diff options
Diffstat (limited to 'drivers')
211 files changed, 4573 insertions, 3244 deletions
diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c index 5d4b72e21161..569a4a662dcd 100644 --- a/drivers/ata/pata_ftide010.c +++ b/drivers/ata/pata_ftide010.c @@ -256,14 +256,12 @@ static struct ata_port_operations pata_ftide010_port_ops = { .qc_issue = ftide010_qc_issue, }; -static struct ata_port_info ftide010_port_info[] = { - { - .flags = ATA_FLAG_SLAVE_POSS, - .mwdma_mask = ATA_MWDMA2, - .udma_mask = ATA_UDMA6, - .pio_mask = ATA_PIO4, - .port_ops = &pata_ftide010_port_ops, - }, +static struct ata_port_info ftide010_port_info = { + .flags = ATA_FLAG_SLAVE_POSS, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA6, + .pio_mask = ATA_PIO4, + .port_ops = &pata_ftide010_port_ops, }; #if IS_ENABLED(CONFIG_SATA_GEMINI) @@ -349,6 +347,7 @@ static int pata_ftide010_gemini_cable_detect(struct ata_port *ap) } static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { struct device *dev = ftide->dev; @@ -373,7 +372,13 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, /* Flag port as SATA-capable */ if (gemini_sata_bridge_enabled(sg, is_ata1)) - ftide010_port_info[0].flags |= ATA_FLAG_SATA; + pi->flags |= ATA_FLAG_SATA; + + /* This device has broken DMA, only PIO works */ + if (of_machine_is_compatible("itian,sq201")) { + pi->mwdma_mask = 0; + pi->udma_mask = 0; + } /* * We assume that a simple 40-wire cable is used in the PATA mode. @@ -435,6 +440,7 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, } #else static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { return -ENOTSUPP; @@ -446,7 +452,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; - const struct ata_port_info pi = ftide010_port_info[0]; + struct ata_port_info pi = ftide010_port_info; const struct ata_port_info *ppi[] = { &pi, NULL }; struct ftide010 *ftide; struct resource *res; @@ -490,6 +496,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) * are ATA0. This will also set up the cable types. */ ret = pata_ftide010_gemini_init(ftide, + &pi, (res->start == 0x63400000)); if (ret) goto err_dis_clk; diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 8e2e4757adcb..5a42ae4078c2 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -185,7 +185,7 @@ EXPORT_SYMBOL_GPL(of_pm_clk_add_clk); int of_pm_clk_add_clks(struct device *dev) { struct clk **clks; - unsigned int i, count; + int i, count; int ret; if (!dev || !dev->of_node) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index b55b245e8052..fd1e19f1a49f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -84,6 +84,18 @@ MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); /* + * How long a persistent grant is allowed to remain allocated without being in + * use. The time is in seconds, 0 means indefinitely long. + */ + +static unsigned int xen_blkif_pgrant_timeout = 60; +module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout, + uint, 0644); +MODULE_PARM_DESC(persistent_grant_unused_seconds, + "Time in seconds an unused persistent grant is allowed to " + "remain allocated. Default is 60, 0 means unlimited."); + +/* * Maximum number of rings/queues blkback supports, allow as many queues as there * are CPUs if user has not specified a value. */ @@ -123,6 +135,13 @@ module_param(log_stats, int, 0644); /* Number of free pages to remove on each call to gnttab_free_pages */ #define NUM_BATCH_FREE_PAGES 10 +static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt) +{ + return xen_blkif_pgrant_timeout && + (jiffies - persistent_gnt->last_used >= + HZ * xen_blkif_pgrant_timeout); +} + static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) { unsigned long flags; @@ -236,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, } } - bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE); - set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->active = true; /* Add new node and rebalance tree. */ rb_link_node(&(persistent_gnt->node), parent, new); rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts); @@ -261,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, else if (gref > data->gnt) node = node->rb_right; else { - if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { + if (data->active) { pr_alert_ratelimited("requesting a grant already in use\n"); return NULL; } - set_bit(PERSISTENT_GNT_ACTIVE, data->flags); + data->active = true; atomic_inc(&ring->persistent_gnt_in_use); return data; } @@ -276,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, static void put_persistent_gnt(struct xen_blkif_ring *ring, struct persistent_gnt *persistent_gnt) { - if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (!persistent_gnt->active) pr_alert_ratelimited("freeing a grant already unused\n"); - set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); - clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->last_used = jiffies; + persistent_gnt->active = false; atomic_dec(&ring->persistent_gnt_in_use); } @@ -371,26 +389,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) struct persistent_gnt *persistent_gnt; struct rb_node *n; unsigned int num_clean, total; - bool scan_used = false, clean_used = false; + bool scan_used = false; struct rb_root *root; - if (ring->persistent_gnt_c < xen_blkif_max_pgrants || - (ring->persistent_gnt_c == xen_blkif_max_pgrants && - !ring->blkif->vbd.overflow_max_grants)) { - goto out; - } - if (work_busy(&ring->persistent_purge_work)) { pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); goto out; } - num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; - num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; - num_clean = min(ring->persistent_gnt_c, num_clean); - if ((num_clean == 0) || - (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use)))) - goto out; + if (ring->persistent_gnt_c < xen_blkif_max_pgrants || + (ring->persistent_gnt_c == xen_blkif_max_pgrants && + !ring->blkif->vbd.overflow_max_grants)) { + num_clean = 0; + } else { + num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; + num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + + num_clean; + num_clean = min(ring->persistent_gnt_c, num_clean); + pr_debug("Going to purge at least %u persistent grants\n", + num_clean); + } /* * At this point, we can assure that there will be no calls @@ -401,9 +419,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) * number of grants. */ - total = num_clean; - - pr_debug("Going to purge %u persistent grants\n", num_clean); + total = 0; BUG_ON(!list_empty(&ring->persistent_purge_list)); root = &ring->persistent_gnts; @@ -412,46 +428,37 @@ purge_list: BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); - if (clean_used) { - clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + if (persistent_gnt->active) continue; - } - - if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (!scan_used && !persistent_gnt_timeout(persistent_gnt)) continue; - if (!scan_used && - (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags))) + if (scan_used && total >= num_clean) continue; rb_erase(&persistent_gnt->node, root); list_add(&persistent_gnt->remove_node, &ring->persistent_purge_list); - if (--num_clean == 0) - goto finished; + total++; } /* - * If we get here it means we also need to start cleaning + * Check whether we also need to start cleaning * grants that were used since last purge in order to cope * with the requested num */ - if (!scan_used && !clean_used) { - pr_debug("Still missing %u purged frames\n", num_clean); + if (!scan_used && total < num_clean) { + pr_debug("Still missing %u purged frames\n", num_clean - total); scan_used = true; goto purge_list; } -finished: - if (!clean_used) { - pr_debug("Finished scanning for grants to clean, removing used flag\n"); - clean_used = true; - goto purge_list; - } - ring->persistent_gnt_c -= (total - num_clean); - ring->blkif->vbd.overflow_max_grants = 0; + if (total) { + ring->persistent_gnt_c -= total; + ring->blkif->vbd.overflow_max_grants = 0; - /* We can defer this work */ - schedule_work(&ring->persistent_purge_work); - pr_debug("Purged %u/%u\n", (total - num_clean), total); + /* We can defer this work */ + schedule_work(&ring->persistent_purge_work); + pr_debug("Purged %u/%u\n", num_clean, total); + } out: return; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index ecb35fe8ca8d..1d3002d773f7 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -233,16 +233,6 @@ struct xen_vbd { struct backend_info; -/* Number of available flags */ -#define PERSISTENT_GNT_FLAGS_SIZE 2 -/* This persistent grant is currently in use */ -#define PERSISTENT_GNT_ACTIVE 0 -/* - * This persistent grant has been used, this flag is set when we remove the - * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently. - */ -#define PERSISTENT_GNT_WAS_ACTIVE 1 - /* Number of requests that we can fit in a ring */ #define XEN_BLKIF_REQS_PER_PAGE 32 @@ -250,7 +240,8 @@ struct persistent_gnt { struct page *page; grant_ref_t gnt; grant_handle_t handle; - DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); + unsigned long last_used; + bool active; struct rb_node node; struct list_head remove_node; }; @@ -278,7 +269,6 @@ struct xen_blkif_ring { wait_queue_head_t pending_free_wq; /* Tree to store persistent grants. */ - spinlock_t pers_gnts_lock; struct rb_root persistent_gnts; unsigned int persistent_gnt_c; atomic_t persistent_gnt_in_use; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8986adab9bf5..a71d817e900d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -46,6 +46,7 @@ #include <linux/scatterlist.h> #include <linux/bitmap.h> #include <linux/list.h> +#include <linux/workqueue.h> #include <xen/xen.h> #include <xen/xenbus.h> @@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq) static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; +static struct delayed_work blkfront_work; +static LIST_HEAD(info_list); /* * Maximum number of segments in indirect requests, the actual value used by @@ -216,6 +219,7 @@ struct blkfront_info /* Save uncomplete reqs and bios for migration. */ struct list_head requests; struct bio_list bio_list; + struct list_head info_list; }; static unsigned int nr_minors; @@ -1759,6 +1763,12 @@ abort_transaction: return err; } +static void free_info(struct blkfront_info *info) +{ + list_del(&info->info_list); + kfree(info); +} + /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) @@ -1880,7 +1890,10 @@ again: destroy_blkring: blkif_free(info, 0); - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); + dev_set_drvdata(&dev->dev, NULL); return err; @@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev, info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); + mutex_lock(&blkfront_mutex); + list_add(&info->info_list, &info_list); + mutex_unlock(&blkfront_mutex); + return 0; } @@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST) indirect_segments = 0; info->max_indirect_segments = indirect_segments; + + if (info->feature_persistent) { + mutex_lock(&blkfront_mutex); + schedule_delayed_work(&blkfront_work, HZ * 10); + mutex_unlock(&blkfront_mutex); + } } /* @@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) mutex_unlock(&info->mutex); if (!bdev) { - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); return 0; } @@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) if (info && !bdev->bd_openers) { xlvbd_release_gendisk(info); disk->private_data = NULL; - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); } mutex_unlock(&bdev->bd_mutex); @@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); xlvbd_release_gendisk(info); disk->private_data = NULL; - kfree(info); + free_info(info); } out: @@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = { .is_ready = blkfront_is_ready, }; +static void purge_persistent_grants(struct blkfront_info *info) +{ + unsigned int i; + unsigned long flags; + + for (i = 0; i < info->nr_rings; i++) { + struct blkfront_ring_info *rinfo = &info->rinfo[i]; + struct grant *gnt_list_entry, *tmp; + + spin_lock_irqsave(&rinfo->ring_lock, flags); + + if (rinfo->persistent_gnts_c == 0) { + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + continue; + } + + list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, + node) { + if (gnt_list_entry->gref == GRANT_INVALID_REF || + gnttab_query_foreign_access(gnt_list_entry->gref)) + continue; + + list_del(&gnt_list_entry->node); + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); + rinfo->persistent_gnts_c--; + __free_page(gnt_list_entry->page); + kfree(gnt_list_entry); + } + + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + } +} + +static void blkfront_delay_work(struct work_struct *work) +{ + struct blkfront_info *info; + bool need_schedule_work = false; + + mutex_lock(&blkfront_mutex); + + list_for_each_entry(info, &info_list, info_list) { + if (info->feature_persistent) { + need_schedule_work = true; + mutex_lock(&info->mutex); + purge_persistent_grants(info); + mutex_unlock(&info->mutex); + } + } + + if (need_schedule_work) + schedule_delayed_work(&blkfront_work, HZ * 10); + + mutex_unlock(&blkfront_mutex); +} + static int __init xlblk_init(void) { int ret; @@ -2626,6 +2708,15 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_has_pv_disk_devices()) + return -ENODEV; + + if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { + pr_warn("xen_blk: can't get major %d with name %s\n", + XENVBD_MAJOR, DEV_NAME); + return -ENODEV; + } + if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; @@ -2641,14 +2732,7 @@ static int __init xlblk_init(void) xen_blkif_max_queues = nr_cpus; } - if (!xen_has_pv_disk_devices()) - return -ENODEV; - - if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { - printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", - XENVBD_MAJOR, DEV_NAME); - return -ENODEV; - } + INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work); ret = xenbus_register_frontend(&blkfront_driver); if (ret) { @@ -2663,6 +2747,8 @@ module_init(xlblk_init); static void __exit xlblk_exit(void) { + cancel_delayed_work_sync(&blkfront_work); + xenbus_unregister_driver(&blkfront_driver); unregister_blkdev(XENVBD_MAJOR, DEV_NAME); kfree(minors); diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 2df11cc08a46..845b0314ce3a 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -200,6 +200,7 @@ config BT_HCIUART_RTL depends on BT_HCIUART depends on BT_HCIUART_SERDEV depends on GPIOLIB + depends on ACPI select BT_HCIUART_3WIRE select BT_RTL help diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index ed2a5c7cb77f..4593baff2bc9 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -144,8 +144,10 @@ static int mtk_setup_fw(struct hci_dev *hdev) fw_size = fw->size; /* The size of patch header is 30 bytes, should be skip */ - if (fw_size < 30) - return -EINVAL; + if (fw_size < 30) { + err = -EINVAL; + goto free_fw; + } fw_size -= 30; fw_ptr += 30; @@ -172,8 +174,8 @@ static int mtk_setup_fw(struct hci_dev *hdev) fw_ptr += dlen; } +free_fw: release_firmware(fw); - return err; } diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index c9bac9dc4637..e4fe954e63a9 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -498,32 +498,29 @@ static int sysc_check_registers(struct sysc *ddata) /** * syc_ioremap - ioremap register space for the interconnect target module - * @ddata: deviec driver data + * @ddata: device driver data * * Note that the interconnect target module registers can be anywhere - * within the first child device address space. For example, SGX has - * them at offset 0x1fc00 in the 32MB module address space. We just - * what we need around the interconnect target module registers. + * within the interconnect target module range. For example, SGX has + * them at offset 0x1fc00 in the 32MB module address space. And cpsw + * has them at offset 0x1200 in the CPSW_WR child. Usually the + * the interconnect target module registers are at the beginning of + * the module range though. */ static int sysc_ioremap(struct sysc *ddata) { - u32 size = 0; - - if (ddata->offsets[SYSC_SYSSTATUS] >= 0) - size = ddata->offsets[SYSC_SYSSTATUS]; - else if (ddata->offsets[SYSC_SYSCONFIG] >= 0) - size = ddata->offsets[SYSC_SYSCONFIG]; - else if (ddata->offsets[SYSC_REVISION] >= 0) - size = ddata->offsets[SYSC_REVISION]; - else - return -EINVAL; + int size; - size &= 0xfff00; - size += SZ_256; + size = max3(ddata->offsets[SYSC_REVISION], + ddata->offsets[SYSC_SYSCONFIG], + ddata->offsets[SYSC_SYSSTATUS]); + + if (size < 0 || (size + sizeof(u32)) > ddata->module_size) + return -EINVAL; ddata->module_va = devm_ioremap(ddata->dev, ddata->module_pa, - size); + size + sizeof(u32)); if (!ddata->module_va) return -EIO; @@ -1224,10 +1221,10 @@ static int sysc_child_suspend_noirq(struct device *dev) if (!pm_runtime_status_suspended(dev)) { error = pm_generic_runtime_suspend(dev); if (error) { - dev_err(dev, "%s error at %i: %i\n", - __func__, __LINE__, error); + dev_warn(dev, "%s busy at %i: %i\n", + __func__, __LINE__, error); - return error; + return 0; } error = sysc_runtime_suspend(ddata->dev); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 113fc6edb2b0..a5d5a96479bf 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2546,7 +2546,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || (arg == CDSL_CURRENT || arg == CDSL_NONE)) return cdi->ops->drive_status(cdi, CDSL_CURRENT); - if (((int)arg >= cdi->capacity)) + if (arg >= cdi->capacity) return -EINVAL; return cdrom_slot_status(cdi, arg); } diff --git a/drivers/clk/clk-npcm7xx.c b/drivers/clk/clk-npcm7xx.c index 740af90a9508..c5edf8f2fd19 100644 --- a/drivers/clk/clk-npcm7xx.c +++ b/drivers/clk/clk-npcm7xx.c @@ -558,8 +558,8 @@ static void __init npcm7xx_clk_init(struct device_node *clk_np) if (!clk_base) goto npcm7xx_init_error; - npcm7xx_clk_data = kzalloc(sizeof(*npcm7xx_clk_data->hws) * - NPCM7XX_NUM_CLOCKS + sizeof(npcm7xx_clk_data), GFP_KERNEL); + npcm7xx_clk_data = kzalloc(struct_size(npcm7xx_clk_data, hws, + NPCM7XX_NUM_CLOCKS), GFP_KERNEL); if (!npcm7xx_clk_data) goto npcm7xx_init_np_err; diff --git a/drivers/clk/x86/clk-st.c b/drivers/clk/x86/clk-st.c index fb62f3938008..3a0996f2d556 100644 --- a/drivers/clk/x86/clk-st.c +++ b/drivers/clk/x86/clk-st.c @@ -46,7 +46,7 @@ static int st_clk_probe(struct platform_device *pdev) clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents), 0, st_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, NULL); - clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_25M]->clk); + clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_48M]->clk); hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", "oscout1_mux", 0, st_data->base + MISCCLKCNTL1, OSCCLKENB, diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 110483f0e3fb..e26a40971b26 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -379,9 +379,20 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == -1) idx = i; /* first enabled state */ if (s->target_residency > data->predicted_us) { - if (!tick_nohz_tick_stopped()) + if (data->predicted_us < TICK_USEC) break; + if (!tick_nohz_tick_stopped()) { + /* + * If the state selected so far is shallow, + * waking up early won't hurt, so retain the + * tick in that case and let the governor run + * again in the next iteration of the loop. + */ + expected_interval = drv->states[idx].target_residency; + break; + } + /* * If the state selected so far is shallow and this * state's target residency matches the time till the diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 6e61cc93c2b0..d7aa7d7ff102 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -679,10 +679,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, int ret = 0; if (keylen != 2 * AES_MIN_KEY_SIZE && keylen != 2 * AES_MAX_KEY_SIZE) { - crypto_ablkcipher_set_flags(ablkcipher, - CRYPTO_TFM_RES_BAD_KEY_LEN); dev_err(jrdev, "key size mismatch\n"); - return -EINVAL; + goto badkey; } ctx->cdata.keylen = keylen; @@ -715,7 +713,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, return ret; badkey: crypto_ablkcipher_set_flags(ablkcipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return 0; + return -EINVAL; } /* diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 578ea63a3109..f26d62e5533a 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -71,8 +71,8 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, @@ -90,8 +90,8 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } /* RSA Job Completion handler */ @@ -417,13 +417,13 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, goto unmap_p; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_q; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -451,7 +451,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_q: dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); unmap_p: @@ -504,13 +504,13 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, goto unmap_dq; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_qinv; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -538,7 +538,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_qinv: dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); unmap_dq: diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index f4f258075b89..acdd72016ffe 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -190,7 +190,8 @@ static void caam_jr_dequeue(unsigned long devarg) BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0); /* Unmap just-run descriptor so we can post-process */ - dma_unmap_single(dev, jrp->outring[hw_idx].desc, + dma_unmap_single(dev, + caam_dma_to_cpu(jrp->outring[hw_idx].desc), jrp->entinfo[sw_idx].desc_size, DMA_TO_DEVICE); diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h index 9a476bb6d4c7..af596455b420 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_dev.h +++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h @@ -35,6 +35,7 @@ struct nitrox_cmdq { /* requests in backlog queues */ atomic_t backlog_count; + int write_idx; /* command size 32B/64B */ u8 instr_size; u8 qno; @@ -87,7 +88,7 @@ struct nitrox_bh { struct bh_data *slc; }; -/* NITROX-5 driver state */ +/* NITROX-V driver state */ #define NITROX_UCODE_LOADED 0 #define NITROX_READY 1 diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c index ebe267379ac9..4d31df07777f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_lib.c +++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c @@ -36,6 +36,7 @@ static int cmdq_common_init(struct nitrox_cmdq *cmdq) cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN); cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN); cmdq->qsize = (qsize + PKT_IN_ALIGN); + cmdq->write_idx = 0; spin_lock_init(&cmdq->response_lock); spin_lock_init(&cmdq->cmdq_lock); diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index deaefd532aaa..4a362fc22f62 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -42,6 +42,16 @@ * Invalid flag options in AES-CCM IV. */ +static inline int incr_index(int index, int count, int max) +{ + if ((index + count) >= max) + index = index + count - max; + else + index += count; + + return index; +} + /** * dma_free_sglist - unmap and free the sg lists. * @ndev: N5 device @@ -426,30 +436,29 @@ static void post_se_instr(struct nitrox_softreq *sr, struct nitrox_cmdq *cmdq) { struct nitrox_device *ndev = sr->ndev; - union nps_pkt_in_instr_baoff_dbell pkt_in_baoff_dbell; - u64 offset; + int idx; u8 *ent; spin_lock_bh(&cmdq->cmdq_lock); - /* get the next write offset */ - offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(cmdq->qno); - pkt_in_baoff_dbell.value = nitrox_read_csr(ndev, offset); + idx = cmdq->write_idx; /* copy the instruction */ - ent = cmdq->head + pkt_in_baoff_dbell.s.aoff; + ent = cmdq->head + (idx * cmdq->instr_size); memcpy(ent, &sr->instr, cmdq->instr_size); - /* flush the command queue updates */ - dma_wmb(); - sr->tstamp = jiffies; atomic_set(&sr->status, REQ_POSTED); response_list_add(sr, cmdq); + sr->tstamp = jiffies; + /* flush the command queue updates */ + dma_wmb(); /* Ring doorbell with count 1 */ writeq(1, cmdq->dbell_csr_addr); /* orders the doorbell rings */ mmiowb(); + cmdq->write_idx = incr_index(idx, 1, ndev->qlen); + spin_unlock_bh(&cmdq->cmdq_lock); } @@ -459,6 +468,9 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) struct nitrox_softreq *sr, *tmp; int ret = 0; + if (!atomic_read(&cmdq->backlog_count)) + return 0; + spin_lock_bh(&cmdq->backlog_lock); list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) { @@ -466,7 +478,7 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) /* submit until space available */ if (unlikely(cmdq_full(cmdq, ndev->qlen))) { - ret = -EBUSY; + ret = -ENOSPC; break; } /* delete from backlog list */ @@ -491,23 +503,20 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) { struct nitrox_cmdq *cmdq = sr->cmdq; struct nitrox_device *ndev = sr->ndev; - int ret = -EBUSY; + + /* try to post backlog requests */ + post_backlog_cmds(cmdq); if (unlikely(cmdq_full(cmdq, ndev->qlen))) { if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -EAGAIN; - + return -ENOSPC; + /* add to backlog list */ backlog_list_add(sr, cmdq); - } else { - ret = post_backlog_cmds(cmdq); - if (ret) { - backlog_list_add(sr, cmdq); - return ret; - } - post_se_instr(sr, cmdq); - ret = -EINPROGRESS; + return -EBUSY; } - return ret; + post_se_instr(sr, cmdq); + + return -EINPROGRESS; } /** @@ -624,11 +633,9 @@ int nitrox_process_se_request(struct nitrox_device *ndev, */ sr->instr.fdata[0] = *((u64 *)&req->gph); sr->instr.fdata[1] = 0; - /* flush the soft_req changes before posting the cmd */ - wmb(); ret = nitrox_enqueue_request(sr); - if (ret == -EAGAIN) + if (ret == -ENOSPC) goto send_fail; return ret; diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h index a53a0e6ba024..7725b6ee14ef 100644 --- a/drivers/crypto/chelsio/chtls/chtls.h +++ b/drivers/crypto/chelsio/chtls/chtls.h @@ -96,6 +96,10 @@ enum csk_flags { CSK_CONN_INLINE, /* Connection on HW */ }; +enum chtls_cdev_state { + CHTLS_CDEV_STATE_UP = 1 +}; + struct listen_ctx { struct sock *lsk; struct chtls_dev *cdev; @@ -146,6 +150,7 @@ struct chtls_dev { unsigned int send_page_order; int max_host_sndbuf; struct key_map kmap; + unsigned int cdev_state; }; struct chtls_hws { diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index 9b07f9165658..f59b044ebd25 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -160,6 +160,7 @@ static void chtls_register_dev(struct chtls_dev *cdev) tlsdev->hash = chtls_create_hash; tlsdev->unhash = chtls_destroy_hash; tls_register_device(&cdev->tlsdev); + cdev->cdev_state = CHTLS_CDEV_STATE_UP; } static void chtls_unregister_dev(struct chtls_dev *cdev) @@ -281,8 +282,10 @@ static void chtls_free_all_uld(void) struct chtls_dev *cdev, *tmp; mutex_lock(&cdev_mutex); - list_for_each_entry_safe(cdev, tmp, &cdev_list, list) - chtls_free_uld(cdev); + list_for_each_entry_safe(cdev, tmp, &cdev_list, list) { + if (cdev->cdev_state == CHTLS_CDEV_STATE_UP) + chtls_free_uld(cdev); + } mutex_unlock(&cdev_mutex); } diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 5285ece4f33a..b71895871be3 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -107,24 +107,23 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; @@ -147,24 +146,23 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c index 8bd9aff0f55f..e9954a7d4694 100644 --- a/drivers/crypto/vmx/aes_xts.c +++ b/drivers/crypto/vmx/aes_xts.c @@ -116,32 +116,39 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc, ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { + blkcipher_walk_init(&walk, dst, src, nbytes); + + ret = blkcipher_walk_virt(desc, &walk); + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); - - ret = blkcipher_walk_virt(desc, &walk); iv = walk.iv; memset(tweak, 0, AES_BLOCK_SIZE); aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); if (enc) aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); else aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 502b94fb116a..b6e9df11115d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1012,13 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (r) return r; - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; - if (!parser->ctx->preamble_presented) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; - parser->ctx->preamble_presented = true; - } - } + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + parser->job->preamble_status |= + AMDGPU_PREAMBLE_IB_PRESENT; if (parser->ring && parser->ring != ring) return -EINVAL; @@ -1207,26 +1203,24 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int r; + job = p->job; + p->job = NULL; + + r = drm_sched_job_init(&job->base, entity, p->filp); + if (r) + goto error_unlock; + + /* No memory allocation is allowed while holding the mn lock */ amdgpu_mn_lock(p->mn); amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = e->robj; if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - amdgpu_mn_unlock(p->mn); - return -ERESTARTSYS; + r = -ERESTARTSYS; + goto error_abort; } } - job = p->job; - p->job = NULL; - - r = drm_sched_job_init(&job->base, entity, p->filp); - if (r) { - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; - } - job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); @@ -1241,6 +1235,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_cs_post_dependencies(p); + if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && + !p->ctx->preamble_presented) { + job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; + p->ctx->preamble_presented = true; + } + cs->out.handle = seq; job->uf_sequence = seq; @@ -1258,6 +1258,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_mn_unlock(p->mn); return 0; + +error_abort: + dma_fence_put(&job->base.s_fence->finished); + job->base.s_fence = NULL; + +error_unlock: + amdgpu_job_free(job); + amdgpu_mn_unlock(p->mn); + return r; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 5518e623fed2..51b5e977ca88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -164,8 +164,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } + need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || + (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; dma_fence_put(tmp); @@ -196,7 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } skip_preamble = ring->current_ctx == fence_ctx; - need_ctx_switch = ring->current_ctx != fence_ctx; if (job && ring->funcs->emit_cntxcntl) { if (need_ctx_switch) status |= AMDGPU_HAVE_CTX_SWITCH; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 8f98629fbe59..7b4e657a95c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1932,14 +1932,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) amdgpu_fence_wait_empty(ring); } - mutex_lock(&adev->pm.mutex); - /* update battery/ac status */ - if (power_supply_is_system_supplied() > 0) - adev->pm.ac_power = true; - else - adev->pm.ac_power = false; - mutex_unlock(&adev->pm.mutex); - if (adev->powerplay.pp_funcs->dispatch_tasks) { if (!amdgpu_device_has_dc_support(adev)) { mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ece0ac703e27..b17771dd5ce7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -172,6 +172,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, * is validated on next vm use to avoid fault. * */ list_move_tail(&base->vm_status, &vm->evicted); + base->moved = true; } /** @@ -369,7 +370,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, uint64_t addr; int r; - addr = amdgpu_bo_gpu_offset(bo); entries = amdgpu_bo_size(bo) / 8; if (pte_support_ats) { @@ -401,6 +401,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; + addr = amdgpu_bo_gpu_offset(bo); if (ats_entries) { uint64_t ats_value; @@ -2483,28 +2484,52 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size * * @adev: amdgpu_device pointer - * @vm_size: the default vm size if it's set auto + * @min_vm_size: the minimum vm size in GB if it's set auto * @fragment_size_default: Default PTE fragment size * @max_level: max VMPT level * @max_bits: max address space size in bits * */ -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, uint32_t fragment_size_default, unsigned max_level, unsigned max_bits) { + unsigned int max_size = 1 << (max_bits - 30); + unsigned int vm_size; uint64_t tmp; /* adjust vm size first */ if (amdgpu_vm_size != -1) { - unsigned max_size = 1 << (max_bits - 30); - vm_size = amdgpu_vm_size; if (vm_size > max_size) { dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", amdgpu_vm_size, max_size); vm_size = max_size; } + } else { + struct sysinfo si; + unsigned int phys_ram_gb; + + /* Optimal VM size depends on the amount of physical + * RAM available. Underlying requirements and + * assumptions: + * + * - Need to map system memory and VRAM from all GPUs + * - VRAM from other GPUs not known here + * - Assume VRAM <= system memory + * - On GFX8 and older, VM space can be segmented for + * different MTYPEs + * - Need to allow room for fragmentation, guard pages etc. + * + * This adds up to a rough guess of system memory x3. + * Round up to power of two to maximize the available + * VM size with the given page table size. + */ + si_meminfo(&si); + phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + + (1 << 30) - 1) >> 30; + vm_size = roundup_pow_of_two( + min(max(phys_ram_gb * 3, min_vm_size), max_size)); } adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 67a15d439ac0..9fa9df0c5e7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -321,7 +321,7 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, uint32_t fragment_size_default, unsigned max_level, unsigned max_bits); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5cd45210113f..5a9534a82d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5664,6 +5664,11 @@ static int gfx_v8_0_set_powergating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_RLC_SMU_HS | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GFX_DMG)) + adev->gfx.rlc.funcs->enter_safe_mode(adev); switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5713,7 +5718,11 @@ static int gfx_v8_0_set_powergating_state(void *handle, default: break; } - + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_RLC_SMU_HS | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GFX_DMG)) + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 75317f283c69..ad151fefa41f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -632,12 +632,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev) amdgpu_gart_table_vram_unpin(adev); } -static void gmc_v6_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client) { @@ -935,8 +929,9 @@ static int gmc_v6_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - gmc_v6_0_gart_fini(adev); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 36dc367c4b45..f8d8a3a73e42 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -747,19 +747,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) } /** - * gmc_v7_0_gart_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tears down the driver GART/VM setup (CIK). - */ -static void gmc_v7_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - -/** * gmc_v7_0_vm_decode_fault - print human readable fault info * * @adev: amdgpu_device pointer @@ -1095,8 +1082,9 @@ static int gmc_v7_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); kfree(adev->gmc.vm_fault_info); - gmc_v7_0_gart_fini(adev); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 70fc97b59b4f..9333109b210d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -969,19 +969,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) } /** - * gmc_v8_0_gart_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tears down the driver GART/VM setup (CIK). - */ -static void gmc_v8_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - -/** * gmc_v8_0_vm_decode_fault - print human readable fault info * * @adev: amdgpu_device pointer @@ -1199,8 +1186,9 @@ static int gmc_v8_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); kfree(adev->gmc.vm_fault_info); - gmc_v8_0_gart_fini(adev); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 399a5db27649..72f8018fa2a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -942,26 +942,12 @@ static int gmc_v9_0_sw_init(void *handle) return 0; } -/** - * gmc_v9_0_gart_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tears down the driver GART/VM setup (CIK). - */ -static void gmc_v9_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - gmc_v9_0_gart_fini(adev); /* * TODO: @@ -974,7 +960,9 @@ static int gmc_v9_0_sw_fini(void *handle) */ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 3f57f6463dc8..cb79a93c2eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -65,8 +65,6 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev, int min_temp, int max_temp); static int kv_init_fps_limits(struct amdgpu_device *adev); -static void kv_dpm_powergate_uvd(void *handle, bool gate); -static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate); static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate); static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate); @@ -1354,8 +1352,6 @@ static int kv_dpm_enable(struct amdgpu_device *adev) return ret; } - kv_update_current_ps(adev, adev->pm.dpm.boot_ps); - if (adev->irq.installed && amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) { ret = kv_set_thermal_temperature_range(adev, KV_TEMP_RANGE_MIN, KV_TEMP_RANGE_MAX); @@ -1374,6 +1370,8 @@ static int kv_dpm_enable(struct amdgpu_device *adev) static void kv_dpm_disable(struct amdgpu_device *adev) { + struct kv_power_info *pi = kv_get_pi(adev); + amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, AMDGPU_THERMAL_IRQ_LOW_TO_HIGH); amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, @@ -1387,8 +1385,10 @@ static void kv_dpm_disable(struct amdgpu_device *adev) /* powerup blocks */ kv_dpm_powergate_acp(adev, false); kv_dpm_powergate_samu(adev, false); - kv_dpm_powergate_vce(adev, false); - kv_dpm_powergate_uvd(adev, false); + if (pi->caps_vce_pg) /* power on the VCE block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON); + if (pi->caps_uvd_pg) /* power on the UVD block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_UVDPowerON); kv_enable_smc_cac(adev, false); kv_enable_didt(adev, false); @@ -1551,7 +1551,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev, int ret; if (amdgpu_new_state->evclk > 0 && amdgpu_current_state->evclk == 0) { - kv_dpm_powergate_vce(adev, false); if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1573,7 +1572,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev, kv_enable_vce_dpm(adev, true); } else if (amdgpu_new_state->evclk == 0 && amdgpu_current_state->evclk > 0) { kv_enable_vce_dpm(adev, false); - kv_dpm_powergate_vce(adev, true); } return 0; @@ -1702,24 +1700,32 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate) } } -static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate) +static void kv_dpm_powergate_vce(void *handle, bool gate) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct kv_power_info *pi = kv_get_pi(adev); - - if (pi->vce_power_gated == gate) - return; + int ret; pi->vce_power_gated = gate; - if (!pi->caps_vce_pg) - return; - - if (gate) - amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF); - else - amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON); + if (gate) { + /* stop the VCE block */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + kv_enable_vce_dpm(adev, false); + if (pi->caps_vce_pg) /* power off the VCE block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF); + } else { + if (pi->caps_vce_pg) /* power on the VCE block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON); + kv_enable_vce_dpm(adev, true); + /* re-init the VCE block */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); + } } + static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate) { struct kv_power_info *pi = kv_get_pi(adev); @@ -3061,7 +3067,7 @@ static int kv_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; mutex_unlock(&adev->pm.mutex); - + amdgpu_pm_compute_clocks(adev); return ret; } @@ -3313,6 +3319,9 @@ static int kv_set_powergating_by_smu(void *handle, case AMD_IP_BLOCK_TYPE_UVD: kv_dpm_powergate_uvd(handle, gate); break; + case AMD_IP_BLOCK_TYPE_VCE: + kv_dpm_powergate_vce(handle, gate); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index db327b412562..1de96995e690 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -6887,7 +6887,6 @@ static int si_dpm_enable(struct amdgpu_device *adev) si_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, true); si_thermal_start_thermal_controller(adev); - ni_update_current_ps(adev, boot_ps); return 0; } @@ -7763,7 +7762,7 @@ static int si_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; mutex_unlock(&adev->pm.mutex); - + amdgpu_pm_compute_clocks(adev); return ret; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index fbe878ae1e8c..4ba0003a9d32 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -480,12 +480,20 @@ void pp_rv_set_display_requirement(struct pp_smu *pp, { struct dc_context *ctx = pp->ctx; struct amdgpu_device *adev = ctx->driver_context; + void *pp_handle = adev->powerplay.pp_handle; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + struct pp_display_clock_request clock = {0}; - if (!pp_funcs || !pp_funcs->display_configuration_changed) + if (!pp_funcs || !pp_funcs->display_clock_voltage_request) return; - amdgpu_dpm_display_configuration_changed(adev); + clock.clock_type = amd_pp_dcf_clock; + clock.clock_freq_in_khz = req->hard_min_dcefclk_khz; + pp_funcs->display_clock_voltage_request(pp_handle, &clock); + + clock.clock_type = amd_pp_f_clock; + clock.clock_freq_in_khz = req->hard_min_fclk_khz; + pp_funcs->display_clock_voltage_request(pp_handle, &clock); } void pp_rv_set_wm_ranges(struct pp_smu *pp, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 567867915d32..37eaf72ace54 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -754,8 +754,12 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) * fail-safe mode */ if (dc_is_hdmi_signal(link->connector_signal) || - dc_is_dvi_signal(link->connector_signal)) + dc_is_dvi_signal(link->connector_signal)) { + if (prev_sink != NULL) + dc_sink_release(prev_sink); + return false; + } default: break; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 11d834f94220..98358b4b36de 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -199,7 +199,6 @@ vma_create(struct drm_i915_gem_object *obj, vma->flags |= I915_VMA_GGTT; list_add(&vma->obj_link, &obj->vma_list); } else { - i915_ppgtt_get(i915_vm_to_ppgtt(vm)); list_add_tail(&vma->obj_link, &obj->vma_list); } @@ -807,9 +806,6 @@ static void __i915_vma_destroy(struct i915_vma *vma) if (vma->obj) rb_erase(&vma->obj_node, &vma->obj->vma_tree); - if (!i915_vma_is_ggtt(vma)) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { GEM_BUG_ON(i915_gem_active_isset(&iter->base)); kfree(iter); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index b725835b47ef..769f3f586661 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -962,9 +962,6 @@ void i915_audio_component_init(struct drm_i915_private *dev_priv) { int ret; - if (INTEL_INFO(dev_priv)->num_pipes == 0) - return; - ret = component_add(dev_priv->drm.dev, &i915_audio_component_bind_ops); if (ret < 0) { DRM_ERROR("failed to add audio component (%d)\n", ret); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed3fa1c8a983..4a3c8ee9a973 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2988,6 +2988,7 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; int dst_x = plane_state->base.dst.x1; + int dst_w = drm_rect_width(&plane_state->base.dst); int pipe_src_w = crtc_state->pipe_src_w; int max_width = skl_max_plane_width(fb, 0, rotation); int max_height = 4096; @@ -3009,10 +3010,10 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, * screen may cause FIFO underflow and display corruption. */ if ((IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && - (dst_x + w < 4 || dst_x > pipe_src_w - 4)) { + (dst_x + dst_w < 4 || dst_x > pipe_src_w - 4)) { DRM_DEBUG_KMS("requested plane X %s position %d invalid (valid range %d-%d)\n", - dst_x + w < 4 ? "end" : "start", - dst_x + w < 4 ? dst_x + w : dst_x, + dst_x + dst_w < 4 ? "end" : "start", + dst_x + dst_w < 4 ? dst_x + dst_w : dst_x, 4, pipe_src_w - 4); return -ERANGE; } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index a9076402dcb0..192972a7d287 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -943,8 +943,12 @@ static int intel_hdmi_hdcp_write(struct intel_digital_port *intel_dig_port, ret = i2c_transfer(adapter, &msg, 1); if (ret == 1) - return 0; - return ret >= 0 ? -EIO : ret; + ret = 0; + else if (ret >= 0) + ret = -EIO; + + kfree(write_buf); + return ret; } static diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 5dae16ccd9f1..3e085c5f2b81 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -74,7 +74,7 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, DRM_DEBUG_KMS("Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); - wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 100); + wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 400); if (current_mode != mode) DRM_ERROR("LSPCON mode hasn't settled\n"); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 978782a77629..28d191192945 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -132,6 +132,11 @@ static void mtk_ovl_config(struct mtk_ddp_comp *comp, unsigned int w, writel(0x0, comp->regs + DISP_REG_OVL_RST); } +static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp) +{ + return 4; +} + static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) { unsigned int reg; @@ -157,6 +162,11 @@ static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx) static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt) { + /* The return value in switch "MEM_MODE_INPUT_FORMAT_XXX" + * is defined in mediatek HW data sheet. + * The alphabet order in XXX is no relation to data + * arrangement in memory. + */ switch (fmt) { default: case DRM_FORMAT_RGB565: @@ -221,6 +231,7 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = { .stop = mtk_ovl_stop, .enable_vblank = mtk_ovl_enable_vblank, .disable_vblank = mtk_ovl_disable_vblank, + .layer_nr = mtk_ovl_layer_nr, .layer_on = mtk_ovl_layer_on, .layer_off = mtk_ovl_layer_off, .layer_config = mtk_ovl_layer_config, diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 585943c81e1f..b0a5cffe345a 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -31,14 +31,31 @@ #define RDMA_REG_UPDATE_INT BIT(0) #define DISP_REG_RDMA_GLOBAL_CON 0x0010 #define RDMA_ENGINE_EN BIT(0) +#define RDMA_MODE_MEMORY BIT(1) #define DISP_REG_RDMA_SIZE_CON_0 0x0014 +#define RDMA_MATRIX_ENABLE BIT(17) +#define RDMA_MATRIX_INT_MTX_SEL GENMASK(23, 20) +#define RDMA_MATRIX_INT_MTX_BT601_to_RGB (6 << 20) #define DISP_REG_RDMA_SIZE_CON_1 0x0018 #define DISP_REG_RDMA_TARGET_LINE 0x001c +#define DISP_RDMA_MEM_CON 0x0024 +#define MEM_MODE_INPUT_FORMAT_RGB565 (0x000 << 4) +#define MEM_MODE_INPUT_FORMAT_RGB888 (0x001 << 4) +#define MEM_MODE_INPUT_FORMAT_RGBA8888 (0x002 << 4) +#define MEM_MODE_INPUT_FORMAT_ARGB8888 (0x003 << 4) +#define MEM_MODE_INPUT_FORMAT_UYVY (0x004 << 4) +#define MEM_MODE_INPUT_FORMAT_YUYV (0x005 << 4) +#define MEM_MODE_INPUT_SWAP BIT(8) +#define DISP_RDMA_MEM_SRC_PITCH 0x002c +#define DISP_RDMA_MEM_GMC_SETTING_0 0x0030 #define DISP_REG_RDMA_FIFO_CON 0x0040 #define RDMA_FIFO_UNDERFLOW_EN BIT(31) #define RDMA_FIFO_PSEUDO_SIZE(bytes) (((bytes) / 16) << 16) #define RDMA_OUTPUT_VALID_FIFO_THRESHOLD(bytes) ((bytes) / 16) #define RDMA_FIFO_SIZE(rdma) ((rdma)->data->fifo_size) +#define DISP_RDMA_MEM_START_ADDR 0x0f00 + +#define RDMA_MEM_GMC 0x40402020 struct mtk_disp_rdma_data { unsigned int fifo_size; @@ -138,12 +155,87 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width, writel(reg, comp->regs + DISP_REG_RDMA_FIFO_CON); } +static unsigned int rdma_fmt_convert(struct mtk_disp_rdma *rdma, + unsigned int fmt) +{ + /* The return value in switch "MEM_MODE_INPUT_FORMAT_XXX" + * is defined in mediatek HW data sheet. + * The alphabet order in XXX is no relation to data + * arrangement in memory. + */ + switch (fmt) { + default: + case DRM_FORMAT_RGB565: + return MEM_MODE_INPUT_FORMAT_RGB565; + case DRM_FORMAT_BGR565: + return MEM_MODE_INPUT_FORMAT_RGB565 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_RGB888: + return MEM_MODE_INPUT_FORMAT_RGB888; + case DRM_FORMAT_BGR888: + return MEM_MODE_INPUT_FORMAT_RGB888 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_RGBA8888: + return MEM_MODE_INPUT_FORMAT_ARGB8888; + case DRM_FORMAT_BGRX8888: + case DRM_FORMAT_BGRA8888: + return MEM_MODE_INPUT_FORMAT_ARGB8888 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + return MEM_MODE_INPUT_FORMAT_RGBA8888; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + return MEM_MODE_INPUT_FORMAT_RGBA8888 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_UYVY: + return MEM_MODE_INPUT_FORMAT_UYVY; + case DRM_FORMAT_YUYV: + return MEM_MODE_INPUT_FORMAT_YUYV; + } +} + +static unsigned int mtk_rdma_layer_nr(struct mtk_ddp_comp *comp) +{ + return 1; +} + +static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, + struct mtk_plane_state *state) +{ + struct mtk_disp_rdma *rdma = comp_to_rdma(comp); + struct mtk_plane_pending_state *pending = &state->pending; + unsigned int addr = pending->addr; + unsigned int pitch = pending->pitch & 0xffff; + unsigned int fmt = pending->format; + unsigned int con; + + con = rdma_fmt_convert(rdma, fmt); + writel_relaxed(con, comp->regs + DISP_RDMA_MEM_CON); + + if (fmt == DRM_FORMAT_UYVY || fmt == DRM_FORMAT_YUYV) { + rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_ENABLE, RDMA_MATRIX_ENABLE); + rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_INT_MTX_SEL, + RDMA_MATRIX_INT_MTX_BT601_to_RGB); + } else { + rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_ENABLE, 0); + } + + writel_relaxed(addr, comp->regs + DISP_RDMA_MEM_START_ADDR); + writel_relaxed(pitch, comp->regs + DISP_RDMA_MEM_SRC_PITCH); + writel(RDMA_MEM_GMC, comp->regs + DISP_RDMA_MEM_GMC_SETTING_0); + rdma_update_bits(comp, DISP_REG_RDMA_GLOBAL_CON, + RDMA_MODE_MEMORY, RDMA_MODE_MEMORY); +} + static const struct mtk_ddp_comp_funcs mtk_disp_rdma_funcs = { .config = mtk_rdma_config, .start = mtk_rdma_start, .stop = mtk_rdma_stop, .enable_vblank = mtk_rdma_enable_vblank, .disable_vblank = mtk_rdma_disable_vblank, + .layer_nr = mtk_rdma_layer_nr, + .layer_config = mtk_rdma_layer_config, }; static int mtk_disp_rdma_bind(struct device *dev, struct device *master, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 2d6aa150a9ff..0b976dfd04df 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -45,7 +45,8 @@ struct mtk_drm_crtc { bool pending_needs_vblank; struct drm_pending_vblank_event *event; - struct drm_plane planes[OVL_LAYER_NR]; + struct drm_plane *planes; + unsigned int layer_nr; bool pending_planes; void __iomem *config_regs; @@ -171,9 +172,9 @@ static void mtk_drm_crtc_mode_set_nofb(struct drm_crtc *crtc) static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - mtk_ddp_comp_enable_vblank(ovl, &mtk_crtc->base); + mtk_ddp_comp_enable_vblank(comp, &mtk_crtc->base); return 0; } @@ -181,9 +182,9 @@ static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc) static void mtk_drm_crtc_disable_vblank(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - mtk_ddp_comp_disable_vblank(ovl); + mtk_ddp_comp_disable_vblank(comp); } static int mtk_crtc_ddp_clk_enable(struct mtk_drm_crtc *mtk_crtc) @@ -286,7 +287,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) } /* Initially configure all planes */ - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; @@ -334,7 +335,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; unsigned int i; /* @@ -343,7 +344,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) * queue update module registers on vblank. */ if (state->pending_config) { - mtk_ddp_comp_config(ovl, state->pending_width, + mtk_ddp_comp_config(comp, state->pending_width, state->pending_height, state->pending_vrefresh, 0); @@ -351,14 +352,14 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) } if (mtk_crtc->pending_planes) { - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; plane_state = to_mtk_plane_state(plane->state); if (plane_state->pending.config) { - mtk_ddp_comp_layer_config(ovl, i, plane_state); + mtk_ddp_comp_layer_config(comp, i, plane_state); plane_state->pending.config = false; } } @@ -370,12 +371,12 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; int ret; DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); - ret = mtk_smi_larb_get(ovl->larb_dev); + ret = mtk_smi_larb_get(comp->larb_dev); if (ret) { DRM_ERROR("Failed to get larb: %d\n", ret); return; @@ -383,7 +384,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, ret = mtk_crtc_ddp_hw_init(mtk_crtc); if (ret) { - mtk_smi_larb_put(ovl->larb_dev); + mtk_smi_larb_put(comp->larb_dev); return; } @@ -395,7 +396,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; int i; DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); @@ -403,7 +404,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, return; /* Set all pending plane state to disabled */ - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; @@ -418,7 +419,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, drm_crtc_vblank_off(crtc); mtk_crtc_ddp_hw_fini(mtk_crtc); - mtk_smi_larb_put(ovl->larb_dev); + mtk_smi_larb_put(comp->larb_dev); mtk_crtc->enabled = false; } @@ -450,7 +451,7 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc, if (mtk_crtc->event) mtk_crtc->pending_needs_vblank = true; - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; @@ -516,7 +517,7 @@ err_cleanup_crtc: return ret; } -void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl) +void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_drm_private *priv = crtc->dev->dev_private; @@ -598,7 +599,12 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mtk_crtc->ddp_comp[i] = comp; } - for (zpos = 0; zpos < OVL_LAYER_NR; zpos++) { + mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]); + mtk_crtc->planes = devm_kzalloc(dev, mtk_crtc->layer_nr * + sizeof(struct drm_plane), + GFP_KERNEL); + + for (zpos = 0; zpos < mtk_crtc->layer_nr; zpos++) { type = (zpos == 0) ? DRM_PLANE_TYPE_PRIMARY : (zpos == 1) ? DRM_PLANE_TYPE_CURSOR : DRM_PLANE_TYPE_OVERLAY; @@ -609,7 +615,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0], - &mtk_crtc->planes[1], pipe); + mtk_crtc->layer_nr > 1 ? &mtk_crtc->planes[1] : + NULL, pipe); if (ret < 0) goto unprepare; drm_mode_crtc_set_gamma_size(&mtk_crtc->base, MTK_LUT_SIZE); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h index 9d9410c67ae9..091adb2087eb 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h @@ -18,13 +18,12 @@ #include "mtk_drm_ddp_comp.h" #include "mtk_drm_plane.h" -#define OVL_LAYER_NR 4 #define MTK_LUT_SIZE 512 #define MTK_MAX_BPC 10 #define MTK_MIN_BPC 3 void mtk_drm_crtc_commit(struct drm_crtc *crtc); -void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl); +void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp); int mtk_drm_crtc_create(struct drm_device *drm_dev, const enum mtk_ddp_comp_id *path, unsigned int path_len); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c index 87e4191c250e..546b3e3b300b 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c @@ -106,6 +106,8 @@ #define OVL1_MOUT_EN_COLOR1 0x1 #define GAMMA_MOUT_EN_RDMA1 0x1 #define RDMA0_SOUT_DPI0 0x2 +#define RDMA0_SOUT_DPI1 0x3 +#define RDMA0_SOUT_DSI1 0x1 #define RDMA0_SOUT_DSI2 0x4 #define RDMA0_SOUT_DSI3 0x5 #define RDMA1_SOUT_DPI0 0x2 @@ -122,6 +124,8 @@ #define DPI0_SEL_IN_RDMA2 0x3 #define DPI1_SEL_IN_RDMA1 (0x1 << 8) #define DPI1_SEL_IN_RDMA2 (0x3 << 8) +#define DSI0_SEL_IN_RDMA1 0x1 +#define DSI0_SEL_IN_RDMA2 0x4 #define DSI1_SEL_IN_RDMA1 0x1 #define DSI1_SEL_IN_RDMA2 0x4 #define DSI2_SEL_IN_RDMA1 (0x1 << 16) @@ -224,6 +228,12 @@ static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id cur, } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DPI0) { *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; value = RDMA0_SOUT_DPI0; + } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DPI1) { + *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; + value = RDMA0_SOUT_DPI1; + } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DSI1) { + *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; + value = RDMA0_SOUT_DSI1; } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DSI2) { *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; value = RDMA0_SOUT_DSI2; @@ -282,6 +292,9 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur, } else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DPI1) { *addr = DISP_REG_CONFIG_DPI_SEL_IN; value = DPI1_SEL_IN_RDMA1; + } else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DSI0) { + *addr = DISP_REG_CONFIG_DSIE_SEL_IN; + value = DSI0_SEL_IN_RDMA1; } else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DSI1) { *addr = DISP_REG_CONFIG_DSIO_SEL_IN; value = DSI1_SEL_IN_RDMA1; @@ -297,8 +310,11 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur, } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DPI1) { *addr = DISP_REG_CONFIG_DPI_SEL_IN; value = DPI1_SEL_IN_RDMA2; - } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI1) { + } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI0) { *addr = DISP_REG_CONFIG_DSIE_SEL_IN; + value = DSI0_SEL_IN_RDMA2; + } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI1) { + *addr = DISP_REG_CONFIG_DSIO_SEL_IN; value = DSI1_SEL_IN_RDMA2; } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI2) { *addr = DISP_REG_CONFIG_DSIE_SEL_IN; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 7413ffeb3c9d..8399229e6ad2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -78,6 +78,7 @@ struct mtk_ddp_comp_funcs { void (*stop)(struct mtk_ddp_comp *comp); void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc); void (*disable_vblank)(struct mtk_ddp_comp *comp); + unsigned int (*layer_nr)(struct mtk_ddp_comp *comp); void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx); void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx); void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx, @@ -128,6 +129,14 @@ static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp) comp->funcs->disable_vblank(comp); } +static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->layer_nr) + return comp->funcs->layer_nr(comp); + + return 0; +} + static inline void mtk_ddp_comp_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) { diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 39721119713b..47ec604289b7 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -381,7 +381,7 @@ static int mtk_drm_bind(struct device *dev) err_deinit: mtk_drm_kms_deinit(drm); err_free: - drm_dev_unref(drm); + drm_dev_put(drm); return ret; } @@ -390,7 +390,7 @@ static void mtk_drm_unbind(struct device *dev) struct mtk_drm_private *private = dev_get_drvdata(dev); drm_dev_unregister(private->drm); - drm_dev_unref(private->drm); + drm_dev_put(private->drm); private->drm = NULL; } @@ -564,7 +564,7 @@ static int mtk_drm_remove(struct platform_device *pdev) drm_dev_unregister(drm); mtk_drm_kms_deinit(drm); - drm_dev_unref(drm); + drm_dev_put(drm); component_master_del(&pdev->dev, &mtk_drm_ops); pm_runtime_disable(&pdev->dev); @@ -580,29 +580,24 @@ static int mtk_drm_sys_suspend(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); struct drm_device *drm = private->drm; + int ret; - drm_kms_helper_poll_disable(drm); - - private->suspend_state = drm_atomic_helper_suspend(drm); - if (IS_ERR(private->suspend_state)) { - drm_kms_helper_poll_enable(drm); - return PTR_ERR(private->suspend_state); - } - + ret = drm_mode_config_helper_suspend(drm); DRM_DEBUG_DRIVER("mtk_drm_sys_suspend\n"); - return 0; + + return ret; } static int mtk_drm_sys_resume(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); struct drm_device *drm = private->drm; + int ret; - drm_atomic_helper_resume(drm, private->suspend_state); - drm_kms_helper_poll_enable(drm); - + ret = drm_mode_config_helper_resume(drm); DRM_DEBUG_DRIVER("mtk_drm_sys_resume\n"); - return 0; + + return ret; } #endif diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 90837f7c7d0f..f4c7516eb989 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -302,14 +302,18 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) return clamp_val(reg, 0, 1023) & (0xff << 2); } -static u16 adt7475_read_word(struct i2c_client *client, int reg) +static int adt7475_read_word(struct i2c_client *client, int reg) { - u16 val; + int val1, val2; - val = i2c_smbus_read_byte_data(client, reg); - val |= (i2c_smbus_read_byte_data(client, reg + 1) << 8); + val1 = i2c_smbus_read_byte_data(client, reg); + if (val1 < 0) + return val1; + val2 = i2c_smbus_read_byte_data(client, reg + 1); + if (val2 < 0) + return val2; - return val; + return val1 | (val2 << 8); } static void adt7475_write_word(struct i2c_client *client, int reg, u16 val) @@ -962,13 +966,14 @@ static ssize_t show_pwmfreq(struct device *dev, struct device_attribute *attr, { struct adt7475_data *data = adt7475_update_device(dev); struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); - int i = clamp_val(data->range[sattr->index] & 0xf, 0, - ARRAY_SIZE(pwmfreq_table) - 1); + int idx; if (IS_ERR(data)) return PTR_ERR(data); + idx = clamp_val(data->range[sattr->index] & 0xf, 0, + ARRAY_SIZE(pwmfreq_table) - 1); - return sprintf(buf, "%d\n", pwmfreq_table[i]); + return sprintf(buf, "%d\n", pwmfreq_table[idx]); } static ssize_t set_pwmfreq(struct device *dev, struct device_attribute *attr, @@ -1004,6 +1009,10 @@ static ssize_t pwm_use_point2_pwm_at_crit_show(struct device *dev, char *buf) { struct adt7475_data *data = adt7475_update_device(dev); + + if (IS_ERR(data)) + return PTR_ERR(data); + return sprintf(buf, "%d\n", !!(data->config4 & CONFIG4_MAXDUTY)); } diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index e9e6aeabbf84..71d3445ba869 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -17,7 +17,7 @@ * Bi-directional Current/Power Monitor with I2C Interface * Datasheet: http://www.ti.com/product/ina230 * - * Copyright (C) 2012 Lothar Felten <l-felten@ti.com> + * Copyright (C) 2012 Lothar Felten <lothar.felten@gmail.com> * Thanks to Jan Volkering * * This program is free software; you can redistribute it and/or modify @@ -329,6 +329,15 @@ static int ina2xx_set_shunt(struct ina2xx_data *data, long val) return 0; } +static ssize_t ina2xx_show_shunt(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%li\n", data->rshunt); +} + static ssize_t ina2xx_store_shunt(struct device *dev, struct device_attribute *da, const char *buf, size_t count) @@ -403,7 +412,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_store_shunt, + ina2xx_show_shunt, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index c6bd61e4695a..944f5b63aecd 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -63,6 +63,7 @@ #include <linux/bitops.h> #include <linux/dmi.h> #include <linux/io.h> +#include <linux/nospec.h> #include "lm75.h" #define USE_ALTERNATE @@ -2689,6 +2690,7 @@ store_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr, return err; if (val > NUM_TEMP) return -EINVAL; + val = array_index_nospec(val, NUM_TEMP + 1); if (val && (!(data->have_temp & BIT(val - 1)) || !data->temp_src[val - 1])) return -EINVAL; diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index 6ec65adaba49..c33dcfb87993 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -110,8 +110,8 @@ static int sclhi(struct i2c_algo_bit_data *adap) } #ifdef DEBUG if (jiffies != start && i2c_debug >= 3) - pr_debug("i2c-algo-bit: needed %ld jiffies for SCL to go " - "high\n", jiffies - start); + pr_debug("i2c-algo-bit: needed %ld jiffies for SCL to go high\n", + jiffies - start); #endif done: @@ -171,8 +171,9 @@ static int i2c_outb(struct i2c_adapter *i2c_adap, unsigned char c) setsda(adap, sb); udelay((adap->udelay + 1) / 2); if (sclhi(adap) < 0) { /* timed out */ - bit_dbg(1, &i2c_adap->dev, "i2c_outb: 0x%02x, " - "timeout at bit #%d\n", (int)c, i); + bit_dbg(1, &i2c_adap->dev, + "i2c_outb: 0x%02x, timeout at bit #%d\n", + (int)c, i); return -ETIMEDOUT; } /* FIXME do arbitration here: @@ -185,8 +186,8 @@ static int i2c_outb(struct i2c_adapter *i2c_adap, unsigned char c) } sdahi(adap); if (sclhi(adap) < 0) { /* timeout */ - bit_dbg(1, &i2c_adap->dev, "i2c_outb: 0x%02x, " - "timeout at ack\n", (int)c); + bit_dbg(1, &i2c_adap->dev, + "i2c_outb: 0x%02x, timeout at ack\n", (int)c); return -ETIMEDOUT; } @@ -215,8 +216,9 @@ static int i2c_inb(struct i2c_adapter *i2c_adap) sdahi(adap); for (i = 0; i < 8; i++) { if (sclhi(adap) < 0) { /* timeout */ - bit_dbg(1, &i2c_adap->dev, "i2c_inb: timeout at bit " - "#%d\n", 7 - i); + bit_dbg(1, &i2c_adap->dev, + "i2c_inb: timeout at bit #%d\n", + 7 - i); return -ETIMEDOUT; } indata *= 2; @@ -265,8 +267,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!scl) { - printk(KERN_WARNING "%s: SCL unexpected low " - "while pulling SDA low!\n", name); + printk(KERN_WARNING + "%s: SCL unexpected low while pulling SDA low!\n", + name); goto bailout; } @@ -278,8 +281,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!scl) { - printk(KERN_WARNING "%s: SCL unexpected low " - "while pulling SDA high!\n", name); + printk(KERN_WARNING + "%s: SCL unexpected low while pulling SDA high!\n", + name); goto bailout; } @@ -291,8 +295,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!sda) { - printk(KERN_WARNING "%s: SDA unexpected low " - "while pulling SCL low!\n", name); + printk(KERN_WARNING + "%s: SDA unexpected low while pulling SCL low!\n", + name); goto bailout; } @@ -304,8 +309,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!sda) { - printk(KERN_WARNING "%s: SDA unexpected low " - "while pulling SCL high!\n", name); + printk(KERN_WARNING + "%s: SDA unexpected low while pulling SCL high!\n", + name); goto bailout; } @@ -352,8 +358,8 @@ static int try_address(struct i2c_adapter *i2c_adap, i2c_start(adap); } if (i && ret) - bit_dbg(1, &i2c_adap->dev, "Used %d tries to %s client at " - "0x%02x: %s\n", i + 1, + bit_dbg(1, &i2c_adap->dev, + "Used %d tries to %s client at 0x%02x: %s\n", i + 1, addr & 1 ? "read from" : "write to", addr >> 1, ret == 1 ? "success" : "failed, timeout?"); return ret; @@ -442,8 +448,9 @@ static int readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msg) if (inval <= 0 || inval > I2C_SMBUS_BLOCK_MAX) { if (!(flags & I2C_M_NO_RD_ACK)) acknak(i2c_adap, 0); - dev_err(&i2c_adap->dev, "readbytes: invalid " - "block length (%d)\n", inval); + dev_err(&i2c_adap->dev, + "readbytes: invalid block length (%d)\n", + inval); return -EPROTO; } /* The original count value accounts for the extra @@ -506,8 +513,8 @@ static int bit_doAddress(struct i2c_adapter *i2c_adap, struct i2c_msg *msg) return -ENXIO; } if (flags & I2C_M_RD) { - bit_dbg(3, &i2c_adap->dev, "emitting repeated " - "start condition\n"); + bit_dbg(3, &i2c_adap->dev, + "emitting repeated start condition\n"); i2c_repstart(adap); /* okay, now switch into reading mode */ addr |= 0x01; @@ -564,8 +571,8 @@ static int bit_xfer(struct i2c_adapter *i2c_adap, } ret = bit_doAddress(i2c_adap, pmsg); if ((ret != 0) && !nak_ok) { - bit_dbg(1, &i2c_adap->dev, "NAK from " - "device addr 0x%02x msg #%d\n", + bit_dbg(1, &i2c_adap->dev, + "NAK from device addr 0x%02x msg #%d\n", msgs[i].addr, i); goto bailout; } diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index e18442b9973a..94d94b4a9a0d 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -708,7 +708,6 @@ int i2c_dw_probe(struct dw_i2c_dev *dev) i2c_set_adapdata(adap, dev); if (dev->pm_disabled) { - dev_pm_syscore_device(dev->dev, true); irq_flags = IRQF_NO_SUSPEND; } else { irq_flags = IRQF_SHARED | IRQF_COND_SUSPEND; diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 1a8d2da5b000..b5750fd85125 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -434,6 +434,9 @@ static int dw_i2c_plat_suspend(struct device *dev) { struct dw_i2c_dev *i_dev = dev_get_drvdata(dev); + if (i_dev->pm_disabled) + return 0; + i_dev->disable(i_dev); i2c_dw_prepare_clk(i_dev, false); @@ -444,7 +447,9 @@ static int dw_i2c_plat_resume(struct device *dev) { struct dw_i2c_dev *i_dev = dev_get_drvdata(dev); - i2c_dw_prepare_clk(i_dev, true); + if (!i_dev->pm_disabled) + i2c_dw_prepare_clk(i_dev, true); + i_dev->init(i_dev); return 0; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 941c223f6491..04b60a349d7e 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1415,6 +1415,13 @@ static void i801_add_tco(struct i801_priv *priv) } #ifdef CONFIG_ACPI +static bool i801_acpi_is_smbus_ioport(const struct i801_priv *priv, + acpi_physical_address address) +{ + return address >= priv->smba && + address <= pci_resource_end(priv->pci_dev, SMBBAR); +} + static acpi_status i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, u64 *value, void *handler_context, void *region_context) @@ -1430,7 +1437,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, */ mutex_lock(&priv->acpi_lock); - if (!priv->acpi_reserved) { + if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) { priv->acpi_reserved = true; dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n"); diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 439e8778f849..818cab14e87c 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -507,8 +507,6 @@ static void sh_mobile_i2c_dma_callback(void *data) pd->pos = pd->msg->len; pd->stop_after_dma = true; - i2c_release_dma_safe_msg_buf(pd->msg, pd->dma_buf); - iic_set_clr(pd, ICIC, 0, ICIC_TDMAE | ICIC_RDMAE); } @@ -602,8 +600,8 @@ static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd) dma_async_issue_pending(chan); } -static int start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, - bool do_init) +static void start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, + bool do_init) { if (do_init) { /* Initialize channel registers */ @@ -627,7 +625,6 @@ static int start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, /* Enable all interrupts to begin with */ iic_wr(pd, ICIC, ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); - return 0; } static int poll_dte(struct sh_mobile_i2c_data *pd) @@ -698,9 +695,7 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, pd->send_stop = i == num - 1 || msg->flags & I2C_M_STOP; pd->stop_after_dma = false; - err = start_ch(pd, msg, do_start); - if (err) - break; + start_ch(pd, msg, do_start); if (do_start) i2c_op(pd, OP_START, 0); @@ -709,6 +704,10 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, timeout = wait_event_timeout(pd->wait, pd->sr & (ICSR_TACK | SW_DONE), adapter->timeout); + + /* 'stop_after_dma' tells if DMA transfer was complete */ + i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, pd->stop_after_dma); + if (!timeout) { dev_err(pd->dev, "Transfer request timed out\n"); if (pd->dma_direction != DMA_NONE) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index f15737763608..9ee9a15e7134 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2293,21 +2293,22 @@ u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold) EXPORT_SYMBOL_GPL(i2c_get_dma_safe_msg_buf); /** - * i2c_release_dma_safe_msg_buf - release DMA safe buffer and sync with i2c_msg - * @msg: the message to be synced with + * i2c_put_dma_safe_msg_buf - release DMA safe buffer and sync with i2c_msg * @buf: the buffer obtained from i2c_get_dma_safe_msg_buf(). May be NULL. + * @msg: the message which the buffer corresponds to + * @xferred: bool saying if the message was transferred */ -void i2c_release_dma_safe_msg_buf(struct i2c_msg *msg, u8 *buf) +void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred) { if (!buf || buf == msg->buf) return; - if (msg->flags & I2C_M_RD) + if (xferred && msg->flags & I2C_M_RD) memcpy(msg->buf, buf, msg->len); kfree(buf); } -EXPORT_SYMBOL_GPL(i2c_release_dma_safe_msg_buf); +EXPORT_SYMBOL_GPL(i2c_put_dma_safe_msg_buf); MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>"); MODULE_DESCRIPTION("I2C-Bus main module"); diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 46b855a42884..c2ca9e4b5160 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -45,6 +45,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> +#include <rdma/ib_sa.h> #include <rdma/ib.h> #include <rdma/rdma_netlink.h> #include <net/netlink.h> @@ -61,6 +62,7 @@ struct addr_req { struct rdma_dev_addr *addr, void *context); unsigned long timeout; struct delayed_work work; + bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */ int status; u32 seq; }; @@ -219,60 +221,75 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr) } EXPORT_SYMBOL(rdma_addr_size_kss); -void rdma_copy_addr(struct rdma_dev_addr *dev_addr, - const struct net_device *dev, - const unsigned char *dst_dev_addr) +/** + * rdma_copy_src_l2_addr - Copy netdevice source addresses + * @dev_addr: Destination address pointer where to copy the addresses + * @dev: Netdevice whose source addresses to copy + * + * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice. + * This includes unicast address, broadcast address, device type and + * interface index. + */ +void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev) { dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); - if (dst_dev_addr) - memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); dev_addr->bound_dev_if = dev->ifindex; } -EXPORT_SYMBOL(rdma_copy_addr); +EXPORT_SYMBOL(rdma_copy_src_l2_addr); -int rdma_translate_ip(const struct sockaddr *addr, - struct rdma_dev_addr *dev_addr) +static struct net_device * +rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) { - struct net_device *dev; + struct net_device *dev = NULL; + int ret = -EADDRNOTAVAIL; - if (dev_addr->bound_dev_if) { - dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (!dev) - return -ENODEV; - rdma_copy_addr(dev_addr, dev, NULL); - dev_put(dev); - return 0; - } - - switch (addr->sa_family) { + switch (src_in->sa_family) { case AF_INET: - dev = ip_dev_find(dev_addr->net, - ((const struct sockaddr_in *)addr)->sin_addr.s_addr); - - if (!dev) - return -EADDRNOTAVAIL; - - rdma_copy_addr(dev_addr, dev, NULL); - dev_put(dev); + dev = __ip_dev_find(net, + ((const struct sockaddr_in *)src_in)->sin_addr.s_addr, + false); + if (dev) + ret = 0; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - rcu_read_lock(); - for_each_netdev_rcu(dev_addr->net, dev) { - if (ipv6_chk_addr(dev_addr->net, - &((const struct sockaddr_in6 *)addr)->sin6_addr, + for_each_netdev_rcu(net, dev) { + if (ipv6_chk_addr(net, + &((const struct sockaddr_in6 *)src_in)->sin6_addr, dev, 1)) { - rdma_copy_addr(dev_addr, dev, NULL); + ret = 0; break; } } - rcu_read_unlock(); break; #endif } - return 0; + return ret ? ERR_PTR(ret) : dev; +} + +int rdma_translate_ip(const struct sockaddr *addr, + struct rdma_dev_addr *dev_addr) +{ + struct net_device *dev; + + if (dev_addr->bound_dev_if) { + dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + if (!dev) + return -ENODEV; + rdma_copy_src_l2_addr(dev_addr, dev); + dev_put(dev); + return 0; + } + + rcu_read_lock(); + dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr); + if (!IS_ERR(dev)) + rdma_copy_src_l2_addr(dev_addr, dev); + rcu_read_unlock(); + return PTR_ERR_OR_ZERO(dev); } EXPORT_SYMBOL(rdma_translate_ip); @@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req) spin_unlock_bh(&lock); } -static int ib_nl_fetch_ha(const struct dst_entry *dst, - struct rdma_dev_addr *dev_addr, +static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) return -EADDRNOTAVAIL; - /* We fill in what we can, the response will fill the rest */ - rdma_copy_addr(dev_addr, dst->dev, NULL); return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); } @@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst, neigh_event_send(n, NULL); ret = -ENODATA; } else { - rdma_copy_addr(dev_addr, dst->dev, n->ha); + memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN); } neigh_release(n); @@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, (const void *)&dst_in6->sin6_addr; sa_family_t family = dst_in->sa_family; - /* Gateway + ARPHRD_INFINIBAND -> IB router */ - if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) - return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); + /* If we have a gateway in IB mode then it must be an IB network */ + if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) + return ib_nl_fetch_ha(dev_addr, daddr, seq, family); else return dst_fetch_ha(dst, dev_addr, daddr); } -static int addr4_resolve(struct sockaddr_in *src_in, - const struct sockaddr_in *dst_in, +static int addr4_resolve(struct sockaddr *src_sock, + const struct sockaddr *dst_sock, struct rdma_dev_addr *addr, struct rtable **prt) { + struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock; + const struct sockaddr_in *dst_in = + (const struct sockaddr_in *)dst_sock; + __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; struct rtable *rt; @@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in, if (ret) return ret; - src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = fl4.saddr; - /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're - * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network - * type accordingly. - */ - if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) - addr->network = RDMA_NETWORK_IPV4; - addr->hoplimit = ip4_dst_hoplimit(&rt->dst); *prt = rt; @@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in, } #if IS_ENABLED(CONFIG_IPV6) -static int addr6_resolve(struct sockaddr_in6 *src_in, - const struct sockaddr_in6 *dst_in, +static int addr6_resolve(struct sockaddr *src_sock, + const struct sockaddr *dst_sock, struct rdma_dev_addr *addr, struct dst_entry **pdst) { + struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock; + const struct sockaddr_in6 *dst_in = + (const struct sockaddr_in6 *)dst_sock; struct flowi6 fl6; struct dst_entry *dst; - struct rt6_info *rt; int ret; memset(&fl6, 0, sizeof fl6); @@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, if (ret < 0) return ret; - rt = (struct rt6_info *)dst; - if (ipv6_addr_any(&src_in->sin6_addr)) { - src_in->sin6_family = AF_INET6; + if (ipv6_addr_any(&src_in->sin6_addr)) src_in->sin6_addr = fl6.saddr; - } - - /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're - * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network - * type accordingly. - */ - if (rt->rt6i_flags & RTF_GATEWAY && - ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) - addr->network = RDMA_NETWORK_IPV6; addr->hoplimit = ip6_dst_hoplimit(dst); @@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, return 0; } #else -static int addr6_resolve(struct sockaddr_in6 *src_in, - const struct sockaddr_in6 *dst_in, +static int addr6_resolve(struct sockaddr *src_sock, + const struct sockaddr *dst_sock, struct rdma_dev_addr *addr, struct dst_entry **pdst) { @@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, static int addr_resolve_neigh(const struct dst_entry *dst, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, + unsigned int ndev_flags, u32 seq) { - if (dst->dev->flags & IFF_LOOPBACK) { - int ret; + int ret = 0; - ret = rdma_translate_ip(dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, addr->src_dev_addr, - MAX_ADDR_LEN); + if (ndev_flags & IFF_LOOPBACK) { + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); + } else { + if (!(ndev_flags & IFF_NOARP)) { + /* If the device doesn't do ARP internally */ + ret = fetch_ha(dst, addr, dst_in, seq); + } + } + return ret; +} - return ret; +static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, + const struct sockaddr *dst_in, + const struct dst_entry *dst, + const struct net_device *ndev) +{ + int ret = 0; + + if (dst->dev->flags & IFF_LOOPBACK) + ret = rdma_translate_ip(dst_in, dev_addr); + else + rdma_copy_src_l2_addr(dev_addr, dst->dev); + + /* + * If there's a gateway and type of device not ARPHRD_INFINIBAND, + * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the + * network type accordingly. + */ + if (has_gateway(dst, dst_in->sa_family) && + ndev->type != ARPHRD_INFINIBAND) + dev_addr->network = dst_in->sa_family == AF_INET ? + RDMA_NETWORK_IPV4 : + RDMA_NETWORK_IPV6; + else + dev_addr->network = RDMA_NETWORK_IB; + + return ret; +} + +static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, + unsigned int *ndev_flags, + const struct sockaddr *dst_in, + const struct dst_entry *dst) +{ + struct net_device *ndev = READ_ONCE(dst->dev); + + *ndev_flags = ndev->flags; + /* A physical device must be the RDMA device to use */ + if (ndev->flags & IFF_LOOPBACK) { + /* + * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or + * loopback IP address. So if route is resolved to loopback + * interface, translate that to a real ndev based on non + * loopback IP address. + */ + ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); + if (!ndev) + return -ENODEV; } - /* If the device doesn't do ARP internally */ - if (!(dst->dev->flags & IFF_NOARP)) - return fetch_ha(dst, addr, dst_in, seq); + return copy_src_l2_addr(dev_addr, dst_in, dst, ndev); +} + +static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) +{ + struct net_device *ndev; - rdma_copy_addr(addr, dst->dev, NULL); + ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr); + if (IS_ERR(ndev)) + return PTR_ERR(ndev); + /* + * Since we are holding the rcu, reading net and ifindex + * are safe without any additional reference; because + * change_net_namespace() in net/core/dev.c does rcu sync + * after it changes the state to IFF_DOWN and before + * updating netdev fields {net, ifindex}. + */ + addr->net = dev_net(ndev); + addr->bound_dev_if = ndev->ifindex; return 0; } +static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr) +{ + addr->net = &init_net; + addr->bound_dev_if = 0; +} + static int addr_resolve(struct sockaddr *src_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, bool resolve_neigh, + bool resolve_by_gid_attr, u32 seq) { - struct net_device *ndev; - struct dst_entry *dst; + struct dst_entry *dst = NULL; + unsigned int ndev_flags = 0; + struct rtable *rt = NULL; int ret; if (!addr->net) { @@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in, return -EINVAL; } - if (src_in->sa_family == AF_INET) { - struct rtable *rt = NULL; - const struct sockaddr_in *dst_in4 = - (const struct sockaddr_in *)dst_in; - - ret = addr4_resolve((struct sockaddr_in *)src_in, - dst_in4, addr, &rt); - if (ret) - return ret; - - if (resolve_neigh) - ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); - - if (addr->bound_dev_if) { - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); - } else { - ndev = rt->dst.dev; - dev_hold(ndev); + rcu_read_lock(); + if (resolve_by_gid_attr) { + if (!addr->sgid_attr) { + rcu_read_unlock(); + pr_warn_ratelimited("%s: missing gid_attr\n", __func__); + return -EINVAL; } - - ip_rt_put(rt); - } else { - const struct sockaddr_in6 *dst_in6 = - (const struct sockaddr_in6 *)dst_in; - - ret = addr6_resolve((struct sockaddr_in6 *)src_in, - dst_in6, addr, - &dst); - if (ret) + /* + * If the request is for a specific gid attribute of the + * rdma_dev_addr, derive net from the netdevice of the + * GID attribute. + */ + ret = set_addr_netns_by_gid_rcu(addr); + if (ret) { + rcu_read_unlock(); return ret; - - if (resolve_neigh) - ret = addr_resolve_neigh(dst, dst_in, addr, seq); - - if (addr->bound_dev_if) { - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); - } else { - ndev = dst->dev; - dev_hold(ndev); } - - dst_release(dst); } - - if (ndev) { - if (ndev->flags & IFF_LOOPBACK) - ret = rdma_translate_ip(dst_in, addr); - else - addr->bound_dev_if = ndev->ifindex; - dev_put(ndev); + if (src_in->sa_family == AF_INET) { + ret = addr4_resolve(src_in, dst_in, addr, &rt); + dst = &rt->dst; + } else { + ret = addr6_resolve(src_in, dst_in, addr, &dst); } + if (ret) { + rcu_read_unlock(); + goto done; + } + ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst); + rcu_read_unlock(); + + /* + * Resolve neighbor destination address if requested and + * only if src addr translation didn't fail. + */ + if (!ret && resolve_neigh) + ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq); + if (src_in->sa_family == AF_INET) + ip_rt_put(rt); + else + dst_release(dst); +done: + /* + * Clear the addr net to go back to its original state, only if it was + * derived from GID attribute in this context. + */ + if (resolve_by_gid_attr) + rdma_addr_set_net_defaults(addr); return ret; } @@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work) src_in = (struct sockaddr *)&req->src_addr; dst_in = (struct sockaddr *)&req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr, - true, req->seq); + true, req->resolve_by_gid_attr, + req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) { req->status = -ETIMEDOUT; } else if (req->status == -ENODATA) { @@ -589,6 +662,7 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), + bool resolve_by_gid_attr, void *context) { struct sockaddr *src_in, *dst_in; @@ -617,10 +691,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, req->addr = addr; req->callback = callback; req->context = context; + req->resolve_by_gid_attr = resolve_by_gid_attr; INIT_DELAYED_WORK(&req->work, process_one_req); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); - req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); + req->status = addr_resolve(src_in, dst_in, addr, true, + req->resolve_by_gid_attr, req->seq); switch (req->status) { case 0: req->timeout = jiffies; @@ -641,25 +717,53 @@ err: } EXPORT_SYMBOL(rdma_resolve_ip); -int rdma_resolve_ip_route(struct sockaddr *src_addr, - const struct sockaddr *dst_addr, - struct rdma_dev_addr *addr) +int roce_resolve_route_from_path(struct sa_path_rec *rec, + const struct ib_gid_attr *attr) { - struct sockaddr_storage ssrc_addr = {}; - struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid, dgid; + struct rdma_dev_addr dev_addr = {}; + int ret; - if (src_addr) { - if (src_addr->sa_family != dst_addr->sa_family) - return -EINVAL; + if (rec->roce.route_resolved) + return 0; - memcpy(src_in, src_addr, rdma_addr_size(src_addr)); - } else { - src_in->sa_family = dst_addr->sa_family; - } + rdma_gid2ip(&sgid._sockaddr, &rec->sgid); + rdma_gid2ip(&dgid._sockaddr, &rec->dgid); + + if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) + return -EINVAL; + + if (!attr || !attr->ndev) + return -EINVAL; + + dev_addr.net = &init_net; + dev_addr.sgid_attr = attr; + + ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr, + &dev_addr, false, true, 0); + if (ret) + return ret; + + if ((dev_addr.network == RDMA_NETWORK_IPV4 || + dev_addr.network == RDMA_NETWORK_IPV6) && + rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) + return -EINVAL; - return addr_resolve(src_in, dst_addr, addr, false, 0); + rec->roce.route_resolved = true; + return 0; } +/** + * rdma_addr_cancel - Cancel resolve ip request + * @addr: Pointer to address structure given previously + * during rdma_resolve_ip(). + * rdma_addr_cancel() is synchronous function which cancels any pending + * request if there is any. + */ void rdma_addr_cancel(struct rdma_dev_addr *addr) { struct addr_req *req, *temp_req; @@ -687,11 +791,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) * guarentees no work is running and none will be started. */ cancel_delayed_work_sync(&found->work); - - if (found->callback) - found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr, - found->addr, found->context); - kfree(found); } EXPORT_SYMBOL(rdma_addr_cancel); @@ -710,7 +809,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr, int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *dmac, const struct net_device *ndev, + u8 *dmac, const struct ib_gid_attr *sgid_attr, int *hoplimit) { struct rdma_dev_addr dev_addr; @@ -726,12 +825,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); - dev_addr.bound_dev_if = ndev->ifindex; dev_addr.net = &init_net; + dev_addr.sgid_attr = sgid_attr; init_completion(&ctx.comp); ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, - &dev_addr, 1000, resolve_cb, &ctx); + &dev_addr, 1000, resolve_cb, true, &ctx); if (ret) return ret; diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 0bee1f4b914e..8957d31d60ca 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1252,6 +1252,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr) } EXPORT_SYMBOL(rdma_hold_gid_attr); +/** + * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice + * which must be in UP state. + * + * @attr:Pointer to the GID attribute + * + * Returns pointer to netdevice if the netdevice was attached to GID and + * netdevice is in UP state. Caller must hold RCU lock as this API + * reads the netdev flags which can change while netdevice migrates to + * different net namespace. Returns ERR_PTR with error code otherwise. + * + */ +struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + struct ib_device *device = entry->attr.device; + struct net_device *ndev = ERR_PTR(-ENODEV); + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table; + unsigned long flags; + bool valid; + + table = rdma_gid_table(device, port_num); + + read_lock_irqsave(&table->rwlock, flags); + valid = is_gid_entry_valid(table->data_vec[attr->index]); + if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP)) + ndev = attr->ndev; + read_unlock_irqrestore(&table->rwlock, flags); + return ndev; +} + static int config_non_roce_gid_cache(struct ib_device *device, u8 port, int gid_tbl_len) { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f72677291b69..a57c8b823302 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1900,7 +1900,7 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { - rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); + rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev); } else { if (!cma_protocol_roce(listen_id) && cma_any_addr(cma_src_addr(id_priv))) { @@ -1950,7 +1950,7 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id, goto err; if (net_dev) { - rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); + rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev); } else { if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), @@ -2880,13 +2880,11 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); - cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: mutex_unlock(&id_priv->handler_mutex); - cma_deref_id(id_priv); } static int cma_resolve_loopback(struct rdma_id_private *id_priv) @@ -2983,16 +2981,16 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return -EINVAL; memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - atomic_inc(&id_priv->refcount); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { if (dst_addr->sa_family == AF_IB) { ret = cma_resolve_ib_addr(id_priv); } else { - ret = rdma_resolve_ip(cma_src_addr(id_priv), - dst_addr, &id->route.addr.dev_addr, - timeout_ms, addr_handler, id_priv); + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, + false, id_priv); } } if (ret) @@ -3001,7 +2999,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return 0; err: cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_addr); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 77c7005c396c..d7399d5b1cb6 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -338,7 +338,14 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *dmac, const struct net_device *ndev, + u8 *dmac, const struct ib_gid_attr *sgid_attr, int *hoplimit); +void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev); +struct sa_path_rec; +int roce_resolve_route_from_path(struct sa_path_rec *rec, + const struct ib_gid_attr *attr); + +struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index af5ad6a56ae4..9271f7290005 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work) IB_POLL_BATCH); if (completed >= IB_POLL_BUDGET_WORKQUEUE || ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) - queue_work(ib_comp_wq, &cq->work); + queue_work(cq->comp_wq, &cq->work); } static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) { - queue_work(ib_comp_wq, &cq->work); + queue_work(cq->comp_wq, &cq->work); } /** @@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); break; case IB_POLL_WORKQUEUE: + case IB_POLL_UNBOUND_WORKQUEUE: cq->comp_handler = ib_cq_completion_workqueue; INIT_WORK(&cq->work, ib_cq_poll_work); ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ? + ib_comp_wq : ib_comp_unbound_wq; break; default: ret = -EINVAL; @@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq) irq_poll_disable(&cq->iop); break; case IB_POLL_WORKQUEUE: + case IB_POLL_UNBOUND_WORKQUEUE: cancel_work_sync(&cq->work); break; default: diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index db3b6271f09d..5a680a88aa87 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -61,6 +61,7 @@ struct ib_client_data { }; struct workqueue_struct *ib_comp_wq; +struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); @@ -269,7 +270,7 @@ struct ib_device *ib_alloc_device(size_t size) INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); - spin_lock_init(&device->client_data_lock); + rwlock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); @@ -285,6 +286,7 @@ EXPORT_SYMBOL(ib_alloc_device); */ void ib_dealloc_device(struct ib_device *device) { + WARN_ON(!list_empty(&device->client_data_list)); WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && device->reg_state != IB_DEV_UNINITIALIZED); rdma_restrack_clean(&device->res); @@ -295,9 +297,8 @@ EXPORT_SYMBOL(ib_dealloc_device); static int add_client_context(struct ib_device *device, struct ib_client *client) { struct ib_client_data *context; - unsigned long flags; - context = kmalloc(sizeof *context, GFP_KERNEL); + context = kmalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; @@ -306,9 +307,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client context->going_down = false; down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); + write_lock_irq(&device->client_data_lock); list_add(&context->list, &device->client_data_list); - spin_unlock_irqrestore(&device->client_data_lock, flags); + write_unlock_irq(&device->client_data_lock); up_write(&lists_rwsem); return 0; @@ -524,6 +525,8 @@ int ib_register_device(struct ib_device *device, goto port_cleanup; } + device->index = __dev_new_index(); + ret = ib_device_register_rdmacg(device); if (ret) { pr_warn("Couldn't register device with rdma cgroup\n"); @@ -550,7 +553,6 @@ int ib_register_device(struct ib_device *device, if (!add_client_context(device, client) && client->add) client->add(device); - device->index = __dev_new_index(); down_write(&lists_rwsem); list_add_tail(&device->core_list, &device_list); up_write(&lists_rwsem); @@ -585,21 +587,20 @@ void ib_unregister_device(struct ib_device *device) down_write(&lists_rwsem); list_del(&device->core_list); - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + write_lock_irq(&device->client_data_lock); + list_for_each_entry(context, &device->client_data_list, list) context->going_down = true; - spin_unlock_irqrestore(&device->client_data_lock, flags); + write_unlock_irq(&device->client_data_lock); downgrade_write(&lists_rwsem); - list_for_each_entry_safe(context, tmp, &device->client_data_list, - list) { + list_for_each_entry(context, &device->client_data_list, list) { if (context->client->remove) context->client->remove(device, context->data); } up_read(&lists_rwsem); - ib_device_unregister_rdmacg(device); ib_device_unregister_sysfs(device); + ib_device_unregister_rdmacg(device); mutex_unlock(&device_mutex); @@ -609,10 +610,13 @@ void ib_unregister_device(struct ib_device *device) kfree(device->port_pkey_list); down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + write_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, + list) { + list_del(&context->list); kfree(context); - spin_unlock_irqrestore(&device->client_data_lock, flags); + } + write_unlock_irqrestore(&device->client_data_lock, flags); up_write(&lists_rwsem); device->reg_state = IB_DEV_UNREGISTERED; @@ -662,9 +666,8 @@ EXPORT_SYMBOL(ib_register_client); */ void ib_unregister_client(struct ib_client *client) { - struct ib_client_data *context, *tmp; + struct ib_client_data *context; struct ib_device *device; - unsigned long flags; mutex_lock(&device_mutex); @@ -676,14 +679,14 @@ void ib_unregister_client(struct ib_client *client) struct ib_client_data *found_context = NULL; down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + write_lock_irq(&device->client_data_lock); + list_for_each_entry(context, &device->client_data_list, list) if (context->client == client) { context->going_down = true; found_context = context; break; } - spin_unlock_irqrestore(&device->client_data_lock, flags); + write_unlock_irq(&device->client_data_lock); up_write(&lists_rwsem); if (client->remove) @@ -697,11 +700,11 @@ void ib_unregister_client(struct ib_client *client) } down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); + write_lock_irq(&device->client_data_lock); list_del(&found_context->list); - kfree(found_context); - spin_unlock_irqrestore(&device->client_data_lock, flags); + write_unlock_irq(&device->client_data_lock); up_write(&lists_rwsem); + kfree(found_context); } mutex_unlock(&device_mutex); @@ -722,13 +725,13 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) void *ret = NULL; unsigned long flags; - spin_lock_irqsave(&device->client_data_lock, flags); + read_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry(context, &device->client_data_list, list) if (context->client == client) { ret = context->data; break; } - spin_unlock_irqrestore(&device->client_data_lock, flags); + read_unlock_irqrestore(&device->client_data_lock, flags); return ret; } @@ -749,7 +752,7 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, struct ib_client_data *context; unsigned long flags; - spin_lock_irqsave(&device->client_data_lock, flags); + write_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry(context, &device->client_data_list, list) if (context->client == client) { context->data = data; @@ -760,7 +763,7 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, device->name, client->name); out: - spin_unlock_irqrestore(&device->client_data_lock, flags); + write_unlock_irqrestore(&device->client_data_lock, flags); } EXPORT_SYMBOL(ib_set_client_data); @@ -1166,10 +1169,19 @@ static int __init ib_core_init(void) goto err; } + ib_comp_unbound_wq = + alloc_workqueue("ib-comp-unb-wq", + WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | + WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); + if (!ib_comp_unbound_wq) { + ret = -ENOMEM; + goto err_comp; + } + ret = class_register(&ib_class); if (ret) { pr_warn("Couldn't create InfiniBand device class\n"); - goto err_comp; + goto err_comp_unbound; } ret = rdma_nl_init(); @@ -1218,6 +1230,8 @@ err_ibnl: rdma_nl_exit(); err_sysfs: class_unregister(&ib_class); +err_comp_unbound: + destroy_workqueue(ib_comp_unbound_wq); err_comp: destroy_workqueue(ib_comp_wq); err: @@ -1236,6 +1250,7 @@ static void __exit ib_core_cleanup(void) addr_cleanup(); rdma_nl_exit(); class_unregister(&ib_class); + destroy_workqueue(ib_comp_unbound_wq); destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ef459f2f2eeb..c355379e7534 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -220,33 +220,37 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, int ret2, qpn; u8 mgmt_class, vclass; + if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) || + (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num))) + return ERR_PTR(-EPROTONOSUPPORT); + /* Validate parameters */ qpn = get_spl_qp_index(qp_type); if (qpn == -1) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid QP Type %d\n", - qp_type); + dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", + __func__, qp_type); goto error1; } if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid RMPP Version %u\n", - rmpp_version); + dev_dbg_ratelimited(&device->dev, + "%s: invalid RMPP Version %u\n", + __func__, rmpp_version); goto error1; } /* Validate MAD registration request if supplied */ if (mad_reg_req) { if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid Class Version %u\n", - mad_reg_req->mgmt_class_version); + dev_dbg_ratelimited(&device->dev, + "%s: invalid Class Version %u\n", + __func__, + mad_reg_req->mgmt_class_version); goto error1; } if (!recv_handler) { - dev_notice(&device->dev, - "ib_register_mad_agent: no recv_handler\n"); + dev_dbg_ratelimited(&device->dev, + "%s: no recv_handler\n", __func__); goto error1; } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { @@ -256,9 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, */ if (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } else if (mad_reg_req->mgmt_class == 0) { @@ -266,8 +270,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0\n"); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0\n", + __func__); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* @@ -275,18 +280,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, * ensure supplied OUI is not zero */ if (!is_vendor_oui(mad_reg_req->oui)) { - dev_notice(&device->dev, - "ib_register_mad_agent: No OUI specified for class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: No OUI specified for class 0x%x\n", + __func__, + mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { if (rmpp_version) { - dev_notice(&device->dev, - "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: RMPP version for non-RMPP class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } @@ -297,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid SM QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } else { @@ -307,9 +313,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid GS QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } @@ -324,18 +330,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid port %d\n", - port_num); + dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n", + __func__, port_num); ret = ERR_PTR(-ENODEV); goto error1; } - /* Verify the QP requested is supported. For example, Ethernet devices - * will not have QP0 */ + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0. + */ if (!port_priv->qp_info[qpn].qp) { - dev_notice(&device->dev, - "ib_register_mad_agent: QP %d not supported\n", qpn); + dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", + __func__, qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } @@ -3183,7 +3189,7 @@ static int ib_mad_port_open(struct ib_device *device, cq_size *= 2; port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, - IB_POLL_WORKQUEUE); + IB_POLL_UNBOUND_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 6eb64c6f0802..752a55c6bdce 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -794,44 +794,6 @@ void uverbs_close_fd(struct file *f) uverbs_uobject_put(uobj); } -static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext) -{ - struct ib_device *ib_dev = ibcontext->device; - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - down_write(&owning_mm->mmap_sem); - ib_dev->disassociate_ucontext(ibcontext); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); -} - /* * Drop the ucontext off the ufile and completely disconnect it from the * ib_device @@ -840,20 +802,28 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { struct ib_ucontext *ucontext = ufile->ucontext; + struct ib_device *ib_dev = ucontext->device; int ret; - if (reason == RDMA_REMOVE_DRIVER_REMOVE) - ufile_disassociate_ucontext(ucontext); + /* + * If we are closing the FD then the user mmap VMAs must have + * already been destroyed as they hold on to the filep, otherwise + * they need to be zap'd. + */ + if (reason == RDMA_REMOVE_DRIVER_REMOVE) { + uverbs_user_mmap_disassociate(ufile); + if (ib_dev->disassociate_ucontext) + ib_dev->disassociate_ucontext(ucontext); + } - put_pid(ucontext->tgid); - ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); /* * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove * the error return. */ - ret = ucontext->device->dealloc_ucontext(ucontext); + ret = ib_dev->dealloc_ucontext(ucontext); WARN_ON(ret); ufile->ucontext = NULL; @@ -882,6 +852,8 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); if (!uverbs_destroy_uobject(obj, reason)) ret = 0; + else + atomic_set(&obj->usecnt, 0); } return ret; } diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index f962f2a593ba..4886d2bba7c7 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -160,5 +160,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi); void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); +void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7b794a14d6e8..d3d6275b3b7e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1227,46 +1227,6 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) return src_path_mask; } -static int roce_resolve_route_from_path(struct sa_path_rec *rec, - const struct ib_gid_attr *attr) -{ - struct rdma_dev_addr dev_addr = {}; - union { - struct sockaddr _sockaddr; - struct sockaddr_in _sockaddr_in; - struct sockaddr_in6 _sockaddr_in6; - } sgid_addr, dgid_addr; - int ret; - - if (rec->roce.route_resolved) - return 0; - if (!attr || !attr->ndev) - return -EINVAL; - - dev_addr.bound_dev_if = attr->ndev->ifindex; - /* TODO: Use net from the ib_gid_attr once it is added to it, - * until than, limit itself to init_net. - */ - dev_addr.net = &init_net; - - rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); - rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); - - /* validate the route */ - ret = rdma_resolve_ip_route(&sgid_addr._sockaddr, - &dgid_addr._sockaddr, &dev_addr); - if (ret) - return ret; - - if ((dev_addr.network == RDMA_NETWORK_IPV4 || - dev_addr.network == RDMA_NETWORK_IPV6) && - rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) - return -EINVAL; - - rec->roce.route_resolved = true; - return 0; -} - static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7fd14ead7b37..0b04dbff884f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1183,7 +1183,7 @@ err_put: return ret; } -static ssize_t show_node_type(struct device *device, +static ssize_t node_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); @@ -1198,8 +1198,9 @@ static ssize_t show_node_type(struct device *device, default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); } } +static DEVICE_ATTR_RO(node_type); -static ssize_t show_sys_image_guid(struct device *device, +static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); @@ -1210,8 +1211,9 @@ static ssize_t show_sys_image_guid(struct device *device, be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); } +static DEVICE_ATTR_RO(sys_image_guid); -static ssize_t show_node_guid(struct device *device, +static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); @@ -1222,8 +1224,9 @@ static ssize_t show_node_guid(struct device *device, be16_to_cpu(((__be16 *) &dev->node_guid)[2]), be16_to_cpu(((__be16 *) &dev->node_guid)[3])); } +static DEVICE_ATTR_RO(node_guid); -static ssize_t show_node_desc(struct device *device, +static ssize_t node_desc_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); @@ -1231,9 +1234,9 @@ static ssize_t show_node_desc(struct device *device, return sprintf(buf, "%.64s\n", dev->node_desc); } -static ssize_t set_node_desc(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t node_desc_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) { struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device_modify desc = {}; @@ -1249,8 +1252,9 @@ static ssize_t set_node_desc(struct device *device, return count; } +static DEVICE_ATTR_RW(node_desc); -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, +static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); @@ -1259,19 +1263,19 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); return strlen(buf); } +static DEVICE_ATTR_RO(fw_ver); + +static struct attribute *ib_dev_attrs[] = { + &dev_attr_node_type.attr, + &dev_attr_node_guid.attr, + &dev_attr_sys_image_guid.attr, + &dev_attr_fw_ver.attr, + &dev_attr_node_desc.attr, + NULL, +}; -static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); -static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); -static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); -static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); - -static struct device_attribute *ib_class_attributes[] = { - &dev_attr_node_type, - &dev_attr_sys_image_guid, - &dev_attr_node_guid, - &dev_attr_node_desc, - &dev_attr_fw_ver, +static const struct attribute_group dev_attr_group = { + .attrs = ib_dev_attrs, }; static void free_port_list_attributes(struct ib_device *device) @@ -1311,16 +1315,13 @@ int ib_device_register_sysfs(struct ib_device *device, if (ret) return ret; + device->groups[0] = &dev_attr_group; + class_dev->groups = device->groups; + ret = device_add(class_dev); if (ret) goto err; - for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { - ret = device_create_file(class_dev, ib_class_attributes[i]); - if (ret) - goto err_unregister; - } - device->ports_parent = kobject_create_and_add("ports", &class_dev->kobj); if (!device->ports_parent) { @@ -1347,20 +1348,15 @@ int ib_device_register_sysfs(struct ib_device *device, err_put: free_port_list_attributes(device); - -err_unregister: device_del(class_dev); - err: return ret; } void ib_device_unregister_sysfs(struct ib_device *device) { - int i; - - /* Hold kobject until ib_dealloc_device() */ - kobject_get(&device->dev.kobj); + /* Hold device until ib_dealloc_device() */ + get_device(&device->dev); free_port_list_attributes(device); @@ -1369,8 +1365,5 @@ void ib_device_unregister_sysfs(struct ib_device *device) free_hsag(&device->dev.kobj, device->hw_stats_ag); } - for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) - device_remove_file(&device->dev, ib_class_attributes[i]); - device_unregister(&device->dev); } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ec8fb289621f..5f437d1570fb 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -124,6 +124,8 @@ static DEFINE_MUTEX(mut); static DEFINE_IDR(ctx_idr); static DEFINE_IDR(multicast_idr); +static const struct file_operations ucma_fops; + static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) { @@ -1581,6 +1583,10 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, f = fdget(cmd.fd); if (!f.file) return -ENOENT; + if (f.file->f_op != &ucma_fops) { + ret = -EINVAL; + goto file_put; + } /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(f.file->private_data, cmd.id); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a41792dbae1f..fec5d489e311 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -86,6 +86,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct vm_area_struct **vma_list; unsigned long lock_limit; unsigned long cur_base; + struct mm_struct *mm; unsigned long npages; int ret; int i; @@ -107,25 +108,32 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (!can_do_mlock()) return ERR_PTR(-EPERM); - umem = kzalloc(sizeof *umem, GFP_KERNEL); - if (!umem) - return ERR_PTR(-ENOMEM); + if (access & IB_ACCESS_ON_DEMAND) { + umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + umem->is_odp = 1; + } else { + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + } umem->context = context; umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; umem->writable = ib_access_writable(access); + umem->owning_mm = mm = current->mm; + mmgrab(mm); if (access & IB_ACCESS_ON_DEMAND) { - ret = ib_umem_odp_get(context, umem, access); + ret = ib_umem_odp_get(to_ib_umem_odp(umem), access); if (ret) goto umem_kfree; return umem; } - umem->odp_data = NULL; - /* We assume the memory is from hugetlb until proved otherwise */ umem->hugetlb = 1; @@ -147,14 +155,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); - current->mm->pinned_vm += npages; - if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { - up_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); + mm->pinned_vm += npages; + if ((mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { + up_write(&mm->mmap_sem); ret = -ENOMEM; goto vma; } - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); cur_base = addr & PAGE_MASK; @@ -172,14 +180,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, sg_list_start = umem->sg_head.sgl; - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); while (npages) { ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); if (ret < 0) { - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); goto umem_release; } @@ -197,7 +205,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* preparing for next loop */ sg_list_start = sg; } - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, @@ -224,21 +232,32 @@ out: free_page((unsigned long) vma_list); free_page((unsigned long) page_list); umem_kfree: - if (ret) + if (ret) { + mmdrop(umem->owning_mm); kfree(umem); + } return ret ? ERR_PTR(ret) : umem; } EXPORT_SYMBOL(ib_umem_get); -static void ib_umem_account(struct work_struct *work) +static void __ib_umem_release_tail(struct ib_umem *umem) +{ + mmdrop(umem->owning_mm); + if (umem->is_odp) + kfree(to_ib_umem_odp(umem)); + else + kfree(umem); +} + +static void ib_umem_release_defer(struct work_struct *work) { struct ib_umem *umem = container_of(work, struct ib_umem, work); - down_write(&umem->mm->mmap_sem); - umem->mm->pinned_vm -= umem->diff; - up_write(&umem->mm->mmap_sem); - mmput(umem->mm); - kfree(umem); + down_write(&umem->owning_mm->mmap_sem); + umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); + up_write(&umem->owning_mm->mmap_sem); + + __ib_umem_release_tail(umem); } /** @@ -248,52 +267,36 @@ static void ib_umem_account(struct work_struct *work) void ib_umem_release(struct ib_umem *umem) { struct ib_ucontext *context = umem->context; - struct mm_struct *mm; - struct task_struct *task; - unsigned long diff; - if (umem->odp_data) { - ib_umem_odp_release(umem); + if (umem->is_odp) { + ib_umem_odp_release(to_ib_umem_odp(umem)); + __ib_umem_release_tail(umem); return; } __ib_umem_release(umem->context->device, umem, 1); - task = get_pid_task(umem->context->tgid, PIDTYPE_PID); - if (!task) - goto out; - mm = get_task_mm(task); - put_task_struct(task); - if (!mm) - goto out; - - diff = ib_umem_num_pages(umem); - /* * We may be called with the mm's mmap_sem already held. This * can happen when a userspace munmap() is the call that drops * the last reference to our file and calls our release * method. If there are memory regions to destroy, we'll end * up here and not be able to take the mmap_sem. In that case - * we defer the vm_locked accounting to the system workqueue. + * we defer the vm_locked accounting a workqueue. */ if (context->closing) { - if (!down_write_trylock(&mm->mmap_sem)) { - INIT_WORK(&umem->work, ib_umem_account); - umem->mm = mm; - umem->diff = diff; - + if (!down_write_trylock(&umem->owning_mm->mmap_sem)) { + INIT_WORK(&umem->work, ib_umem_release_defer); queue_work(ib_wq, &umem->work); return; } - } else - down_write(&mm->mmap_sem); + } else { + down_write(&umem->owning_mm->mmap_sem); + } + umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); + up_write(&umem->owning_mm->mmap_sem); - mm->pinned_vm -= diff; - up_write(&mm->mmap_sem); - mmput(mm); -out: - kfree(umem); + __ib_umem_release_tail(umem); } EXPORT_SYMBOL(ib_umem_release); @@ -303,7 +306,7 @@ int ib_umem_page_count(struct ib_umem *umem) int n; struct scatterlist *sg; - if (umem->odp_data) + if (umem->is_odp) return ib_umem_num_pages(umem); n = 0; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 6ec748eccff7..2b4c5e7dd5a1 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -58,7 +58,7 @@ static u64 node_start(struct umem_odp_node *n) struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); - return ib_umem_start(umem_odp->umem); + return ib_umem_start(&umem_odp->umem); } /* Note that the representation of the intervals in the interval tree @@ -71,140 +71,86 @@ static u64 node_last(struct umem_odp_node *n) struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); - return ib_umem_end(umem_odp->umem) - 1; + return ib_umem_end(&umem_odp->umem) - 1; } INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, node_start, node_last, static, rbt_ib_umem) -static void ib_umem_notifier_start_account(struct ib_umem *item) +static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) { - mutex_lock(&item->odp_data->umem_mutex); - - /* Only update private counters for this umem if it has them. - * Otherwise skip it. All page faults will be delayed for this umem. */ - if (item->odp_data->mn_counters_active) { - int notifiers_count = item->odp_data->notifiers_count++; - - if (notifiers_count == 0) - /* Initialize the completion object for waiting on - * notifiers. Since notifier_count is zero, no one - * should be waiting right now. */ - reinit_completion(&item->odp_data->notifier_completion); - } - mutex_unlock(&item->odp_data->umem_mutex); -} - -static void ib_umem_notifier_end_account(struct ib_umem *item) -{ - mutex_lock(&item->odp_data->umem_mutex); - - /* Only update private counters for this umem if it has them. - * Otherwise skip it. All page faults will be delayed for this umem. */ - if (item->odp_data->mn_counters_active) { + mutex_lock(&umem_odp->umem_mutex); + if (umem_odp->notifiers_count++ == 0) /* - * This sequence increase will notify the QP page fault that - * the page that is going to be mapped in the spte could have - * been freed. + * Initialize the completion object for waiting on + * notifiers. Since notifier_count is zero, no one should be + * waiting right now. */ - ++item->odp_data->notifiers_seq; - if (--item->odp_data->notifiers_count == 0) - complete_all(&item->odp_data->notifier_completion); - } - mutex_unlock(&item->odp_data->umem_mutex); + reinit_completion(&umem_odp->notifier_completion); + mutex_unlock(&umem_odp->umem_mutex); } -/* Account for a new mmu notifier in an ib_ucontext. */ -static void ib_ucontext_notifier_start_account(struct ib_ucontext *context) +static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) { - atomic_inc(&context->notifier_count); + mutex_lock(&umem_odp->umem_mutex); + /* + * This sequence increase will notify the QP page fault that the page + * that is going to be mapped in the spte could have been freed. + */ + ++umem_odp->notifiers_seq; + if (--umem_odp->notifiers_count == 0) + complete_all(&umem_odp->notifier_completion); + mutex_unlock(&umem_odp->umem_mutex); } -/* Account for a terminating mmu notifier in an ib_ucontext. - * - * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since - * the function takes the semaphore itself. */ -static void ib_ucontext_notifier_end_account(struct ib_ucontext *context) +static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, + u64 start, u64 end, void *cookie) { - int zero_notifiers = atomic_dec_and_test(&context->notifier_count); - - if (zero_notifiers && - !list_empty(&context->no_private_counters)) { - /* No currently running mmu notifiers. Now is the chance to - * add private accounting to all previously added umems. */ - struct ib_umem_odp *odp_data, *next; - - /* Prevent concurrent mmu notifiers from working on the - * no_private_counters list. */ - down_write(&context->umem_rwsem); - - /* Read the notifier_count again, with the umem_rwsem - * semaphore taken for write. */ - if (!atomic_read(&context->notifier_count)) { - list_for_each_entry_safe(odp_data, next, - &context->no_private_counters, - no_private_counters) { - mutex_lock(&odp_data->umem_mutex); - odp_data->mn_counters_active = true; - list_del(&odp_data->no_private_counters); - complete_all(&odp_data->notifier_completion); - mutex_unlock(&odp_data->umem_mutex); - } - } - - up_write(&context->umem_rwsem); - } -} + struct ib_umem *umem = &umem_odp->umem; -static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start, - u64 end, void *cookie) { /* * Increase the number of notifiers running, to * prevent any further fault handling on this MR. */ - ib_umem_notifier_start_account(item); - item->odp_data->dying = 1; + ib_umem_notifier_start_account(umem_odp); + umem_odp->dying = 1; /* Make sure that the fact the umem is dying is out before we release * all pending page faults. */ smp_wmb(); - complete_all(&item->odp_data->notifier_completion); - item->context->invalidate_range(item, ib_umem_start(item), - ib_umem_end(item)); + complete_all(&umem_odp->notifier_completion); + umem->context->invalidate_range(umem_odp, ib_umem_start(umem), + ib_umem_end(umem)); return 0; } static void ib_umem_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); - - if (!context->invalidate_range) - return; - - ib_ucontext_notifier_start_account(context); - down_read(&context->umem_rwsem); - rbt_ib_umem_for_each_in_range(&context->umem_tree, 0, - ULLONG_MAX, - ib_umem_notifier_release_trampoline, - true, - NULL); - up_read(&context->umem_rwsem); + struct ib_ucontext_per_mm *per_mm = + container_of(mn, struct ib_ucontext_per_mm, mn); + + down_read(&per_mm->umem_rwsem); + if (per_mm->active) + rbt_ib_umem_for_each_in_range( + &per_mm->umem_tree, 0, ULLONG_MAX, + ib_umem_notifier_release_trampoline, true, NULL); + up_read(&per_mm->umem_rwsem); } -static int invalidate_page_trampoline(struct ib_umem *item, u64 start, +static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start, u64 end, void *cookie) { ib_umem_notifier_start_account(item); - item->context->invalidate_range(item, start, start + PAGE_SIZE); + item->umem.context->invalidate_range(item, start, start + PAGE_SIZE); ib_umem_notifier_end_account(item); return 0; } -static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, - u64 end, void *cookie) +static int invalidate_range_start_trampoline(struct ib_umem_odp *item, + u64 start, u64 end, void *cookie) { ib_umem_notifier_start_account(item); - item->context->invalidate_range(item, start, end); + item->umem.context->invalidate_range(item, start, end); return 0; } @@ -214,28 +160,30 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, unsigned long end, bool blockable) { - struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); - int ret; - - if (!context->invalidate_range) - return 0; + struct ib_ucontext_per_mm *per_mm = + container_of(mn, struct ib_ucontext_per_mm, mn); if (blockable) - down_read(&context->umem_rwsem); - else if (!down_read_trylock(&context->umem_rwsem)) + down_read(&per_mm->umem_rwsem); + else if (!down_read_trylock(&per_mm->umem_rwsem)) return -EAGAIN; - ib_ucontext_notifier_start_account(context); - ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start, - end, - invalidate_range_start_trampoline, - blockable, NULL); - up_read(&context->umem_rwsem); + if (!per_mm->active) { + up_read(&per_mm->umem_rwsem); + /* + * At this point active is permanently set and visible to this + * CPU without a lock, that fact is relied on to skip the unlock + * in range_end. + */ + return 0; + } - return ret; + return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end, + invalidate_range_start_trampoline, + blockable, NULL); } -static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start, +static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, u64 end, void *cookie) { ib_umem_notifier_end_account(item); @@ -247,22 +195,16 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); + struct ib_ucontext_per_mm *per_mm = + container_of(mn, struct ib_ucontext_per_mm, mn); - if (!context->invalidate_range) + if (unlikely(!per_mm->active)) return; - /* - * TODO: we currently bail out if there is any sleepable work to be done - * in ib_umem_notifier_invalidate_range_start so we shouldn't really block - * here. But this is ugly and fragile. - */ - down_read(&context->umem_rwsem); - rbt_ib_umem_for_each_in_range(&context->umem_tree, start, + rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end, invalidate_range_end_trampoline, true, NULL); - up_read(&context->umem_rwsem); - ib_ucontext_notifier_end_account(context); + up_read(&per_mm->umem_rwsem); } static const struct mmu_notifier_ops ib_umem_notifiers = { @@ -271,31 +213,158 @@ static const struct mmu_notifier_ops ib_umem_notifiers = { .invalidate_range_end = ib_umem_notifier_invalidate_range_end, }; -struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, - unsigned long addr, - size_t size) +static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) { - struct ib_umem *umem; + struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; + struct ib_umem *umem = &umem_odp->umem; + + down_write(&per_mm->umem_rwsem); + if (likely(ib_umem_start(umem) != ib_umem_end(umem))) + rbt_ib_umem_insert(&umem_odp->interval_tree, + &per_mm->umem_tree); + up_write(&per_mm->umem_rwsem); +} + +static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) +{ + struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; + struct ib_umem *umem = &umem_odp->umem; + + down_write(&per_mm->umem_rwsem); + if (likely(ib_umem_start(umem) != ib_umem_end(umem))) + rbt_ib_umem_remove(&umem_odp->interval_tree, + &per_mm->umem_tree); + complete_all(&umem_odp->notifier_completion); + + up_write(&per_mm->umem_rwsem); +} + +static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx, + struct mm_struct *mm) +{ + struct ib_ucontext_per_mm *per_mm; + int ret; + + per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL); + if (!per_mm) + return ERR_PTR(-ENOMEM); + + per_mm->context = ctx; + per_mm->mm = mm; + per_mm->umem_tree = RB_ROOT_CACHED; + init_rwsem(&per_mm->umem_rwsem); + per_mm->active = ctx->invalidate_range; + + rcu_read_lock(); + per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + rcu_read_unlock(); + + WARN_ON(mm != current->mm); + + per_mm->mn.ops = &ib_umem_notifiers; + ret = mmu_notifier_register(&per_mm->mn, per_mm->mm); + if (ret) { + dev_err(&ctx->device->dev, + "Failed to register mmu_notifier %d\n", ret); + goto out_pid; + } + + list_add(&per_mm->ucontext_list, &ctx->per_mm_list); + return per_mm; + +out_pid: + put_pid(per_mm->tgid); + kfree(per_mm); + return ERR_PTR(ret); +} + +static int get_per_mm(struct ib_umem_odp *umem_odp) +{ + struct ib_ucontext *ctx = umem_odp->umem.context; + struct ib_ucontext_per_mm *per_mm; + + /* + * Generally speaking we expect only one or two per_mm in this list, + * so no reason to optimize this search today. + */ + mutex_lock(&ctx->per_mm_list_lock); + list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) { + if (per_mm->mm == umem_odp->umem.owning_mm) + goto found; + } + + per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm); + if (IS_ERR(per_mm)) { + mutex_unlock(&ctx->per_mm_list_lock); + return PTR_ERR(per_mm); + } + +found: + umem_odp->per_mm = per_mm; + per_mm->odp_mrs_count++; + mutex_unlock(&ctx->per_mm_list_lock); + + return 0; +} + +static void free_per_mm(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu)); +} + +void put_per_mm(struct ib_umem_odp *umem_odp) +{ + struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; + struct ib_ucontext *ctx = umem_odp->umem.context; + bool need_free; + + mutex_lock(&ctx->per_mm_list_lock); + umem_odp->per_mm = NULL; + per_mm->odp_mrs_count--; + need_free = per_mm->odp_mrs_count == 0; + if (need_free) + list_del(&per_mm->ucontext_list); + mutex_unlock(&ctx->per_mm_list_lock); + + if (!need_free) + return; + + /* + * NOTE! mmu_notifier_unregister() can happen between a start/end + * callback, resulting in an start/end, and thus an unbalanced + * lock. This doesn't really matter to us since we are about to kfree + * the memory that holds the lock, however LOCKDEP doesn't like this. + */ + down_write(&per_mm->umem_rwsem); + per_mm->active = false; + up_write(&per_mm->umem_rwsem); + + WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root)); + mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm); + put_pid(per_mm->tgid); + mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm); +} + +struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, + unsigned long addr, size_t size) +{ + struct ib_ucontext *ctx = per_mm->context; struct ib_umem_odp *odp_data; + struct ib_umem *umem; int pages = size >> PAGE_SHIFT; int ret; - umem = kzalloc(sizeof(*umem), GFP_KERNEL); - if (!umem) + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); + if (!odp_data) return ERR_PTR(-ENOMEM); - - umem->context = context; + umem = &odp_data->umem; + umem->context = ctx; umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; umem->writable = 1; - - odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); - if (!odp_data) { - ret = -ENOMEM; - goto out_umem; - } - odp_data->umem = umem; + umem->is_odp = 1; + odp_data->per_mm = per_mm; mutex_init(&odp_data->umem_mutex); init_completion(&odp_data->notifier_completion); @@ -314,39 +383,34 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, goto out_page_list; } - down_write(&context->umem_rwsem); - context->odp_mrs_count++; - rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree); - if (likely(!atomic_read(&context->notifier_count))) - odp_data->mn_counters_active = true; - else - list_add(&odp_data->no_private_counters, - &context->no_private_counters); - up_write(&context->umem_rwsem); - - umem->odp_data = odp_data; + /* + * Caller must ensure that the umem_odp that the per_mm came from + * cannot be freed during the call to ib_alloc_odp_umem. + */ + mutex_lock(&ctx->per_mm_list_lock); + per_mm->odp_mrs_count++; + mutex_unlock(&ctx->per_mm_list_lock); + add_umem_to_per_mm(odp_data); - return umem; + return odp_data; out_page_list: vfree(odp_data->page_list); out_odp_data: kfree(odp_data); -out_umem: - kfree(umem); return ERR_PTR(ret); } EXPORT_SYMBOL(ib_alloc_odp_umem); -int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, - int access) +int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) { + struct ib_umem *umem = &umem_odp->umem; + /* + * NOTE: This must called in a process context where umem->owning_mm + * == current->mm + */ + struct mm_struct *mm = umem->owning_mm; int ret_val; - struct pid *our_pid; - struct mm_struct *mm = get_task_mm(current); - - if (!mm) - return -EINVAL; if (access & IB_ACCESS_HUGETLB) { struct vm_area_struct *vma; @@ -366,111 +430,43 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, umem->hugetlb = 0; } - /* Prevent creating ODP MRs in child processes */ - rcu_read_lock(); - our_pid = get_task_pid(current->group_leader, PIDTYPE_PID); - rcu_read_unlock(); - put_pid(our_pid); - if (context->tgid != our_pid) { - ret_val = -EINVAL; - goto out_mm; - } - - umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL); - if (!umem->odp_data) { - ret_val = -ENOMEM; - goto out_mm; - } - umem->odp_data->umem = umem; - - mutex_init(&umem->odp_data->umem_mutex); + mutex_init(&umem_odp->umem_mutex); - init_completion(&umem->odp_data->notifier_completion); + init_completion(&umem_odp->notifier_completion); if (ib_umem_num_pages(umem)) { - umem->odp_data->page_list = - vzalloc(array_size(sizeof(*umem->odp_data->page_list), + umem_odp->page_list = + vzalloc(array_size(sizeof(*umem_odp->page_list), ib_umem_num_pages(umem))); - if (!umem->odp_data->page_list) { - ret_val = -ENOMEM; - goto out_odp_data; - } + if (!umem_odp->page_list) + return -ENOMEM; - umem->odp_data->dma_list = - vzalloc(array_size(sizeof(*umem->odp_data->dma_list), + umem_odp->dma_list = + vzalloc(array_size(sizeof(*umem_odp->dma_list), ib_umem_num_pages(umem))); - if (!umem->odp_data->dma_list) { + if (!umem_odp->dma_list) { ret_val = -ENOMEM; goto out_page_list; } } - /* - * When using MMU notifiers, we will get a - * notification before the "current" task (and MM) is - * destroyed. We use the umem_rwsem semaphore to synchronize. - */ - down_write(&context->umem_rwsem); - context->odp_mrs_count++; - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) - rbt_ib_umem_insert(&umem->odp_data->interval_tree, - &context->umem_tree); - if (likely(!atomic_read(&context->notifier_count)) || - context->odp_mrs_count == 1) - umem->odp_data->mn_counters_active = true; - else - list_add(&umem->odp_data->no_private_counters, - &context->no_private_counters); - downgrade_write(&context->umem_rwsem); - - if (context->odp_mrs_count == 1) { - /* - * Note that at this point, no MMU notifier is running - * for this context! - */ - atomic_set(&context->notifier_count, 0); - INIT_HLIST_NODE(&context->mn.hlist); - context->mn.ops = &ib_umem_notifiers; - /* - * Lock-dep detects a false positive for mmap_sem vs. - * umem_rwsem, due to not grasping downgrade_write correctly. - */ - lockdep_off(); - ret_val = mmu_notifier_register(&context->mn, mm); - lockdep_on(); - if (ret_val) { - pr_err("Failed to register mmu_notifier %d\n", ret_val); - ret_val = -EBUSY; - goto out_mutex; - } - } - - up_read(&context->umem_rwsem); + ret_val = get_per_mm(umem_odp); + if (ret_val) + goto out_dma_list; + add_umem_to_per_mm(umem_odp); - /* - * Note that doing an mmput can cause a notifier for the relevant mm. - * If the notifier is called while we hold the umem_rwsem, this will - * cause a deadlock. Therefore, we release the reference only after we - * released the semaphore. - */ - mmput(mm); return 0; -out_mutex: - up_read(&context->umem_rwsem); - vfree(umem->odp_data->dma_list); +out_dma_list: + vfree(umem_odp->dma_list); out_page_list: - vfree(umem->odp_data->page_list); -out_odp_data: - kfree(umem->odp_data); -out_mm: - mmput(mm); + vfree(umem_odp->page_list); return ret_val; } -void ib_umem_odp_release(struct ib_umem *umem) +void ib_umem_odp_release(struct ib_umem_odp *umem_odp) { - struct ib_ucontext *context = umem->context; + struct ib_umem *umem = &umem_odp->umem; /* * Ensure that no more pages are mapped in the umem. @@ -478,61 +474,13 @@ void ib_umem_odp_release(struct ib_umem *umem) * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ - ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem), + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), ib_umem_end(umem)); - down_write(&context->umem_rwsem); - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) - rbt_ib_umem_remove(&umem->odp_data->interval_tree, - &context->umem_tree); - context->odp_mrs_count--; - if (!umem->odp_data->mn_counters_active) { - list_del(&umem->odp_data->no_private_counters); - complete_all(&umem->odp_data->notifier_completion); - } - - /* - * Downgrade the lock to a read lock. This ensures that the notifiers - * (who lock the mutex for reading) will be able to finish, and we - * will be able to enventually obtain the mmu notifiers SRCU. Note - * that since we are doing it atomically, no other user could register - * and unregister while we do the check. - */ - downgrade_write(&context->umem_rwsem); - if (!context->odp_mrs_count) { - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(context->tgid, - PIDTYPE_PID); - if (owning_process == NULL) - /* - * The process is already dead, notifier were removed - * already. - */ - goto out; - - owning_mm = get_task_mm(owning_process); - if (owning_mm == NULL) - /* - * The process' mm is already dead, notifier were - * removed already. - */ - goto out_put_task; - mmu_notifier_unregister(&context->mn, owning_mm); - - mmput(owning_mm); - -out_put_task: - put_task_struct(owning_process); - } -out: - up_read(&context->umem_rwsem); - - vfree(umem->odp_data->dma_list); - vfree(umem->odp_data->page_list); - kfree(umem->odp_data); - kfree(umem); + remove_umem_from_per_mm(umem_odp); + put_per_mm(umem_odp); + vfree(umem_odp->dma_list); + vfree(umem_odp->page_list); } /* @@ -544,7 +492,7 @@ out: * @access_mask: access permissions needed for this page. * @current_seq: sequence number for synchronization with invalidations. * the sequence number is taken from - * umem->odp_data->notifiers_seq. + * umem_odp->notifiers_seq. * * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. @@ -554,12 +502,13 @@ out: * umem. */ static int ib_umem_odp_map_dma_single_page( - struct ib_umem *umem, + struct ib_umem_odp *umem_odp, int page_index, struct page *page, u64 access_mask, unsigned long current_seq) { + struct ib_umem *umem = &umem_odp->umem; struct ib_device *dev = umem->context->device; dma_addr_t dma_addr; int stored_page = 0; @@ -571,11 +520,11 @@ static int ib_umem_odp_map_dma_single_page( * handle case of a racing notifier. This check also allows us to bail * early if we have a notifier running in parallel with us. */ - if (ib_umem_mmu_notifier_retry(umem, current_seq)) { + if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) { ret = -EAGAIN; goto out; } - if (!(umem->odp_data->dma_list[page_index])) { + if (!(umem_odp->dma_list[page_index])) { dma_addr = ib_dma_map_page(dev, page, 0, BIT(umem->page_shift), @@ -584,15 +533,15 @@ static int ib_umem_odp_map_dma_single_page( ret = -EFAULT; goto out; } - umem->odp_data->dma_list[page_index] = dma_addr | access_mask; - umem->odp_data->page_list[page_index] = page; + umem_odp->dma_list[page_index] = dma_addr | access_mask; + umem_odp->page_list[page_index] = page; umem->npages++; stored_page = 1; - } else if (umem->odp_data->page_list[page_index] == page) { - umem->odp_data->dma_list[page_index] |= access_mask; + } else if (umem_odp->page_list[page_index] == page) { + umem_odp->dma_list[page_index] |= access_mask; } else { pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", - umem->odp_data->page_list[page_index], page); + umem_odp->page_list[page_index], page); /* Better remove the mapping now, to prevent any further * damage. */ remove_existing_mapping = 1; @@ -605,7 +554,7 @@ out: if (remove_existing_mapping && umem->context->invalidate_range) { invalidate_page_trampoline( - umem, + umem_odp, ib_umem_start(umem) + (page_index >> umem->page_shift), ib_umem_start(umem) + ((page_index + 1) >> umem->page_shift), @@ -621,7 +570,7 @@ out: * * Pins the range of pages passed in the argument, and maps them to * DMA addresses. The DMA addresses of the mapped pages is updated in - * umem->odp_data->dma_list. + * umem_odp->dma_list. * * Returns the number of pages mapped in success, negative error code * for failure. @@ -629,7 +578,7 @@ out: * the function from completing its task. * An -ENOENT error code indicates that userspace process is being terminated * and mm was already destroyed. - * @umem: the umem to map and pin + * @umem_odp: the umem to map and pin * @user_virt: the address from which we need to map. * @bcnt: the minimal number of bytes to pin and map. The mapping might be * bigger due to alignment, and may also be smaller in case of an error @@ -639,13 +588,15 @@ out: * range. * @current_seq: the MMU notifiers sequance value for synchronization with * invalidations. the sequance number is read from - * umem->odp_data->notifiers_seq before calling this function + * umem_odp->notifiers_seq before calling this function */ -int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, - u64 access_mask, unsigned long current_seq) +int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, + u64 bcnt, u64 access_mask, + unsigned long current_seq) { + struct ib_umem *umem = &umem_odp->umem; struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; + struct mm_struct *owning_mm = umem_odp->umem.owning_mm; struct page **local_page_list = NULL; u64 page_mask, off; int j, k, ret = 0, start_idx, npages = 0, page_shift; @@ -669,15 +620,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, user_virt = user_virt & page_mask; bcnt += off; /* Charge for the first page offset as well. */ - owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID); - if (owning_process == NULL) { + /* + * owning_process is allowed to be NULL, this means somehow the mm is + * existing beyond the lifetime of the originating process.. Presumably + * mmget_not_zero will fail in this case. + */ + owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID); + if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) { ret = -EINVAL; - goto out_no_task; - } - - owning_mm = get_task_mm(owning_process); - if (owning_mm == NULL) { - ret = -ENOENT; goto out_put_task; } @@ -709,7 +659,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, break; bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); - mutex_lock(&umem->odp_data->umem_mutex); + mutex_lock(&umem_odp->umem_mutex); for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) { if (user_virt & ~page_mask) { p += PAGE_SIZE; @@ -722,7 +672,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, } ret = ib_umem_odp_map_dma_single_page( - umem, k, local_page_list[j], + umem_odp, k, local_page_list[j], access_mask, current_seq); if (ret < 0) break; @@ -730,7 +680,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, p = page_to_phys(local_page_list[j]); k++; } - mutex_unlock(&umem->odp_data->umem_mutex); + mutex_unlock(&umem_odp->umem_mutex); if (ret < 0) { /* Release left over pages when handling errors. */ @@ -749,16 +699,17 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, mmput(owning_mm); out_put_task: - put_task_struct(owning_process); -out_no_task: + if (owning_process) + put_task_struct(owning_process); free_page((unsigned long)local_page_list); return ret; } EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); -void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, +void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 bound) { + struct ib_umem *umem = &umem_odp->umem; int idx; u64 addr; struct ib_device *dev = umem->context->device; @@ -770,12 +721,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem->odp_data->umem_mutex); + mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { idx = (addr - ib_umem_start(umem)) >> umem->page_shift; - if (umem->odp_data->page_list[idx]) { - struct page *page = umem->odp_data->page_list[idx]; - dma_addr_t dma = umem->odp_data->dma_list[idx]; + if (umem_odp->page_list[idx]) { + struct page *page = umem_odp->page_list[idx]; + dma_addr_t dma = umem_odp->dma_list[idx]; dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; WARN_ON(!dma_addr); @@ -798,12 +749,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, /* on demand pinning support */ if (!umem->context->invalidate_range) put_page(page); - umem->odp_data->page_list[idx] = NULL; - umem->odp_data->dma_list[idx] = 0; + umem_odp->page_list[idx] = NULL; + umem_odp->dma_list[idx] = 0; umem->npages--; } } - mutex_unlock(&umem->odp_data->umem_mutex); + mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); @@ -830,7 +781,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, return -EAGAIN; next = rbt_ib_umem_iter_next(node, start, last - 1); umem = container_of(node, struct ib_umem_odp, interval_tree); - ret_val = cb(umem->umem, start, last, cookie) || ret_val; + ret_val = cb(umem, start, last, cookie) || ret_val; } return ret_val; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 5df8e548cc14..c97935a0c7c6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -100,13 +100,14 @@ struct ib_uverbs_device { atomic_t refcount; int num_comp_vectors; struct completion comp; - struct device *dev; + struct device dev; + /* First group for device attributes, NULL terminated array */ + const struct attribute_group *groups[2]; struct ib_device __rcu *ib_dev; int devnum; struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; - struct kobject kobj; struct srcu_struct disassociate_srcu; struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; @@ -146,7 +147,6 @@ struct ib_uverbs_file { struct ib_event_handler event_handler; struct ib_uverbs_async_event_file *async_file; struct list_head list; - int is_closed; /* * To access the uobjects list hw_destroy_rwsem must be held for write @@ -158,6 +158,9 @@ struct ib_uverbs_file { spinlock_t uobjects_lock; struct list_head uobjects; + struct mutex umap_lock; + struct list_head umaps; + u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; @@ -218,12 +221,6 @@ struct ib_ucq_object { u32 async_events_reported; }; -struct ib_uflow_resources; -struct ib_uflow_object { - struct ib_uobject uobject; - struct ib_uflow_resources *resources; -}; - extern const struct file_operations uverbs_event_fops; void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a21d5214afc3..91d3e4029cd5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -117,18 +117,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, /* ufile is required when some objects are released */ ucontext->ufile = file; - rcu_read_lock(); - ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); - rcu_read_unlock(); - ucontext->closing = 0; + ucontext->closing = false; ucontext->cleanup_retryable = false; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - ucontext->umem_tree = RB_ROOT_CACHED; - init_rwsem(&ucontext->umem_rwsem); - ucontext->odp_mrs_count = 0; - INIT_LIST_HEAD(&ucontext->no_private_counters); - + mutex_init(&ucontext->per_mm_list_lock); + INIT_LIST_HEAD(&ucontext->per_mm_list); if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) ucontext->invalidate_range = NULL; @@ -172,7 +166,6 @@ err_fd: put_unused_fd(resp.async_fd); err_free: - put_pid(ucontext->tgid); ib_dev->dealloc_ucontext(ucontext); err_alloc: @@ -2747,16 +2740,7 @@ out_put: return ret ? ret : in_len; } -struct ib_uflow_resources { - size_t max; - size_t num; - size_t collection_num; - size_t counters_num; - struct ib_counters **counters; - struct ib_flow_action **collection; -}; - -static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) +struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) { struct ib_uflow_resources *resources; @@ -2786,6 +2770,7 @@ err: return NULL; } +EXPORT_SYMBOL(flow_resources_alloc); void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) { @@ -2804,10 +2789,11 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) kfree(uflow_res->counters); kfree(uflow_res); } +EXPORT_SYMBOL(ib_uverbs_flow_resources_free); -static void flow_resources_add(struct ib_uflow_resources *uflow_res, - enum ib_flow_spec_type type, - void *ibobj) +void flow_resources_add(struct ib_uflow_resources *uflow_res, + enum ib_flow_spec_type type, + void *ibobj) { WARN_ON(uflow_res->num >= uflow_res->max); @@ -2828,6 +2814,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res, uflow_res->num++; } +EXPORT_SYMBOL(flow_resources_add); static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, @@ -3462,7 +3449,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_uverbs_create_flow cmd; struct ib_uverbs_create_flow_resp resp; struct ib_uobject *uobj; - struct ib_uflow_object *uflow; struct ib_flow *flow_id; struct ib_uverbs_flow_attr *kern_flow_attr; struct ib_flow_attr *flow_attr; @@ -3601,13 +3587,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = PTR_ERR(flow_id); goto err_free; } - atomic_inc(&qp->usecnt); - flow_id->qp = qp; - flow_id->device = qp->device; - flow_id->uobject = uobj; - uobj->object = flow_id; - uflow = container_of(uobj, typeof(*uflow), uobject); - uflow->resources = uflow_res; + + ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res); memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 1a6b229e3db3..0e95a5888274 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -57,6 +57,7 @@ struct bundle_priv { struct ib_uverbs_attr *uattrs; DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); + DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN); /* * Must be last. bundle ends in a flex array which overlaps @@ -143,6 +144,86 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } +static int uverbs_process_idrs_array(struct bundle_priv *pbundle, + const struct uverbs_api_attr *attr_uapi, + struct uverbs_objs_arr_attr *attr, + struct ib_uverbs_attr *uattr, + u32 attr_bkey) +{ + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + size_t array_len; + u32 *idr_vals; + int ret = 0; + size_t i; + + if (uattr->attr_data.reserved) + return -EINVAL; + + if (uattr->len % sizeof(u32)) + return -EINVAL; + + array_len = uattr->len / sizeof(u32); + if (array_len < spec->u2.objs_arr.min_len || + array_len > spec->u2.objs_arr.max_len) + return -EINVAL; + + attr->uobjects = + uverbs_alloc(&pbundle->bundle, + array_size(array_len, sizeof(*attr->uobjects))); + if (IS_ERR(attr->uobjects)) + return PTR_ERR(attr->uobjects); + + /* + * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects + * to store idrs array and avoid additional memory allocation. The + * idrs array is offset to the end of the uobjects array so we will be + * able to read idr and replace with a pointer. + */ + idr_vals = (u32 *)(attr->uobjects + array_len) - array_len; + + if (uattr->len > sizeof(uattr->data)) { + ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data), + uattr->len); + if (ret) + return -EFAULT; + } else { + memcpy(idr_vals, &uattr->data, uattr->len); + } + + for (i = 0; i != array_len; i++) { + attr->uobjects[i] = uverbs_get_uobject_from_file( + spec->u2.objs_arr.obj_type, pbundle->bundle.ufile, + spec->u2.objs_arr.access, idr_vals[i]); + if (IS_ERR(attr->uobjects[i])) { + ret = PTR_ERR(attr->uobjects[i]); + break; + } + } + + attr->len = i; + __set_bit(attr_bkey, pbundle->spec_finalize); + return ret; +} + +static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, + struct uverbs_objs_arr_attr *attr, + bool commit) +{ + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + int current_ret; + int ret = 0; + size_t i; + + for (i = 0; i != attr->len; i++) { + current_ret = uverbs_finalize_object( + attr->uobjects[i], spec->u2.objs_arr.access, commit); + if (!ret) + ret = current_ret; + } + + return ret; +} + static int uverbs_process_attr(struct bundle_priv *pbundle, const struct uverbs_api_attr *attr_uapi, struct ib_uverbs_attr *uattr, u32 attr_bkey) @@ -246,6 +327,11 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, } break; + + case UVERBS_ATTR_TYPE_IDRS_ARRAY: + return uverbs_process_idrs_array(pbundle, attr_uapi, + &e->objs_arr_attr, uattr, + attr_bkey); default: return -EOPNOTSUPP; } @@ -384,6 +470,7 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) unsigned int i; int ret = 0; + /* fast path for simple uobjects */ i = -1; while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, i + 1)) < key_bitmap_len) { @@ -397,6 +484,32 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) ret = current_ret; } + i = -1; + while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len, + i + 1)) < key_bitmap_len) { + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + const struct uverbs_api_attr *attr_uapi; + void __rcu **slot; + int current_ret; + + slot = uapi_get_attr_for_method( + pbundle, + pbundle->method_key | uapi_bkey_to_key_attr(i)); + if (WARN_ON(!slot)) + continue; + + attr_uapi = srcu_dereference( + *slot, + &pbundle->bundle.ufile->device->disassociate_srcu); + + if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { + current_ret = uverbs_free_idrs_array( + attr_uapi, &attr->objs_arr_attr, commit); + if (!ret) + ret = current_ret; + } + } + for (memblock = pbundle->allocated_mem; memblock;) { struct bundle_alloc_head *tmp = memblock; @@ -461,6 +574,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, memset(pbundle->bundle.attr_present, 0, sizeof(pbundle->bundle.attr_present)); memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); + memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); destroy_ret = bundle_destroy(pbundle, ret == 0); @@ -611,3 +725,26 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, return 0; } EXPORT_SYMBOL(uverbs_copy_to); + +int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val) +{ + const struct uverbs_attr *attr; + + attr = uverbs_attr_get(attrs_bundle, idx); + if (IS_ERR(attr)) { + if ((PTR_ERR(attr) != -ENOENT) || !def_val) + return PTR_ERR(attr); + + *to = *def_val; + } else { + *to = attr->ptr_attr.data; + } + + if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(_uverbs_get_const); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 823beca448e1..8d56773aac56 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -45,6 +45,7 @@ #include <linux/cdev.h> #include <linux/anon_inodes.h> #include <linux/slab.h> +#include <linux/sched/mm.h> #include <linux/uaccess.h> @@ -169,20 +170,16 @@ int uverbs_dealloc_mw(struct ib_mw *mw) return ret; } -static void ib_uverbs_release_dev(struct kobject *kobj) +static void ib_uverbs_release_dev(struct device *device) { struct ib_uverbs_device *dev = - container_of(kobj, struct ib_uverbs_device, kobj); + container_of(device, struct ib_uverbs_device, dev); uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); kfree(dev); } -static struct kobj_type ib_uverbs_dev_ktype = { - .release = ib_uverbs_release_dev, -}; - static void ib_uverbs_release_async_event_file(struct kref *ref) { struct ib_uverbs_async_event_file *file = @@ -265,7 +262,7 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); - kobject_put(&file->device->kobj); + put_device(&file->device->dev); kfree(file); } @@ -816,6 +813,226 @@ out: } /* + * Each time we map IO memory into user space this keeps track of the mapping. + * When the device is hot-unplugged we 'zap' the mmaps in user space to point + * to the zero page and allow the hot unplug to proceed. + * + * This is necessary for cases like PCI physical hot unplug as the actual BAR + * memory may vanish after this and access to it from userspace could MCE. + * + * RDMA drivers supporting disassociation must have their user space designed + * to cope in some way with their IO pages going to the zero page. + */ +struct rdma_umap_priv { + struct vm_area_struct *vma; + struct list_head list; +}; + +static const struct vm_operations_struct rdma_umap_ops; + +static void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + + priv->vma = vma; + vma->vm_private_data = priv; + vma->vm_ops = &rdma_umap_ops; + + mutex_lock(&ufile->umap_lock); + list_add(&priv->list, &ufile->umaps); + mutex_unlock(&ufile->umap_lock); +} + +/* + * The VMA has been dup'd, initialize the vm_private_data with a new tracking + * struct + */ +static void rdma_umap_open(struct vm_area_struct *vma) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + struct rdma_umap_priv *opriv = vma->vm_private_data; + struct rdma_umap_priv *priv; + + if (!opriv) + return; + + /* We are racing with disassociation */ + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + goto out_zap; + /* + * Disassociation already completed, the VMA should already be zapped. + */ + if (!ufile->ucontext) + goto out_unlock; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + goto out_unlock; + rdma_umap_priv_init(priv, vma); + + up_read(&ufile->hw_destroy_rwsem); + return; + +out_unlock: + up_read(&ufile->hw_destroy_rwsem); +out_zap: + /* + * We can't allow the VMA to be created with the actual IO pages, that + * would break our API contract, and it can't be stopped at this + * point, so zap it. + */ + vma->vm_private_data = NULL; + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); +} + +static void rdma_umap_close(struct vm_area_struct *vma) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + struct rdma_umap_priv *priv = vma->vm_private_data; + + if (!priv) + return; + + /* + * The vma holds a reference on the struct file that created it, which + * in turn means that the ib_uverbs_file is guaranteed to exist at + * this point. + */ + mutex_lock(&ufile->umap_lock); + list_del(&priv->list); + mutex_unlock(&ufile->umap_lock); + kfree(priv); +} + +static const struct vm_operations_struct rdma_umap_ops = { + .open = rdma_umap_open, + .close = rdma_umap_close, +}; + +static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, + struct vm_area_struct *vma, + unsigned long size) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + struct rdma_umap_priv *priv; + + if (vma->vm_end - vma->vm_start != size) + return ERR_PTR(-EINVAL); + + /* Driver is using this wrong, must be called by ib_uverbs_mmap */ + if (WARN_ON(!vma->vm_file || + vma->vm_file->private_data != ufile)) + return ERR_PTR(-EINVAL); + lockdep_assert_held(&ufile->device->disassociate_srcu); + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return ERR_PTR(-ENOMEM); + return priv; +} + +/* + * Map IO memory into a process. This is to be called by drivers as part of + * their mmap() functions if they wish to send something like PCI-E BAR memory + * to userspace. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); + + if (IS_ERR(priv)) + return PTR_ERR(priv); + + vma->vm_page_prot = prot; + if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { + kfree(priv); + return -EAGAIN; + } + + rdma_umap_priv_init(priv, vma); + return 0; +} +EXPORT_SYMBOL(rdma_user_mmap_io); + +/* + * The page case is here for a slightly different reason, the driver expects + * to be able to free the page it is sharing to user space when it destroys + * its ucontext, which means we need to zap the user space references. + * + * We could handle this differently by providing an API to allocate a shared + * page and then only freeing the shared page when the last ufile is + * destroyed. + */ +int rdma_user_mmap_page(struct ib_ucontext *ucontext, + struct vm_area_struct *vma, struct page *page, + unsigned long size) +{ + struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); + + if (IS_ERR(priv)) + return PTR_ERR(priv); + + if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, + vma->vm_page_prot)) { + kfree(priv); + return -EAGAIN; + } + + rdma_umap_priv_init(priv, vma); + return 0; +} +EXPORT_SYMBOL(rdma_user_mmap_page); + +void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) +{ + struct rdma_umap_priv *priv, *next_priv; + + lockdep_assert_held(&ufile->hw_destroy_rwsem); + + while (1) { + struct mm_struct *mm = NULL; + + /* Get an arbitrary mm pointer that hasn't been cleaned yet */ + mutex_lock(&ufile->umap_lock); + if (!list_empty(&ufile->umaps)) { + mm = list_first_entry(&ufile->umaps, + struct rdma_umap_priv, list) + ->vma->vm_mm; + mmget(mm); + } + mutex_unlock(&ufile->umap_lock); + if (!mm) + return; + + /* + * The umap_lock is nested under mmap_sem since it used within + * the vma_ops callbacks, so we have to clean the list one mm + * at a time to get the lock ordering right. Typically there + * will only be one mm, so no big deal. + */ + down_write(&mm->mmap_sem); + mutex_lock(&ufile->umap_lock); + list_for_each_entry_safe (priv, next_priv, &ufile->umaps, + list) { + struct vm_area_struct *vma = priv->vma; + + if (vma->vm_mm != mm) + continue; + list_del_init(&priv->list); + + zap_vma_ptes(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); + } + mutex_unlock(&ufile->umap_lock); + up_write(&mm->mmap_sem); + mmput(mm); + } +} + +/* * ib_uverbs_open() does not need the BKL: * * - the ib_uverbs_device structures are properly reference counted and @@ -838,6 +1055,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; + get_device(&dev->dev); srcu_key = srcu_read_lock(&dev->disassociate_srcu); mutex_lock(&dev->lists_mutex); ib_dev = srcu_dereference(dev->ib_dev, @@ -875,9 +1093,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) spin_lock_init(&file->uobjects_lock); INIT_LIST_HEAD(&file->uobjects); init_rwsem(&file->hw_destroy_rwsem); + mutex_init(&file->umap_lock); + INIT_LIST_HEAD(&file->umaps); filp->private_data = file; - kobject_get(&dev->kobj); list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); @@ -898,6 +1117,7 @@ err: if (atomic_dec_and_test(&dev->refcount)) ib_uverbs_comp_dev(dev); + put_device(&dev->dev); return ret; } @@ -908,10 +1128,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); mutex_lock(&file->device->lists_mutex); - if (!file->is_closed) { - list_del(&file->list); - file->is_closed = 1; - } + list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); if (file->async_file) @@ -950,17 +1167,15 @@ static struct ib_client uverbs_client = { .remove = ib_uverbs_remove_one }; -static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, +static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, char *buf) { + struct ib_uverbs_device *dev = + container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; - struct ib_uverbs_device *dev = dev_get_drvdata(device); struct ib_device *ib_dev; - if (!dev) - return -ENODEV; - srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) @@ -969,18 +1184,17 @@ static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, return ret; } -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR_RO(ibdev); -static ssize_t show_dev_abi_version(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t abi_version_show(struct device *device, + struct device_attribute *attr, char *buf) { - struct ib_uverbs_device *dev = dev_get_drvdata(device); + struct ib_uverbs_device *dev = + container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; - if (!dev) - return -ENODEV; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) @@ -989,7 +1203,17 @@ static ssize_t show_dev_abi_version(struct device *device, return ret; } -static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); +static DEVICE_ATTR_RO(abi_version); + +static struct attribute *ib_dev_attrs[] = { + &dev_attr_abi_version.attr, + &dev_attr_ibdev.attr, + NULL, +}; + +static const struct attribute_group dev_attr_group = { + .attrs = ib_dev_attrs, +}; static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); @@ -1027,14 +1251,21 @@ static void ib_uverbs_add_one(struct ib_device *device) return; } + device_initialize(&uverbs_dev->dev); + uverbs_dev->dev.class = uverbs_class; + uverbs_dev->dev.parent = device->dev.parent; + uverbs_dev->dev.release = ib_uverbs_release_dev; + uverbs_dev->groups[0] = &dev_attr_group; + uverbs_dev->dev.groups = uverbs_dev->groups; atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); - kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); + rcu_assign_pointer(uverbs_dev->ib_dev, device); + uverbs_dev->num_comp_vectors = device->num_comp_vectors; devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (devnum >= IB_UVERBS_MAX_DEVICES) @@ -1046,47 +1277,30 @@ static void ib_uverbs_add_one(struct ib_device *device) else base = IB_UVERBS_BASE_DEV + devnum; - rcu_assign_pointer(uverbs_dev->ib_dev, device); - uverbs_dev->num_comp_vectors = device->num_comp_vectors; - if (ib_uverbs_create_uapi(device, uverbs_dev)) - goto err; + goto err_uapi; + + uverbs_dev->dev.devt = base; + dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); - cdev_init(&uverbs_dev->cdev, NULL); + cdev_init(&uverbs_dev->cdev, + device->mmap ? &uverbs_mmap_fops : &uverbs_fops); uverbs_dev->cdev.owner = THIS_MODULE; - uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj); - kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(&uverbs_dev->cdev, base, 1)) - goto err_cdev; - - uverbs_dev->dev = device_create(uverbs_class, device->dev.parent, - uverbs_dev->cdev.dev, uverbs_dev, - "uverbs%d", uverbs_dev->devnum); - if (IS_ERR(uverbs_dev->dev)) - goto err_cdev; - - if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) - goto err_class; - if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) - goto err_class; - ib_set_client_data(device, &uverbs_client, uverbs_dev); + ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); + if (ret) + goto err_uapi; + ib_set_client_data(device, &uverbs_client, uverbs_dev); return; -err_class: - device_destroy(uverbs_class, uverbs_dev->cdev.dev); - -err_cdev: - cdev_del(&uverbs_dev->cdev); +err_uapi: clear_bit(devnum, dev_map); - err: if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kobject_put(&uverbs_dev->kobj); + put_device(&uverbs_dev->dev); return; } @@ -1107,8 +1321,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, while (!list_empty(&uverbs_dev->uverbs_file_list)) { file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); - file->is_closed = 1; - list_del(&file->list); + list_del_init(&file->list); kref_get(&file->ref); /* We must release the mutex before going ahead and calling @@ -1156,9 +1369,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) if (!uverbs_dev) return; - dev_set_drvdata(uverbs_dev->dev, NULL); - device_destroy(uverbs_class, uverbs_dev->cdev.dev); - cdev_del(&uverbs_dev->cdev); + cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); clear_bit(uverbs_dev->devnum, dev_map); if (device->disassociate_ucontext) { @@ -1182,7 +1393,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) if (wait_clients) wait_for_completion(&uverbs_dev->comp); - kobject_put(&uverbs_dev->kobj); + put_device(&uverbs_dev->dev); } static char *uverbs_devnode(struct device *dev, umode_t *mode) diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index d8cfafe23bd9..cb9486ad5c67 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -326,11 +326,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( if (IS_ERR(action)) return PTR_ERR(action); - atomic_set(&action->usecnt, 0); - action->device = ib_dev; - action->type = IB_FLOW_ACTION_ESP; - action->uobject = uobj; - uobj->object = action; + uverbs_flow_action_fill_action(action, uobj, ib_dev, + IB_FLOW_ACTION_ESP); return 0; } diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 73ea6f0db88f..cdf5ced2c84f 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -73,6 +73,18 @@ static int uapi_merge_method(struct uverbs_api *uapi, if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) method_elm->driver_method |= is_driver; + /* + * Like other uobject based things we only support a single + * uobject being NEW'd or DESTROY'd + */ + if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { + u8 access = attr->attr.u2.objs_arr.access; + + if (WARN_ON(access == UVERBS_ACCESS_NEW || + access == UVERBS_ACCESS_DESTROY)) + return -EINVAL; + } + attr_slot = uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), sizeof(*attr_slot)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6ee03d6089eb..c36be384fe34 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -710,7 +710,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device, ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, ah_attr->roce.dmac, - sgid_attr->ndev, &hop_limit); + sgid_attr, &hop_limit); grh->hop_limit = hop_limit; return ret; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index bbfb86eb2d24..bc2b9e038439 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -833,6 +833,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) "Failed to destroy Shadow QP"); return rc; } + bnxt_qplib_free_qp_res(&rdev->qplib_res, + &rdev->qp1_sqp->qplib_qp); mutex_lock(&rdev->qp_lock); list_del(&rdev->qp1_sqp->list); atomic_dec(&rdev->qp_count); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e426b990c1dd..6eaa4b2abb78 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -36,6 +36,8 @@ * Description: Fast Path Operators */ +#define dev_fmt(fmt) "QPLIB: " fmt + #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/sched.h> @@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) if (!qp->sq.flushed) { dev_dbg(&scq->hwq.pdev->dev, - "QPLIB: FP: Adding to SQ Flush list = %p", - qp); + "FP: Adding to SQ Flush list = %p\n", qp); bnxt_qplib_cancel_phantom_processing(qp); list_add_tail(&qp->sq_flush, &scq->sqf_head); qp->sq.flushed = true; @@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) if (!qp->srq) { if (!qp->rq.flushed) { dev_dbg(&rcq->hwq.pdev->dev, - "QPLIB: FP: Adding to RQ Flush list = %p", - qp); + "FP: Adding to RQ Flush list = %p\n", qp); list_add_tail(&qp->rq_flush, &rcq->rqf_head); qp->rq.flushed = true; } @@ -196,7 +196,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - struct bnxt_qplib_q *sq = &qp->rq; + struct bnxt_qplib_q *sq = &qp->sq; int rc = 0; if (qp->sq_hdr_buf_size && sq->hwq.max_elements) { @@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, if (!qp->sq_hdr_buf) { rc = -ENOMEM; dev_err(&res->pdev->dev, - "QPLIB: Failed to create sq_hdr_buf"); + "Failed to create sq_hdr_buf\n"); goto fail; } } @@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, if (!qp->rq_hdr_buf) { rc = -ENOMEM; dev_err(&res->pdev->dev, - "QPLIB: Failed to create rq_hdr_buf"); + "Failed to create rq_hdr_buf\n"); goto fail; } } @@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data) num_cqne_processed++; else dev_warn(&nq->pdev->dev, - "QPLIB: cqn - type 0x%x not handled", - type); + "cqn - type 0x%x not handled\n", type); spin_unlock_bh(&cq->compl_lock); break; } @@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data) num_srqne_processed++; else dev_warn(&nq->pdev->dev, - "QPLIB: SRQ event 0x%x not handled", + "SRQ event 0x%x not handled\n", nqsrqe->event); break; } @@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data) break; default: dev_warn(&nq->pdev->dev, - "QPLIB: nqe with type = 0x%x not handled", - type); + "nqe with type = 0x%x not handled\n", type); break; } raw_cons++; @@ -396,7 +394,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, rc = irq_set_affinity_hint(nq->vector, &nq->mask); if (rc) { dev_warn(&nq->pdev->dev, - "QPLIB: set affinity failed; vector: %d nq_idx: %d\n", + "set affinity failed; vector: %d nq_idx: %d\n", nq->vector, nq_indx); } nq->requested = true; @@ -443,7 +441,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true); if (rc) { dev_err(&nq->pdev->dev, - "QPLIB: Failed to request irq for nq-idx %d", nq_idx); + "Failed to request irq for nq-idx %d\n", nq_idx); goto fail; } @@ -662,8 +660,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, spin_lock(&srq_hwq->lock); if (srq->start_idx == srq->last_idx) { - dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!", - srq->id); + dev_err(&srq_hwq->pdev->dev, + "FP: SRQ (0x%x) is full!\n", srq->id); rc = -EINVAL; spin_unlock(&srq_hwq->lock); goto done; @@ -1324,7 +1322,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } } if (i == res->sgid_tbl.max) - dev_warn(&res->pdev->dev, "QPLIB: SGID not found??"); + dev_warn(&res->pdev->dev, "SGID not found??\n"); qp->ah.hop_limit = sb->hop_limit; qp->ah.traffic_class = sb->traffic_class; @@ -1536,7 +1534,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, if (bnxt_qplib_queue_full(sq)) { dev_err(&sq->hwq.pdev->dev, - "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x", + "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, sq->q_full_delta); rc = -ENOMEM; @@ -1561,7 +1559,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, /* Copy the inline data */ if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { dev_warn(&sq->hwq.pdev->dev, - "QPLIB: Inline data length > 96 detected"); + "Inline data length > 96 detected\n"); data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; } else { data_len = wqe->inline_len; @@ -1776,7 +1774,7 @@ done: queue_work(qp->scq->nq->cqn_wq, &nq_work->work); } else { dev_err(&sq->hwq.pdev->dev, - "QPLIB: FP: Failed to allocate SQ nq_work!"); + "FP: Failed to allocate SQ nq_work!\n"); rc = -ENOMEM; } } @@ -1815,13 +1813,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { sch_handler = true; dev_dbg(&rq->hwq.pdev->dev, - "%s Error QP. Scheduling for poll_cq\n", - __func__); + "%s: Error QP. Scheduling for poll_cq\n", __func__); goto queue_err; } if (bnxt_qplib_queue_full(rq)) { dev_err(&rq->hwq.pdev->dev, - "QPLIB: FP: QP (0x%x) RQ is full!", qp->id); + "FP: QP (0x%x) RQ is full!\n", qp->id); rc = -EINVAL; goto done; } @@ -1870,7 +1867,7 @@ queue_err: queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); } else { dev_err(&rq->hwq.pdev->dev, - "QPLIB: FP: Failed to allocate RQ nq_work!"); + "FP: Failed to allocate RQ nq_work!\n"); rc = -ENOMEM; } } @@ -1932,7 +1929,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) if (!cq->dpi) { dev_err(&rcfw->pdev->dev, - "QPLIB: FP: CREATE_CQ failed due to NULL DPI"); + "FP: CREATE_CQ failed due to NULL DPI\n"); return -EINVAL; } req.dpi = cpu_to_le32(cq->dpi->dpi); @@ -2172,7 +2169,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, * comes back */ dev_dbg(&cq->hwq.pdev->dev, - "FP:Got Phantom CQE"); + "FP: Got Phantom CQE\n"); sq->condition = false; sq->single = true; rc = 0; @@ -2189,7 +2186,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, peek_raw_cq_cons++; } dev_err(&cq->hwq.pdev->dev, - "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x", + "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); rc = -EINVAL; } @@ -2213,7 +2210,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, le64_to_cpu(hwcqe->qp_handle)); if (!qp) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: Process Req qp is NULL"); + "FP: Process Req qp is NULL\n"); return -EINVAL; } sq = &qp->sq; @@ -2221,16 +2218,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); if (cqe_sq_cons > sq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process req reported "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x", + "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n", cqe_sq_cons, sq->hwq.max_elements); return -EINVAL; } if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + "%s: QP in Flush QP = %p\n", __func__, qp); goto done; } /* Require to walk the sq's swq to fabricate CQEs for all previously @@ -2262,9 +2257,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, hwcqe->status != CQ_REQ_STATUS_OK) { cqe->status = hwcqe->status; dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Processed Req "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id[%d] = 0x%llx with status 0x%x", + "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n", sw_sq_cons, cqe->wr_id, cqe->status); cqe++; (*budget)--; @@ -2330,12 +2323,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { - dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL"); + dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n"); return -EINVAL; } if (qp->rq.flushed) { dev_dbg(&cq->hwq.pdev->dev, - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + "%s: QP in Flush QP = %p\n", __func__, qp); goto done; } @@ -2356,9 +2349,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, return -EINVAL; if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process RC "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x", + "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n", wr_id_idx, srq->hwq.max_elements); return -EINVAL; } @@ -2371,9 +2362,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, rq = &qp->rq; if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process RC "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x", + "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n", wr_id_idx, rq->hwq.max_elements); return -EINVAL; } @@ -2409,12 +2398,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { - dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL"); + dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n"); return -EINVAL; } if (qp->rq.flushed) { dev_dbg(&cq->hwq.pdev->dev, - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + "%s: QP in Flush QP = %p\n", __func__, qp); goto done; } cqe = *pcqe; @@ -2439,9 +2428,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process UD "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x", + "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n", wr_id_idx, srq->hwq.max_elements); return -EINVAL; } @@ -2454,9 +2441,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, rq = &qp->rq; if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process UD "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x", + "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n", wr_id_idx, rq->hwq.max_elements); return -EINVAL; } @@ -2508,13 +2493,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { - dev_err(&cq->hwq.pdev->dev, - "QPLIB: process_cq Raw/QP1 qp is NULL"); + dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n"); return -EINVAL; } if (qp->rq.flushed) { dev_dbg(&cq->hwq.pdev->dev, - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + "%s: QP in Flush QP = %p\n", __func__, qp); goto done; } cqe = *pcqe; @@ -2543,14 +2527,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, srq = qp->srq; if (!srq) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: SRQ used but not defined??"); + "FP: SRQ used but not defined??\n"); return -EINVAL; } if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process Raw/QP1 "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x", + "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n", wr_id_idx, srq->hwq.max_elements); return -EINVAL; } @@ -2563,9 +2545,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, rq = &qp->rq; if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: ix 0x%x exceeded RQ max 0x%x", + "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n", wr_id_idx, rq->hwq.max_elements); return -EINVAL; } @@ -2600,14 +2580,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, /* Check the Status */ if (hwcqe->status != CQ_TERMINAL_STATUS_OK) dev_warn(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process Terminal Error status = 0x%x", + "FP: CQ Process Terminal Error status = 0x%x\n", hwcqe->status); qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process terminal qp is NULL"); + "FP: CQ Process terminal qp is NULL\n"); return -EINVAL; } @@ -2623,16 +2603,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, if (cqe_cons > sq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process terminal reported "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x", + "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n", cqe_cons, sq->hwq.max_elements); goto do_rq; } if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + "%s: QP in Flush QP = %p\n", __func__, qp); goto sq_done; } @@ -2673,16 +2651,14 @@ do_rq: goto done; } else if (cqe_cons > rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Processed terminal "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x", + "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n", cqe_cons, rq->hwq.max_elements); goto done; } if (qp->rq.flushed) { dev_dbg(&cq->hwq.pdev->dev, - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + "%s: QP in Flush QP = %p\n", __func__, qp); rc = 0; goto done; } @@ -2704,7 +2680,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq, /* Check the Status */ if (hwcqe->status != CQ_CUTOFF_STATUS_OK) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process Cutoff Error status = 0x%x", + "FP: CQ Process Cutoff Error status = 0x%x\n", hwcqe->status); return -EINVAL; } @@ -2724,16 +2700,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, spin_lock_irqsave(&cq->flush_lock, flags); list_for_each_entry(qp, &cq->sqf_head, sq_flush) { - dev_dbg(&cq->hwq.pdev->dev, - "QPLIB: FP: Flushing SQ QP= %p", - qp); + dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp); __flush_sq(&qp->sq, qp, &cqe, &budget); } list_for_each_entry(qp, &cq->rqf_head, rq_flush) { - dev_dbg(&cq->hwq.pdev->dev, - "QPLIB: FP: Flushing RQ QP= %p", - qp); + dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp); __flush_rq(&qp->rq, qp, &cqe, &budget); } spin_unlock_irqrestore(&cq->flush_lock, flags); @@ -2801,7 +2773,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, goto exit; default: dev_err(&cq->hwq.pdev->dev, - "QPLIB: process_cq unknown type 0x%lx", + "process_cq unknown type 0x%lx\n", hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK); rc = -EINVAL; @@ -2814,7 +2786,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, * next one */ dev_err(&cq->hwq.pdev->dev, - "QPLIB: process_cqe error rc = 0x%x", rc); + "process_cqe error rc = 0x%x\n", rc); } raw_cons++; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 2852d350ada1..14105004d258 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -35,6 +35,9 @@ * * Description: RDMA Controller HW interface */ + +#define dev_fmt(fmt) "QPLIB: " fmt + #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/pci.h> @@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { dev_err(&rcfw->pdev->dev, - "QPLIB: RCFW not initialized, reject opcode 0x%x", - opcode); + "RCFW not initialized, reject opcode 0x%x\n", opcode); return -EINVAL; } if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) && opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { - dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!"); + dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n"); return -EINVAL; } @@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, */ spin_lock_irqsave(&cmdq->lock, flags); if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) { - dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!"); + dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n"); spin_unlock_irqrestore(&cmdq->lock, flags); return -EAGAIN; } @@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; if (!cmdqe) { dev_err(&rcfw->pdev->dev, - "QPLIB: RCFW request failed with no cmdqe!"); + "RCFW request failed with no cmdqe!\n"); goto done; } /* Copy a segment of the req cmd to the cmdq */ @@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) { /* send failed */ - dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed", + dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n", cookie, opcode); return rc; } @@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, rc = __wait_for_resp(rcfw, cookie); if (rc) { /* timed out */ - dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec", + dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n", cookie, opcode, RCFW_CMD_WAIT_TIME_MS); set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags); return rc; @@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, if (evnt->status) { /* failed with status */ - dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x", + dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n", cookie, opcode, evnt->status); rc = -EFAULT; } @@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, qp_id = le32_to_cpu(err_event->xid); qp = rcfw->qp_tbl[qp_id].qp_handle; dev_dbg(&rcfw->pdev->dev, - "QPLIB: Received QP error notification"); + "Received QP error notification\n"); dev_dbg(&rcfw->pdev->dev, - "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", + "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", qp_id, err_event->req_err_state_reason, err_event->res_err_state_reason); if (!qp) @@ -323,13 +325,13 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, crsqe->resp = NULL; } else { dev_err(&rcfw->pdev->dev, - "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x", + "CMD %s resp->cookie = %#x, evnt->cookie = %#x\n", crsqe->resp ? "mismatch" : "collision", crsqe->resp ? crsqe->resp->cookie : 0, mcookie); } if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap)) dev_warn(&rcfw->pdev->dev, - "QPLIB: CMD bit %d was not requested", cbit); + "CMD bit %d was not requested\n", cbit); cmdq->cons += crsqe->req_size; crsqe->req_size = 0; @@ -376,14 +378,13 @@ static void bnxt_qplib_service_creq(unsigned long data) (rcfw, (struct creq_func_event *)creqe)) rcfw->creq_func_event_processed++; else - dev_warn - (&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled", - type); + dev_warn(&rcfw->pdev->dev, + "aeqe:%#x Not handled\n", type); break; default: - dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with "); dev_warn(&rcfw->pdev->dev, - "QPLIB: op_event = 0x%x not handled", type); + "creqe with op_event = 0x%x not handled\n", + type); break; } raw_cons++; @@ -551,7 +552,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, HWQ_TYPE_L2_CMPL)) { dev_err(&rcfw->pdev->dev, - "QPLIB: HW channel CREQ allocation failed"); + "HW channel CREQ allocation failed\n"); goto fail; } rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; @@ -560,7 +561,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, HWQ_TYPE_CTX)) { dev_err(&rcfw->pdev->dev, - "QPLIB: HW channel CMDQ allocation failed"); + "HW channel CMDQ allocation failed\n"); goto fail; } @@ -616,7 +617,7 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size); if (indx != rcfw->bmap_size) dev_err(&rcfw->pdev->dev, - "QPLIB: disabling RCFW with pending cmd-bit %lx", indx); + "disabling RCFW with pending cmd-bit %lx\n", indx); kfree(rcfw->cmdq_bitmap); rcfw->bmap_size = 0; @@ -681,8 +682,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, RCFW_COMM_BASE_OFFSET, RCFW_COMM_SIZE); if (!rcfw->cmdq_bar_reg_iomem) { - dev_err(&rcfw->pdev->dev, - "QPLIB: CMDQ BAR region %d mapping failed", + dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n", rcfw->cmdq_bar_reg); return -ENOMEM; } @@ -697,13 +697,12 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, res_base = pci_resource_start(pdev, rcfw->creq_bar_reg); if (!res_base) dev_err(&rcfw->pdev->dev, - "QPLIB: CREQ BAR region %d resc start is 0!", + "CREQ BAR region %d resc start is 0!\n", rcfw->creq_bar_reg); rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off, 4); if (!rcfw->creq_bar_reg_iomem) { - dev_err(&rcfw->pdev->dev, - "QPLIB: CREQ BAR region %d mapping failed", + dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n", rcfw->creq_bar_reg); return -ENOMEM; } @@ -717,7 +716,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true); if (rc) { dev_err(&rcfw->pdev->dev, - "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc); + "Failed to request IRQ for CREQ rc = 0x%x\n", rc); bnxt_qplib_disable_rcfw_channel(rcfw); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 539a5d44e6db..59eeac55626f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -36,6 +36,8 @@ * Description: QPLib resource manager */ +#define dev_fmt(fmt) "QPLIB: " fmt + #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/interrupt.h> @@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, pbl->pg_map_arr[i]); else dev_warn(&pdev->dev, - "QPLIB: PBL free pg_arr[%d] empty?!", - i); + "PBL free pg_arr[%d] empty?!\n", i); pbl->pg_arr[i] = NULL; } } @@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pkey_tbl *pkey_tbl) { if (!pkey_tbl->tbl) - dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present"); + dev_dbg(&res->pdev->dev, "PKEY tbl not present\n"); else kfree(pkey_tbl->tbl); @@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd) { if (test_and_set_bit(pd->id, pdt->tbl)) { - dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d", + dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n", pd->id); return -EINVAL; } @@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res, struct bnxt_qplib_dpi *dpi) { if (dpi->dpi >= dpit->max) { - dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi); + dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi); return -EINVAL; } if (test_and_set_bit(dpi->dpi, dpit->tbl)) { - dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d", + dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n", dpi->dpi); return -EINVAL; } @@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, u32 dbr_len, bytes; if (dpit->dbr_bar_reg_iomem) { - dev_err(&res->pdev->dev, - "QPLIB: DBR BAR region %d already mapped", dbr_bar_reg); + dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n", + dbr_bar_reg); return -EALREADY; } bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg); if (!bar_reg_base) { - dev_err(&res->pdev->dev, - "QPLIB: BAR region %d resc start failed", dbr_bar_reg); + dev_err(&res->pdev->dev, "BAR region %d resc start failed\n", + dbr_bar_reg); return -ENOMEM; } dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset; if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) { - dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d", - dbr_len); + dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len); return -ENOMEM; } @@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, dbr_len); if (!dpit->dbr_bar_reg_iomem) { dev_err(&res->pdev->dev, - "QPLIB: FP: DBR BAR region %d mapping failed", - dbr_bar_reg); + "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg); return -ENOMEM; } @@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, &stats->dma_map, GFP_KERNEL); if (!stats->dma) { - dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed"); + dev_err(&pdev->dev, "Stats DMA allocation failed\n"); return -ENOMEM; } return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 4097f3fa25c5..c80f2eaaf1a3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -36,6 +36,8 @@ * Description: Slow Path Operators */ +#define dev_fmt(fmt) "QPLIB: " fmt + #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/sched.h> @@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); if (!sbuf) { dev_err(&rcfw->pdev->dev, - "QPLIB: SP: QUERY_FUNC alloc side buffer failed"); + "SP: QUERY_FUNC alloc side buffer failed\n"); return -ENOMEM; } @@ -186,8 +188,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, (void *)&resp, NULL, 0); if (rc) { - dev_err(&res->pdev->dev, - "QPLIB: Failed to set function resources"); + dev_err(&res->pdev->dev, "Failed to set function resources\n"); } return rc; } @@ -199,7 +200,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, { if (index >= sgid_tbl->max) { dev_err(&res->pdev->dev, - "QPLIB: Index %d exceeded SGID table max (%d)", + "Index %d exceeded SGID table max (%d)\n", index, sgid_tbl->max); return -EINVAL; } @@ -217,13 +218,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int index; if (!sgid_tbl) { - dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); + dev_err(&res->pdev->dev, "SGID table not allocated\n"); return -EINVAL; } /* Do we need a sgid_lock here? */ if (!sgid_tbl->active) { - dev_err(&res->pdev->dev, - "QPLIB: SGID table has no active entries"); + dev_err(&res->pdev->dev, "SGID table has no active entries\n"); return -ENOMEM; } for (index = 0; index < sgid_tbl->max; index++) { @@ -231,7 +231,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, break; } if (index == sgid_tbl->max) { - dev_warn(&res->pdev->dev, "GID not found in the SGID table"); + dev_warn(&res->pdev->dev, "GID not found in the SGID table\n"); return 0; } /* Remove GID from the SGID table */ @@ -244,7 +244,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, RCFW_CMD_PREP(req, DELETE_GID, cmd_flags); if (sgid_tbl->hw_id[index] == 0xFFFF) { dev_err(&res->pdev->dev, - "QPLIB: GID entry contains an invalid HW id"); + "GID entry contains an invalid HW id\n"); return -EINVAL; } req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]); @@ -258,7 +258,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, sgid_tbl->vlan[index] = 0; sgid_tbl->active--; dev_dbg(&res->pdev->dev, - "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x", + "SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n", index, sgid_tbl->hw_id[index], sgid_tbl->active); sgid_tbl->hw_id[index] = (u16)-1; @@ -277,20 +277,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int i, free_idx; if (!sgid_tbl) { - dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); + dev_err(&res->pdev->dev, "SGID table not allocated\n"); return -EINVAL; } /* Do we need a sgid_lock here? */ if (sgid_tbl->active == sgid_tbl->max) { - dev_err(&res->pdev->dev, "QPLIB: SGID table is full"); + dev_err(&res->pdev->dev, "SGID table is full\n"); return -ENOMEM; } free_idx = sgid_tbl->max; for (i = 0; i < sgid_tbl->max; i++) { if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) { dev_dbg(&res->pdev->dev, - "QPLIB: SGID entry already exist in entry %d!", - i); + "SGID entry already exist in entry %d!\n", i); *index = i; return -EALREADY; } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero, @@ -301,7 +300,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, } if (free_idx == sgid_tbl->max) { dev_err(&res->pdev->dev, - "QPLIB: SGID table is FULL but count is not MAX??"); + "SGID table is FULL but count is not MAX??\n"); return -ENOMEM; } if (update) { @@ -348,7 +347,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, sgid_tbl->vlan[free_idx] = 1; dev_dbg(&res->pdev->dev, - "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x", + "SGID added hw_id[0x%x] = 0x%x active = 0x%x\n", free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active); *index = free_idx; @@ -404,7 +403,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, } if (index >= pkey_tbl->max) { dev_err(&res->pdev->dev, - "QPLIB: Index %d exceeded PKEY table max (%d)", + "Index %d exceeded PKEY table max (%d)\n", index, pkey_tbl->max); return -EINVAL; } @@ -419,14 +418,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res, int i, rc = 0; if (!pkey_tbl) { - dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); + dev_err(&res->pdev->dev, "PKEY table not allocated\n"); return -EINVAL; } /* Do we need a pkey_lock here? */ if (!pkey_tbl->active) { - dev_err(&res->pdev->dev, - "QPLIB: PKEY table has no active entries"); + dev_err(&res->pdev->dev, "PKEY table has no active entries\n"); return -ENOMEM; } for (i = 0; i < pkey_tbl->max; i++) { @@ -435,8 +433,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res, } if (i == pkey_tbl->max) { dev_err(&res->pdev->dev, - "QPLIB: PKEY 0x%04x not found in the pkey table", - *pkey); + "PKEY 0x%04x not found in the pkey table\n", *pkey); return -ENOMEM; } memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey)); @@ -453,13 +450,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, int i, free_idx, rc = 0; if (!pkey_tbl) { - dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); + dev_err(&res->pdev->dev, "PKEY table not allocated\n"); return -EINVAL; } /* Do we need a pkey_lock here? */ if (pkey_tbl->active == pkey_tbl->max) { - dev_err(&res->pdev->dev, "QPLIB: PKEY table is full"); + dev_err(&res->pdev->dev, "PKEY table is full\n"); return -ENOMEM; } free_idx = pkey_tbl->max; @@ -471,7 +468,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, } if (free_idx == pkey_tbl->max) { dev_err(&res->pdev->dev, - "QPLIB: PKEY table is FULL but count is not MAX??"); + "PKEY table is FULL but count is not MAX??\n"); return -ENOMEM; } /* Add PKEY to the pkey_tbl */ @@ -555,8 +552,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw) int rc; if (mrw->lkey == 0xFFFFFFFF) { - dev_info(&res->pdev->dev, - "QPLIB: SP: Free a reserved lkey MRW"); + dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n"); return 0; } @@ -666,9 +662,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, pages++; if (pages > MAX_PBL_LVL_1_PGS) { - dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages "); dev_err(&res->pdev->dev, - "requested (0x%x) exceeded max (0x%x)", + "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n", pages, MAX_PBL_LVL_1_PGS); return -ENOMEM; } @@ -684,7 +679,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, HWQ_TYPE_CTX); if (rc) { dev_err(&res->pdev->dev, - "SP: Reg MR memory allocation failed"); + "SP: Reg MR memory allocation failed\n"); return -ENOMEM; } /* Write to the hwq */ @@ -795,7 +790,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); if (!sbuf) { dev_err(&rcfw->pdev->dev, - "QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed"); + "SP: QUERY_ROCE_STATS alloc side buffer failed\n"); return -ENOMEM; } diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0f83cbec33f3..615413bd3e8d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref) ep->com.local_addr.ss_family); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); - if (ep->mpa_skb) - kfree_skb(ep->mpa_skb); + kfree_skb(ep->mpa_skb); } if (!skb_queue_empty(&ep->com.ep_skb_list)) skb_queue_purge(&ep->com.ep_skb_list); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index b3203afa3b1d..e78dd9afac86 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1685,6 +1685,12 @@ static void flush_qp(struct c4iw_qp *qhp) schp = to_c4iw_cq(qhp->ibqp.send_cq); if (qhp->ibqp.uobject) { + + /* for user qps, qhp->wq.flushed is protected by qhp->mutex */ + if (qhp->wq.flushed) + return; + + qhp->wq.flushed = 1; t4_set_wq_in_error(&qhp->wq, 0); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); @@ -2807,8 +2813,7 @@ err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); err_free_skb: - if (srq->destroy_skb) - kfree_skb(srq->destroy_skb); + kfree_skb(srq->destroy_skb); err_free_srq_idx: c4iw_free_srq_idx(&rhp->rdev, srq->idx); err_free_wr_wait: diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index f451ba912f47..a8dcf82ab7cb 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -8,12 +8,41 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ - eprom.o exp_rcv.o file_ops.o firmware.o \ - init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ - qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ - uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ - verbs_txreq.o vnic_main.o vnic_sdma.o +hfi1-y := \ + affinity.o \ + chip.o \ + device.o \ + driver.o \ + efivar.o \ + eprom.o \ + exp_rcv.o \ + file_ops.o \ + firmware.o \ + init.o \ + intr.o \ + mad.o \ + mmu_rb.o \ + msix.o \ + pcie.o \ + pio.o \ + pio_copy.o \ + platform.o \ + qp.o \ + qsfp.o \ + rc.o \ + ruc.o \ + sdma.o \ + sysfs.o \ + trace.o \ + uc.o \ + ud.o \ + user_exp_rcv.o \ + user_pages.o \ + user_sdma.o \ + verbs.o \ + verbs_txreq.o \ + vnic_main.o \ + vnic_sdma.o ifdef CONFIG_DEBUG_FS hfi1-y += debugfs.o diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index bedd5fba33b0..2baf38cc1e23 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) set = &entry->def_intr; cpumask_set_cpu(cpu, &set->mask); cpumask_set_cpu(cpu, &set->used); - for (i = 0; i < dd->num_msix_entries; i++) { + for (i = 0; i < dd->msix_info.max_requested; i++) { struct hfi1_msix_entry *other_msix; - other_msix = &dd->msix_entries[i]; + other_msix = &dd->msix_info.msix_entries[i]; if (other_msix->type != IRQ_SDMA || other_msix == msix) continue; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2c19bf772451..290c3b5b22f3 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -67,8 +67,6 @@ #include "debugfs.h" #include "fault.h" -#define NUM_IB_PORTS 1 - uint kdeth_qp; module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); @@ -1100,9 +1098,9 @@ struct err_reg_info { const char *desc; }; -#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START) -#define NUM_DC_ERRS (IS_DC_END - IS_DC_START) -#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START) +#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START) +#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START) +#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START) /* * Helpers for building HFI and DC error interrupt table entries. Different @@ -8178,7 +8176,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) /** * is_rcv_urgent_int() - User receive context urgent IRQ handler * @dd: valid dd - * @source: logical IRQ source (ofse from IS_RCVURGENT_START) + * @source: logical IRQ source (offset from IS_RCVURGENT_START) * * RX block receive urgent interrupt. Source is < 160. * @@ -8228,7 +8226,7 @@ static const struct is_table is_table[] = { is_sdma_eng_err_name, is_sdma_eng_err_int }, { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, is_sendctxt_err_name, is_sendctxt_err_int }, -{ IS_SDMA_START, IS_SDMA_END, +{ IS_SDMA_START, IS_SDMA_IDLE_END, is_sdma_eng_name, is_sdma_eng_int }, { IS_VARIOUS_START, IS_VARIOUS_END, is_various_name, is_various_int }, @@ -8254,7 +8252,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source) /* avoids a double compare by walking the table in-order */ for (entry = &is_table[0]; entry->is_name; entry++) { - if (source < entry->end) { + if (source <= entry->end) { trace_hfi1_interrupt(dd, entry, source); entry->is_int(dd, source - entry->start); return; @@ -8273,7 +8271,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source) * context DATA IRQs are threaded and are not supported by this handler. * */ -static irqreturn_t general_interrupt(int irq, void *data) +irqreturn_t general_interrupt(int irq, void *data) { struct hfi1_devdata *dd = data; u64 regs[CCE_NUM_INT_CSRS]; @@ -8306,7 +8304,7 @@ static irqreturn_t general_interrupt(int irq, void *data) return handled; } -static irqreturn_t sdma_interrupt(int irq, void *data) +irqreturn_t sdma_interrupt(int irq, void *data) { struct sdma_engine *sde = data; struct hfi1_devdata *dd = sde->dd; @@ -8398,7 +8396,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) * invoked) is finished. The intent is to avoid extra interrupts while we * are processing packets anyway. */ -static irqreturn_t receive_context_interrupt(int irq, void *data) +irqreturn_t receive_context_interrupt(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; struct hfi1_devdata *dd = rcd->dd; @@ -8438,7 +8436,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data) * Receive packet thread handler. This expects to be invoked with the * receive interrupt still blocked. */ -static irqreturn_t receive_context_thread(int irq, void *data) +irqreturn_t receive_context_thread(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; int present; @@ -9648,30 +9646,10 @@ void qsfp_event(struct work_struct *work) } } -static void init_qsfp_int(struct hfi1_devdata *dd) +void init_qsfp_int(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd = dd->pport; - u64 qsfp_mask, cce_int_mask; - const int qsfp1_int_smask = QSFP1_INT % 64; - const int qsfp2_int_smask = QSFP2_INT % 64; - - /* - * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0 - * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR, - * therefore just one of QSFP1_INT/QSFP2_INT can be used to find - * the index of the appropriate CSR in the CCEIntMask CSR array - */ - cce_int_mask = read_csr(dd, CCE_INT_MASK + - (8 * (QSFP1_INT / 64))); - if (dd->hfi1_id) { - cce_int_mask &= ~((u64)1 << qsfp1_int_smask); - write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)), - cce_int_mask); - } else { - cce_int_mask &= ~((u64)1 << qsfp2_int_smask); - write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)), - cce_int_mask); - } + u64 qsfp_mask; qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); /* Clear current status to avoid spurious interrupts */ @@ -9688,6 +9666,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd) write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, qsfp_mask); + + /* Enable the appropriate QSFP IRQ source */ + if (!dd->hfi1_id) + set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true); + else + set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true); } /* @@ -11928,10 +11912,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK; } - if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) + if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) { + set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt, + IS_RCVAVAIL_START + rcd->ctxt, true); rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; - if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) + } + if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) { + set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt, + IS_RCVAVAIL_START + rcd->ctxt, false); rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; + } if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { @@ -11961,6 +11951,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; + if (op & HFI1_RCVCTRL_URGENT_ENB) + set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt, + IS_RCVURGENT_START + rcd->ctxt, true); + if (op & HFI1_RCVCTRL_URGENT_DIS) + set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt, + IS_RCVURGENT_START + rcd->ctxt, false); + hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); @@ -12959,63 +12956,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp) return ret; } +/* ========================================================================= */ + /** - * get_int_mask - get 64 bit int mask - * @dd - the devdata - * @i - the csr (relative to CCE_INT_MASK) + * read_mod_write() - Calculate the IRQ register index and set/clear the bits + * @dd: valid devdata + * @src: IRQ source to determine register index from + * @bits: the bits to set or clear + * @set: true == set the bits, false == clear the bits * - * Returns the mask with the urgent interrupt mask - * bit clear for kernel receive contexts. */ -static u64 get_int_mask(struct hfi1_devdata *dd, u32 i) +static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits, + bool set) { - u64 mask = U64_MAX; /* default to no change */ - - if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) { - int j = (i - (IS_RCVURGENT_START / 64)) * 64; - int k = !j ? IS_RCVURGENT_START % 64 : 0; + u64 reg; + u16 idx = src / BITS_PER_REGISTER; - if (j) - j -= IS_RCVURGENT_START % 64; - /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */ - for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++) - /* convert to bit in mask and clear */ - mask &= ~BIT_ULL(k); - } - return mask; + spin_lock(&dd->irq_src_lock); + reg = read_csr(dd, CCE_INT_MASK + (8 * idx)); + if (set) + reg |= bits; + else + reg &= ~bits; + write_csr(dd, CCE_INT_MASK + (8 * idx), reg); + spin_unlock(&dd->irq_src_lock); } -/* ========================================================================= */ - -/* - * Enable/disable chip from delivering interrupts. +/** + * set_intr_bits() - Enable/disable a range (one or more) IRQ sources + * @dd: valid devdata + * @first: first IRQ source to set/clear + * @last: last IRQ source (inclusive) to set/clear + * @set: true == set the bits, false == clear the bits + * + * If first == last, set the exact source. */ -void set_intr_state(struct hfi1_devdata *dd, u32 enable) +int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set) { - int i; + u64 bits = 0; + u64 bit; + u16 src; - /* - * In HFI, the mask needs to be 1 to allow interrupts. - */ - if (enable) { - /* enable all interrupts but urgent on kernel contexts */ - for (i = 0; i < CCE_NUM_INT_CSRS; i++) { - u64 mask = get_int_mask(dd, i); + if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES) + return -EINVAL; - write_csr(dd, CCE_INT_MASK + (8 * i), mask); - } + if (last < first) + return -ERANGE; - init_qsfp_int(dd); - } else { - for (i = 0; i < CCE_NUM_INT_CSRS; i++) - write_csr(dd, CCE_INT_MASK + (8 * i), 0ull); + for (src = first; src <= last; src++) { + bit = src % BITS_PER_REGISTER; + /* wrapped to next register? */ + if (!bit && bits) { + read_mod_write(dd, src - 1, bits, set); + bits = 0; + } + bits |= BIT_ULL(bit); } + read_mod_write(dd, last, bits, set); + + return 0; } /* * Clear all interrupt sources on the chip. */ -static void clear_all_interrupts(struct hfi1_devdata *dd) +void clear_all_interrupts(struct hfi1_devdata *dd) { int i; @@ -13039,38 +13044,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); } -/** - * hfi1_clean_up_interrupts() - Free all IRQ resources - * @dd: valid device data data structure - * - * Free the MSIx and assoicated PCI resources, if they have been allocated. - */ -void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) -{ - int i; - struct hfi1_msix_entry *me = dd->msix_entries; - - /* remove irqs - must happen before disabling/turning off */ - for (i = 0; i < dd->num_msix_entries; i++, me++) { - if (!me->arg) /* => no irq, no affinity */ - continue; - hfi1_put_irq_affinity(dd, me); - pci_free_irq(dd->pcidev, i, me->arg); - } - - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; - - pci_free_irq_vectors(dd->pcidev); -} - /* * Remap the interrupt source from the general handler to the given MSI-X * interrupt. */ -static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) +void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) { u64 reg; int m, n; @@ -13094,8 +13072,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) write_csr(dd, CCE_INT_MAP + (8 * m), reg); } -static void remap_sdma_interrupts(struct hfi1_devdata *dd, - int engine, int msix_intr) +void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr) { /* * SDMA engine interrupt sources grouped by type, rather than @@ -13104,204 +13081,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd, * SDMAProgress * SDMAIdle */ - remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine, - msix_intr); - remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine, - msix_intr); - remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine, - msix_intr); -} - -static int request_msix_irqs(struct hfi1_devdata *dd) -{ - int first_general, last_general; - int first_sdma, last_sdma; - int first_rx, last_rx; - int i, ret = 0; - - /* calculate the ranges we are going to use */ - first_general = 0; - last_general = first_general + 1; - first_sdma = last_general; - last_sdma = first_sdma + dd->num_sdma; - first_rx = last_sdma; - last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts; - - /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ - dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; - - /* - * Sanity check - the code expects all SDMA chip source - * interrupts to be in the same CSR, starting at bit 0. Verify - * that this is true by checking the bit location of the start. - */ - BUILD_BUG_ON(IS_SDMA_START % 64); - - for (i = 0; i < dd->num_msix_entries; i++) { - struct hfi1_msix_entry *me = &dd->msix_entries[i]; - const char *err_info; - irq_handler_t handler; - irq_handler_t thread = NULL; - void *arg = NULL; - int idx; - struct hfi1_ctxtdata *rcd = NULL; - struct sdma_engine *sde = NULL; - char name[MAX_NAME_SIZE]; - - /* obtain the arguments to pci_request_irq */ - if (first_general <= i && i < last_general) { - idx = i - first_general; - handler = general_interrupt; - arg = dd; - snprintf(name, sizeof(name), - DRIVER_NAME "_%d", dd->unit); - err_info = "general"; - me->type = IRQ_GENERAL; - } else if (first_sdma <= i && i < last_sdma) { - idx = i - first_sdma; - sde = &dd->per_sdma[idx]; - handler = sdma_interrupt; - arg = sde; - snprintf(name, sizeof(name), - DRIVER_NAME "_%d sdma%d", dd->unit, idx); - err_info = "sdma"; - remap_sdma_interrupts(dd, idx, i); - me->type = IRQ_SDMA; - } else if (first_rx <= i && i < last_rx) { - idx = i - first_rx; - rcd = hfi1_rcd_get_by_index_safe(dd, idx); - if (rcd) { - /* - * Set the interrupt register and mask for this - * context's interrupt. - */ - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; - rcd->imask = ((u64)1) << - ((IS_RCVAVAIL_START + idx) % 64); - handler = receive_context_interrupt; - thread = receive_context_thread; - arg = rcd; - snprintf(name, sizeof(name), - DRIVER_NAME "_%d kctxt%d", - dd->unit, idx); - err_info = "receive context"; - remap_intr(dd, IS_RCVAVAIL_START + idx, i); - me->type = IRQ_RCVCTXT; - rcd->msix_intr = i; - hfi1_rcd_put(rcd); - } - } else { - /* not in our expected range - complain, then - * ignore it - */ - dd_dev_err(dd, - "Unexpected extra MSI-X interrupt %d\n", i); - continue; - } - /* no argument, no interrupt */ - if (!arg) - continue; - /* make sure the name is terminated */ - name[sizeof(name) - 1] = 0; - me->irq = pci_irq_vector(dd->pcidev, i); - ret = pci_request_irq(dd->pcidev, i, handler, thread, arg, - name); - if (ret) { - dd_dev_err(dd, - "unable to allocate %s interrupt, irq %d, index %d, err %d\n", - err_info, me->irq, idx, ret); - return ret; - } - /* - * assign arg after pci_request_irq call, so it will be - * cleaned up - */ - me->arg = arg; - - ret = hfi1_get_irq_affinity(dd, me); - if (ret) - dd_dev_err(dd, "unable to pin IRQ %d\n", ret); - } - - return ret; -} - -void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) -{ - int i; - - for (i = 0; i < dd->vnic.num_ctxt; i++) { - struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; - struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; - - synchronize_irq(me->irq); - } -} - -void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) -{ - struct hfi1_devdata *dd = rcd->dd; - struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; - - if (!me->arg) /* => no irq, no affinity */ - return; - - hfi1_put_irq_affinity(dd, me); - pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); - - me->arg = NULL; -} - -void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) -{ - struct hfi1_devdata *dd = rcd->dd; - struct hfi1_msix_entry *me; - int idx = rcd->ctxt; - void *arg = rcd; - int ret; - - rcd->msix_intr = dd->vnic.msix_idx++; - me = &dd->msix_entries[rcd->msix_intr]; - - /* - * Set the interrupt register and mask for this - * context's interrupt. - */ - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; - rcd->imask = ((u64)1) << - ((IS_RCVAVAIL_START + idx) % 64); - me->type = IRQ_RCVCTXT; - me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); - remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); - - ret = pci_request_irq(dd->pcidev, rcd->msix_intr, - receive_context_interrupt, - receive_context_thread, arg, - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); - if (ret) { - dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", - me->irq, idx, ret); - return; - } - /* - * assign arg after pci_request_irq call, so it will be - * cleaned up - */ - me->arg = arg; - - ret = hfi1_get_irq_affinity(dd, me); - if (ret) { - dd_dev_err(dd, - "unable to pin IRQ %d\n", ret); - pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); - } + remap_intr(dd, IS_SDMA_START + engine, msix_intr); + remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr); + remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr); } /* * Set the general handler to accept all interrupts, remap all * chip interrupts back to MSI-X 0. */ -static void reset_interrupts(struct hfi1_devdata *dd) +void reset_interrupts(struct hfi1_devdata *dd) { int i; @@ -13314,54 +13103,33 @@ static void reset_interrupts(struct hfi1_devdata *dd) write_csr(dd, CCE_INT_MAP + (8 * i), 0); } +/** + * set_up_interrupts() - Initialize the IRQ resources and state + * @dd: valid devdata + * + */ static int set_up_interrupts(struct hfi1_devdata *dd) { - u32 total; - int ret, request; - - /* - * Interrupt count: - * 1 general, "slow path" interrupt (includes the SDMA engines - * slow source, SDMACleanupDone) - * N interrupts - one per used SDMA engine - * M interrupt - one per kernel receive context - * V interrupt - one for each VNIC context - */ - total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; - - /* ask for MSI-X interrupts */ - request = request_msix(dd, total); - if (request < 0) { - ret = request; - goto fail; - } else { - dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), - GFP_KERNEL); - if (!dd->msix_entries) { - ret = -ENOMEM; - goto fail; - } - /* using MSI-X */ - dd->num_msix_entries = total; - dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); - } + int ret; /* mask all interrupts */ - set_intr_state(dd, 0); + set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false); + /* clear all pending interrupts */ clear_all_interrupts(dd); /* reset general handler mask, chip MSI-X mappings */ reset_interrupts(dd); - ret = request_msix_irqs(dd); + /* ask for MSI-X interrupts */ + ret = msix_initialize(dd); if (ret) - goto fail; + return ret; - return 0; + ret = msix_request_irqs(dd); + if (ret) + msix_clean_up_interrupts(dd); -fail: - hfi1_clean_up_interrupts(dd); return ret; } @@ -14914,20 +14682,16 @@ err_exit: } /** - * Allocate and initialize the device structure for the hfi. + * hfi1_init_dd() - Initialize most of the dd structure. * @dev: the pci_dev for hfi1_ib device * @ent: pci_device_id struct for this dev * - * Also allocates, initializes, and returns the devdata struct for this - * device instance - * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ -struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, - const struct pci_device_id *ent) +int hfi1_init_dd(struct hfi1_devdata *dd) { - struct hfi1_devdata *dd; + struct pci_dev *pdev = dd->pcidev; struct hfi1_pportdata *ppd; u64 reg; int i, ret; @@ -14938,13 +14702,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, "Functional simulator" }; struct pci_dev *parent = pdev->bus->self; - u32 sdma_engines; + u32 sdma_engines = chip_sdma_engines(dd); - dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * - sizeof(struct hfi1_pportdata)); - if (IS_ERR(dd)) - goto bail; - sdma_engines = chip_sdma_engines(dd); ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) { int vl; @@ -15123,6 +14882,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + /* + * This should probably occur in hfi1_pcie_init(), but historically + * occurs after the do_pcie_gen3_transition() code. + */ + tune_pcie_caps(dd); + /* start setting dd values and adjusting CSRs */ init_early_variables(dd); @@ -15235,14 +15000,13 @@ bail_free_cntrs: free_cntrs(dd); bail_clear_intr: hfi1_comp_vectors_clean_up(dd); - hfi1_clean_up_interrupts(dd); + msix_clean_up_interrupts(dd); bail_cleanup: hfi1_pcie_ddcleanup(dd); bail_free: hfi1_free_devdata(dd); - dd = ERR_PTR(ret); bail: - return dd; + return ret; } static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 36b04d6300e5..6b9c8f12dff8 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -52,9 +52,7 @@ */ /* sizes */ -#define CCE_NUM_MSIX_VECTORS 256 -#define CCE_NUM_INT_CSRS 12 -#define CCE_NUM_INT_MAP_CSRS 96 +#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64)) #define NUM_INTERRUPT_SOURCES 768 #define RXE_NUM_CONTEXTS 160 #define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */ @@ -161,34 +159,49 @@ (CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \ CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT) -/* interrupt source numbers */ -#define IS_GENERAL_ERR_START 0 -#define IS_SDMAENG_ERR_START 16 -#define IS_SENDCTXT_ERR_START 32 -#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */ +/* Specific IRQ sources */ +#define CCE_ERR_INT 0 +#define RXE_ERR_INT 1 +#define MISC_ERR_INT 2 +#define PIO_ERR_INT 4 +#define SDMA_ERR_INT 5 +#define EGRESS_ERR_INT 6 +#define TXE_ERR_INT 7 +#define PBC_INT 240 +#define GPIO_ASSERT_INT 241 +#define QSFP1_INT 242 +#define QSFP2_INT 243 +#define TCRIT_INT 244 + +/* interrupt source ranges */ +#define IS_FIRST_SOURCE CCE_ERR_INT +#define IS_GENERAL_ERR_START 0 +#define IS_SDMAENG_ERR_START 16 +#define IS_SENDCTXT_ERR_START 32 +#define IS_SDMA_START 192 +#define IS_SDMA_PROGRESS_START 208 +#define IS_SDMA_IDLE_START 224 #define IS_VARIOUS_START 240 #define IS_DC_START 248 #define IS_RCVAVAIL_START 256 #define IS_RCVURGENT_START 416 #define IS_SENDCREDIT_START 576 #define IS_RESERVED_START 736 -#define IS_MAX_SOURCES 768 +#define IS_LAST_SOURCE 767 /* derived interrupt source values */ -#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START -#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START -#define IS_SENDCTXT_ERR_END IS_SDMA_START -#define IS_SDMA_END IS_VARIOUS_START -#define IS_VARIOUS_END IS_DC_START -#define IS_DC_END IS_RCVAVAIL_START -#define IS_RCVAVAIL_END IS_RCVURGENT_START -#define IS_RCVURGENT_END IS_SENDCREDIT_START -#define IS_SENDCREDIT_END IS_RESERVED_START -#define IS_RESERVED_END IS_MAX_SOURCES - -/* absolute interrupt numbers for QSFP1Int and QSFP2Int */ -#define QSFP1_INT 242 -#define QSFP2_INT 243 +#define IS_GENERAL_ERR_END 7 +#define IS_SDMAENG_ERR_END 31 +#define IS_SENDCTXT_ERR_END 191 +#define IS_SDMA_END 207 +#define IS_SDMA_PROGRESS_END 223 +#define IS_SDMA_IDLE_END 239 +#define IS_VARIOUS_END 244 +#define IS_DC_END 255 +#define IS_RCVAVAIL_END 415 +#define IS_RCVURGENT_END 575 +#define IS_SENDCREDIT_END 735 +#define IS_RESERVED_END IS_LAST_SOURCE /* DCC_CFG_PORT_CONFIG logical link states */ #define LSTATE_DOWN 0x1 @@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); +irqreturn_t general_interrupt(int irq, void *data); +irqreturn_t sdma_interrupt(int irq, void *data); +irqreturn_t receive_context_interrupt(int irq, void *data); +irqreturn_t receive_context_thread(int irq, void *data); + +int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set); +void init_qsfp_int(struct hfi1_devdata *dd); +void clear_all_interrupts(struct hfi1_devdata *dd); +void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); +void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); +void reset_interrupts(struct hfi1_devdata *dd); + /* * Interrupt source table. * diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1fc75647e47b..c22ebc774a6a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); + HFI1_RCVCTRL_NO_EGR_DROP_DIS | + HFI1_RCVCTRL_URGENT_DIS, uctxt); /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt); /* @@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; + rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB; if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; /* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index d9470317983f..d3d5717650e9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -80,6 +80,7 @@ #include "qsfp.h" #include "platform.h" #include "affinity.h" +#include "msix.h" /* bumped 1 from s/w major version of TrueScale */ #define HFI1_CHIP_VERS_MAJ 3U @@ -620,6 +621,8 @@ struct rvt_sge_state; #define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000 #define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000 #define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000 +#define HFI1_RCVCTRL_URGENT_ENB 0x40000 +#define HFI1_RCVCTRL_URGENT_DIS 0x80000 /* partition enforcement flags */ #define HFI1_PART_ENFORCE_IN 0x1 @@ -667,6 +670,14 @@ struct hfi1_msix_entry { struct irq_affinity_notify notify; }; +struct hfi1_msix_info { + /* lock to synchronize in_use_msix access */ + spinlock_t msix_lock; + DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS); + struct hfi1_msix_entry *msix_entries; + u16 max_requested; +}; + /* per-SL CCA information */ struct cca_timer { struct hrtimer hrtimer; @@ -992,7 +1003,6 @@ struct hfi1_vnic_data { struct idr vesw_idr; u8 rmt_start; u8 num_ctxt; - u32 msix_idx; }; struct hfi1_vnic_vport_info; @@ -1205,11 +1215,6 @@ struct hfi1_devdata { struct diag_client *diag_client; - /* MSI-X information */ - struct hfi1_msix_entry *msix_entries; - u32 num_msix_entries; - u32 first_dyn_msix_idx; - /* general interrupt: mask of handled interrupts */ u64 gi_mask[CCE_NUM_INT_CSRS]; @@ -1223,6 +1228,9 @@ struct hfi1_devdata { */ struct timer_list synth_stats_timer; + /* MSI-X information */ + struct hfi1_msix_info msix_info; + /* * device counters */ @@ -1349,6 +1357,8 @@ struct hfi1_devdata { /* vnic data */ struct hfi1_vnic_data vnic; + /* Lock to protect IRQ SRC register access */ + spinlock_t irq_src_lock; }; static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) @@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); void set_all_slowpath(struct hfi1_devdata *dd); -void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); -void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); -void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); extern const struct pci_device_id hfi1_pci_tbl[]; void hfi1_make_ud_req_9B(struct rvt_qp *qp, @@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_CTXT_WAITING_URG 4 /* free up any allocated data at closes */ -struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, - const struct pci_device_id *ent); +int hfi1_init_dd(struct hfi1_devdata *dd); void hfi1_free_devdata(struct hfi1_devdata *dd); -struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, @@ -1974,16 +1979,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd); /* Hook for sysfs read of QSFP */ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); -int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); -void hfi1_clean_up_interrupts(struct hfi1_devdata *dd); +int hfi1_pcie_init(struct hfi1_devdata *dd); void hfi1_pcie_cleanup(struct pci_dev *pdev); int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); -int request_msix(struct hfi1_devdata *dd, u32 msireq); int restore_pci_variables(struct hfi1_devdata *dd); int save_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); +void tune_pcie_caps(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, enum platform_config_table_type_encoding @@ -2124,19 +2128,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) return base_sdma_integrity; } -/* - * hfi1_early_err is used (only!) to print early errors before devdata is - * allocated, or when dd->pcidev may not be valid, and at the tail end of - * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is - * the same as dd_dev_err, but is used when the message really needs - * the IB port# to be definitive as to what's happening.. - */ -#define hfi1_early_err(dev, fmt, ...) \ - dev_err(dev, fmt, ##__VA_ARGS__) - -#define hfi1_early_info(dev, fmt, ...) \ - dev_info(dev, fmt, ##__VA_ARGS__) - #define dd_dev_emerg(dd, fmt, ...) \ dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 758d273c32cf..1e770a133779 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -83,6 +83,8 @@ #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ +#define NUM_IB_PORTS 1 + /* * Number of user receive contexts we are configured to use (to allow for more * pio buffers per ctxt, etc.) Zero means use one user context per CPU. @@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, ppd->part_enforce |= HFI1_PART_ENFORCE_IN; if (loopback) { - hfi1_early_err(&pdev->dev, - "Faking data partition 0x8001 in idx %u\n", - !default_pkey_idx); + dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n", + !default_pkey_idx); ppd->pkeys[!default_pkey_idx] = 0x8001; } @@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, return; bail: - - hfi1_early_err(&pdev->dev, - "Congestion Control Agent disabled for port %d\n", port); + dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port); } /* @@ -833,6 +832,23 @@ wq_error: } /** + * enable_general_intr() - Enable the IRQs that will be handled by the + * general interrupt handler. + * @dd: valid devdata + * + */ +static void enable_general_intr(struct hfi1_devdata *dd) +{ + set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true); + set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true); + set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true); + set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true); + set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true); + set_intr_bits(dd, IS_DC_START, IS_DC_END, true); + set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true); +} + +/** * hfi1_init - do the actual initialization sequence on the chip * @dd: the hfi1_ib device * @reinit: re-initializing, so don't allocate new memory @@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); ret = lastfail; } + /* enable IRQ */ hfi1_rcd_put(rcd); } @@ -954,7 +971,8 @@ done: HFI1_STATUS_INITTED; if (!ret) { /* enable all interrupts from the chip */ - set_intr_state(dd, 1); + enable_general_intr(dd); + init_qsfp_int(dd); /* chip is OK for user apps; mark it as initialized */ for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd) } dd->flags &= ~HFI1_INITTED; - /* mask and clean up interrupts, but not errors */ - set_intr_state(dd, 0); - hfi1_clean_up_interrupts(dd); + /* mask and clean up interrupts */ + set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false); + msix_clean_up_interrupts(dd); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) kobject_put(&dd->kobj); } -/* - * Allocate our primary per-unit data structure. Must be done via verbs - * allocator, because the verbs cleanup process both does cleanup and - * free of the data structure. +/** + * hfi1_alloc_devdata - Allocate our primary per-unit data structure. + * @pdev: Valid PCI device + * @extra: How many bytes to alloc past the default + * + * Must be done via verbs allocator, because the verbs cleanup process + * both does cleanup and free of the data structure. * "extra" is for chip-specific data. * * Use the idr mechanism to get a unit number for this unit. */ -struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) +static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, + size_t extra) { unsigned long flags; struct hfi1_devdata *dd; @@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) idr_preload_end(); if (ret < 0) { - hfi1_early_err(&pdev->dev, - "Could not allocate unit ID: error %d\n", -ret); + dev_err(&pdev->dev, + "Could not allocate unit ID: error %d\n", -ret); goto bail; } rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit); @@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) spin_lock_init(&dd->pio_map_lock); mutex_init(&dd->dc8051_lock); init_waitqueue_head(&dd->event_queue); + spin_lock_init(&dd->irq_src_lock); dd->int_counter = alloc_percpu(u64); if (!dd->int_counter) { @@ -1604,23 +1627,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd) hfi1_free_devdata(dd); } -static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt) +static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt) { if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { - hfi1_early_err(dev, "Receive header queue count too small\n"); + dd_dev_err(dd, "Receive header queue count too small\n"); return -EINVAL; } if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { - hfi1_early_err(dev, - "Receive header queue count cannot be greater than %u\n", - HFI1_MAX_HDRQ_EGRBUF_CNT); + dd_dev_err(dd, + "Receive header queue count cannot be greater than %u\n", + HFI1_MAX_HDRQ_EGRBUF_CNT); return -EINVAL; } if (thecnt % HDRQ_INCREMENT) { - hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n", - thecnt, HDRQ_INCREMENT); + dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n", + thecnt, HDRQ_INCREMENT); return -EINVAL; } @@ -1639,22 +1662,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Validate dev ids */ if (!(ent->device == PCI_DEVICE_ID_INTEL0 || ent->device == PCI_DEVICE_ID_INTEL1)) { - hfi1_early_err(&pdev->dev, - "Failing on unknown Intel deviceid 0x%x\n", - ent->device); + dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n", + ent->device); ret = -ENODEV; goto bail; } + /* Allocate the dd so we can get to work */ + dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * + sizeof(struct hfi1_pportdata)); + if (IS_ERR(dd)) { + ret = PTR_ERR(dd); + goto bail; + } + /* Validate some global module parameters */ - ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); + ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt); if (ret) goto bail; /* use the encoding function as a sanitization check */ if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { - hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", - hfi1_hdrq_entsize); + dd_dev_err(dd, "Invalid HdrQ Entry size %u\n", + hfi1_hdrq_entsize); ret = -EINVAL; goto bail; } @@ -1676,10 +1706,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) clamp_val(eager_buffer_size, MIN_EAGER_BUFFER * 8, MAX_EAGER_BUFFER_TOTAL); - hfi1_early_info(&pdev->dev, "Eager buffer size %u\n", - eager_buffer_size); + dd_dev_info(dd, "Eager buffer size %u\n", + eager_buffer_size); } else { - hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n"); + dd_dev_err(dd, "Invalid Eager buffer size of 0\n"); ret = -EINVAL; goto bail; } @@ -1687,7 +1717,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* restrict value of hfi1_rcvarr_split */ hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100); - ret = hfi1_pcie_init(pdev, ent); + ret = hfi1_pcie_init(dd); if (ret) goto bail; @@ -1695,12 +1725,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * Do device-specific initialization, function table setup, dd * allocation, etc. */ - dd = hfi1_init_dd(pdev, ent); - - if (IS_ERR(dd)) { - ret = PTR_ERR(dd); + ret = hfi1_init_dd(dd); + if (ret) goto clean_bail; /* error already printed */ - } ret = create_workqueues(dd); if (ret) @@ -1731,7 +1758,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j); if (initfail || ret) { - hfi1_clean_up_interrupts(dd); + msix_clean_up_interrupts(dd); stop_timers(dd); flush_workqueue(ib_wq); for (pidx = 0; pidx < dd->num_pports; ++pidx) { diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c new file mode 100644 index 000000000000..d920b165d696 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "hfi.h" +#include "affinity.h" +#include "sdma.h" + +/** + * msix_initialize() - Calculate, request and configure MSIx IRQs + * @dd: valid hfi1 devdata + * + */ +int msix_initialize(struct hfi1_devdata *dd) +{ + u32 total; + int ret; + struct hfi1_msix_entry *entries; + + /* + * MSIx interrupt count: + * one for the general, "slow path" interrupt + * one per used SDMA engine + * one per kernel receive context + * one for each VNIC context + * ...any new IRQs should be added here. + */ + total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; + + if (total >= CCE_NUM_MSIX_VECTORS) + return -EINVAL; + + ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX); + if (ret < 0) { + dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret); + return ret; + } + + entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries), + GFP_KERNEL); + if (!entries) { + pci_free_irq_vectors(dd->pcidev); + return -ENOMEM; + } + + dd->msix_info.msix_entries = entries; + spin_lock_init(&dd->msix_info.msix_lock); + bitmap_zero(dd->msix_info.in_use_msix, total); + dd->msix_info.max_requested = total; + dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); + + return 0; +} + +/** + * msix_request_irq() - Allocate a free MSIx IRQ + * @dd: valid devdata + * @arg: context information for the IRQ + * @handler: IRQ handler + * @thread: IRQ thread handler (could be NULL) + * @idx: zero base idx if multiple devices are needed + * @type: affinty IRQ type + * + * Allocated an MSIx vector if available, and then create the appropriate + * meta data needed to keep track of the pci IRQ request. + * + * Return: + * < 0 Error + * >= 0 MSIx vector + * + */ +static int msix_request_irq(struct hfi1_devdata *dd, void *arg, + irq_handler_t handler, irq_handler_t thread, + u32 idx, enum irq_type type) +{ + unsigned long nr; + int irq; + int ret; + const char *err_info; + char name[MAX_NAME_SIZE]; + struct hfi1_msix_entry *me; + + /* Allocate an MSIx vector */ + spin_lock(&dd->msix_info.msix_lock); + nr = find_first_zero_bit(dd->msix_info.in_use_msix, + dd->msix_info.max_requested); + if (nr < dd->msix_info.max_requested) + __set_bit(nr, dd->msix_info.in_use_msix); + spin_unlock(&dd->msix_info.msix_lock); + + if (nr == dd->msix_info.max_requested) + return -ENOSPC; + + /* Specific verification and determine the name */ + switch (type) { + case IRQ_GENERAL: + /* general interrupt must be MSIx vector 0 */ + if (nr) { + spin_lock(&dd->msix_info.msix_lock); + __clear_bit(nr, dd->msix_info.in_use_msix); + spin_unlock(&dd->msix_info.msix_lock); + dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n", + nr); + return -EINVAL; + } + snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); + err_info = "general"; + break; + case IRQ_SDMA: + snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", + dd->unit, idx); + err_info = "sdma"; + break; + case IRQ_RCVCTXT: + snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", + dd->unit, idx); + err_info = "receive context"; + break; + case IRQ_OTHER: + default: + return -EINVAL; + } + name[sizeof(name) - 1] = 0; + + irq = pci_irq_vector(dd->pcidev, nr); + ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name); + if (ret) { + dd_dev_err(dd, + "%s: request for IRQ %d failed, MSIx %d, err %d\n", + err_info, irq, idx, ret); + spin_lock(&dd->msix_info.msix_lock); + __clear_bit(nr, dd->msix_info.in_use_msix); + spin_unlock(&dd->msix_info.msix_lock); + return ret; + } + + /* + * assign arg after pci_request_irq call, so it will be + * cleaned up + */ + me = &dd->msix_info.msix_entries[nr]; + me->irq = irq; + me->arg = arg; + me->type = type; + + /* This is a request, so a failure is not fatal */ + ret = hfi1_get_irq_affinity(dd, me); + if (ret) + dd_dev_err(dd, "unable to pin IRQ %d\n", ret); + + return nr; +} + +/** + * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs + * @rcd: valid rcd context + * + */ +int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) +{ + int nr; + + nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt, + receive_context_thread, rcd->ctxt, IRQ_RCVCTXT); + if (nr < 0) + return nr; + + /* + * Set the interrupt register and mask for this + * context's interrupt. + */ + rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64; + rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64); + rcd->msix_intr = nr; + remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr); + + return 0; +} + +/** + * msix_request_smda_ira() - Helper for getting SDMA IRQ resources + * @sde: valid sdma engine + * + */ +int msix_request_sdma_irq(struct sdma_engine *sde) +{ + int nr; + + nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL, + sde->this_idx, IRQ_SDMA); + if (nr < 0) + return nr; + sde->msix_intr = nr; + remap_sdma_interrupts(sde->dd, sde->this_idx, nr); + + return 0; +} + +/** + * enable_sdma_src() - Helper to enable SDMA IRQ srcs + * @dd: valid devdata structure + * @i: index of SDMA engine + */ +static void enable_sdma_srcs(struct hfi1_devdata *dd, int i) +{ + set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true); + set_intr_bits(dd, IS_SDMA_PROGRESS_START + i, + IS_SDMA_PROGRESS_START + i, true); + set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true); + set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i, + true); +} + +/** + * msix_request_irqs() - Allocate all MSIx IRQs + * @dd: valid devdata structure + * + * Helper function to request the used MSIx IRQs. + * + */ +int msix_request_irqs(struct hfi1_devdata *dd) +{ + int i; + int ret; + + ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL); + if (ret < 0) + return ret; + + for (i = 0; i < dd->num_sdma; i++) { + struct sdma_engine *sde = &dd->per_sdma[i]; + + ret = msix_request_sdma_irq(sde); + if (ret) + return ret; + enable_sdma_srcs(sde->dd, i); + } + + for (i = 0; i < dd->n_krcv_queues; i++) { + struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i); + + if (rcd) + ret = msix_request_rcd_irq(rcd); + hfi1_rcd_put(rcd); + if (ret) + return ret; + } + + return 0; +} + +/** + * msix_free_irq() - Free the specified MSIx resources and IRQ + * @dd: valid devdata + * @msix_intr: MSIx vector to free. + * + */ +void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr) +{ + struct hfi1_msix_entry *me; + + if (msix_intr >= dd->msix_info.max_requested) + return; + + me = &dd->msix_info.msix_entries[msix_intr]; + + if (!me->arg) /* => no irq, no affinity */ + return; + + hfi1_put_irq_affinity(dd, me); + pci_free_irq(dd->pcidev, msix_intr, me->arg); + + me->arg = NULL; + + spin_lock(&dd->msix_info.msix_lock); + __clear_bit(msix_intr, dd->msix_info.in_use_msix); + spin_unlock(&dd->msix_info.msix_lock); +} + +/** + * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources + * @dd: valid device data data structure + * + * Free the MSIx and associated PCI resources, if they have been allocated. + */ +void msix_clean_up_interrupts(struct hfi1_devdata *dd) +{ + int i; + struct hfi1_msix_entry *me = dd->msix_info.msix_entries; + + /* remove irqs - must happen before disabling/turning off */ + for (i = 0; i < dd->msix_info.max_requested; i++, me++) + msix_free_irq(dd, i); + + /* clean structures */ + kfree(dd->msix_info.msix_entries); + dd->msix_info.msix_entries = NULL; + dd->msix_info.max_requested = 0; + + pci_free_irq_vectors(dd->pcidev); +} + +/** + * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize + * @dd: valid devdata + */ +void msix_vnic_synchronize_irq(struct hfi1_devdata *dd) +{ + int i; + + for (i = 0; i < dd->vnic.num_ctxt; i++) { + struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; + struct hfi1_msix_entry *me; + + me = &dd->msix_info.msix_entries[rcd->msix_intr]; + + synchronize_irq(me->irq); + } +} diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h new file mode 100644 index 000000000000..a514881632a4 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/msix.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _HFI1_MSIX_H +#define _HFI1_MSIX_H + +#include "hfi.h" + +/* MSIx interface */ +int msix_initialize(struct hfi1_devdata *dd); +int msix_request_irqs(struct hfi1_devdata *dd); +void msix_clean_up_interrupts(struct hfi1_devdata *dd); +int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd); +int msix_request_sdma_irq(struct sdma_engine *sde); +void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr); + +/* VNIC interface */ +void msix_vnic_synchronize_irq(struct hfi1_devdata *dd); + +#endif diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index eec83757d55f..cb7fc9fb3c9e 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -61,19 +61,12 @@ */ /* - * Code to adjust PCIe capabilities. - */ -static void tune_pcie_caps(struct hfi1_devdata *); - -/* * Do all the common PCIe setup and initialization. - * devdata is not yet allocated, and is not allocated until after this - * routine returns success. Therefore dd_dev_err() can't be used for error - * printing. */ -int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) +int hfi1_pcie_init(struct hfi1_devdata *dd) { int ret; + struct pci_dev *pdev = dd->pcidev; ret = pci_enable_device(pdev); if (ret) { @@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) * about that, it appears. If the original BAR was retained * in the kernel data structures, this may be OK. */ - hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n", - -ret); - goto done; + dd_dev_err(dd, "pci enable failed: error %d\n", -ret); + return ret; } ret = pci_request_regions(pdev, DRIVER_NAME); if (ret) { - hfi1_early_err(&pdev->dev, - "pci_request_regions fails: err %d\n", -ret); + dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret); goto bail; } @@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) */ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (ret) { - hfi1_early_err(&pdev->dev, - "Unable to set DMA mask: %d\n", ret); + dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret); goto bail; } ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); @@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); } if (ret) { - hfi1_early_err(&pdev->dev, - "Unable to set DMA consistent mask: %d\n", ret); + dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret); goto bail; } pci_set_master(pdev); (void)pci_enable_pcie_error_reporting(pdev); - goto done; + return 0; bail: hfi1_pcie_cleanup(pdev); -done: return ret; } @@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) dd_dev_err(dd, "WC mapping of send buffers failed\n"); goto nomem; } - dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE); + dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE); dd->physaddr = addr; /* used for io_remap, etc. */ @@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/* - * Returns: - * - actual number of interrupts allocated or - * - error - */ -int request_msix(struct hfi1_devdata *dd, u32 msireq) -{ - int nvec; - - nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX); - if (nvec < 0) { - dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); - return nvec; - } - - tune_pcie_caps(dd); - - return nvec; -} - /* restore command and BARs after a reset has wiped them out */ int restore_pci_variables(struct hfi1_devdata *dd) { @@ -479,14 +447,19 @@ error: * Check and optionally adjust them to maximize our throughput. */ static int hfi1_pcie_caps; -module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO); +module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); uint aspm_mode = ASPM_MODE_DISABLED; -module_param_named(aspm, aspm_mode, uint, S_IRUGO); +module_param_named(aspm, aspm_mode, uint, 0444); MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); -static void tune_pcie_caps(struct hfi1_devdata *dd) +/** + * tune_pcie_caps() - Code to adjust PCIe capabilities. + * @dd: Valid device data structure + * + */ +void tune_pcie_caps(struct hfi1_devdata *dd) { struct pci_dev *parent; u16 rc_mpss, rc_mps, ep_mpss, ep_mps; @@ -1032,6 +1005,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) const u8 (*ctle_tunings)[4]; uint static_ctle_mode; int return_error = 0; + u32 target_width; /* PCIe Gen3 is for the ASIC only */ if (dd->icode != ICODE_RTL_SILICON) @@ -1071,6 +1045,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) return 0; } + /* Previous Gen1/Gen2 bus width */ + target_width = dd->lbus_width; + /* * Do the Gen3 transition. Steps are those of the PCIe Gen3 * recipe. @@ -1439,11 +1416,12 @@ retry: dd_dev_info(dd, "%s: new speed and width: %s\n", __func__, dd->lbus_info); - if (dd->lbus_speed != target_speed) { /* not target */ + if (dd->lbus_speed != target_speed || + dd->lbus_width < target_width) { /* not target */ /* maybe retry */ do_retry = retry_count < pcie_retry; - dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n", - pcie_target, do_retry ? ", retrying" : ""); + dd_dev_err(dd, "PCIe link speed or width did not match target%s\n", + do_retry ? ", retrying" : ""); retry_count++; if (do_retry) { msleep(100); /* allow time to settle */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9b1e84a6b1cc..54d9ff171059 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -285,17 +285,13 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, * hfi1_check_send_wqe - validate wqe * @qp - The qp * @wqe - The built wqe - * - * validate wqe. This is called - * prior to inserting the wqe into - * the ring but after the wqe has been - * setup. + * @call_send - Determine if the send should be posted or scheduled. * * Returns 0 on success, -EINVAL on failure * */ int hfi1_check_send_wqe(struct rvt_qp *qp, - struct rvt_swqe *wqe) + struct rvt_swqe *wqe, bool *call_send) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct rvt_ah *ah; @@ -305,6 +301,8 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; + if (wqe->length > qp->pmtu) + *call_send = false; break; case IB_QPT_SMI: ah = ibah_to_rvtah(wqe->ud_wr.ah); @@ -321,7 +319,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, default: break; } - return wqe->length <= piothreshold; + return 0; } /** diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 88e326d6cc49..7a9b67e82a96 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2444,7 +2444,7 @@ nodesc: * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit - * @count: pointer to a u32 which, after return will contain the total number of + * @count: pointer to a u16 which, after return will contain the total number of * sdma_txreqs removed from the tx_list. This will include sdma_txreqs * whose SDMA descriptors are submitted to the ring and the sdma_txreqs * which are added to SDMA engine flush list if the SDMA engine state is @@ -2468,7 +2468,7 @@ nodesc: * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state */ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list, u32 *count_out) + struct list_head *tx_list, u16 *count_out) { struct sdma_txreq *tx, *tx_next; int ret = 0; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 46c775f255d1..c076eef081e8 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -62,16 +62,6 @@ /* Hardware limit for SDMA packet size */ #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1) -#define SDMA_TXREQ_S_OK 0 -#define SDMA_TXREQ_S_SENDERROR 1 -#define SDMA_TXREQ_S_ABORTED 2 -#define SDMA_TXREQ_S_SHUTDOWN 3 - -/* flags bits */ -#define SDMA_TXREQ_F_URGENT 0x0001 -#define SDMA_TXREQ_F_AHG_COPY 0x0002 -#define SDMA_TXREQ_F_USE_AHG 0x0004 - #define SDMA_MAP_NONE 0 #define SDMA_MAP_SINGLE 1 #define SDMA_MAP_PAGE 2 @@ -415,6 +405,7 @@ struct sdma_engine { struct list_head flushlist; struct cpumask cpu_mask; struct kobject kobj; + u32 msix_intr; }; int sdma_init(struct hfi1_devdata *dd, u8 port); @@ -858,7 +849,7 @@ int sdma_send_txreq(struct sdma_engine *sde, int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, struct list_head *tx_list, - u32 *count); + u16 *count_out); int sdma_ahg_alloc(struct sdma_engine *sde); void sdma_ahg_free(struct sdma_engine *sde, int ahg_index); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a3a7b33196d6..825e475dc9fe 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 static unsigned initial_pkt_count = 8; -static int user_sdma_send_pkts(struct user_sdma_request *req, - unsigned maxpkts); +static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts); static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); @@ -187,7 +186,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; pq->n_max_reqs = hfi1_sdma_comp_ring_size; - pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); atomic_set(&pq->n_locked, 0); @@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, /* Wait until all requests have been freed. */ wait_event_interruptible( pq->wait, - (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); + !atomic_read(&pq->n_reqs)); kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); @@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid) return mapping[hash]; } +/** + * hfi1_user_sdma_process_request() - Process and start a user sdma request + * @fd: valid file descriptor + * @iovec: array of io vectors to process + * @dim: overall iovec array size + * @count: number of io vector array entries processed + */ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count) @@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, u8 opcode, sc, vl; u16 pkey; u32 slid; - int req_queued = 0; u16 dlid; u32 selector; @@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->data_len = 0; req->pq = pq; req->cq = cq; - req->status = -1; req->ahg_idx = -1; req->iov_idx = 0; req->sent = 0; @@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqcomp = 0; req->seqsubmitted = 0; req->tids = NULL; - req->done = 0; req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); + /* The request is initialized, count it */ + atomic_inc(&pq->n_reqs); + if (req_opcode(info.ctrl) == EXPECTED) { /* expected must have a TID info and at least one data vector */ if (req->data_iovs < 2) { @@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { req->data_iovs = i; - req->status = ret; goto free_req; } req->data_len += req->iovs[i].iov.iov_len; @@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); - atomic_inc(&pq->n_reqs); - req_queued = 1; + pq->state = SDMA_PKT_Q_ACTIVE; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); - if (unlikely(ret < 0 && ret != -EBUSY)) { - req->status = ret; + if (unlikely(ret < 0 && ret != -EBUSY)) goto free_req; - } - - /* - * It is possible that the SDMA engine would have processed all the - * submitted packets by the time we get here. Therefore, only set - * packet queue state to ACTIVE if there are still uncompleted - * requests. - */ - if (atomic_read(&pq->n_reqs)) - xchg(&pq->state, SDMA_PKT_Q_ACTIVE); /* * This is a somewhat blocking send implementation. @@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { - if (ret != -EBUSY) { - req->status = ret; - WRITE_ONCE(req->has_error, 1); - if (READ_ONCE(req->seqcomp) == - req->seqsubmitted - 1) - goto free_req; - return ret; - } + if (ret != -EBUSY) + goto free_req; wait_event_interruptible_timeout( pq->busy.wait_dma, (pq->state == SDMA_PKT_Q_ACTIVE), @@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, *count += idx; return 0; free_req: - user_sdma_free_request(req, true); - if (req_queued) + /* + * If the submitted seqsubmitted == npkts, the completion routine + * controls the final state. If sequbmitted < npkts, wait for any + * outstanding packets to finish before cleaning up. + */ + if (req->seqsubmitted < req->info.npkts) { + if (req->seqsubmitted) + wait_event(pq->busy.wait_dma, + (req->seqcomp == req->seqsubmitted - 1)); + user_sdma_free_request(req, true); pq_update(pq); - set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); + set_comp_state(pq, cq, info.comp_idx, ERROR, ret); + } return ret; } @@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req, return ret; } -static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) +static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) { - int ret = 0, count; + int ret = 0; + u16 count; unsigned npkts = 0; struct user_sdma_txreq *tx = NULL; struct hfi1_user_sdma_pkt_q *pq = NULL; @@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) changes = set_txreq_header_ahg(req, tx, datalen); - if (changes < 0) + if (changes < 0) { + ret = changes; goto free_tx; + } } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + @@ -917,7 +915,6 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1365,11 +1362,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, return idx; } -/* - * SDMA tx request completion callback. Called when the SDMA progress - * state machine gets notification that the SDMA descriptors for this - * tx request have been processed by the DMA engine. Called in - * interrupt context. +/** + * user_sdma_txreq_cb() - SDMA tx request completion callback. + * @txreq: valid sdma tx request + * @status: success/failure of request + * + * Called when the SDMA progress state machine gets notification that + * the SDMA descriptors for this tx request have been processed by the + * DMA engine. Called in interrupt context. + * Only do work on completed sequences. */ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) { @@ -1378,7 +1379,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) struct user_sdma_request *req; struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq; - u16 idx; + enum hfi1_sdma_comp_state state = COMPLETE; if (!tx->req) return; @@ -1391,39 +1392,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) SDMA_DBG(req, "SDMA completion with error %d", status); WRITE_ONCE(req->has_error, 1); + state = ERROR; } req->seqcomp = tx->seqnum; kmem_cache_free(pq->txreq_cache, tx); - tx = NULL; - - idx = req->info.comp_idx; - if (req->status == -1 && status == SDMA_TXREQ_S_OK) { - if (req->seqcomp == req->info.npkts - 1) { - req->status = 0; - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, COMPLETE, 0); - } - } else { - if (status != SDMA_TXREQ_S_OK) - req->status = status; - if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) && - (READ_ONCE(req->done) || - READ_ONCE(req->has_error))) { - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, ERROR, req->status); - } - } + + /* sequence isn't complete? We are done */ + if (req->seqcomp != req->info.npkts - 1) + return; + + user_sdma_free_request(req, false); + set_comp_state(pq, cq, req->info.comp_idx, state, status); + pq_update(pq); } static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) { - if (atomic_dec_and_test(&pq->n_reqs)) { - xchg(&pq->state, SDMA_PKT_Q_INACTIVE); + if (atomic_dec_and_test(&pq->n_reqs)) wake_up(&pq->wait); - } } static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) @@ -1448,6 +1435,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) if (!node) continue; + req->iovs[i].node = NULL; + if (unpin) hfi1_mmu_rb_remove(req->pq->handler, &node->rb); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index d2bc77f75253..14dfd757dafd 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size, #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -#define SDMA_PKT_Q_INACTIVE BIT(0) -#define SDMA_PKT_Q_ACTIVE BIT(1) -#define SDMA_PKT_Q_DEFERRED BIT(2) +enum pkt_q_sdma_state { + SDMA_PKT_Q_ACTIVE, + SDMA_PKT_Q_DEFERRED, +}; /* * Maximum retry attempts to submit a TX request @@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q { struct user_sdma_request *reqs; unsigned long *req_in_use; struct iowait busy; - unsigned state; + enum pkt_q_sdma_state state; wait_queue_head_t wait; unsigned long unpinned; struct mmu_rb_handler *handler; @@ -203,14 +204,12 @@ struct user_sdma_request { s8 ahg_idx; /* Writeable fields shared with interrupt */ - u64 seqcomp ____cacheline_aligned_in_smp; - u64 seqsubmitted; - /* status of the last txreq completed */ - int status; + u16 seqcomp ____cacheline_aligned_in_smp; + u16 seqsubmitted; /* Send side fields */ struct list_head txps ____cacheline_aligned_in_smp; - u64 seqnum; + u16 seqnum; /* * KDETH.OFFSET (TID) field * The offset can cover multiple packets, depending on the @@ -228,7 +227,6 @@ struct user_sdma_request { u16 tididx; /* progress index moving along the iovs array */ u8 iov_idx; - u8 done; u8 has_error; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; @@ -248,7 +246,7 @@ struct user_sdma_txreq { struct user_sdma_request *req; u16 flags; unsigned int busycount; - u64 seqnum; + u16 seqnum; }; int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index a4d06502f06d..269ec338581b 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -343,7 +343,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait); -int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); +int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, + bool *call_send); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index c643d80c5a53..c9876d9e3cb9 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, uctxt->seq_cnt = 1; uctxt->is_vnic = true; - hfi1_set_vnic_msix_info(uctxt); + msix_request_rcd_irq(uctxt); hfi1_stats.sps_ctxts++; dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); @@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); flush_wc(); - hfi1_reset_vnic_msix_info(uctxt); - /* * Disable receive context and interrupt available, reset all * RcvCtxtCtrl bits to default values. @@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); + /* msix_intr will always be > 0, only clean up if this is true */ + if (uctxt->msix_intr) + msix_free_irq(dd, uctxt->msix_intr); + uctxt->event_flags = 0; hfi1_clear_tids(uctxt); @@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); /* ensure irqs see the change */ - hfi1_vnic_synchronize_irq(dd); + msix_vnic_synchronize_irq(dd); /* remove unread skbs */ for (i = 0; i < vinfo->num_rx_q; i++) { @@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) rc = hfi1_vnic_txreq_init(dd); if (rc) goto txreq_fail; - - dd->vnic.msix_idx = dd->first_dyn_msix_idx; } for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9a24fd0ee3e7..d443e26b67d1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -219,19 +219,11 @@ struct hns_roce_uar { unsigned long logic_idx; }; -struct hns_roce_vma_data { - struct list_head list; - struct vm_area_struct *vma; - struct mutex *vma_list_mutex; -}; - struct hns_roce_ucontext { struct ib_ucontext ibucontext; struct hns_roce_uar uar; struct list_head page_list; struct mutex page_mutex; - struct list_head vma_list; - struct mutex vma_list_mutex; }; struct hns_roce_pd { @@ -738,6 +730,7 @@ struct hns_roce_work { struct hns_roce_dev *hr_dev; struct work_struct work; u32 qpn; + u32 cqn; int event_type; int sub_type; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0218c0f8c2a7..7ccf3774eb71 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3995,13 +3995,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work) { struct hns_roce_work *irq_work = container_of(work, struct hns_roce_work, work); + struct device *dev = irq_work->hr_dev->dev; u32 qpn = irq_work->qpn; + u32 cqn = irq_work->cqn; switch (irq_work->event_type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + dev_info(dev, "Path migrated succeeded.\n"); + break; + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + dev_warn(dev, "Path migration failed.\n"); + break; + case HNS_ROCE_EVENT_TYPE_COMM_EST: + dev_info(dev, "Communication established.\n"); + break; + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + dev_warn(dev, "Send queue drained.\n"); + break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + dev_err(dev, "Local work queue catastrophic error.\n"); + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + switch (irq_work->sub_type) { + case HNS_ROCE_LWQCE_QPC_ERROR: + dev_err(dev, "QP %d, QPC error.\n", qpn); + break; + case HNS_ROCE_LWQCE_MTU_ERROR: + dev_err(dev, "QP %d, MTU error.\n", qpn); + break; + case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: + dev_err(dev, "QP %d, WQE BA addr error.\n", qpn); + break; + case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: + dev_err(dev, "QP %d, WQE addr error.\n", qpn); + break; + case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: + dev_err(dev, "QP %d, WQE shift error.\n", qpn); + break; + default: + dev_err(dev, "Unhandled sub_event type %d.\n", + irq_work->sub_type); + break; + } + break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + dev_err(dev, "Invalid request local work queue error.\n"); + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + dev_err(dev, "Local access violation work queue error.\n"); hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + switch (irq_work->sub_type) { + case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: + dev_err(dev, "QP %d, R_key violation.\n", qpn); + break; + case HNS_ROCE_LAVWQE_LENGTH_ERROR: + dev_err(dev, "QP %d, length error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_VA_ERROR: + dev_err(dev, "QP %d, VA error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_PD_ERROR: + dev_err(dev, "QP %d, PD error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_RW_ACC_ERROR: + dev_err(dev, "QP %d, rw acc error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: + dev_err(dev, "QP %d, key state error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: + dev_err(dev, "QP %d, MR operation error.\n", qpn); + break; + default: + dev_err(dev, "Unhandled sub_event type %d.\n", + irq_work->sub_type); + break; + } + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + dev_warn(dev, "SRQ limit reach.\n"); + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + dev_warn(dev, "SRQ last wqe reach.\n"); + break; + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + dev_err(dev, "SRQ catas error.\n"); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + dev_err(dev, "CQ 0x%x access err.\n", cqn); + break; + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + dev_warn(dev, "CQ 0x%x overflow\n", cqn); + break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: + dev_warn(dev, "DB overflow.\n"); + break; + case HNS_ROCE_EVENT_TYPE_FLR: + dev_warn(dev, "Function level reset.\n"); break; default: break; @@ -4011,7 +4101,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work) } static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, u32 qpn) + struct hns_roce_eq *eq, + u32 qpn, u32 cqn) { struct hns_roce_work *irq_work; @@ -4022,6 +4113,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); irq_work->hr_dev = hr_dev; irq_work->qpn = qpn; + irq_work->cqn = cqn; irq_work->event_type = eq->event_type; irq_work->sub_type = eq->sub_type; queue_work(hr_dev->irq_workq, &(irq_work->work)); @@ -4058,124 +4150,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) hns_roce_write64_k(doorbell, eq->doorbell); } -static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, - u32 qpn) -{ - struct device *dev = hr_dev->dev; - int sub_type; - - dev_warn(dev, "Local work queue catastrophic error.\n"); - sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, - HNS_ROCE_V2_AEQE_SUB_TYPE_S); - switch (sub_type) { - case HNS_ROCE_LWQCE_QPC_ERROR: - dev_warn(dev, "QP %d, QPC error.\n", qpn); - break; - case HNS_ROCE_LWQCE_MTU_ERROR: - dev_warn(dev, "QP %d, MTU error.\n", qpn); - break; - case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: - dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn); - break; - case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: - dev_warn(dev, "QP %d, WQE addr error.\n", qpn); - break; - case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: - dev_warn(dev, "QP %d, WQE shift error.\n", qpn); - break; - default: - dev_err(dev, "Unhandled sub_event type %d.\n", sub_type); - break; - } -} - -static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, u32 qpn) -{ - struct device *dev = hr_dev->dev; - int sub_type; - - dev_warn(dev, "Local access violation work queue error.\n"); - sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, - HNS_ROCE_V2_AEQE_SUB_TYPE_S); - switch (sub_type) { - case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: - dev_warn(dev, "QP %d, R_key violation.\n", qpn); - break; - case HNS_ROCE_LAVWQE_LENGTH_ERROR: - dev_warn(dev, "QP %d, length error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_VA_ERROR: - dev_warn(dev, "QP %d, VA error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_PD_ERROR: - dev_err(dev, "QP %d, PD error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_RW_ACC_ERROR: - dev_warn(dev, "QP %d, rw acc error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: - dev_warn(dev, "QP %d, key state error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: - dev_warn(dev, "QP %d, MR operation error.\n", qpn); - break; - default: - dev_err(dev, "Unhandled sub_event type %d.\n", sub_type); - break; - } -} - -static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, - int event_type, u32 qpn) -{ - struct device *dev = hr_dev->dev; - - switch (event_type) { - case HNS_ROCE_EVENT_TYPE_COMM_EST: - dev_warn(dev, "Communication established.\n"); - break; - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - dev_warn(dev, "Send queue drained.\n"); - break; - case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn); - break; - case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - dev_warn(dev, "Invalid request local work queue error.\n"); - break; - case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn); - break; - default: - break; - } - - hns_roce_qp_event(hr_dev, qpn, event_type); -} - -static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, - int event_type, u32 cqn) -{ - struct device *dev = hr_dev->dev; - - switch (event_type) { - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - dev_warn(dev, "CQ 0x%x access err.\n", cqn); - break; - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - dev_warn(dev, "CQ 0x%x overflow\n", cqn); - break; - default: - break; - } - - hns_roce_cq_event(hr_dev, cqn, event_type); -} - static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) { u32 buf_chk_sz; @@ -4251,31 +4225,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: - dev_warn(dev, "Path migrated succeeded.\n"); - break; case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - dev_warn(dev, "Path migration failed.\n"); - break; case HNS_ROCE_EVENT_TYPE_COMM_EST: case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, - qpn); + hns_roce_qp_event(hr_dev, qpn, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - dev_warn(dev, "SRQ not support.\n"); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, - cqn); + hns_roce_cq_event(hr_dev, cqn, event_type); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - dev_warn(dev, "DB overflow.\n"); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -4284,10 +4250,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, le64_to_cpu(aeqe->event.cmd.out_param)); break; case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - dev_warn(dev, "CEQ overflow.\n"); break; case HNS_ROCE_EVENT_TYPE_FLR: - dev_warn(dev, "Function level reset.\n"); break; default: dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", @@ -4304,7 +4268,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, dev_warn(dev, "cons_index overflow, set back to 0.\n"); eq->cons_index = 0; } - hns_roce_v2_init_irq_work(hr_dev, eq, qpn); + hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); } set_eq_cons_index_v2(eq); @@ -5125,6 +5089,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) create_singlethread_workqueue("hns_roce_irq_workqueue"); if (!hr_dev->irq_workq) { dev_err(dev, "Create irq workqueue failed!\n"); + ret = -ENOMEM; goto err_request_irq_fail; } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c5cae9a38c04..6edb547baee8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -344,8 +344,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, if (ret) goto error_fail_uar_alloc; - INIT_LIST_HEAD(&context->vma_list); - mutex_init(&context->vma_list_mutex); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { INIT_LIST_HEAD(&context->page_list); mutex_init(&context->page_mutex); @@ -376,76 +374,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) return 0; } -static void hns_roce_vma_open(struct vm_area_struct *vma) -{ - vma->vm_ops = NULL; -} - -static void hns_roce_vma_close(struct vm_area_struct *vma) -{ - struct hns_roce_vma_data *vma_data; - - vma_data = (struct hns_roce_vma_data *)vma->vm_private_data; - vma_data->vma = NULL; - mutex_lock(vma_data->vma_list_mutex); - list_del(&vma_data->list); - mutex_unlock(vma_data->vma_list_mutex); - kfree(vma_data); -} - -static const struct vm_operations_struct hns_roce_vm_ops = { - .open = hns_roce_vma_open, - .close = hns_roce_vma_close, -}; - -static int hns_roce_set_vma_data(struct vm_area_struct *vma, - struct hns_roce_ucontext *context) -{ - struct list_head *vma_head = &context->vma_list; - struct hns_roce_vma_data *vma_data; - - vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL); - if (!vma_data) - return -ENOMEM; - - vma_data->vma = vma; - vma_data->vma_list_mutex = &context->vma_list_mutex; - vma->vm_private_data = vma_data; - vma->vm_ops = &hns_roce_vm_ops; - - mutex_lock(&context->vma_list_mutex); - list_add(&vma_data->list, vma_head); - mutex_unlock(&context->vma_list_mutex); - - return 0; -} - static int hns_roce_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct hns_roce_dev *hr_dev = to_hr_dev(context->device); - if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) - return -EINVAL; + switch (vma->vm_pgoff) { + case 0: + return rdma_user_mmap_io(context, vma, + to_hr_ucontext(context)->uar.pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot)); + + /* vm_pgoff: 1 -- TPTR */ + case 1: + if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) + return -EINVAL; + /* + * FIXME: using io_remap_pfn_range on the dma address returned + * by dma_alloc_coherent is totally wrong. + */ + return rdma_user_mmap_io(context, vma, + hr_dev->tptr_dma_addr >> PAGE_SHIFT, + hr_dev->tptr_size, + vma->vm_page_prot); - if (vma->vm_pgoff == 0) { - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (io_remap_pfn_range(vma, vma->vm_start, - to_hr_ucontext(context)->uar.pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - } else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr && - hr_dev->tptr_size) { - /* vm_pgoff: 1 -- TPTR */ - if (io_remap_pfn_range(vma, vma->vm_start, - hr_dev->tptr_dma_addr >> PAGE_SHIFT, - hr_dev->tptr_size, - vma->vm_page_prot)) - return -EAGAIN; - } else + default: return -EINVAL; - - return hns_roce_set_vma_data(vma, to_hr_ucontext(context)); + } } static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, @@ -471,21 +427,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext) { - struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); - struct hns_roce_vma_data *vma_data, *n; - struct vm_area_struct *vma; - - mutex_lock(&context->vma_list_mutex); - list_for_each_entry_safe(vma_data, n, &context->vma_list, list) { - vma = vma_data->vma; - zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); - - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); - vma->vm_ops = NULL; - list_del(&vma_data->list); - kfree(vma_data); - } - mutex_unlock(&context->vma_list_mutex); } static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 423818a7d333..771eb6bd0785 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, unsigned long flags; rtnl_lock(); - for_each_netdev_rcu(&init_net, ip_dev) { + for_each_netdev(&init_net, ip_dev) { if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) && (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) || (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ca0f1ee26091..bf3cdb88aaf5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1138,144 +1138,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) return 0; } -static void mlx4_ib_vma_open(struct vm_area_struct *area) -{ - /* vma_open is called when a new VMA is created on top of our VMA. - * This is done through either mremap flow or split_vma (usually due - * to mlock, madvise, munmap, etc.). We do not support a clone of the - * vma, as this VMA is strongly hardware related. Therefore we set the - * vm_ops of the newly created/cloned VMA to NULL, to prevent it from - * calling us again and trying to do incorrect actions. We assume that - * the original vma size is exactly a single page that there will be no - * "splitting" operations on. - */ - area->vm_ops = NULL; -} - -static void mlx4_ib_vma_close(struct vm_area_struct *area) -{ - struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data; - - /* It's guaranteed that all VMAs opened on a FD are closed before the - * file itself is closed, therefore no sync is needed with the regular - * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync - * with accessing the vma as part of mlx4_ib_disassociate_ucontext. - * The close operation is usually called under mm->mmap_sem except when - * process is exiting. The exiting case is handled explicitly as part - * of mlx4_ib_disassociate_ucontext. - */ - mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *) - area->vm_private_data; - - /* set the vma context pointer to null in the mlx4_ib driver's private - * data to protect against a race condition in mlx4_ib_dissassociate_ucontext(). - */ - mlx4_ib_vma_priv_data->vma = NULL; -} - -static const struct vm_operations_struct mlx4_ib_vm_ops = { - .open = mlx4_ib_vma_open, - .close = mlx4_ib_vma_close -}; - static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { - int i; - struct vm_area_struct *vma; - struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); - - /* need to protect from a race on closing the vma as part of - * mlx4_ib_vma_close(). - */ - for (i = 0; i < HW_BAR_COUNT; i++) { - vma = context->hw_bar_info[i].vma; - if (!vma) - continue; - - zap_vma_ptes(context->hw_bar_info[i].vma, - context->hw_bar_info[i].vma->vm_start, PAGE_SIZE); - - context->hw_bar_info[i].vma->vm_flags &= - ~(VM_SHARED | VM_MAYSHARE); - /* context going to be destroyed, should not access ops any more */ - context->hw_bar_info[i].vma->vm_ops = NULL; - } -} - -static void mlx4_ib_set_vma_data(struct vm_area_struct *vma, - struct mlx4_ib_vma_private_data *vma_private_data) -{ - vma_private_data->vma = vma; - vma->vm_private_data = vma_private_data; - vma->vm_ops = &mlx4_ib_vm_ops; } static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct mlx4_ib_dev *dev = to_mdev(context->device); - struct mlx4_ib_ucontext *mucontext = to_mucontext(context); - - if (vma->vm_end - vma->vm_start != PAGE_SIZE) - return -EINVAL; - - if (vma->vm_pgoff == 0) { - /* We prevent double mmaping on same context */ - if (mucontext->hw_bar_info[HW_BAR_DB].vma) - return -EINVAL; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (io_remap_pfn_range(vma, vma->vm_start, - to_mucontext(context)->uar.pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]); + switch (vma->vm_pgoff) { + case 0: + return rdma_user_mmap_io(context, vma, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot)); - } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { - /* We prevent double mmaping on same context */ - if (mucontext->hw_bar_info[HW_BAR_BF].vma) + case 1: + if (dev->dev->caps.bf_reg_size == 0) return -EINVAL; + return rdma_user_mmap_io( + context, vma, + to_mucontext(context)->uar.pfn + + dev->dev->caps.num_uars, + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - - if (io_remap_pfn_range(vma, vma->vm_start, - to_mucontext(context)->uar.pfn + - dev->dev->caps.num_uars, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - - mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]); - - } else if (vma->vm_pgoff == 3) { + case 3: { struct mlx4_clock_params params; int ret; - /* We prevent double mmaping on same context */ - if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma) - return -EINVAL; - ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); - if (ret) return ret; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (io_remap_pfn_range(vma, vma->vm_start, - (pci_resource_start(dev->dev->persist->pdev, - params.bar) + - params.offset) - >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - - mlx4_ib_set_vma_data(vma, - &mucontext->hw_bar_info[HW_BAR_CLOCK]); - } else { - return -EINVAL; + return rdma_user_mmap_io( + context, vma, + (pci_resource_start(dev->dev->persist->pdev, + params.bar) + + params.offset) >> + PAGE_SHIFT, + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); } - return 0; + default: + return -EINVAL; + } } static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 81ffc007e0a1..d844831179cf 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) if (!list_empty(&group->pending_list)) req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - if ((method == IB_MGMT_METHOD_GET_RESP)) { + if (method == IB_MGMT_METHOD_GET_RESP) { if (req) { send_reply_to_slave(req->func, group, &req->sa_mad, status); --group->func[req->func].num_pend_reqs; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e10dccc7958f..8850dfc3826d 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -80,16 +80,11 @@ enum hw_bar_type { HW_BAR_COUNT }; -struct mlx4_ib_vma_private_data { - struct vm_area_struct *vma; -}; - struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; struct list_head db_page_list; struct mutex db_page_mutex; - struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; struct list_head wqn_ranges_list; struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */ }; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 1a29f47f836e..4ee4af450720 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -7,7 +7,9 @@ #include <rdma/ib_verbs.h> #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> +#include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/ib_umem.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> @@ -16,6 +18,24 @@ #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> +static int +mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, + enum mlx5_flow_namespace_type *namespace) +{ + switch (table_type) { + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: + *namespace = MLX5_FLOW_NAMESPACE_BYPASS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: + *namespace = MLX5_FLOW_NAMESPACE_EGRESS; + break; + default: + return -EINVAL; + } + + return 0; +} + static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { [MLX5_IB_FLOW_TYPE_NORMAL] = { .type = UVERBS_ATTR_TYPE_PTR_IN, @@ -38,11 +58,15 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { }, }; +#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; + struct ib_uobject **arr_flow_actions; + struct ib_uflow_resources *uflow_res; void *devx_obj; int dest_id, dest_type; void *cmd_in; @@ -52,6 +76,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + int len, ret, i; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -61,7 +86,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( dest_qp = uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)) + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS && + ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) + return -EINVAL; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS && + (dest_devx || dest_qp)) return -EINVAL; if (dest_devx) { @@ -75,7 +107,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( */ if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) return -EINVAL; - } else { + } else if (dest_qp) { struct mlx5_ib_qp *mqp; qp = uverbs_attr_get_obj(attrs, @@ -92,6 +124,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( else dest_id = mqp->raw_packet_qp.rq.tirn; dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } else { + dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } if (dev->rep) @@ -101,16 +135,38 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); inlen = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - fs_matcher = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen, + + uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); + if (!uflow_res) + return -ENOMEM; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); + for (i = 0; i < len; i++) { + struct mlx5_ib_flow_action *maction = + to_mflow_act(arr_flow_actions[i]->object); + + ret = parse_flow_flow_action(maction, false, &flow_act); + if (ret) + goto err_out; + flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, + arr_flow_actions[i]->object); + } + + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + cmd_in, inlen, dest_id, dest_type); - if (IS_ERR(flow_handler)) - return PTR_ERR(flow_handler); + if (IS_ERR(flow_handler)) { + ret = PTR_ERR(flow_handler); + goto err_out; + } - ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev); + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); return 0; +err_out: + ib_uverbs_flow_resources_free(uflow_res); + return ret; } static int flow_matcher_cleanup(struct ib_uobject *uobject, @@ -134,12 +190,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); struct mlx5_ib_flow_matcher *obj; + u32 flags; int err; obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); if (!obj) return -ENOMEM; + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; obj->mask_len = uverbs_attr_get_len( attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); err = uverbs_copy_from(&obj->matcher_mask, @@ -165,6 +223,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end; + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + goto end; + + if (flags) { + err = mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); + if (err) + goto end; + } + uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@ -175,6 +246,248 @@ end: return err; } +void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) +{ + switch (maction->flow_action_raw.sub_type) { + case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: + mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.action_id); + break; + case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: + mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.action_id); + break; + case MLX5_IB_FLOW_ACTION_DECAP: + break; + default: + break; + } +} + +static struct ib_flow_action * +mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, + enum mlx5_ib_uapi_flow_table_type ft_type, + u8 num_actions, void *in) +{ + enum mlx5_flow_namespace_type namespace; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ERR_PTR(-EINVAL); + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return ERR_PTR(-ENOMEM); + + ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in, + &maction->flow_action_raw.action_id); + + if (ret) { + kfree(maction); + return ERR_PTR(ret); + } + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_MODIFY_HEADER; + maction->flow_action_raw.dev = dev; + + return &maction->ib_action; +} + +static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + enum mlx5_ib_uapi_flow_table_type ft_type; + struct ib_flow_action *action; + size_t num_actions; + void *in; + int len; + int ret; + + if (!mlx5_ib_modify_header_supported(mdev)) + return -EOPNOTSUPP; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); + len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); + + if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)) + return -EINVAL; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); + if (ret) + return ret; + + num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto), + action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); + if (IS_ERR(action)) + return PTR_ERR(action); + + uverbs_flow_action_fill_action(action, uobj, uobj->context->device, + IB_FLOW_ACTION_UNSPECIFIED); + + return 0; +} + +static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, + u8 packet_reformat_type, + u8 ft_type) +{ + switch (packet_reformat_type) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE(ibdev->mdev, + encap_general_header); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, + reformat_l2_to_l3_tunnel); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, + reformat_l3_tunnel_to_l2); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); + break; + default: + break; + } + + return false; +} + +static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) +{ + switch (dv_prt) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx5_ib_flow_action_create_packet_reformat_ctx( + struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_action *maction, + u8 ft_type, u8 dv_prt, + void *in, size_t len) +{ + enum mlx5_flow_namespace_type namespace; + u8 prm_prt; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ret; + + ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); + if (ret) + return ret; + + ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace, + &maction->flow_action_raw.action_id); + if (ret) + return ret; + + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; + maction->flow_action_raw.dev = dev; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; + enum mlx5_ib_uapi_flow_table_type ft_type; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); + if (ret) + return ret; + + ret = uverbs_get_const(&dv_prt, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); + if (ret) + return ret; + + if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) + return -EOPNOTSUPP; + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return -ENOMEM; + + if (dv_prt == + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_DECAP; + maction->flow_action_raw.dev = mdev; + } else { + void *in; + int len; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + if (IS_ERR(in)) { + ret = PTR_ERR(in); + goto free_maction; + } + + len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + + ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, + maction, ft_type, dv_prt, in, len); + if (ret) + goto free_maction; + } + + uverbs_flow_action_fill_action(&maction->ib_action, uobj, + uobj->context->device, + IB_FLOW_ACTION_UNSPECIFIED); + return 0; + +free_maction: + kfree(maction); + return ret; +} + DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_CREATE_FLOW, UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, @@ -195,7 +508,12 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ACCESS_READ), UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ)); + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_READ, 1, + MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, @@ -210,6 +528,44 @@ ADD_UVERBS_METHODS(mlx5_ib_fs, &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( + set_action_in_add_action_in_auto)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, + UVERBS_ATTR_MIN_SIZE(1), + UA_ALLOC_AND_COPY, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + enum mlx5_ib_uapi_flow_action_packet_reformat_type, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +ADD_UVERBS_METHODS( + mlx5_ib_flow_actions, + UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); + +DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_FLOW_MATCHER_CREATE, UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, MLX5_IB_OBJECT_FLOW_MATCHER, @@ -224,7 +580,10 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, UVERBS_ATTR_TYPE(u8), - UA_MANDATORY)); + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + enum ib_flow_flags, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, @@ -247,6 +606,7 @@ int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) root[i++] = &flow_objects; root[i++] = &mlx5_ib_fs; + root[i++] = &mlx5_ib_flow_actions; return i; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 131a1286a767..853574345d91 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1778,8 +1778,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_mdev; } - INIT_LIST_HEAD(&context->vma_private_list); - mutex_init(&context->vma_private_list_mutex); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1855,6 +1853,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); + if (mlx5_lag_is_active(dev->mdev)) { + u8 port = mlx5_core_native_port_num(dev->mdev); + + atomic_set(&context->tx_port_affinity, + atomic_add_return( + 1, &dev->roce[port].tx_port_affinity)); + } + return &context->ibucontext; out_mdev: @@ -1884,6 +1890,13 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* All umem's must be destroyed before destroying the ucontext. */ + mutex_lock(&ibcontext->per_mm_list_lock); + WARN_ON(!list_empty(&ibcontext->per_mm_list)); + mutex_unlock(&ibcontext->per_mm_list_lock); +#endif + if (context->devx_uid) mlx5_ib_devx_destroy(dev, context); @@ -1929,94 +1942,9 @@ static int get_extended_index(unsigned long offset) return get_arg(offset) | ((offset >> 16) & 0xff) << 8; } -static void mlx5_ib_vma_open(struct vm_area_struct *area) -{ - /* vma_open is called when a new VMA is created on top of our VMA. This - * is done through either mremap flow or split_vma (usually due to - * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, - * as this VMA is strongly hardware related. Therefore we set the - * vm_ops of the newly created/cloned VMA to NULL, to prevent it from - * calling us again and trying to do incorrect actions. We assume that - * the original VMA size is exactly a single page, and therefore all - * "splitting" operation will not happen to it. - */ - area->vm_ops = NULL; -} - -static void mlx5_ib_vma_close(struct vm_area_struct *area) -{ - struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; - - /* It's guaranteed that all VMAs opened on a FD are closed before the - * file itself is closed, therefore no sync is needed with the regular - * closing flow. (e.g. mlx5 ib_dealloc_ucontext) - * However need a sync with accessing the vma as part of - * mlx5_ib_disassociate_ucontext. - * The close operation is usually called under mm->mmap_sem except when - * process is exiting. - * The exiting case is handled explicitly as part of - * mlx5_ib_disassociate_ucontext. - */ - mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; - - /* setting the vma context pointer to null in the mlx5_ib driver's - * private data, to protect a race condition in - * mlx5_ib_disassociate_ucontext(). - */ - mlx5_ib_vma_priv_data->vma = NULL; - mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex); - list_del(&mlx5_ib_vma_priv_data->list); - mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex); - kfree(mlx5_ib_vma_priv_data); -} - -static const struct vm_operations_struct mlx5_ib_vm_ops = { - .open = mlx5_ib_vma_open, - .close = mlx5_ib_vma_close -}; - -static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, - struct mlx5_ib_ucontext *ctx) -{ - struct mlx5_ib_vma_private_data *vma_prv; - struct list_head *vma_head = &ctx->vma_private_list; - - vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); - if (!vma_prv) - return -ENOMEM; - - vma_prv->vma = vma; - vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex; - vma->vm_private_data = vma_prv; - vma->vm_ops = &mlx5_ib_vm_ops; - - mutex_lock(&ctx->vma_private_list_mutex); - list_add(&vma_prv->list, vma_head); - mutex_unlock(&ctx->vma_private_list_mutex); - - return 0; -} static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { - struct vm_area_struct *vma; - struct mlx5_ib_vma_private_data *vma_private, *n; - struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); - - mutex_lock(&context->vma_private_list_mutex); - list_for_each_entry_safe(vma_private, n, &context->vma_private_list, - list) { - vma = vma_private->vma; - zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); - /* context going to be destroyed, should - * not access ops any more. - */ - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); - vma->vm_ops = NULL; - list_del(&vma_private->list); - kfree(vma_private); - } - mutex_unlock(&context->vma_private_list_mutex); } static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) @@ -2039,9 +1967,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) { - phys_addr_t pfn; - int err; - if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; @@ -2054,13 +1979,8 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (!dev->mdev->clock_info_page) return -EOPNOTSUPP; - pfn = page_to_pfn(dev->mdev->clock_info_page); - err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, - vma->vm_page_prot); - if (err) - return err; - - return mlx5_ib_set_vma_data(vma, context); + return rdma_user_mmap_page(&context->ibucontext, vma, + dev->mdev->clock_info_page, PAGE_SIZE); } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, @@ -2150,21 +2070,15 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, pfn = uar_index2pfn(dev, uar_index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); - vma->vm_page_prot = prot; - err = io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot); + err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, + prot); if (err) { mlx5_ib_err(dev, - "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", + "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", err, mmap_cmd2str(cmd)); - err = -EAGAIN; goto err; } - err = mlx5_ib_set_vma_data(vma, context); - if (err) - goto err; - if (dyn_uar) bfregi->sys_pages[idx] = uar_index; return 0; @@ -2189,7 +2103,6 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) size_t map_size = vma->vm_end - vma->vm_start; u32 npages = map_size >> PAGE_SHIFT; phys_addr_t pfn; - pgprot_t prot; if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != page_idx + npages) @@ -2199,14 +2112,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> PAGE_SHIFT) + page_idx; - prot = pgprot_writecombine(vma->vm_page_prot); - vma->vm_page_prot = prot; - - if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size, - vma->vm_page_prot)) - return -EAGAIN; - - return mlx5_ib_set_vma_data(vma, mctx); + return rdma_user_mmap_io(context, vma, pfn, map_size, + pgprot_writecombine(vma->vm_page_prot)); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2481,20 +2388,50 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) offsetof(typeof(filter), field) -\ sizeof(filter.field)) -static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action) +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, + bool is_egress, + struct mlx5_flow_act *action) { - struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act); switch (maction->ib_action.type) { case IB_FLOW_ACTION_ESP: + if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) + return -EINVAL; /* Currently only AES_GCM keymat is supported by the driver */ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ? + action->action |= is_egress ? MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : MLX5_FLOW_CONTEXT_ACTION_DECRYPT; return 0; + case IB_FLOW_ACTION_UNSPECIFIED: + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + action->modify_id = maction->flow_action_raw.action_id; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_DECAP) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { + if (action->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + return -EINVAL; + action->action |= + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + action->reformat_id = + maction->flow_action_raw.action_id; + return 0; + } + /* fall through */ default: return -EOPNOTSUPP; } @@ -2831,7 +2768,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; break; case IB_FLOW_SPEC_ACTION_HANDLE: - ret = parse_flow_flow_action(ib_spec, flow_attr, action); + ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), + flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); if (ret) return ret; break; @@ -2912,7 +2850,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, * rules would be supported, always return VALID_SPEC_NA. */ if (!is_crypto) - return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; + return VALID_SPEC_NA; return is_crypto && is_ipsec && (!egress || (!is_drop && !flow_act->has_flow_tag)) ? @@ -3055,14 +2993,15 @@ enum flow_table_type { static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, struct mlx5_ib_flow_prio *prio, int priority, - int num_entries, int num_groups) + int num_entries, int num_groups, + u32 flags) { struct mlx5_flow_table *ft; ft = mlx5_create_auto_grouped_flow_table(ns, priority, num_entries, num_groups, - 0, 0); + 0, flags); if (IS_ERR(ft)) return ERR_CAST(ft); @@ -3082,26 +3021,43 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int max_table_size; int num_entries; int num_groups; + u32 flags = 0; int priority; max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (ft_type == MLX5_IB_FT_TX) - priority = 0; - else if (flow_is_multicast_only(flow_attr) && - !dont_trap) + enum mlx5_flow_namespace_type fn_type; + + if (flow_is_multicast_only(flow_attr) && + !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, dont_trap); - ns = mlx5_get_flow_namespace(dev->mdev, - ft_type == MLX5_IB_FT_TX ? - MLX5_FLOW_NAMESPACE_EGRESS : - MLX5_FLOW_NAMESPACE_BYPASS); + if (ft_type == MLX5_IB_FT_RX) { + fn_type = MLX5_FLOW_NAMESPACE_BYPASS; + prio = &dev->flow_db->prios[priority]; + if (!dev->rep && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (!dev->rep && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + log_max_ft_size)); + fn_type = MLX5_FLOW_NAMESPACE_EGRESS; + prio = &dev->flow_db->egress_prios[priority]; + if (!dev->rep && + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; - prio = &dev->flow_db->prios[priority]; } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { ns = mlx5_get_flow_namespace(dev->mdev, @@ -3133,7 +3089,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = prio->flow_table; if (!ft) - return _get_prio(ns, prio, priority, num_entries, num_groups); + return _get_prio(ns, prio, priority, num_entries, num_groups, + flags); return prio; } @@ -3300,6 +3257,9 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); + if (dev->rep && is_egress) + return ERR_PTR(-EINVAL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); handler = kzalloc(sizeof(*handler), GFP_KERNEL); if (!handler || !spec) { @@ -3687,34 +3647,54 @@ free_ucmd: return ERR_PTR(err); } -static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev, - int priority, bool mcast) +static struct mlx5_ib_flow_prio * +_get_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + bool mcast) { - int max_table_size; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio; + int max_table_size; + u32 flags = 0; + int priority; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */ + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); if (max_table_size < MLX5_FS_MAX_ENTRIES) return ERR_PTR(-ENOMEM); if (mcast) priority = MLX5_IB_FLOW_MCAST_PRIO; else - priority = ib_prio_to_core_prio(priority, false); + priority = ib_prio_to_core_prio(fs_matcher->priority, false); - ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); + ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); if (!ns) return ERR_PTR(-ENOTSUPP); - prio = &dev->flow_db->prios[priority]; + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + prio = &dev->flow_db->prios[priority]; + else + prio = &dev->flow_db->egress_prios[priority]; if (prio->flow_table) return prio; return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, - MLX5_FS_MAX_TYPES); + MLX5_FS_MAX_TYPES, flags); } static struct mlx5_ib_flow_handler * @@ -3722,10 +3702,10 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_act *flow_act, void *cmd_in, int inlen) { struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft = ft_prio->flow_table; int err = 0; @@ -3744,9 +3724,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, fs_matcher->mask_len); spec->match_criteria_enable = fs_matcher->match_criteria_enable; - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; handler->rule = mlx5_add_flow_rules(ft, spec, - &flow_act, dst, 1); + flow_act, dst, 1); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -3808,12 +3787,12 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; - int priority = fs_matcher->priority; struct mlx5_ib_flow_handler *handler; bool mcast; int err; @@ -3831,7 +3810,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, mcast = raw_fs_is_multicast(fs_matcher, cmd_in); mutex_lock(&dev->flow_db->lock); - ft_prio = _get_flow_table(dev, priority, mcast); + ft_prio = _get_flow_table(dev, fs_matcher, mcast); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; @@ -3840,13 +3819,18 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { dst->type = dest_type; dst->tir_num = dest_id; - } else { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; dst->ft_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; } - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, - inlen); + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, + cmd_in, inlen); if (IS_ERR(handler)) { err = PTR_ERR(handler); @@ -4024,6 +4008,9 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) */ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); break; + case IB_FLOW_ACTION_UNSPECIFIED: + mlx5_ib_destroy_flow_action_raw(maction); + break; default: WARN_ON(true); break; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index f3dbd75a0a96..549234988bb4 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -57,7 +57,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int entry; unsigned long page_shift = umem->page_shift; - if (umem->odp_data) { + if (umem->is_odp) { *ncont = ib_umem_page_count(umem); *count = *ncont << (page_shift - PAGE_SHIFT); *shift = page_shift; @@ -152,14 +152,13 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, struct scatterlist *sg; int entry; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - const bool odp = umem->odp_data != NULL; - - if (odp) { + if (umem->is_odp) { WARN_ON(shift != 0); WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); for (i = 0; i < num_pages; ++i) { - dma_addr_t pa = umem->odp_data->dma_list[offset + i]; + dma_addr_t pa = + to_ib_umem_odp(umem)->dma_list[offset + i]; pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8376408e2bc9..a28d04d4c9df 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -39,8 +39,10 @@ #include <rdma/ib_smi.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> +#include <linux/mlx5/fs.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/srq.h> +#include <linux/mlx5/fs.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> @@ -114,13 +116,6 @@ enum { MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, }; -struct mlx5_ib_vma_private_data { - struct list_head list; - struct vm_area_struct *vma; - /* protect vma_private_list add/del */ - struct mutex *vma_private_list_mutex; -}; - struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -132,13 +127,12 @@ struct mlx5_ib_ucontext { u8 cqe_version; /* Transport Domain number */ u32 tdn; - struct list_head vma_private_list; - /* protect vma_private_list add/del */ - struct mutex vma_private_list_mutex; u64 lib_caps; DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); u16 devx_uid; + /* For RoCE LAG TX affinity */ + atomic_t tx_port_affinity; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -151,6 +145,12 @@ struct mlx5_ib_pd { u32 pdn; }; +enum { + MLX5_IB_FLOW_ACTION_MODIFY_HEADER, + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT, + MLX5_IB_FLOW_ACTION_DECAP, +}; + #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) #define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1) #if (MLX5_IB_FLOW_LAST_PRIO <= 0) @@ -180,6 +180,7 @@ struct mlx5_ib_flow_matcher { struct mlx5_ib_match_params matcher_mask; int mask_len; enum mlx5_ib_flow_type flow_type; + enum mlx5_flow_namespace_type ns_type; u16 priority; struct mlx5_core_dev *mdev; atomic_t usecnt; @@ -188,6 +189,7 @@ struct mlx5_ib_flow_matcher { struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; struct mlx5_flow_table *lag_demux_ft; @@ -699,7 +701,7 @@ struct mlx5_roce { rwlock_t netdev_lock; struct net_device *netdev; struct notifier_block nb; - atomic_t next_port; + atomic_t tx_port_affinity; enum ib_port_state last_port_state; struct mlx5_ib_dev *dev; u8 native_port_num; @@ -814,6 +816,11 @@ struct mlx5_ib_flow_action { u64 ib_flags; struct mlx5_accel_esp_xfrm *ctx; } esp_aes_gcm; + struct { + struct mlx5_ib_dev *dev; + u32 sub_type; + u32 action_id; + } flow_action_raw; }; }; @@ -858,6 +865,9 @@ to_mcounters(struct ib_counters *ibcntrs) return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs); } +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, + bool is_egress, + struct mlx5_flow_act *action); struct mlx5_ib_lb_state { /* protect the user_td */ struct mutex mutex; @@ -868,7 +878,7 @@ struct mlx5_ib_lb_state { struct mlx5_ib_dev { struct ib_device ib_dev; - const struct uverbs_object_tree_def *driver_trees[6]; + const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; @@ -1148,7 +1158,7 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, +void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, @@ -1243,9 +1253,11 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - void *cmd_in, int inlen, int dest_id, int dest_type); + struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, + int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); +void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, @@ -1264,6 +1276,11 @@ mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) { return 0; } +static inline void +mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) +{ + return; +}; #endif static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9fb1d9cb9401..6aac3a107330 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -98,7 +98,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { - if (mr->umem->odp_data) { + if (mr->umem->is_odp) { /* * This barrier prevents the compiler from moving the * setting of umem->odp_data->private to point to our @@ -107,7 +107,7 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) * handle invalidations. */ smp_wmb(); - mr->umem->odp_data->private = mr; + to_ib_umem_odp(mr->umem)->private = mr; /* * Make sure we will see the new * umem->odp_data->private value in the invalidation @@ -1624,14 +1624,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) struct ib_umem *umem = mr->umem; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem && umem->odp_data) { + if (umem && umem->is_odp) { + struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); + /* Prevent new page faults from succeeding */ mr->live = 0; /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ - if (umem->odp_data->page_list) - mlx5_ib_invalidate_range(umem, ib_umem_start(umem), + if (umem_odp->page_list) + mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem), ib_umem_end(umem)); else mlx5_ib_free_implicit_mr(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index d216e0d2921d..b04eb6775326 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -61,13 +61,21 @@ static int check_parent(struct ib_umem_odp *odp, return mr && mr->parent == parent && !odp->dying; } +struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) +{ + if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp)) + return NULL; + + return to_ib_umem_odp(mr->umem)->per_mm; +} + static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) { struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; - struct ib_ucontext *ctx = odp->umem->context; + struct ib_ucontext_per_mm *per_mm = odp->per_mm; struct rb_node *rb; - down_read(&ctx->umem_rwsem); + down_read(&per_mm->umem_rwsem); while (1) { rb = rb_next(&odp->interval_tree.rb); if (!rb) @@ -79,19 +87,19 @@ static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) not_found: odp = NULL; end: - up_read(&ctx->umem_rwsem); + up_read(&per_mm->umem_rwsem); return odp; } -static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, - u64 start, u64 length, +static struct ib_umem_odp *odp_lookup(u64 start, u64 length, struct mlx5_ib_mr *parent) { + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); struct ib_umem_odp *odp; struct rb_node *rb; - down_read(&ctx->umem_rwsem); - odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length); + down_read(&per_mm->umem_rwsem); + odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); if (!odp) goto end; @@ -102,13 +110,13 @@ static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, if (!rb) goto not_found; odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(odp->umem) > start + length) + if (ib_umem_start(&odp->umem) > start + length) goto not_found; } not_found: odp = NULL; end: - up_read(&ctx->umem_rwsem); + up_read(&per_mm->umem_rwsem); return odp; } @@ -116,7 +124,6 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags) { struct ib_pd *pd = mr->ibmr.pd; - struct ib_ucontext *ctx = pd->uobject->context; struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem_odp *odp; unsigned long va; @@ -131,13 +138,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, return; } - odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE, - nentries * MLX5_IMR_MTT_SIZE, mr); + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, + nentries * MLX5_IMR_MTT_SIZE, mr); for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && odp->umem->address == va) { + if (odp && odp->umem.address == va) { struct mlx5_ib_mr *mtt = odp->private; pklm->key = cpu_to_be32(mtt->ibmr.lkey); @@ -153,13 +160,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, static void mr_leaf_free_action(struct work_struct *work) { struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT; + int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_release(odp->umem); + ib_umem_release(&odp->umem); if (imr->live) mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | @@ -170,22 +177,24 @@ static void mr_leaf_free_action(struct work_struct *work) wake_up(&imr->q_leaf_free); } -void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, +void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end) { struct mlx5_ib_mr *mr; const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + struct ib_umem *umem; int in_block = 0; u64 addr; - if (!umem || !umem->odp_data) { + if (!umem_odp) { pr_err("invalidation called on NULL umem or non-ODP umem\n"); return; } + umem = &umem_odp->umem; - mr = umem->odp_data->private; + mr = umem_odp->private; if (!mr || !mr->ibmr.pd) return; @@ -208,7 +217,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, * estimate the cost of another UMR vs. the cost of bigger * UMR. */ - if (umem->odp_data->dma_list[idx] & + if (umem_odp->dma_list[idx] & (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) { if (!in_block) { blk_start_idx = idx; @@ -237,13 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, * needed. */ - ib_umem_odp_unmap_dma_pages(umem, start, end); + ib_umem_odp_unmap_dma_pages(umem_odp, start, end); if (unlikely(!umem->npages && mr->parent && - !umem->odp_data->dying)) { - WRITE_ONCE(umem->odp_data->dying, 1); + !umem_odp->dying)) { + WRITE_ONCE(umem_odp->dying, 1); atomic_inc(&mr->parent->num_leaf_free); - schedule_work(&umem->odp_data->work); + schedule_work(&umem_odp->work); } } @@ -366,16 +375,15 @@ fail: static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt) { - struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context; struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); struct ib_umem_odp *odp, *result = NULL; + struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); u64 addr = io_virt & MLX5_IMR_MTT_MASK; int nentries = 0, start_idx = 0, ret; struct mlx5_ib_mr *mtt; - struct ib_umem *umem; - mutex_lock(&mr->umem->odp_data->umem_mutex); - odp = odp_lookup(ctx, addr, 1, mr); + mutex_lock(&odp_mr->umem_mutex); + odp = odp_lookup(addr, 1, mr); mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", io_virt, bcnt, addr, odp); @@ -385,22 +393,23 @@ next_mr: if (nentries) nentries++; } else { - umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE); - if (IS_ERR(umem)) { - mutex_unlock(&mr->umem->odp_data->umem_mutex); - return ERR_CAST(umem); + odp = ib_alloc_odp_umem(odp_mr->per_mm, addr, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) { + mutex_unlock(&odp_mr->umem_mutex); + return ERR_CAST(odp); } - mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags); + mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0, + mr->access_flags); if (IS_ERR(mtt)) { - mutex_unlock(&mr->umem->odp_data->umem_mutex); - ib_umem_release(umem); + mutex_unlock(&odp_mr->umem_mutex); + ib_umem_release(&odp->umem); return ERR_CAST(mtt); } - odp = umem->odp_data; odp->private = mtt; - mtt->umem = umem; + mtt->umem = &odp->umem; mtt->mmkey.iova = addr; mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); @@ -417,7 +426,7 @@ next_mr: addr += MLX5_IMR_MTT_SIZE; if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && odp->umem->address != addr) + if (odp && odp->umem.address != addr) odp = NULL; goto next_mr; } @@ -432,7 +441,7 @@ next_mr: } } - mutex_unlock(&mr->umem->odp_data->umem_mutex); + mutex_unlock(&odp_mr->umem_mutex); return result; } @@ -460,36 +469,36 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, return imr; } -static int mr_leaf_free(struct ib_umem *umem, u64 start, - u64 end, void *cookie) +static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, + void *cookie) { - struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie; + struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; + struct ib_umem *umem = &umem_odp->umem; if (mr->parent != imr) return 0; - ib_umem_odp_unmap_dma_pages(umem, - ib_umem_start(umem), + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), ib_umem_end(umem)); - if (umem->odp_data->dying) + if (umem_odp->dying) return 0; - WRITE_ONCE(umem->odp_data->dying, 1); + WRITE_ONCE(umem_odp->dying, 1); atomic_inc(&imr->num_leaf_free); - schedule_work(&umem->odp_data->work); + schedule_work(&umem_odp->work); return 0; } void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context; + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); - down_read(&ctx->umem_rwsem); - rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX, + down_read(&per_mm->umem_rwsem); + rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX, mr_leaf_free, true, imr); - up_read(&ctx->umem_rwsem); + up_read(&per_mm->umem_rwsem); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } @@ -497,6 +506,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, u32 *bytes_mapped) { + struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); u64 access_mask = ODP_READ_ALLOWED_BIT; int npages = 0, page_shift, np; u64 start_idx, page_mask; @@ -505,7 +515,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, size_t size; int ret; - if (!mr->umem->odp_data->page_list) { + if (!odp_mr->page_list) { odp = implicit_mr_get_data(mr, io_virt, bcnt); if (IS_ERR(odp)) @@ -513,11 +523,11 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr = odp->private; } else { - odp = mr->umem->odp_data; + odp = odp_mr; } next_mr: - size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt); + size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt); page_shift = mr->umem->page_shift; page_mask = ~(BIT(page_shift) - 1); @@ -533,7 +543,7 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size, + ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size, access_mask, current_seq); if (ret < 0) @@ -542,7 +552,8 @@ next_mr: np = ret; mutex_lock(&odp->umem_mutex); - if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { + if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem), + current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already @@ -575,7 +586,7 @@ next_mr: io_virt += size; next = odp_next(odp); - if (unlikely(!next || next->umem->address != io_virt)) { + if (unlikely(!next || next->umem.address != io_virt)) { mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", io_virt, next); return -EAGAIN; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index fcaa5c4d6feb..1f318a47040c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2965,6 +2965,37 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return 0; } +static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, + struct mlx5_ib_pd *pd, + struct mlx5_ib_qp_base *qp_base, + u8 port_num) +{ + struct mlx5_ib_ucontext *ucontext = NULL; + unsigned int tx_port_affinity; + + if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context) + ucontext = to_mucontext(pd->ibpd.uobject->context); + + if (ucontext) { + tx_port_affinity = (unsigned int)atomic_add_return( + 1, &ucontext->tx_port_affinity) % + MLX5_MAX_PORTS + + 1; + mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n", + tx_port_affinity, qp_base->mqp.qpn, ucontext); + } else { + tx_port_affinity = + (unsigned int)atomic_add_return( + 1, &dev->roce[port_num].tx_port_affinity) % + MLX5_MAX_PORTS + + 1; + mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", + tx_port_affinity, qp_base->mqp.qpn); + } + + return tx_port_affinity; +} + static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state, @@ -3030,6 +3061,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!context) return -ENOMEM; + pd = get_pd(qp); context->flags = cpu_to_be32(mlx5_st << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { @@ -3058,9 +3090,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_XRC_TGT)) { if (mlx5_lag_is_active(dev->mdev)) { u8 p = mlx5_core_native_port_num(dev->mdev); - tx_affinity = (unsigned int)atomic_add_return(1, - &dev->roce[p].next_port) % - MLX5_MAX_PORTS + 1; + tx_affinity = get_tx_affinity(dev, pd, base, p); context->flags |= cpu_to_be32(tx_affinity << 24); } } @@ -3118,7 +3148,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, goto out; } - pd = get_pd(qp); get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq, &recv_cq); @@ -3299,7 +3328,9 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new int req = IB_QP_STATE; int opt = 0; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + if (new_state == IB_QPS_RESET) { + return is_valid_mask(attr_mask, req, opt); + } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { req |= IB_QP_PKEY_INDEX | IB_QP_PORT; return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -4427,6 +4458,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, u8 next_fence = 0; u8 fence; + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); @@ -4436,13 +4473,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { - err = -EIO; - *bad_wr = wr; - nreq = 0; - goto out; - } - for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); @@ -4756,18 +4786,17 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int ind; int i; + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { - err = -EIO; - *bad_wr = wr; - nreq = 0; - goto out; - } - ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 42b68aa999fc..e00add6d78ec 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -456,9 +456,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) void __iomem *mmio_regs = NULL; u8 hw_rev; - assert(pcidev != NULL); - assert(ent != NULL); - printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n", DRV_VERSION, pci_name(pcidev)); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index bedaa02749fb..a895fe980d10 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -149,18 +149,9 @@ do { \ printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \ } while (0) -#define assert(expr) \ -do { \ - if (!(expr)) { \ - printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ - #expr, __FILE__, __func__, __LINE__); \ - } \ -} while (0) - #define NES_EVENT_TIMEOUT 1200000 #else #define nes_debug(level, fmt, args...) no_printk(fmt, ##args) -#define assert(expr) do {} while (0) #define NES_EVENT_TIMEOUT 100000 #endif diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index bd0675d8f298..5517e392bc01 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1443,7 +1443,7 @@ static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_inde mdelay(1); nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - } while ((temp_phy_data2 == temp_phy_data)); + } while (temp_phy_data2 == temp_phy_data); /* wait for tracking */ counter = 0; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 61014e251555..16f33454c198 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -146,8 +146,6 @@ static int nes_netdev_open(struct net_device *netdev) struct list_head *list_pos, *list_temp; unsigned long flags; - assert(nesdev != NULL); - if (nesvnic->netdev_open == 1) return 0; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8cc3df24e04e..9d4d165014d9 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1447,7 +1447,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, u64 pbl_base_addr, phy_prod_pair_addr; struct ib_ucontext *ib_ctx = NULL; struct qedr_srq_hwq_info *hw_srq; - struct qedr_ucontext *ctx = NULL; u32 page_cnt, page_size; struct qedr_srq *srq; int rc = 0; @@ -1473,7 +1472,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, if (udata && ibpd->uobject && ibpd->uobject->context) { ib_ctx = ibpd->uobject->context; - ctx = get_qedr_ucontext(ib_ctx); if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { DP_ERR(dev, diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 344e401915f7..a81905df2d0f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp) * qib_check_send_wqe - validate wr/wqe * @qp - The qp * @wqe - The built wqe + * @call_send - Determine if the send should be posted or scheduled * - * validate wr/wqe. This is called - * prior to inserting the wqe into - * the ring but after the wqe has been - * setup. - * - * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure + * Returns 0 on success, -EINVAL on failure */ int qib_check_send_wqe(struct rvt_qp *qp, - struct rvt_swqe *wqe) + struct rvt_swqe *wqe, bool *call_send) { struct rvt_ah *ah; - int ret = 0; switch (qp->ibqp.qp_type) { case IB_QPT_RC: case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; + if (wqe->length > qp->pmtu) + *call_send = false; break; case IB_QPT_SMI: case IB_QPT_GSI: @@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp, if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; /* progress hint */ - ret = 1; + *call_send = true; break; default: break; } - return ret; + return 0; } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 666613eef88f..3d7b744ae8fb 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -303,7 +303,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); -int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); +int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, + bool *call_send); struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c index 92dc66cc2d50..a3115709fb03 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -165,6 +165,5 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow) { - if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) - debugfs_remove(qp_flow->dbgfs_dentry); + debugfs_remove(qp_flow->dbgfs_dentry); } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 9dd39daa602b..49275a548751 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -54,18 +54,6 @@ static struct workqueue_struct *usnic_uiom_wq; ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \ (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0])) -static void usnic_uiom_reg_account(struct work_struct *work) -{ - struct usnic_uiom_reg *umem = container_of(work, - struct usnic_uiom_reg, work); - - down_write(&umem->mm->mmap_sem); - umem->mm->locked_vm -= umem->diff; - up_write(&umem->mm->mmap_sem); - mmput(umem->mm); - kfree(umem); -} - static int usnic_uiom_dma_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags, @@ -99,8 +87,9 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) } static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, - int dmasync, struct list_head *chunk_list) + int dmasync, struct usnic_uiom_reg *uiomr) { + struct list_head *chunk_list = &uiomr->chunk_list; struct page **page_list; struct scatterlist *sg; struct usnic_uiom_chunk *chunk; @@ -114,6 +103,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int flags; dma_addr_t pa; unsigned int gup_flags; + struct mm_struct *mm; /* * If the combination of the addr and size requested for this memory @@ -136,7 +126,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); + uiomr->owning_mm = mm = current->mm; + down_write(&mm->mmap_sem); locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -196,10 +187,12 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, out: if (ret < 0) usnic_uiom_put_pages(chunk_list, 0); - else - current->mm->pinned_vm = locked; + else { + mm->pinned_vm = locked; + mmgrab(uiomr->owning_mm); + } - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); free_page((unsigned long) page_list); return ret; } @@ -379,7 +372,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, uiomr->pd = pd; err = usnic_uiom_get_pages(addr, size, writable, dmasync, - &uiomr->chunk_list); + uiomr); if (err) { usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n", vpn_start, vpn_last, err); @@ -426,29 +419,39 @@ out_put_intervals: out_put_pages: usnic_uiom_put_pages(&uiomr->chunk_list, 0); spin_unlock(&pd->lock); + mmdrop(uiomr->owning_mm); out_free_uiomr: kfree(uiomr); return ERR_PTR(err); } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, - struct ib_ucontext *ucontext) +static void __usnic_uiom_release_tail(struct usnic_uiom_reg *uiomr) { - struct task_struct *task; - struct mm_struct *mm; - unsigned long diff; + mmdrop(uiomr->owning_mm); + kfree(uiomr); +} - __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); +static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr) +{ + return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; +} - task = get_pid_task(ucontext->tgid, PIDTYPE_PID); - if (!task) - goto out; - mm = get_task_mm(task); - put_task_struct(task); - if (!mm) - goto out; +static void usnic_uiom_release_defer(struct work_struct *work) +{ + struct usnic_uiom_reg *uiomr = + container_of(work, struct usnic_uiom_reg, work); - diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; + down_write(&uiomr->owning_mm->mmap_sem); + uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); + up_write(&uiomr->owning_mm->mmap_sem); + + __usnic_uiom_release_tail(uiomr); +} + +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *context) +{ + __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); /* * We may be called with the mm's mmap_sem already held. This @@ -456,25 +459,21 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, * the last reference to our file and calls our release * method. If there are memory regions to destroy, we'll end * up here and not be able to take the mmap_sem. In that case - * we defer the vm_locked accounting to the system workqueue. + * we defer the vm_locked accounting to a workqueue. */ - if (ucontext->closing) { - if (!down_write_trylock(&mm->mmap_sem)) { - INIT_WORK(&uiomr->work, usnic_uiom_reg_account); - uiomr->mm = mm; - uiomr->diff = diff; - + if (context->closing) { + if (!down_write_trylock(&uiomr->owning_mm->mmap_sem)) { + INIT_WORK(&uiomr->work, usnic_uiom_release_defer); queue_work(usnic_uiom_wq, &uiomr->work); return; } - } else - down_write(&mm->mmap_sem); + } else { + down_write(&uiomr->owning_mm->mmap_sem); + } + uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); + up_write(&uiomr->owning_mm->mmap_sem); - mm->pinned_vm -= diff; - up_write(&mm->mmap_sem); - mmput(mm); -out: - kfree(uiomr); + __usnic_uiom_release_tail(uiomr); } struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 8c096acff123..b86a9731071b 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -71,8 +71,7 @@ struct usnic_uiom_reg { int writable; struct list_head chunk_list; struct work_struct work; - struct mm_struct *mm; - unsigned long diff; + struct mm_struct *owning_mm; }; struct usnic_uiom_chunk { diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 5ce403c6cddb..a9b7d7ff32ee 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1718,7 +1718,7 @@ static inline int rvt_qp_is_avail( */ static int rvt_post_one_wr(struct rvt_qp *qp, const struct ib_send_wr *wr, - int *call_send) + bool *call_send) { struct rvt_swqe *wqe; u32 next; @@ -1825,11 +1825,9 @@ static int rvt_post_one_wr(struct rvt_qp *qp, /* general part of wqe valid - allow for driver checks */ if (rdi->driver_f.check_send_wqe) { - ret = rdi->driver_f.check_send_wqe(qp, wqe); + ret = rdi->driver_f.check_send_wqe(qp, wqe, call_send); if (ret < 0) goto bail_inval_free; - if (ret) - *call_send = ret; } log_pmtu = qp->log_pmtu; @@ -1897,7 +1895,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); unsigned long flags = 0; - int call_send; + bool call_send; unsigned nreq = 0; int err = 0; @@ -1930,7 +1928,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, bail: spin_unlock_irqrestore(&qp->s_hlock, flags); if (nreq) { - if (call_send) + /* + * Only call do_send if there is exactly one packet, and the + * driver said it was ok. + */ + if (nreq == 1 && call_send) rdi->driver_f.do_send(qp); else rdi->driver_f.schedule_send_no_lock(qp); diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 83311dd07019..ea089cb091ad 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -191,6 +191,7 @@ static inline void reset_retry_counters(struct rxe_qp *qp) { qp->comp.retry_cnt = qp->attr.retry_cnt; qp->comp.rnr_retry = qp->attr.rnr_retry; + qp->comp.started_retry = 0; } static inline enum comp_state check_psn(struct rxe_qp *qp, @@ -253,6 +254,17 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { + /* read retries of partial data may restart from + * read response first or response only. + */ + if ((pkt->psn == wqe->first_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) || + (wqe->first_psn == wqe->last_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY)) + break; + return COMPST_ERROR; } break; @@ -499,11 +511,11 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct rxe_send_wqe *wqe) { - qp->comp.opcode = -1; - - if (pkt) { - if (psn_compare(pkt->psn, qp->comp.psn) >= 0) - qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + if (pkt && wqe->state == wqe_state_pending) { + if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) { + qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK; + qp->comp.opcode = -1; + } if (qp->req.wait_psn) { qp->req.wait_psn = 0; @@ -676,6 +688,20 @@ int rxe_completer(void *arg) goto exit; } + /* if we've started a retry, don't start another + * retry sequence, unless this is a timeout. + */ + if (qp->comp.started_retry && + !qp->comp.timeout_retry) { + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + skb = NULL; + } + + goto done; + } + if (qp->comp.retry_cnt > 0) { if (qp->comp.retry_cnt != 7) qp->comp.retry_cnt--; @@ -692,6 +718,7 @@ int rxe_completer(void *arg) rxe_counter_inc(rxe, RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; + qp->comp.started_retry = 1; rxe_run_task(&qp->req.task, 1); } @@ -701,7 +728,7 @@ int rxe_completer(void *arg) skb = NULL; } - goto exit; + goto done; } else { rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED); diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 87d14f7ef21b..8e305422adbb 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -144,8 +144,7 @@ void rxe_loopback(struct sk_buff *skb); int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb); struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); -int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb, u32 *crc); +int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc); enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); struct device *rxe_dma_device(struct rxe_dev *rxe); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index dff605fdf60f..9d3916b93f23 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -573,33 +573,20 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, struct rxe_dev *rxe = to_rdev(pd->ibpd.device); int index = key >> 8; - if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { - mem = rxe_pool_get_index(&rxe->mr_pool, index); - if (!mem) - goto err1; - } else { - goto err1; + mem = rxe_pool_get_index(&rxe->mr_pool, index); + if (!mem) + return NULL; + + if (unlikely((type == lookup_local && mem->lkey != key) || + (type == lookup_remote && mem->rkey != key) || + mem->pd != pd || + (access && !(access & mem->access)) || + mem->state != RXE_MEM_STATE_VALID)) { + rxe_drop_ref(mem); + mem = NULL; } - if ((type == lookup_local && mem->lkey != key) || - (type == lookup_remote && mem->rkey != key)) - goto err2; - - if (mem->pd != pd) - goto err2; - - if (access && !(access & mem->access)) - goto err2; - - if (mem->state != RXE_MEM_STATE_VALID) - goto err2; - return mem; - -err2: - rxe_drop_ref(mem); -err1: - return NULL; } int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 8094cbaa54a9..ed823cccca48 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -182,19 +182,11 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, #endif -static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, +static struct dst_entry *rxe_find_route(struct net_device *ndev, struct rxe_qp *qp, struct rxe_av *av) { - const struct ib_gid_attr *attr; struct dst_entry *dst = NULL; - struct net_device *ndev; - - attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num, - av->grh.sgid_index); - if (IS_ERR(attr)) - return NULL; - ndev = attr->ndev; if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); @@ -229,7 +221,6 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, sk_dst_set(qp->sk->sk, dst); } } - rdma_put_gid_attr(attr); return dst; } @@ -377,8 +368,8 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb, struct rxe_av *av) +static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, + struct rxe_av *av) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; @@ -387,7 +378,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; - dst = rxe_find_route(rxe, qp, av); + dst = rxe_find_route(skb->dev, qp, av); if (!dst) { pr_err("Host not reachable\n"); return -EHOSTUNREACH; @@ -396,8 +387,8 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, if (!memcmp(saddr, daddr, sizeof(*daddr))) pkt->mask |= RXE_LOOPBACK_MASK; - prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), - htons(ROCE_V2_UDP_DPORT)); + prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), + cpu_to_be16(ROCE_V2_UDP_DPORT)); prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, av->grh.traffic_class, av->grh.hop_limit, df, xnet); @@ -406,15 +397,15 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return 0; } -static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb, struct rxe_av *av) +static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, + struct rxe_av *av) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; - dst = rxe_find_route(rxe, qp, av); + dst = rxe_find_route(skb->dev, qp, av); if (!dst) { pr_err("Host not reachable\n"); return -EHOSTUNREACH; @@ -423,8 +414,8 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, if (!memcmp(saddr, daddr, sizeof(*daddr))) pkt->mask |= RXE_LOOPBACK_MASK; - prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), - htons(ROCE_V2_UDP_DPORT)); + prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), + cpu_to_be16(ROCE_V2_UDP_DPORT)); prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, av->grh.traffic_class, @@ -434,16 +425,15 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return 0; } -int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb, u32 *crc) +int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) { int err = 0; struct rxe_av *av = rxe_get_av(pkt); if (av->network_type == RDMA_NETWORK_IPV4) - err = prepare4(rxe, pkt, skb, av); + err = prepare4(pkt, skb, av); else if (av->network_type == RDMA_NETWORK_IPV6) - err = prepare6(rxe, pkt, skb, av); + err = prepare6(pkt, skb, av); *crc = rxe_icrc_hdr(pkt, skb); diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b4a8acc7bb7d..36b53fb94a49 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -207,7 +207,7 @@ int rxe_pool_init( kref_init(&pool->ref_cnt); - spin_lock_init(&pool->pool_lock); + rwlock_init(&pool->pool_lock); if (rxe_type_info[type].flags & RXE_POOL_INDEX) { err = rxe_pool_init_index(pool, @@ -222,7 +222,7 @@ int rxe_pool_init( pool->key_size = rxe_type_info[type].key_size; } - pool->state = rxe_pool_valid; + pool->state = RXE_POOL_STATE_VALID; out: return err; @@ -232,7 +232,7 @@ static void rxe_pool_release(struct kref *kref) { struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); - pool->state = rxe_pool_invalid; + pool->state = RXE_POOL_STATE_INVALID; kfree(pool->table); } @@ -245,12 +245,12 @@ int rxe_pool_cleanup(struct rxe_pool *pool) { unsigned long flags; - spin_lock_irqsave(&pool->pool_lock, flags); - pool->state = rxe_pool_invalid; + write_lock_irqsave(&pool->pool_lock, flags); + pool->state = RXE_POOL_STATE_INVALID; if (atomic_read(&pool->num_elem) > 0) pr_warn("%s pool destroyed with unfree'd elem\n", pool_name(pool)); - spin_unlock_irqrestore(&pool->pool_lock, flags); + write_unlock_irqrestore(&pool->pool_lock, flags); rxe_pool_put(pool); @@ -336,10 +336,10 @@ void rxe_add_key(void *arg, void *key) struct rxe_pool *pool = elem->pool; unsigned long flags; - spin_lock_irqsave(&pool->pool_lock, flags); + write_lock_irqsave(&pool->pool_lock, flags); memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); insert_key(pool, elem); - spin_unlock_irqrestore(&pool->pool_lock, flags); + write_unlock_irqrestore(&pool->pool_lock, flags); } void rxe_drop_key(void *arg) @@ -348,9 +348,9 @@ void rxe_drop_key(void *arg) struct rxe_pool *pool = elem->pool; unsigned long flags; - spin_lock_irqsave(&pool->pool_lock, flags); + write_lock_irqsave(&pool->pool_lock, flags); rb_erase(&elem->node, &pool->tree); - spin_unlock_irqrestore(&pool->pool_lock, flags); + write_unlock_irqrestore(&pool->pool_lock, flags); } void rxe_add_index(void *arg) @@ -359,10 +359,10 @@ void rxe_add_index(void *arg) struct rxe_pool *pool = elem->pool; unsigned long flags; - spin_lock_irqsave(&pool->pool_lock, flags); + write_lock_irqsave(&pool->pool_lock, flags); elem->index = alloc_index(pool); insert_index(pool, elem); - spin_unlock_irqrestore(&pool->pool_lock, flags); + write_unlock_irqrestore(&pool->pool_lock, flags); } void rxe_drop_index(void *arg) @@ -371,10 +371,10 @@ void rxe_drop_index(void *arg) struct rxe_pool *pool = elem->pool; unsigned long flags; - spin_lock_irqsave(&pool->pool_lock, flags); + write_lock_irqsave(&pool->pool_lock, flags); clear_bit(elem->index - pool->min_index, pool->table); rb_erase(&elem->node, &pool->tree); - spin_unlock_irqrestore(&pool->pool_lock, flags); + write_unlock_irqrestore(&pool->pool_lock, flags); } void *rxe_alloc(struct rxe_pool *pool) @@ -384,13 +384,13 @@ void *rxe_alloc(struct rxe_pool *pool) might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); - spin_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != rxe_pool_valid) { - spin_unlock_irqrestore(&pool->pool_lock, flags); + read_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != RXE_POOL_STATE_VALID) { + read_unlock_irqrestore(&pool->pool_lock, flags); return NULL; } kref_get(&pool->ref_cnt); - spin_unlock_irqrestore(&pool->pool_lock, flags); + read_unlock_irqrestore(&pool->pool_lock, flags); kref_get(&pool->rxe->ref_cnt); @@ -436,9 +436,9 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) struct rxe_pool_entry *elem = NULL; unsigned long flags; - spin_lock_irqsave(&pool->pool_lock, flags); + read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != rxe_pool_valid) + if (pool->state != RXE_POOL_STATE_VALID) goto out; node = pool->tree.rb_node; @@ -450,15 +450,14 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) node = node->rb_left; else if (elem->index < index) node = node->rb_right; - else + else { + kref_get(&elem->ref_cnt); break; + } } - if (node) - kref_get(&elem->ref_cnt); - out: - spin_unlock_irqrestore(&pool->pool_lock, flags); + read_unlock_irqrestore(&pool->pool_lock, flags); return node ? elem : NULL; } @@ -469,9 +468,9 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) int cmp; unsigned long flags; - spin_lock_irqsave(&pool->pool_lock, flags); + read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != rxe_pool_valid) + if (pool->state != RXE_POOL_STATE_VALID) goto out; node = pool->tree.rb_node; @@ -494,6 +493,6 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) kref_get(&elem->ref_cnt); out: - spin_unlock_irqrestore(&pool->pool_lock, flags); + read_unlock_irqrestore(&pool->pool_lock, flags); return node ? elem : NULL; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 47df28e43acf..aa4ba307097b 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -74,8 +74,8 @@ struct rxe_type_info { extern struct rxe_type_info rxe_type_info[]; enum rxe_pool_state { - rxe_pool_invalid, - rxe_pool_valid, + RXE_POOL_STATE_INVALID, + RXE_POOL_STATE_VALID, }; struct rxe_pool_entry { @@ -90,7 +90,7 @@ struct rxe_pool_entry { struct rxe_pool { struct rxe_dev *rxe; - spinlock_t pool_lock; /* pool spinlock */ + rwlock_t pool_lock; /* protects pool add/del/search */ size_t elem_size; struct kref ref_cnt; void (*cleanup)(struct rxe_pool_entry *obj); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index c58452daffc7..6ff88c8250f6 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -227,6 +227,16 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return err; qp->sk->sk->sk_user_data = qp; + /* pick a source UDP port number for this QP based on + * the source QPN. this spreads traffic for different QPs + * across different NIC RX queues (while using a single + * flow for a given QP to maintain packet order). + * the port number must be in the Dynamic Ports range + * (0xc000 - 0xffff). + */ + qp->src_port = RXE_ROCE_V2_SPORT + + (hash_32_generic(qp_num(qp), 14) & 0x3fff); + qp->sq.max_wr = init->cap.max_send_wr; qp->sq.max_sge = init->cap.max_send_sge; qp->sq.max_inline = init->cap.max_inline_data; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index d30dbac24583..5c29a1bb575a 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -122,7 +122,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, set_bad_pkey_cntr(port); goto err1; } - } else if (qpn != 0) { + } else { if (unlikely(!pkey_match(pkey, port->pkey_tbl[qp->attr.pkey_index] ))) { @@ -134,7 +134,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, } if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && - qpn != 0 && pkt->mask) { + pkt->mask) { u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; if (unlikely(deth_qkey(pkt) != qkey)) { diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 8be27238a86e..6c361d70d7cd 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -73,9 +73,6 @@ static void req_retry(struct rxe_qp *qp) int npsn; int first = 1; - wqe = queue_head(qp->sq.queue); - npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; - qp->req.wqe_index = consumer_index(qp->sq.queue); qp->req.psn = qp->comp.psn; qp->req.opcode = -1; @@ -107,11 +104,17 @@ static void req_retry(struct rxe_qp *qp) if (first) { first = 0; - if (mask & WR_WRITE_OR_SEND_MASK) + if (mask & WR_WRITE_OR_SEND_MASK) { + npsn = (qp->comp.psn - wqe->first_psn) & + BTH_PSN_MASK; retry_first_write_send(qp, wqe, mask, npsn); + } - if (mask & WR_READ_MASK) + if (mask & WR_READ_MASK) { + npsn = (wqe->dma.length - wqe->dma.resid) / + qp->mtu; wqe->iova += npsn * qp->mtu; + } } wqe->state = wqe_state_posted; @@ -435,7 +438,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (pkt->mask & RXE_RETH_MASK) { reth_set_rkey(pkt, ibwr->wr.rdma.rkey); reth_set_va(pkt, wqe->iova); - reth_set_len(pkt, wqe->dma.length); + reth_set_len(pkt, wqe->dma.resid); } if (pkt->mask & RXE_IMMDT_MASK) @@ -476,7 +479,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, u32 *p; int err; - err = rxe_prepare(rxe, pkt, skb, &crc); + err = rxe_prepare(pkt, skb, &crc); if (err) return err; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index aa5833318372..c962160292f4 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -637,7 +637,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, if (ack->mask & RXE_ATMACK_MASK) atmack_set_orig(ack, qp->resp.atomic_orig); - err = rxe_prepare(rxe, ack, skb, &crc); + err = rxe_prepare(ack, skb, &crc); if (err) { kfree_skb(skb); return NULL; @@ -682,6 +682,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, rxe_advance_resp_resource(qp); res->type = RXE_READ_MASK; + res->replay = 0; res->read.va = qp->resp.va; res->read.va_org = qp->resp.va; @@ -752,7 +753,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, state = RESPST_DONE; } else { qp->resp.res = NULL; - qp->resp.opcode = -1; + if (!res->replay) + qp->resp.opcode = -1; if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) qp->resp.psn = res->cur_psn; state = RESPST_CLEANUP; @@ -814,6 +816,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; @@ -1065,7 +1068,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { enum resp_states rc; - u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK; + u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; if (pkt->mask & RXE_SEND_MASK || pkt->mask & RXE_WRITE_MASK) { @@ -1108,6 +1111,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, res->state = (pkt->psn == res->first_psn) ? rdatm_res_state_new : rdatm_res_state_replay; + res->replay = 1; /* Reset the resource, except length. */ res->read.va_org = iova; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index af1470d29391..82e670d6eeea 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -158,6 +158,7 @@ struct rxe_comp_info { int opcode; int timeout; int timeout_retry; + int started_retry; u32 retry_cnt; u32 rnr_retry; struct rxe_task task; @@ -171,6 +172,7 @@ enum rdatm_res_state { struct resp_res { int type; + int replay; u32 first_psn; u32 last_psn; u32 cur_psn; @@ -195,6 +197,7 @@ struct rxe_resp_info { enum rxe_qp_state state; u32 msn; u32 psn; + u32 ack_psn; int opcode; int drop_msg; int goto_error; @@ -248,6 +251,7 @@ struct rxe_qp { struct socket *sk; u32 dst_cookie; + u16 src_port; struct rxe_av pri_av; struct rxe_av alt_av; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index ea01b8dd2be6..3d5424f335cb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1027,12 +1027,14 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, skb_queue_head_init(&skqueue); + netif_tx_lock_bh(p->dev); spin_lock_irq(&priv->lock); set_bit(IPOIB_FLAG_OPER_UP, &p->flags); if (p->neigh) while ((skb = __skb_dequeue(&p->neigh->queue))) __skb_queue_tail(&skqueue, skb); spin_unlock_irq(&priv->lock); + netif_tx_unlock_bh(p->dev); while ((skb = __skb_dequeue(&skqueue))) { skb->dev = p->dev; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e3d28f9ad9c0..1df90f0d9e64 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -243,7 +243,8 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) + if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) || + new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -1880,6 +1881,8 @@ static int ipoib_parent_init(struct net_device *ndev) sizeof(union ib_gid)); SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); + priv->dev->dev_port = priv->port - 1; + /* Let's set this one too for backwards compatibility. */ priv->dev->dev_id = priv->port - 1; return 0; @@ -2384,6 +2387,35 @@ int ipoib_add_pkey_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_pkey); } +/* + * We erroneously exposed the iface's port number in the dev_id + * sysfs field long after dev_port was introduced for that purpose[1], + * and we need to stop everyone from relying on that. + * Let's overload the shower routine for the dev_id file here + * to gently bring the issue up. + * + * [1] https://www.spinics.net/lists/netdev/msg272123.html + */ +static ssize_t dev_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *ndev = to_net_dev(dev); + + if (ndev->dev_id == ndev->dev_port) + netdev_info_once(ndev, + "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", + current->comm); + + return sprintf(buf, "%#x\n", ndev->dev_id); +} +static DEVICE_ATTR_RO(dev_id); + +int ipoib_intercept_dev_id_attr(struct net_device *dev) +{ + device_remove_file(&dev->dev, &dev_attr_dev_id); + return device_create_file(&dev->dev, &dev_attr_dev_id); +} + static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { @@ -2425,6 +2457,8 @@ static struct net_device *ipoib_add_port(const char *format, */ ndev->priv_destructor = ipoib_intf_free; + if (ipoib_intercept_dev_id_attr(ndev)) + goto sysfs_failed; if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; if (ipoib_add_pkey_attr(ndev)) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index f37cbad022a2..447d21ea479a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2708,7 +2708,7 @@ static void srpt_queue_response(struct se_cmd *cmd) break; } - if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) + if (WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)) return; /* For read commands, transfer the data to the initiator. */ diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 648eb6743ed5..6edffeed9953 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -238,10 +238,6 @@ static void mmc_mq_exit_request(struct blk_mq_tag_set *set, struct request *req, mmc_exit_request(mq->queue, req); } -/* - * We use BLK_MQ_F_BLOCKING and have only 1 hardware queue, which means requests - * will not be dispatched in parallel. - */ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -264,7 +260,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(q->queue_lock); - if (mq->recovery_needed) { + if (mq->recovery_needed || mq->busy) { spin_unlock_irq(q->queue_lock); return BLK_STS_RESOURCE; } @@ -291,6 +287,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, break; } + /* Parallel dispatch of requests is not supported at the moment */ + mq->busy = true; + mq->in_flight[issue_type] += 1; get_card = (mmc_tot_in_flight(mq) == 1); cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1); @@ -333,9 +332,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, mq->in_flight[issue_type] -= 1; if (mmc_tot_in_flight(mq) == 0) put_card = true; + mq->busy = false; spin_unlock_irq(q->queue_lock); if (put_card) mmc_put_card(card, &mq->ctx); + } else { + WRITE_ONCE(mq->busy, false); } return ret; diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 17e59d50b496..9bf3c9245075 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -81,6 +81,7 @@ struct mmc_queue { unsigned int cqe_busy; #define MMC_CQE_DCMD_BUSY BIT(0) #define MMC_CQE_QUEUE_FULL BIT(1) + bool busy; bool use_cqe; bool recovery_needed; bool in_recovery; diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c index 294de177632c..61e4e2a213c9 100644 --- a/drivers/mmc/host/android-goldfish.c +++ b/drivers/mmc/host/android-goldfish.c @@ -217,7 +217,7 @@ static void goldfish_mmc_xfer_done(struct goldfish_mmc_host *host, * We don't really have DMA, so we need * to copy from our platform driver buffer */ - sg_copy_to_buffer(data->sg, 1, host->virt_base, + sg_copy_from_buffer(data->sg, 1, host->virt_base, data->sg->length); } host->data->bytes_xfered += data->sg->length; @@ -393,7 +393,7 @@ static void goldfish_mmc_prepare_data(struct goldfish_mmc_host *host, * We don't really have DMA, so we need to copy to our * platform driver buffer */ - sg_copy_from_buffer(data->sg, 1, host->virt_base, + sg_copy_to_buffer(data->sg, 1, host->virt_base, data->sg->length); } } diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 5aa2c9404e92..be53044086c7 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1976,7 +1976,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) do { value = atmci_readl(host, ATMCI_RDR); if (likely(offset + 4 <= sg->length)) { - sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset); + sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset); offset += 4; nbytes += 4; @@ -1993,7 +1993,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) } else { unsigned int remaining = sg->length - offset; - sg_pcopy_to_buffer(sg, 1, &value, remaining, offset); + sg_pcopy_from_buffer(sg, 1, &value, remaining, offset); nbytes += remaining; flush_dcache_page(sg_page(sg)); @@ -2003,7 +2003,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) goto done; offset = 4 - remaining; - sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining, + sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining, offset, 0); nbytes += offset; } @@ -2042,7 +2042,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) do { if (likely(offset + 4 <= sg->length)) { - sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset); + sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset); atmci_writel(host, ATMCI_TDR, value); offset += 4; @@ -2059,7 +2059,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) unsigned int remaining = sg->length - offset; value = 0; - sg_pcopy_from_buffer(sg, 1, &value, remaining, offset); + sg_pcopy_to_buffer(sg, 1, &value, remaining, offset); nbytes += remaining; host->sg = sg = sg_next(sg); @@ -2070,7 +2070,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) } offset = 4 - remaining; - sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining, + sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining, offset, 0); atmci_writel(host, ATMCI_TDR, value); nbytes += offset; diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 35cc0de6be67..ca0b43973769 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -45,14 +45,16 @@ /* DM_CM_RST */ #define RST_DTRANRST1 BIT(9) #define RST_DTRANRST0 BIT(8) -#define RST_RESERVED_BITS GENMASK_ULL(32, 0) +#define RST_RESERVED_BITS GENMASK_ULL(31, 0) /* DM_CM_INFO1 and DM_CM_INFO1_MASK */ #define INFO1_CLEAR 0 +#define INFO1_MASK_CLEAR GENMASK_ULL(31, 0) #define INFO1_DTRANEND1 BIT(17) #define INFO1_DTRANEND0 BIT(16) /* DM_CM_INFO2 and DM_CM_INFO2_MASK */ +#define INFO2_MASK_CLEAR GENMASK_ULL(31, 0) #define INFO2_DTRANERR1 BIT(17) #define INFO2_DTRANERR0 BIT(16) @@ -252,6 +254,12 @@ renesas_sdhi_internal_dmac_request_dma(struct tmio_mmc_host *host, { struct renesas_sdhi *priv = host_to_priv(host); + /* Disable DMAC interrupts, we don't use them */ + renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO1_MASK, + INFO1_MASK_CLEAR); + renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO2_MASK, + INFO2_MASK_CLEAR); + /* Each value is set to non-zero to assume "enabling" each DMA */ host->chan_rx = host->chan_tx = (void *)0xdeadbeaf; diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index ca18612c4201..67b2065e7a19 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1338,6 +1338,11 @@ int denali_init(struct denali_nand_info *denali) denali_enable_irq(denali); denali_reset_banks(denali); + if (!denali->max_banks) { + /* Error out earlier if no chip is found for some reasons. */ + ret = -ENODEV; + goto disable_irq; + } denali->active_bank = DENALI_INVALID_BANK; diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c index a3f04315c05c..427fcbc1b71c 100644 --- a/drivers/mtd/nand/raw/docg4.c +++ b/drivers/mtd/nand/raw/docg4.c @@ -1218,7 +1218,7 @@ static int docg4_resume(struct platform_device *pdev) return 0; } -static void __init init_mtd_structs(struct mtd_info *mtd) +static void init_mtd_structs(struct mtd_info *mtd) { /* initialize mtd and nand data structures */ @@ -1290,7 +1290,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd) } -static int __init read_id_reg(struct mtd_info *mtd) +static int read_id_reg(struct mtd_info *mtd) { struct nand_chip *nand = mtd_to_nand(mtd); struct docg4_priv *doc = nand_get_controller_data(nand); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 139d96c5a023..092c817f8f11 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -110,16 +110,14 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, struct tcf_exts *tc_exts) { const struct tc_action *tc_act; - LIST_HEAD(tc_actions); - int rc; + int i, rc; if (!tcf_exts_has_actions(tc_exts)) { netdev_info(bp->dev, "no actions"); return -EINVAL; } - tcf_exts_to_list(tc_exts, &tc_actions); - list_for_each_entry(tc_act, &tc_actions, list) { + tcf_exts_for_each_action(i, tc_act, tc_exts) { /* Drop action */ if (is_tcf_gact_shot(tc_act)) { actions->flags |= BNXT_TC_ACTION_FLAG_DROP; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dc09f9a8a49b..c6707ea2d751 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -482,11 +482,6 @@ static int macb_mii_probe(struct net_device *dev) if (np) { if (of_phy_is_fixed_link(np)) { - if (of_phy_register_fixed_link(np) < 0) { - dev_err(&bp->pdev->dev, - "broken fixed-link specification\n"); - return -ENODEV; - } bp->phy_node = of_node_get(np); } else { bp->phy_node = of_parse_phandle(np, "phy-handle", 0); @@ -569,7 +564,7 @@ static int macb_mii_init(struct macb *bp) { struct macb_platform_data *pdata; struct device_node *np; - int err; + int err = -ENXIO; /* Enable management port */ macb_writel(bp, NCR, MACB_BIT(MPE)); @@ -592,12 +587,23 @@ static int macb_mii_init(struct macb *bp) dev_set_drvdata(&bp->dev->dev, bp->mii_bus); np = bp->pdev->dev.of_node; - if (pdata) - bp->mii_bus->phy_mask = pdata->phy_mask; + if (np && of_phy_is_fixed_link(np)) { + if (of_phy_register_fixed_link(np) < 0) { + dev_err(&bp->pdev->dev, + "broken fixed-link specification %pOF\n", np); + goto err_out_free_mdiobus; + } + + err = mdiobus_register(bp->mii_bus); + } else { + if (pdata) + bp->mii_bus->phy_mask = pdata->phy_mask; + + err = of_mdiobus_register(bp->mii_bus, np); + } - err = of_mdiobus_register(bp->mii_bus, np); if (err) - goto err_out_free_mdiobus; + goto err_out_free_fixed_link; err = macb_mii_probe(bp->dev); if (err) @@ -607,6 +613,7 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); +err_out_free_fixed_link: if (np && of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); err_out_free_mdiobus: @@ -2028,14 +2035,17 @@ static void macb_reset_hw(struct macb *bp) { struct macb_queue *queue; unsigned int q; + u32 ctrl = macb_readl(bp, NCR); /* Disable RX and TX (XXX: Should we halt the transmission * more gracefully?) */ - macb_writel(bp, NCR, 0); + ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE)); /* Clear the stats registers (XXX: Update stats first?) */ - macb_writel(bp, NCR, MACB_BIT(CLRSTAT)); + ctrl |= MACB_BIT(CLRSTAT); + + macb_writel(bp, NCR, ctrl); /* Clear all status flags */ macb_writel(bp, TSR, -1); @@ -2223,7 +2233,7 @@ static void macb_init_hw(struct macb *bp) } /* Enable TX and RX */ - macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE)); + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); } /* The hash address register is 64 bits long and takes up two diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 623f73dd7738..c116f96956fe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -417,10 +417,9 @@ static void cxgb4_process_flow_actions(struct net_device *in, struct ch_filter_specification *fs) { const struct tc_action *a; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { fs->action = FILTER_PASS; } else if (is_tcf_gact_shot(a)) { @@ -591,10 +590,9 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, bool act_redir = false; bool act_pedit = false; bool act_vlan = false; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { /* Do nothing */ } else if (is_tcf_gact_shot(a)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 18eb2aedd4cb..c7d2b4dc7568 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -93,14 +93,13 @@ static int fill_action_fields(struct adapter *adap, unsigned int num_actions = 0; const struct tc_action *a; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { /* Don't allow more than one action per rule. */ if (num_actions) return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index fa5b30f547f6..cad52bd331f7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -220,10 +220,10 @@ struct hnae_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; + u32 page_offset; + u32 length; /* length of the buffer */ - u16 length; /* length of the buffer */ + u16 reuse_flag; /* desc type, used by the ring user to mark the type of the priv data */ u16 type; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 9f2b552aee33..02a0ba20fad5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -406,113 +406,13 @@ out_net_tx_busy: return NETDEV_TX_BUSY; } -/** - * hns_nic_get_headlen - determine size of header for RSC/LRO/GRO/FCOE - * @data: pointer to the start of the headers - * @max: total length of section to find headers in - * - * This function is meant to determine the length of headers that will - * be recognized by hardware for LRO, GRO, and RSC offloads. The main - * motivation of doing this is to only perform one pull for IPv4 TCP - * packets so that we can do basic things like calculating the gso_size - * based on the average data per packet. - **/ -static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, - unsigned int max_size) -{ - unsigned char *network; - u8 hlen; - - /* this should never happen, but better safe than sorry */ - if (max_size < ETH_HLEN) - return max_size; - - /* initialize network frame pointer */ - network = data; - - /* set first protocol and move network header forward */ - network += ETH_HLEN; - - /* handle any vlan tag if present */ - if (hnae_get_field(flag, HNS_RXD_VLAN_M, HNS_RXD_VLAN_S) - == HNS_RX_FLAG_VLAN_PRESENT) { - if ((typeof(max_size))(network - data) > (max_size - VLAN_HLEN)) - return max_size; - - network += VLAN_HLEN; - } - - /* handle L3 protocols */ - if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S) - == HNS_RX_FLAG_L3ID_IPV4) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct iphdr))) - return max_size; - - /* access ihl as a u8 to avoid unaligned access on ia64 */ - hlen = (network[0] & 0x0F) << 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct iphdr)) - return network - data; - - /* record next protocol if header is present */ - } else if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S) - == HNS_RX_FLAG_L3ID_IPV6) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct ipv6hdr))) - return max_size; - - /* record next protocol */ - hlen = sizeof(struct ipv6hdr); - } else { - return network - data; - } - - /* relocate pointer to start of L4 header */ - network += hlen; - - /* finally sort out TCP/UDP */ - if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S) - == HNS_RX_FLAG_L4ID_TCP) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct tcphdr))) - return max_size; - - /* access doff as a u8 to avoid unaligned access on ia64 */ - hlen = (network[12] & 0xF0) >> 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct tcphdr)) - return network - data; - - network += hlen; - } else if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S) - == HNS_RX_FLAG_L4ID_UDP) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct udphdr))) - return max_size; - - network += sizeof(struct udphdr); - } - - /* If everything has gone correctly network should be the - * data section of the packet and will be the end of the header. - * If not then it probably represents the end of the last recognized - * header. - */ - if ((typeof(max_size))(network - data) < max_size) - return network - data; - else - return max_size; -} - static void hns_nic_reuse_page(struct sk_buff *skb, int i, struct hnae_ring *ring, int pull_len, struct hnae_desc_cb *desc_cb) { struct hnae_desc *desc; - int truesize, size; + u32 truesize; + int size; int last_offset; bool twobufs; @@ -530,7 +430,7 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i, } skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, - size - pull_len, truesize - pull_len); + size - pull_len, truesize); /* avoid re-using remote pages,flag default unreuse */ if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id())) @@ -695,7 +595,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, } else { ring->stats.seg_pkt_cnt++; - pull_len = hns_nic_get_headlen(va, bnum_flag, HNS_RX_HEAD_SIZE); + pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE); memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 3554dca7a680..955c4ab18b03 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2019,7 +2019,8 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, struct hns3_desc_cb *desc_cb) { struct hns3_desc *desc; - int truesize, size; + u32 truesize; + int size; int last_offset; bool twobufs; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index a02a96aee2a2..cb450d7ec8c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -284,11 +284,11 @@ struct hns3_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; - + u32 page_offset; u32 length; /* length of the buffer */ + u16 reuse_flag; + /* desc type, used by the ring user to mark the type of the priv data */ u16 type; }; diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index bdb3f8e65ed4..2569a168334c 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -624,14 +624,14 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->tx_ring = tx_old; e1000_free_all_rx_resources(adapter); e1000_free_all_tx_resources(adapter); - kfree(tx_old); - kfree(rx_old); adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); if (err) goto err_setup; } + kfree(tx_old); + kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); return 0; @@ -644,7 +644,8 @@ err_setup_rx: err_alloc_rx: kfree(txdr); err_alloc_tx: - e1000_up(adapter); + if (netif_running(adapter->netdev)) + e1000_up(adapter); err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index abcd096ede14..5ff6caa83948 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2013,7 +2013,7 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data) for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) i40e_add_stat_strings(&data, i40e_gstrings_pfc_stats, i); - WARN_ONCE(p - data != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN, + WARN_ONCE(data - p != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN, "stat strings count mismatch!"); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f2c622e78802..ac685ad4d877 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5122,15 +5122,17 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, u8 *bw_share) { struct i40e_aqc_configure_vsi_tc_bw_data bw_data; + struct i40e_pf *pf = vsi->back; i40e_status ret; int i; - if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) + /* There is no need to reset BW when mqprio mode is on. */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) return 0; - if (!vsi->mqprio_qopt.qopt.hw) { + if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "Failed to reset tx rate for vsi->seid %u\n", vsi->seid); return ret; @@ -5139,12 +5141,11 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; - ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, &bw_data, - NULL); + ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL); if (ret) { - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "AQ command Config VSI BW allocation per TC failed = %d\n", - vsi->back->hw.aq.asq_last_status); + pf->hw.aq.asq_last_status); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index d8b5fff581e7..868f4a1d0f72 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -89,6 +89,13 @@ extern const char ice_drv_ver[]; #define ice_for_each_rxq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) +/* Macros for each allocated tx/rx ring whether used or not in a VSI */ +#define ice_for_each_alloc_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++) + +#define ice_for_each_alloc_rxq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++) + struct ice_tc_info { u16 qoffset; u16 qcount; @@ -189,9 +196,9 @@ struct ice_vsi { struct list_head tmp_sync_list; /* MAC filters to be synced */ struct list_head tmp_unsync_list; /* MAC filters to be unsynced */ - bool irqs_ready; - bool current_isup; /* Sync 'link up' logging */ - bool stat_offsets_loaded; + u8 irqs_ready; + u8 current_isup; /* Sync 'link up' logging */ + u8 stat_offsets_loaded; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -262,7 +269,7 @@ struct ice_pf { struct ice_hw_port_stats stats; struct ice_hw_port_stats stats_prev; struct ice_hw hw; - bool stat_prev_loaded; /* has previous stats been loaded */ + u8 stat_prev_loaded; /* has previous stats been loaded */ char int_name[ICE_INT_NAME_STR_LEN]; }; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 7541ec2270b3..a0614f472658 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -329,19 +329,19 @@ struct ice_aqc_vsi_props { /* VLAN section */ __le16 pvid; /* VLANS include priority bits */ u8 pvlan_reserved[2]; - u8 port_vlan_flags; -#define ICE_AQ_VSI_PVLAN_MODE_S 0 -#define ICE_AQ_VSI_PVLAN_MODE_M (0x3 << ICE_AQ_VSI_PVLAN_MODE_S) -#define ICE_AQ_VSI_PVLAN_MODE_UNTAGGED 0x1 -#define ICE_AQ_VSI_PVLAN_MODE_TAGGED 0x2 -#define ICE_AQ_VSI_PVLAN_MODE_ALL 0x3 + u8 vlan_flags; +#define ICE_AQ_VSI_VLAN_MODE_S 0 +#define ICE_AQ_VSI_VLAN_MODE_M (0x3 << ICE_AQ_VSI_VLAN_MODE_S) +#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED 0x1 +#define ICE_AQ_VSI_VLAN_MODE_TAGGED 0x2 +#define ICE_AQ_VSI_VLAN_MODE_ALL 0x3 #define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2) -#define ICE_AQ_VSI_PVLAN_EMOD_S 3 -#define ICE_AQ_VSI_PVLAN_EMOD_M (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR (0x2 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_S 3 +#define ICE_AQ_VSI_VLAN_EMOD_M (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR (0x2 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) u8 pvlan_reserved2[3]; /* ingress egress up sections */ __le32 ingress_table; /* bitmap, 3 bits per up */ @@ -594,6 +594,7 @@ struct ice_sw_rule_lg_act { #define ICE_LG_ACT_GENERIC_OFFSET_M (0x7 << ICE_LG_ACT_GENERIC_OFFSET_S) #define ICE_LG_ACT_GENERIC_PRIORITY_S 22 #define ICE_LG_ACT_GENERIC_PRIORITY_M (0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S) +#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX 7 /* Action = 7 - Set Stat count */ #define ICE_LG_ACT_STAT_COUNT 0x7 diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 71d032cc5fa7..661beea6af79 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -45,6 +45,9 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) /** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure + * + * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port + * configuration, flow director filters, etc.). */ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw) { @@ -1483,7 +1486,7 @@ enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up) struct ice_phy_info *phy_info; enum ice_status status = 0; - if (!pi) + if (!pi || !link_up) return ICE_ERR_PARAM; phy_info = &pi->phy; @@ -1619,20 +1622,23 @@ __ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, } /* LUT size is only valid for Global and PF table types */ - if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128) { - flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG << - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512) { + switch (lut_size) { + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128: + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512: flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else if ((lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K) && - (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF)) { - flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else { + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K: + if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) { + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + break; + } + /* fall-through */ + default: status = ICE_ERR_PARAM; goto ice_aq_get_set_rss_lut_exit; } diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 7c511f144ed6..62be72fdc8f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -597,10 +597,14 @@ static enum ice_status ice_init_check_adminq(struct ice_hw *hw) return 0; init_ctrlq_free_rq: - ice_shutdown_rq(hw, cq); - ice_shutdown_sq(hw, cq); - mutex_destroy(&cq->sq_lock); - mutex_destroy(&cq->rq_lock); + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } return status; } @@ -706,10 +710,14 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) return; } - ice_shutdown_sq(hw, cq); - ice_shutdown_rq(hw, cq); - mutex_destroy(&cq->sq_lock); - mutex_destroy(&cq->rq_lock); + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } } /** @@ -1057,8 +1065,11 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, clean_rq_elem_out: /* Set pending if needed, unlock and return */ - if (pending) + if (pending) { + /* re-read HW head to calculate actual pending messages */ + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); *pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc)); + } clean_rq_elem_err: mutex_unlock(&cq->rq_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 1db304c01d10..c71a9b528d6d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -26,7 +26,7 @@ static int ice_q_stats_len(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); - return ((np->vsi->num_txq + np->vsi->num_rxq) * + return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * (sizeof(struct ice_q_stats) / sizeof(u64))); } @@ -218,7 +218,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; } - ice_for_each_txq(vsi, i) { + ice_for_each_alloc_txq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_packets", i); p += ETH_GSTRING_LEN; @@ -226,7 +226,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; } - ice_for_each_rxq(vsi, i) { + ice_for_each_alloc_rxq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_packets", i); p += ETH_GSTRING_LEN; @@ -253,6 +253,24 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { case ETH_SS_STATS: + /* The number (and order) of strings reported *must* remain + * constant for a given netdevice. This function must not + * report a different number based on run time parameters + * (such as the number of queues in use, or the setting of + * a private ethtool flag). This is due to the nature of the + * ethtool stats API. + * + * User space programs such as ethtool must make 3 separate + * ioctl requests, one for size, one for the strings, and + * finally one for the stats. Since these cross into + * user space, changes to the number or size could result in + * undefined memory access or incorrect string<->value + * correlations for statistics. + * + * Even if it appears to be safe, changes to the size or + * order of strings will suffer from race conditions and are + * not safe. + */ return ICE_ALL_STATS_LEN(netdev); default: return -EOPNOTSUPP; @@ -280,18 +298,26 @@ ice_get_ethtool_stats(struct net_device *netdev, /* populate per queue stats */ rcu_read_lock(); - ice_for_each_txq(vsi, j) { + ice_for_each_alloc_txq(vsi, j) { ring = READ_ONCE(vsi->tx_rings[j]); - if (!ring) - continue; - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } } - ice_for_each_rxq(vsi, j) { + ice_for_each_alloc_rxq(vsi, j) { ring = READ_ONCE(vsi->rx_rings[j]); - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } } rcu_read_unlock(); @@ -519,7 +545,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) goto done; } - for (i = 0; i < vsi->num_txq; i++) { + for (i = 0; i < vsi->alloc_txq; i++) { /* clone ring and setup updated count */ tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_cnt; @@ -551,7 +577,7 @@ process_rx: goto done; } - for (i = 0; i < vsi->num_rxq; i++) { + for (i = 0; i < vsi->alloc_rxq; i++) { /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 499904874b3f..6076fc87df9d 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -121,10 +121,6 @@ #define PFINT_FW_CTL_CAUSE_ENA_S 30 #define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S) #define PFINT_OICR 0x0016CA00 -#define PFINT_OICR_HLP_RDY_S 14 -#define PFINT_OICR_HLP_RDY_M BIT(PFINT_OICR_HLP_RDY_S) -#define PFINT_OICR_CPM_RDY_S 15 -#define PFINT_OICR_CPM_RDY_M BIT(PFINT_OICR_CPM_RDY_S) #define PFINT_OICR_ECC_ERR_S 16 #define PFINT_OICR_ECC_ERR_M BIT(PFINT_OICR_ECC_ERR_S) #define PFINT_OICR_MAL_DETECT_S 19 @@ -133,10 +129,6 @@ #define PFINT_OICR_GRST_M BIT(PFINT_OICR_GRST_S) #define PFINT_OICR_PCI_EXCEPTION_S 21 #define PFINT_OICR_PCI_EXCEPTION_M BIT(PFINT_OICR_PCI_EXCEPTION_S) -#define PFINT_OICR_GPIO_S 22 -#define PFINT_OICR_GPIO_M BIT(PFINT_OICR_GPIO_S) -#define PFINT_OICR_STORM_DETECT_S 24 -#define PFINT_OICR_STORM_DETECT_M BIT(PFINT_OICR_STORM_DETECT_S) #define PFINT_OICR_HMC_ERR_S 26 #define PFINT_OICR_HMC_ERR_M BIT(PFINT_OICR_HMC_ERR_S) #define PFINT_OICR_PE_CRITERR_S 28 diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index d23a91665b46..068dbc740b76 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -265,6 +265,7 @@ enum ice_rx_flex_desc_status_error_0_bits { struct ice_rlan_ctx { u16 head; u16 cpuid; /* bigger than needed, see above for reason */ +#define ICE_RLAN_BASE_S 7 u64 base; u16 qlen; #define ICE_RLAN_CTX_DBUF_S 7 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5299caf55a7f..f1e80eed2fd6 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -901,7 +901,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) case ice_aqc_opc_get_link_status: if (ice_handle_link_event(pf)) dev_err(&pf->pdev->dev, - "Could not handle link event"); + "Could not handle link event\n"); break; default: dev_dbg(&pf->pdev->dev, @@ -917,13 +917,27 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) } /** + * ice_ctrlq_pending - check if there is a difference between ntc and ntu + * @hw: pointer to hardware info + * @cq: control queue information + * + * returns true if there are pending messages in a queue, false if there aren't + */ +static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u16 ntu; + + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + return cq->rq.next_to_clean != ntu; +} + +/** * ice_clean_adminq_subtask - clean the AdminQ rings * @pf: board private structure */ static void ice_clean_adminq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - u32 val; if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) return; @@ -933,9 +947,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); - /* re-enable Admin queue interrupt causes */ - val = rd32(hw, PFINT_FW_CTL); - wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M)); + /* There might be a situation where new messages arrive to a control + * queue between processing the last message and clearing the + * EVENT_PENDING bit. So before exiting, check queue head again (using + * ice_ctrlq_pending) and process new messages if any. + */ + if (ice_ctrlq_pending(hw, &hw->adminq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN); ice_flush(hw); } @@ -1295,11 +1313,8 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) qcount = numq_tc; } - /* find higher power-of-2 of qcount */ - pow = ilog2(qcount); - - if (!is_power_of_2(qcount)) - pow++; + /* find the (rounded up) power-of-2 of qcount */ + pow = order_base_2(qcount); for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { @@ -1352,14 +1367,15 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; - /* Allow all packets untagged/tagged */ - ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL & - ICE_AQ_VSI_PVLAN_MODE_M) >> - ICE_AQ_VSI_PVLAN_MODE_S); - /* Show VLAN/UP from packets in Rx descriptors */ - ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH & - ICE_AQ_VSI_PVLAN_EMOD_M) >> - ICE_AQ_VSI_PVLAN_EMOD_S); + + /* By default bits 3 and 4 in vlan_flags are 0's which results in legacy + * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all + * packets untagged/tagged. + */ + ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL & + ICE_AQ_VSI_VLAN_MODE_M) >> + ICE_AQ_VSI_VLAN_MODE_S); + /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); @@ -1688,15 +1704,12 @@ static void ice_ena_misc_vector(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, 0); /* disable all */ rd32(hw, PFINT_OICR); /* read to clear */ - val = (PFINT_OICR_HLP_RDY_M | - PFINT_OICR_CPM_RDY_M | - PFINT_OICR_ECC_ERR_M | + val = (PFINT_OICR_ECC_ERR_M | PFINT_OICR_MAL_DETECT_M | PFINT_OICR_GRST_M | PFINT_OICR_PCI_EXCEPTION_M | - PFINT_OICR_GPIO_M | - PFINT_OICR_STORM_DETECT_M | - PFINT_OICR_HMC_ERR_M); + PFINT_OICR_HMC_ERR_M | + PFINT_OICR_PE_CRITERR_M); wr32(hw, PFINT_OICR_ENA, val); @@ -2058,15 +2071,13 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) skip_req_irq: ice_ena_misc_vector(pf); - val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) | - PFINT_OICR_CTL_CAUSE_ENA_M; + val = ((pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | + PFINT_OICR_CTL_CAUSE_ENA_M); wr32(hw, PFINT_OICR_CTL, val); /* This enables Admin queue Interrupt causes */ - val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) | - PFINT_FW_CTL_CAUSE_ENA_M; + val = ((pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | + PFINT_FW_CTL_CAUSE_ENA_M); wr32(hw, PFINT_FW_CTL, val); itr_gran = hw->itr_gran_200; @@ -3246,8 +3257,10 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf) if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) ice_dis_msix(pf); - devm_kfree(&pf->pdev->dev, pf->irq_tracker); - pf->irq_tracker = NULL; + if (pf->irq_tracker) { + devm_kfree(&pf->pdev->dev, pf->irq_tracker); + pf->irq_tracker = NULL; + } } /** @@ -3271,7 +3284,7 @@ static int ice_probe(struct pci_dev *pdev, err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); if (err) { - dev_err(&pdev->dev, "I/O map error %d\n", err); + dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err); return err; } @@ -3720,10 +3733,10 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) enum ice_status status; /* Here we are configuring the VSI to let the driver add VLAN tags by - * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN - * tag insertion happens in the Tx hot path, in ice_tx_map. + * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag + * insertion happens in the Tx hot path, in ice_tx_map. */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL; ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); ctxt.vsi_num = vsi->vsi_num; @@ -3735,7 +3748,7 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) return -EIO; } - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + vsi->info.vlan_flags = ctxt.info.vlan_flags; return 0; } @@ -3757,12 +3770,15 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) */ if (ena) { /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH; } else { /* Disable stripping. Leave tag in packet */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING; } + /* Allow all packets untagged/tagged */ + ctxt.info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL; + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); ctxt.vsi_num = vsi->vsi_num; @@ -3773,7 +3789,7 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) return -EIO; } - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + vsi->info.vlan_flags = ctxt.info.vlan_flags; return 0; } @@ -3986,7 +4002,7 @@ static int ice_setup_rx_ctx(struct ice_ring *ring) /* clear the context structure first */ memset(&rlan_ctx, 0, sizeof(rlan_ctx)); - rlan_ctx.base = ring->dma >> 7; + rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S; rlan_ctx.qlen = ring->count; @@ -4098,11 +4114,12 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) { int err; - ice_set_rx_mode(vsi->netdev); - - err = ice_restore_vlan(vsi); - if (err) - return err; + if (vsi->netdev) { + ice_set_rx_mode(vsi->netdev); + err = ice_restore_vlan(vsi); + if (err) + return err; + } err = ice_vsi_cfg_txqs(vsi); if (!err) @@ -4868,7 +4885,7 @@ int ice_down(struct ice_vsi *vsi) */ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_txq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", @@ -4893,7 +4910,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) */ static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_rxq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", @@ -5235,7 +5252,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) u8 count = 0; if (new_mtu == netdev->mtu) { - netdev_warn(netdev, "mtu is already %d\n", netdev->mtu); + netdev_warn(netdev, "mtu is already %u\n", netdev->mtu); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 92da0a626ce0..295a8cd87fc1 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -131,9 +131,8 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) * * This function will request NVM ownership. */ -static enum -ice_status ice_acquire_nvm(struct ice_hw *hw, - enum ice_aq_res_access_type access) +static enum ice_status +ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) { if (hw->nvm.blank_nvm_mode) return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2e6c1d92cc88..eeae199469b6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1576,8 +1576,7 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc, return status; } - if (owner == ICE_SCHED_NODE_OWNER_LAN) - vsi->max_lanq[tc] = new_numqs; + vsi->max_lanq[tc] = new_numqs; return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 723d15f1e90b..6b7ec2ae5ad6 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -645,14 +645,14 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->pdata.lg_act.act[1] = cpu_to_le32(act); - act = (7 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; + act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX << + ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M; /* Third action Marker value */ act |= ICE_LG_ACT_GENERIC; act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; - act |= (0 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->pdata.lg_act.act[2] = cpu_to_le32(act); /* call the fill switch rule to fill the lookup tx rx structure */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 6f4a0d159dbf..9b8ec128ee31 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -17,7 +17,7 @@ struct ice_vsi_ctx { u16 vsis_unallocated; u16 flags; struct ice_aqc_vsi_props info; - bool alloc_from_pool; + u8 alloc_from_pool; }; enum ice_sw_fwd_act_type { @@ -94,8 +94,8 @@ struct ice_fltr_info { u8 qgrp_size; /* Rule creations populate these indicators basing on the switch type */ - bool lb_en; /* Indicate if packet can be looped back */ - bool lan_en; /* Indicate if packet can be forwarded to the uplink */ + u8 lb_en; /* Indicate if packet can be looped back */ + u8 lan_en; /* Indicate if packet can be forwarded to the uplink */ }; /* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 567067b650c4..31bc998fe200 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -143,7 +143,7 @@ struct ice_ring { u16 next_to_use; u16 next_to_clean; - bool ring_active; /* is ring online or not */ + u8 ring_active; /* is ring online or not */ /* stats structs */ struct ice_q_stats stats; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 99c8a9a71b5e..97c366e0ca59 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -83,7 +83,7 @@ struct ice_link_status { u64 phy_type_low; u16 max_frame_size; u16 link_speed; - bool lse_ena; /* Link Status Event notification */ + u8 lse_ena; /* Link Status Event notification */ u8 link_info; u8 an_info; u8 ext_info; @@ -101,7 +101,7 @@ struct ice_phy_info { struct ice_link_status link_info_old; u64 phy_type_low; enum ice_media_type media_type; - bool get_link_info; + u8 get_link_info; }; /* Common HW capabilities for SW use */ @@ -167,7 +167,7 @@ struct ice_nvm_info { u32 oem_ver; /* OEM version info */ u16 sr_words; /* Shadow RAM size in words */ u16 ver; /* NVM package version */ - bool blank_nvm_mode; /* is NVM empty (no FW present) */ + u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; /* Max number of port to queue branches w.r.t topology */ @@ -181,7 +181,7 @@ struct ice_sched_node { struct ice_aqc_txsched_elem_data info; u32 agg_id; /* aggregator group id */ u16 vsi_id; - bool in_use; /* suspended or in use */ + u8 in_use; /* suspended or in use */ u8 tx_sched_layer; /* Logical Layer (1-9) */ u8 num_children; u8 tc_num; @@ -218,7 +218,7 @@ struct ice_sched_vsi_info { struct ice_sched_tx_policy { u16 max_num_vsis; u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS]; - bool rdma_ena; + u8 rdma_ena; }; struct ice_port_info { @@ -243,7 +243,7 @@ struct ice_port_info { struct list_head agg_list; /* lists all aggregator */ u8 lport; #define ICE_LPORT_MASK 0xff - bool is_vf; + u8 is_vf; }; struct ice_switch_info { @@ -287,7 +287,7 @@ struct ice_hw { u8 max_cgds; u8 sw_entry_point_layer; - bool evb_veb; /* true for VEB, false for VEPA */ + u8 evb_veb; /* true for VEB, false for VEPA */ struct ice_bus_info bus; struct ice_nvm_info nvm; struct ice_hw_dev_caps dev_caps; /* device capabilities */ @@ -318,7 +318,7 @@ struct ice_hw { u8 itr_gran_100; u8 itr_gran_50; u8 itr_gran_25; - bool ucast_shared; /* true if VSIs can share unicast addr */ + u8 ucast_shared; /* true if VSIs can share unicast addr */ }; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f92f7918112d..5acf3b743876 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1649,7 +1649,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) if (hw->phy.type == e1000_phy_m88) igb_phy_disable_receiver(adapter); - mdelay(500); + msleep(500); return 0; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d03c2f0d7592..a32c576c1e65 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3873,7 +3873,7 @@ static int igb_sw_init(struct igb_adapter *adapter) adapter->mac_table = kcalloc(hw->mac.rar_entry_count, sizeof(struct igb_mac_addr), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->mac_table) return -ENOMEM; @@ -3883,7 +3883,7 @@ static int igb_sw_init(struct igb_adapter *adapter) /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->shadow_vfta) return -ENOMEM; @@ -5816,7 +5816,8 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_failed: - if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) + if (!(first->tx_flags & IGB_TX_FLAGS_VLAN) && + !tx_ring->launchtime_enable) return; goto no_csum; } diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 43664adf7a3c..d3e72d0f66ef 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -771,14 +771,13 @@ ixgb_setup_rx_resources(struct ixgb_adapter *adapter) rxdr->size = rxdr->count * sizeof(struct ixgb_rx_desc); rxdr->size = ALIGN(rxdr->size, 4096); - rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, - GFP_KERNEL); + rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, + GFP_KERNEL); if (!rxdr->desc) { vfree(rxdr->buffer_info); return -ENOMEM; } - memset(rxdr->desc, 0, rxdr->size); rxdr->next_to_clean = 0; rxdr->next_to_use = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 94b3165ff543..ccd852ad62a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -192,7 +192,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid, } /* alloc the udl from per cpu ddp pool */ - ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp); + ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_KERNEL, &ddp->udp); if (!ddp->udl) { e_err(drv, "failed allocated ddp context\n"); goto out_noddp_unmap; @@ -760,7 +760,7 @@ int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter) return 0; /* Extra buffer to be shared by all DDPs for HW work around */ - buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC); + buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_KERNEL); if (!buffer) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 447098005490..9a23d33a47ed 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6201,7 +6201,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, adapter->mac_table = kcalloc(hw->mac.num_rar_entries, sizeof(struct ixgbe_mac_addr), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->mac_table) return -ENOMEM; @@ -6620,8 +6620,18 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) struct ixgbe_adapter *adapter = netdev_priv(netdev); if (adapter->xdp_prog) { - e_warn(probe, "MTU cannot be changed while XDP program is loaded\n"); - return -EPERM; + int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + + VLAN_HLEN; + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *ring = adapter->rx_ring[i]; + + if (new_frame_size > ixgbe_rx_bufsz(ring)) { + e_warn(probe, "Requested MTU size is not supported with XDP\n"); + return -EINVAL; + } + } } /* @@ -8983,6 +8993,15 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) #ifdef CONFIG_IXGBE_DCB if (tc) { + if (adapter->xdp_prog) { + e_warn(probe, "DCB is not supported with XDP\n"); + + ixgbe_init_interrupt_scheme(adapter); + if (netif_running(dev)) + ixgbe_open(dev); + return -EINVAL; + } + netdev_set_num_tc(dev, tc); ixgbe_set_prio_tc_map(adapter); @@ -9171,14 +9190,12 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, struct tcf_exts *exts, u64 *action, u8 *queue) { const struct tc_action *a; - LIST_HEAD(actions); + int i; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { - + tcf_exts_for_each_action(i, a, exts) { /* Drop action */ if (is_tcf_gact_shot(a)) { *action = IXGBE_FDIR_DROP_QUEUE; @@ -9936,6 +9953,11 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) int tcs = adapter->hw_tcs ? : 1; int pool, err; + if (adapter->xdp_prog) { + e_warn(probe, "L2FW offload is not supported with XDP\n"); + return ERR_PTR(-EINVAL); + } + /* The hardware supported by ixgbe only filters on the destination MAC * address. In order to avoid issues we only support offloading modes * where the hardware can actually provide the functionality. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 6f59933cdff7..3c6f01c41b78 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -53,6 +53,11 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter, struct ixgbe_hw *hw = &adapter->hw; int i; + if (adapter->xdp_prog) { + e_warn(probe, "SRIOV is not supported with XDP\n"); + return -EINVAL; + } + /* Enable VMDq flag so device will be set in VM mode */ adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED | IXGBE_FLAG_VMDQ_ENABLED; @@ -688,8 +693,13 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; + u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask); u8 num_tcs = adapter->hw_tcs; + u32 reg_val; + u32 queue; + u32 word; /* remove VLAN filters beloning to this VF */ ixgbe_clear_vf_vlans(adapter, vf); @@ -726,6 +736,27 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) /* reset VF api back to unknown */ adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10; + + /* Restart each queue for given VF */ + for (queue = 0; queue < q_per_pool; queue++) { + unsigned int reg_idx = (vf * q_per_pool) + queue; + + reg_val = IXGBE_READ_REG(hw, IXGBE_PVFTXDCTL(reg_idx)); + + /* Re-enabling only configured queues */ + if (reg_val) { + reg_val |= IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val); + reg_val &= ~IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val); + } + } + + /* Clear VF's mailbox memory */ + for (word = 0; word < IXGBE_VFMAILBOX_SIZE; word++) + IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf), word, 0); + + IXGBE_WRITE_FLUSH(hw); } static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 44cfb2021145..41bcbb337e83 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2518,6 +2518,7 @@ enum { /* Translated register #defines */ #define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P))) #define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P))) +#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P))) #define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P))) #define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P))) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8cac8e9c8c63..25a9b8a8f63e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1978,14 +1978,15 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, goto out_ok; modify_ip_header = false; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { + int k; + if (!is_tcf_pedit(a)) continue; nkeys = tcf_pedit_nkeys(a); - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); + for (k = 0; k < nkeys; k++) { + htype = tcf_pedit_htype(a, k); if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { modify_ip_header = true; @@ -2049,15 +2050,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, const struct tc_action *a; LIST_HEAD(actions); u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, @@ -2666,7 +2666,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, LIST_HEAD(actions); bool encap = false; u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; @@ -2674,8 +2674,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6070d1591d1e..930700413b1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1346,8 +1346,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOMEM; mall_tc_entry->cookie = f->cookie; - tcf_exts_to_list(f->exts, &actions); - a = list_first_entry(&actions, struct tc_action, list); + a = tcf_exts_first_action(f->exts); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct mlxsw_sp_port_mall_mirror_tc_entry *mirror; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 3ae930196741..3cdb7aca90b7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -414,6 +414,8 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index ebd1b24ebaa5..8d211972c5e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -21,8 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { const struct tc_action *a; - LIST_HEAD(actions); - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return 0; @@ -32,8 +31,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_ok(a)) { err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3a96307f51b0..2ab9cf25a08a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6234,6 +6234,17 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_vr_put(mlxsw_sp, vr); } +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return; + mlxsw_sp_rif_destroy(rif); +} + static void mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0d8444aaba01..db715da7bab7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -127,6 +127,24 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); } +static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, + void *data) +{ + struct mlxsw_sp *mlxsw_sp = data; + + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + return 0; +} + +static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + netdev_walk_all_upper_dev_rcu(dev, + mlxsw_sp_bridge_device_upper_rif_destroy, + mlxsw_sp); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) @@ -165,6 +183,8 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp, + bridge_device->dev); list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 0ba0356ec4e6..9044496803e6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -796,11 +796,10 @@ int nfp_flower_compile_action(struct nfp_app *app, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err, tun_out_cnt, out_cnt; + int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; enum nfp_flower_tun_type tun_type; const struct tc_action *a; u32 csum_updated = 0; - LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; @@ -810,8 +809,7 @@ int nfp_flower_compile_action(struct nfp_app *app, tun_out_cnt = 0; out_cnt = 0; - tcf_exts_to_list(flow->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, flow->exts) { err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, &out_cnt, &csum_updated); diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index d9ab5add27a8..34193c2f1699 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -407,7 +407,7 @@ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn, if (i == QED_INIT_MAX_POLL_COUNT) { DP_ERR(p_hwfn, - "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n", + "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparison %08x)]\n", addr, le32_to_cpu(cmd->expected_val), val, le32_to_cpu(cmd->op_data)); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index d89a0e22f6e4..5d37ec7e9b0b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -48,7 +48,7 @@ #include "qed_reg_addr.h" #include "qed_sriov.h" -#define CHIP_MCP_RESP_ITER_US 10 +#define QED_MCP_RESP_ITER_US 10 #define QED_DRV_MB_MAX_RETRIES (500 * 1000) /* Account for 5 sec */ #define QED_MCP_RESET_RETRIES (50 * 1000) /* Account for 500 msec */ @@ -183,18 +183,57 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn) return 0; } +/* Maximum of 1 sec to wait for the SHMEM ready indication */ +#define QED_MCP_SHMEM_RDY_MAX_RETRIES 20 +#define QED_MCP_SHMEM_RDY_ITER_MS 50 + static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info = p_hwfn->mcp_info; + u8 cnt = QED_MCP_SHMEM_RDY_MAX_RETRIES; + u8 msec = QED_MCP_SHMEM_RDY_ITER_MS; u32 drv_mb_offsize, mfw_mb_offsize; u32 mcp_pf_id = MCP_PF_ID(p_hwfn); p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR); - if (!p_info->public_base) - return 0; + if (!p_info->public_base) { + DP_NOTICE(p_hwfn, + "The address of the MCP scratch-pad is not configured\n"); + return -EINVAL; + } p_info->public_base |= GRCBASE_MCP; + /* Get the MFW MB address and number of supported messages */ + mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, + SECTION_OFFSIZE_ADDR(p_info->public_base, + PUBLIC_MFW_MB)); + p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); + p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, + sup_msgs)); + + /* The driver can notify that there was an MCP reset, and might read the + * SHMEM values before the MFW has completed initializing them. + * To avoid this, the "sup_msgs" field in the MFW mailbox is used as a + * data ready indication. + */ + while (!p_info->mfw_mb_length && --cnt) { + msleep(msec); + p_info->mfw_mb_length = + (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, sup_msgs)); + } + + if (!cnt) { + DP_NOTICE(p_hwfn, + "Failed to get the SHMEM ready notification after %d msec\n", + QED_MCP_SHMEM_RDY_MAX_RETRIES * msec); + return -EBUSY; + } + /* Calculate the driver and MFW mailbox address */ drv_mb_offsize = qed_rd(p_hwfn, p_ptt, SECTION_OFFSIZE_ADDR(p_info->public_base, @@ -204,13 +243,6 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) "drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 0x%x\n", drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id); - /* Set the MFW MB address */ - mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, - SECTION_OFFSIZE_ADDR(p_info->public_base, - PUBLIC_MFW_MB)); - p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); - p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr); - /* Get the current driver mailbox sequence before sending * the first command */ @@ -285,9 +317,15 @@ static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn, int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0; + u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0; int rc = 0; + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending MCP_RESET mailbox command.\n"); + return -EBUSY; + } + /* Ensure that only a single thread is accessing the mailbox */ spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -413,14 +451,41 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, (p_mb_params->cmd | seq_num), p_mb_params->param); } +static void qed_mcp_cmd_set_blocking(struct qed_hwfn *p_hwfn, bool block_cmd) +{ + p_hwfn->mcp_info->b_block_cmd = block_cmd; + + DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n", + block_cmd ? "Block" : "Unblock"); +} + +static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2; + u32 delay = QED_MCP_RESP_ITER_US; + + cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + cpu_pc_0 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_1 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_2 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + + DP_NOTICE(p_hwfn, + "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 0x%08x}\n", + cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2); +} + static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_mb_params *p_mb_params, - u32 max_retries, u32 delay) + u32 max_retries, u32 usecs) { + u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000); struct qed_mcp_cmd_elem *p_cmd_elem; - u32 cnt = 0; u16 seq_num; int rc = 0; @@ -443,7 +508,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, goto err; spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); - udelay(delay); + + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); } while (++cnt < max_retries); if (cnt >= max_retries) { @@ -472,7 +541,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, * The spinlock stays locked until the list element is removed. */ - udelay(delay); + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); if (p_cmd_elem->b_is_completed) @@ -491,11 +564,15 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "The MFW failed to respond to command 0x%08x [param 0x%08x].\n", p_mb_params->cmd, p_mb_params->param); + qed_mcp_print_cpu_info(p_hwfn, p_ptt); spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK)) + qed_mcp_cmd_set_blocking(p_hwfn, true); + return -EAGAIN; } @@ -507,7 +584,7 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n", p_mb_params->mcp_resp, p_mb_params->mcp_param, - (cnt * delay) / 1000, (cnt * delay) % 1000); + (cnt * usecs) / 1000, (cnt * usecs) % 1000); /* Clear the sequence number from the MFW response */ p_mb_params->mcp_resp &= FW_MSG_CODE_MASK; @@ -525,7 +602,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, { size_t union_data_size = sizeof(union drv_union_data); u32 max_retries = QED_DRV_MB_MAX_RETRIES; - u32 delay = CHIP_MCP_RESP_ITER_US; + u32 usecs = QED_MCP_RESP_ITER_US; /* MCP not initialized */ if (!qed_mcp_is_init(p_hwfn)) { @@ -533,6 +610,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EBUSY; } + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending mailbox command 0x%08x [param 0x%08x].\n", + p_mb_params->cmd, p_mb_params->param); + return -EBUSY; + } + if (p_mb_params->data_src_size > union_data_size || p_mb_params->data_dst_size > union_data_size) { DP_ERR(p_hwfn, @@ -542,8 +626,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EINVAL; } + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) { + max_retries = DIV_ROUND_UP(max_retries, 1000); + usecs *= 1000; + } + return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries, - delay); + usecs); } int qed_mcp_cmd(struct qed_hwfn *p_hwfn, @@ -761,6 +850,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn, mb_params.data_src_size = sizeof(load_req); mb_params.p_data_dst = &load_rsp; mb_params.data_dst_size = sizeof(load_rsp); + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; DP_VERBOSE(p_hwfn, QED_MSG_SP, "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n", @@ -982,7 +1072,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn, int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 wol_param, mcp_resp, mcp_param; + struct qed_mcp_mb_params mb_params; + u32 wol_param; switch (p_hwfn->cdev->wol_config) { case QED_OV_WOL_DISABLED: @@ -1000,8 +1091,12 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP; } - return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_UNLOAD_REQ, wol_param, - &mcp_resp, &mcp_param); + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ; + mb_params.param = wol_param; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; + + return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); } int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) @@ -2077,31 +2172,65 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return rc; } +/* A maximal 100 msec waiting time for the MCP to halt */ +#define QED_MCP_HALT_SLEEP_MS 10 +#define QED_MCP_HALT_MAX_RETRIES 10 + int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 resp = 0, param = 0; + u32 resp = 0, param = 0, cpu_state, cnt = 0; int rc; rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp, ¶m); - if (rc) + if (rc) { DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + return rc; + } - return rc; + do { + msleep(QED_MCP_HALT_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) + break; + } while (++cnt < QED_MCP_HALT_MAX_RETRIES); + + if (cnt == QED_MCP_HALT_MAX_RETRIES) { + DP_NOTICE(p_hwfn, + "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state); + return -EBUSY; + } + + qed_mcp_cmd_set_blocking(p_hwfn, true); + + return 0; } +#define QED_MCP_RESUME_SLEEP_MS 10 + int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 value, cpu_mode; + u32 cpu_mode, cpu_state; qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff); - value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); - value &= ~MCP_REG_CPU_MODE_SOFT_HALT; - qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value); cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT; + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode); + msleep(QED_MCP_RESUME_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); - return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) { + DP_NOTICE(p_hwfn, + "Failed to resume the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + cpu_mode, cpu_state); + return -EBUSY; + } + + qed_mcp_cmd_set_blocking(p_hwfn, false); + + return 0; } int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 047976d5c6e9..85e6b3989e7a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -635,11 +635,14 @@ struct qed_mcp_info { */ spinlock_t cmd_lock; + /* Flag to indicate whether sending a MFW mailbox command is blocked */ + bool b_block_cmd; + /* Spinlock used for syncing SW link-changes and link-changes * originating from attention context. */ spinlock_t link_lock; - bool block_mb_sending; + u32 public_base; u32 drv_mb_addr; u32 mfw_mb_addr; @@ -660,14 +663,20 @@ struct qed_mcp_info { }; struct qed_mcp_mb_params { - u32 cmd; - u32 param; - void *p_data_src; - u8 data_src_size; - void *p_data_dst; - u8 data_dst_size; - u32 mcp_resp; - u32 mcp_param; + u32 cmd; + u32 param; + void *p_data_src; + void *p_data_dst; + u8 data_src_size; + u8 data_dst_size; + u32 mcp_resp; + u32 mcp_param; + u32 flags; +#define QED_MB_FLAG_CAN_SLEEP (0x1 << 0) +#define QED_MB_FLAG_AVOID_BLOCK (0x1 << 1) +#define QED_MB_FLAGS_IS_SET(params, flag) \ + ({ typeof(params) __params = (params); \ + (__params && (__params->flags & QED_MB_FLAG_ ## flag)); }) }; struct qed_drv_tlv_hdr { diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index d8ad2dcad8d5..f736f70956fd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -562,8 +562,10 @@ 0 #define MCP_REG_CPU_STATE \ 0xe05004UL +#define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10) #define MCP_REG_CPU_EVENT_MASK \ 0xe05008UL +#define MCP_REG_CPU_PROGRAM_COUNTER 0xe0501cUL #define PGLUE_B_REG_PF_BAR0_SIZE \ 0x2aae60UL #define PGLUE_B_REG_PF_BAR1_SIZE \ diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 9673d19308e6..b16ce7d93caf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2006,18 +2006,16 @@ unlock: static int qede_parse_actions(struct qede_dev *edev, struct tcf_exts *exts) { - int rc = -EINVAL, num_act = 0; + int rc = -EINVAL, num_act = 0, i; const struct tc_action *a; bool is_drop = false; - LIST_HEAD(actions); if (!tcf_exts_has_actions(exts)) { DP_NOTICE(edev, "No tc actions received\n"); return rc; } - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { num_act++; if (is_tcf_gact_shot(a)) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 353f1c129af1..059ba9429e51 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2384,26 +2384,20 @@ static int qlge_update_hw_vlan_features(struct net_device *ndev, return status; } -static netdev_features_t qlge_fix_features(struct net_device *ndev, - netdev_features_t features) -{ - int err; - - /* Update the behavior of vlan accel in the adapter */ - err = qlge_update_hw_vlan_features(ndev, features); - if (err) - return err; - - return features; -} - static int qlge_set_features(struct net_device *ndev, netdev_features_t features) { netdev_features_t changed = ndev->features ^ features; + int err; + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + /* Update the behavior of vlan accel in the adapter */ + err = qlge_update_hw_vlan_features(ndev, features); + if (err) + return err; - if (changed & NETIF_F_HW_VLAN_CTAG_RX) qlge_vlan_mode(ndev, features); + } return 0; } @@ -4719,7 +4713,6 @@ static const struct net_device_ops qlge_netdev_ops = { .ndo_set_mac_address = qlge_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = qlge_tx_timeout, - .ndo_fix_features = qlge_fix_features, .ndo_set_features = qlge_set_features, .ndo_vlan_rx_add_vid = qlge_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = qlge_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index b81f4faf7b10..1470fc12282b 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Renesas Ethernet AVB device driver * * Copyright (C) 2014-2015 Renesas Electronics Corporation @@ -5,10 +6,6 @@ * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com> * * Based on the SuperH Ethernet driver - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License version 2, - * as published by the Free Software Foundation. */ #ifndef __RAVB_H__ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c06f2df895c2..aff5516b781e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Renesas Ethernet AVB device driver * * Copyright (C) 2014-2015 Renesas Electronics Corporation @@ -5,10 +6,6 @@ * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com> * * Based on the SuperH Ethernet driver - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License version 2, - * as published by the Free Software Foundation. */ #include <linux/cache.h> diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 5573199c4536..ad4433d59237 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* SuperH Ethernet device driver * * Copyright (C) 2014 Renesas Electronics Corporation @@ -5,18 +6,6 @@ * Copyright (C) 2008-2014 Renesas Solutions Corp. * Copyright (C) 2013-2017 Cogent Embedded, Inc. * Copyright (C) 2014 Codethink Limited - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". */ #include <linux/module.h> diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index f94be99cf400..0c18650bbfe6 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* SuperH Ethernet device driver * * Copyright (C) 2006-2012 Nobuhiro Iwamatsu * Copyright (C) 2008-2012 Renesas Solutions Corp. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". */ #ifndef __SH_ETH_H__ diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index edf20361ea5f..bf4acebb6bcd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -33,7 +33,7 @@ config DWMAC_DWC_QOS_ETH select PHYLIB select CRC32 select MII - depends on OF && COMMON_CLK && HAS_DMA + depends on OF && HAS_DMA help Support for chips using the snps,dwc-qos-ethernet.txt DT binding. @@ -57,7 +57,7 @@ config DWMAC_ANARION config DWMAC_IPQ806X tristate "QCA IPQ806x DWMAC support" default ARCH_QCOM - depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST) + depends on OF && (ARCH_QCOM || COMPILE_TEST) select MFD_SYSCON help Support for QCA IPQ806X DWMAC Ethernet. @@ -100,7 +100,7 @@ config DWMAC_OXNAS config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP - depends on OF && COMMON_CLK && (ARCH_ROCKCHIP || COMPILE_TEST) + depends on OF && (ARCH_ROCKCHIP || COMPILE_TEST) select MFD_SYSCON help Support for Ethernet controller on Rockchip RK3288 SoC. @@ -123,7 +123,7 @@ config DWMAC_SOCFPGA config DWMAC_STI tristate "STi GMAC support" default ARCH_STI - depends on OF && COMMON_CLK && (ARCH_STI || COMPILE_TEST) + depends on OF && (ARCH_STI || COMPILE_TEST) select MFD_SYSCON ---help--- Support for ethernet controller on STi SOCs. @@ -147,7 +147,7 @@ config DWMAC_STM32 config DWMAC_SUNXI tristate "Allwinner GMAC support" default ARCH_SUNXI - depends on OF && COMMON_CLK && (ARCH_SUNXI || COMPILE_TEST) + depends on OF && (ARCH_SUNXI || COMPILE_TEST) ---help--- Support for Allwinner A20/A31 GMAC ethernet controllers. diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1a96dd9c1091..531294f4978b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -61,7 +61,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, struct stmmac_tc_entry *action_entry = entry; const struct tc_action *act; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) @@ -69,8 +69,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, if (frag) action_entry = frag; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(act, &actions, list) { + tcf_exts_for_each_action(i, act, exts) { /* Accept */ if (is_tcf_gact_ok(act)) { action_entry->val.af = 1; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 507f68190cb1..1121a1ec407c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -29,6 +29,7 @@ #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> +#include <linux/pci.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/in.h> @@ -2039,12 +2040,16 @@ static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct net_device_context *net_device_ctx; + struct device *pdev = vf_netdev->dev.parent; struct netvsc_device *netvsc_dev; int ret; if (vf_netdev->addr_len != ETH_ALEN) return NOTIFY_DONE; + if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev)) + return NOTIFY_DONE; + /* * We will use the MAC address to locate the synthetic interface to * associate with the VF interface. If we don't find a matching diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 97742708460b..2cd71bdb6484 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5217,8 +5217,8 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } - if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && - udev->serial && !strcmp(udev->serial, "000001000000")) { + if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && + (!strcmp(udev->serial, "000001000000") || !strcmp(udev->serial, "000002000000"))) { dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation"); set_bit(DELL_TB_RX_AGG_BUG, &tp->flags); } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1b9951d2067e..d668682f91df 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -316,6 +316,14 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, old_value = *dbbuf_db; *dbbuf_db = value; + /* + * Ensure that the doorbell is updated before reading the event + * index from memory. The controller needs to provide similar + * ordering to ensure the envent index is updated before reading + * the doorbell. + */ + mb(); + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) return false; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ebf3e7a6c49e..b5ec96abd048 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1210,7 +1210,7 @@ static int __init nvmet_init(void) error = nvmet_init_discovery(); if (error) - goto out; + goto out_free_work_queue; error = nvmet_init_configfs(); if (error) @@ -1219,6 +1219,8 @@ static int __init nvmet_init(void) out_exit_discovery: nvmet_exit_discovery(); +out_free_work_queue: + destroy_workqueue(buffered_io_wq); out: return error; } diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 34712def81b1..5251689a1d9a 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -311,7 +311,7 @@ fcloop_tgt_lsrqst_done_work(struct work_struct *work) struct fcloop_tport *tport = tls_req->tport; struct nvmefc_ls_req *lsreq = tls_req->lsreq; - if (tport->remoteport) + if (!tport || tport->remoteport) lsreq->done(lsreq, tls_req->status); } @@ -329,6 +329,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; + tls_req->tport = NULL; schedule_work(&tls_req->work); return ret; } diff --git a/drivers/of/base.c b/drivers/of/base.c index 466e3c8582f0..9095b8290150 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -54,6 +54,28 @@ DEFINE_MUTEX(of_mutex); */ DEFINE_RAW_SPINLOCK(devtree_lock); +bool of_node_name_eq(const struct device_node *np, const char *name) +{ + const char *node_name; + size_t len; + + if (!np) + return false; + + node_name = kbasename(np->full_name); + len = strchrnul(node_name, '@') - node_name; + + return (strlen(name) == len) && (strncmp(node_name, name, len) == 0); +} + +bool of_node_name_prefix(const struct device_node *np, const char *prefix) +{ + if (!np) + return false; + + return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0; +} + int of_n_addr_cells(struct device_node *np) { u32 cells; @@ -720,6 +742,31 @@ struct device_node *of_get_next_available_child(const struct device_node *node, EXPORT_SYMBOL(of_get_next_available_child); /** + * of_get_compatible_child - Find compatible child node + * @parent: parent node + * @compatible: compatible string + * + * Lookup child node whose compatible property contains the given compatible + * string. + * + * Returns a node pointer with refcount incremented, use of_node_put() on it + * when done; or NULL if not found. + */ +struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible) +{ + struct device_node *child; + + for_each_child_of_node(parent, child) { + if (of_device_is_compatible(child, compatible)) + break; + } + + return child; +} +EXPORT_SYMBOL(of_get_compatible_child); + +/** * of_get_child_by_name - Find the child node by name for a given parent * @node: parent node * @name: child name to look for. diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 977a8307fbb1..4f2816559205 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -260,10 +260,13 @@ static int of_thermal_set_mode(struct thermal_zone_device *tz, mutex_lock(&tz->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED) { tz->polling_delay = data->polling_delay; - else + tz->passive_delay = data->passive_delay; + } else { tz->polling_delay = 0; + tz->passive_delay = 0; + } mutex_unlock(&tz->lock); diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index c866cc165960..450ed66edf58 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -1,16 +1,6 @@ -/* - * Copyright 2016 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright 2016 Freescale Semiconductor, Inc. #include <linux/module.h> #include <linux/platform_device.h> @@ -197,7 +187,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) int ret; struct qoriq_tmu_data *data; struct device_node *np = pdev->dev.of_node; - u32 site = 0; + u32 site; if (!np) { dev_err(&pdev->dev, "Device OF-Node is NULL"); @@ -233,8 +223,9 @@ static int qoriq_tmu_probe(struct platform_device *pdev) if (ret < 0) goto err_tmu; - data->tz = thermal_zone_of_sensor_register(&pdev->dev, data->sensor_id, - data, &tmu_tz_ops); + data->tz = devm_thermal_zone_of_sensor_register(&pdev->dev, + data->sensor_id, + data, &tmu_tz_ops); if (IS_ERR(data->tz)) { ret = PTR_ERR(data->tz); dev_err(&pdev->dev, @@ -243,7 +234,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) } /* Enable monitoring */ - site |= 0x1 << (15 - data->sensor_id); + site = 0x1 << (15 - data->sensor_id); tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr); return 0; @@ -261,8 +252,6 @@ static int qoriq_tmu_remove(struct platform_device *pdev) { struct qoriq_tmu_data *data = platform_get_drvdata(pdev); - thermal_zone_of_sensor_unregister(&pdev->dev, data->tz); - /* Disable monitoring */ tmu_write(data, TMR_DISABLE, &data->regs->tmr); diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c index 766521eb7071..7aed5337bdd3 100644 --- a/drivers/thermal/rcar_gen3_thermal.c +++ b/drivers/thermal/rcar_gen3_thermal.c @@ -1,19 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Car Gen3 THS thermal sensor driver * Based on rcar_thermal.c and work from Hien Dang and Khiem Nguyen. * * Copyright (C) 2016 Renesas Electronics Corporation. * Copyright (C) 2016 Sang Engineering - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * */ #include <linux/delay.h> #include <linux/err.h> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index e77e63070e99..78f932822d38 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Car THS/TSC thermal sensor driver * * Copyright (C) 2012 Renesas Solutions Corp. * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include <linux/delay.h> #include <linux/err.h> @@ -660,6 +648,6 @@ static struct platform_driver rcar_thermal_driver = { }; module_platform_driver(rcar_thermal_driver); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("R-Car THS/TSC thermal sensor driver"); MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>"); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 96c1d8400822..b13c6b4b2c66 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -952,7 +952,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, list_for_each_entry_safe(node, n, &d->pending_list, node) { struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; if (msg->iova <= vq_msg->iova && - msg->iova + msg->size - 1 > vq_msg->iova && + msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { vhost_poll_queue(&node->vq->poll); list_del(&node->node); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index f2088838f690..5b471889d723 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -402,10 +402,19 @@ static ssize_t modalias_show(struct device *dev, } static DEVICE_ATTR_RO(modalias); +static ssize_t state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", + xenbus_strstate(to_xenbus_device(dev)->state)); +} +static DEVICE_ATTR_RO(state); + static struct attribute *xenbus_dev_attrs[] = { &dev_attr_nodename.attr, &dev_attr_devtype.attr, &dev_attr_modalias.attr, + &dev_attr_state.attr, NULL, }; |