From e8dc73c9f9ea554b36093dea23e4ca3b586105d7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Feb 2020 15:26:12 -0600 Subject: xen: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200226212612.GA4663@embeddedor Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-pciback/pciback.h | 2 +- include/xen/interface/io/tpmif.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index ce1077e32466..7c95516a860f 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -52,7 +52,7 @@ struct xen_pcibk_dev_data { unsigned int ack_intr:1; /* .. and ACK-ing */ unsigned long handled; unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ - char irq_name[0]; /* xen-pcibk[000:04:00.0] */ + char irq_name[]; /* xen-pcibk[000:04:00.0] */ }; /* Used by XenBus and xen_pcibk_ops.c */ diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h index 28e7dcd75e82..f8aa8bac5196 100644 --- a/include/xen/interface/io/tpmif.h +++ b/include/xen/interface/io/tpmif.h @@ -46,7 +46,7 @@ struct vtpm_shared_page { uint8_t pad; uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ - uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */ + uint32_t extra_pages[]; /* grant IDs; length in nr_extra_pages */ }; #endif -- cgit v1.2.3-59-g8ed1b From 1b6a51e86cce38cf4d48ce9c242120283ae2f603 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 3 Mar 2020 14:14:22 -0800 Subject: xenbus: req->body should be updated before req->state The req->body should be updated before req->state is updated and the order should be guaranteed by a barrier. Otherwise, read_reply() might return req->body = NULL. Below is sample callstack when the issue is reproduced on purpose by reordering the updates of req->body and req->state and adding delay in code between updates of req->state and req->body. [ 22.356105] general protection fault: 0000 [#1] SMP PTI [ 22.361185] CPU: 2 PID: 52 Comm: xenwatch Not tainted 5.5.0xen+ #6 [ 22.366727] Hardware name: Xen HVM domU, BIOS ... [ 22.372245] RIP: 0010:_parse_integer_fixup_radix+0x6/0x60 ... ... [ 22.392163] RSP: 0018:ffffb2d64023fdf0 EFLAGS: 00010246 [ 22.395933] RAX: 0000000000000000 RBX: 75746e7562755f6d RCX: 0000000000000000 [ 22.400871] RDX: 0000000000000000 RSI: ffffb2d64023fdfc RDI: 75746e7562755f6d [ 22.405874] RBP: 0000000000000000 R08: 00000000000001e8 R09: 0000000000cdcdcd [ 22.410945] R10: ffffb2d6402ffe00 R11: ffff9d95395eaeb0 R12: ffff9d9535935000 [ 22.417613] R13: ffff9d9526d4a000 R14: ffff9d9526f4f340 R15: ffff9d9537654000 [ 22.423726] FS: 0000000000000000(0000) GS:ffff9d953bc80000(0000) knlGS:0000000000000000 [ 22.429898] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 22.434342] CR2: 000000c4206a9000 CR3: 00000001ea3fc002 CR4: 00000000001606e0 [ 22.439645] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 22.444941] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 22.450342] Call Trace: [ 22.452509] simple_strtoull+0x27/0x70 [ 22.455572] xenbus_transaction_start+0x31/0x50 [ 22.459104] netback_changed+0x76c/0xcc1 [xen_netfront] [ 22.463279] ? find_watch+0x40/0x40 [ 22.466156] xenwatch_thread+0xb4/0x150 [ 22.469309] ? wait_woken+0x80/0x80 [ 22.472198] kthread+0x10e/0x130 [ 22.474925] ? kthread_park+0x80/0x80 [ 22.477946] ret_from_fork+0x35/0x40 [ 22.480968] Modules linked in: xen_kbdfront xen_fbfront(+) xen_netfront xen_blkfront [ 22.486783] ---[ end trace a9222030a747c3f7 ]--- [ 22.490424] RIP: 0010:_parse_integer_fixup_radix+0x6/0x60 The virt_rmb() is added in the 'true' path of test_reply(). The "while" is changed to "do while" so that test_reply() is used as a read memory barrier. Signed-off-by: Dongli Zhang Link: https://lore.kernel.org/r/20200303221423.21962-1-dongli.zhang@oracle.com Reviewed-by: Julien Grall Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_comms.c | 2 ++ drivers/xen/xenbus/xenbus_xs.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index d239fc3c5e3d..852ed161fc2a 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -313,6 +313,8 @@ static int process_msg(void) req->msg.type = state.msg.type; req->msg.len = state.msg.len; req->body = state.body; + /* write body, then update state */ + virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); } else diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index ddc18da61834..3a06eb699f33 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -191,8 +191,11 @@ static bool xenbus_ok(void) static bool test_reply(struct xb_req_data *req) { - if (req->state == xb_req_state_got_reply || !xenbus_ok()) + if (req->state == xb_req_state_got_reply || !xenbus_ok()) { + /* read req->state before all other fields */ + virt_rmb(); return true; + } /* Make sure to reread req->state each time. */ barrier(); @@ -202,7 +205,7 @@ static bool test_reply(struct xb_req_data *req) static void *read_reply(struct xb_req_data *req) { - while (req->state != xb_req_state_got_reply) { + do { wait_event(req->wq, test_reply(req)); if (!xenbus_ok()) @@ -216,7 +219,7 @@ static void *read_reply(struct xb_req_data *req) if (req->err) return ERR_PTR(req->err); - } + } while (req->state != xb_req_state_got_reply); return req->body; } -- cgit v1.2.3-59-g8ed1b From 8130b9d5b5abf26f9927b487c15319a187775f34 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 3 Mar 2020 14:14:23 -0800 Subject: xenbus: req->err should be updated before req->state This patch adds the barrier to guarantee that req->err is always updated before req->state. Otherwise, read_reply() would not return ERR_PTR(req->err) but req->body, when process_writes()->xb_write() is failed. Signed-off-by: Dongli Zhang Link: https://lore.kernel.org/r/20200303221423.21962-2-dongli.zhang@oracle.com Reviewed-by: Julien Grall Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_comms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 852ed161fc2a..eb5151fc8efa 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -397,6 +397,8 @@ static int process_writes(void) if (state.req->state == xb_req_state_aborted) kfree(state.req); else { + /* write err, then update state */ + virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } -- cgit v1.2.3-59-g8ed1b From 2f69a110e7bba3ec6bc089a2f736ca0941d887ed Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 5 Mar 2020 11:03:23 +0100 Subject: xen/xenbus: fix locking Commit 060eabe8fbe726 ("xenbus/backend: Protect xenbus callback with lock") introduced a bug by holding a lock while calling a function which might schedule. Fix that by using a semaphore instead. Fixes: 060eabe8fbe726 ("xenbus/backend: Protect xenbus callback with lock") Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20200305100323.16736-1-jgross@suse.com Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_probe.c | 10 +++++----- drivers/xen/xenbus/xenbus_probe_backend.c | 5 +++-- include/xen/xenbus.h | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 66975da4f3b6..8c4d05b687b7 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -239,9 +239,9 @@ int xenbus_dev_probe(struct device *_dev) goto fail; } - spin_lock(&dev->reclaim_lock); + down(&dev->reclaim_sem); err = drv->probe(dev, id); - spin_unlock(&dev->reclaim_lock); + up(&dev->reclaim_sem); if (err) goto fail_put; @@ -271,9 +271,9 @@ int xenbus_dev_remove(struct device *_dev) free_otherend_watch(dev); if (drv->remove) { - spin_lock(&dev->reclaim_lock); + down(&dev->reclaim_sem); drv->remove(dev); - spin_unlock(&dev->reclaim_lock); + up(&dev->reclaim_sem); } module_put(drv->driver.owner); @@ -473,7 +473,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, goto fail; dev_set_name(&xendev->dev, "%s", devname); - spin_lock_init(&xendev->reclaim_lock); + sema_init(&xendev->reclaim_sem, 1); /* Register with generic device framework. */ err = device_register(&xendev->dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 791f6fe01e91..9b2fbe69bccc 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -257,10 +258,10 @@ static int backend_reclaim_memory(struct device *dev, void *data) drv = to_xenbus_driver(dev->driver); if (drv && drv->reclaim_memory) { xdev = to_xenbus_device(dev); - if (!spin_trylock(&xdev->reclaim_lock)) + if (down_trylock(&xdev->reclaim_sem)) return 0; drv->reclaim_memory(xdev); - spin_unlock(&xdev->reclaim_lock); + up(&xdev->reclaim_sem); } return 0; } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 89a889585ba0..850a43bd69d3 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,7 @@ struct xenbus_device { enum xenbus_state state; struct completion down; struct work_struct work; - spinlock_t reclaim_lock; + struct semaphore reclaim_sem; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) -- cgit v1.2.3-59-g8ed1b From 4ab50af63d2eb5da5c1571f8518948514f535782 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 5 Mar 2020 16:51:29 +0100 Subject: xen/blkfront: fix ring info addressing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0265d6e8ddb890 ("xen/blkfront: limit allocated memory size to actual use case") made struct blkfront_ring_info size dynamic. This is fine when running with only one queue, but with multiple queues the addressing of the single queues has to be adapted as the structs are allocated in an array. Fixes: 0265d6e8ddb890 ("xen/blkfront: limit allocated memory size to actual use case") Reported-by: Sander Eikelenboom Tested-by: Sander Eikelenboom Signed-off-by: Juergen Gross Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20200305155129.28326-1-jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/block/xen-blkfront.c | 80 +++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index e2ad6bba2281..9df516a56bb2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -213,6 +213,7 @@ struct blkfront_info struct blk_mq_tag_set tag_set; struct blkfront_ring_info *rinfo; unsigned int nr_rings; + unsigned int rinfo_size; /* Save uncomplete reqs and bios for migration. */ struct list_head requests; struct bio_list bio_list; @@ -259,6 +260,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo); static void blkfront_gather_backend_features(struct blkfront_info *info); static int negotiate_mq(struct blkfront_info *info); +#define for_each_rinfo(info, ptr, idx) \ + for ((ptr) = (info)->rinfo, (idx) = 0; \ + (idx) < (info)->nr_rings; \ + (idx)++, (ptr) = (void *)(ptr) + (info)->rinfo_size) + +static inline struct blkfront_ring_info * +get_rinfo(const struct blkfront_info *info, unsigned int i) +{ + BUG_ON(i >= info->nr_rings); + return (void *)info->rinfo + i * info->rinfo_size; +} + static int get_id_from_freelist(struct blkfront_ring_info *rinfo) { unsigned long free = rinfo->shadow_free; @@ -883,8 +896,7 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx, struct blkfront_info *info = hctx->queue->queuedata; struct blkfront_ring_info *rinfo = NULL; - BUG_ON(info->nr_rings <= qid); - rinfo = &info->rinfo[qid]; + rinfo = get_rinfo(info, qid); blk_mq_start_request(qd->rq); spin_lock_irqsave(&rinfo->ring_lock, flags); if (RING_FULL(&rinfo->ring)) @@ -1181,6 +1193,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, static void xlvbd_release_gendisk(struct blkfront_info *info) { unsigned int minor, nr_minors, i; + struct blkfront_ring_info *rinfo; if (info->rq == NULL) return; @@ -1188,9 +1201,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) /* No more blkif_request(). */ blk_mq_stop_hw_queues(info->rq); - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; - + for_each_rinfo(info, rinfo, i) { /* No more gnttab callback work. */ gnttab_cancel_free_callback(&rinfo->callback); @@ -1339,6 +1350,7 @@ free_shadow: static void blkif_free(struct blkfront_info *info, int suspend) { unsigned int i; + struct blkfront_ring_info *rinfo; /* Prevent new requests being issued until we fix things up. */ info->connected = suspend ? @@ -1347,8 +1359,8 @@ static void blkif_free(struct blkfront_info *info, int suspend) if (info->rq) blk_mq_stop_hw_queues(info->rq); - for (i = 0; i < info->nr_rings; i++) - blkif_free_ring(&info->rinfo[i]); + for_each_rinfo(info, rinfo, i) + blkif_free_ring(rinfo); kvfree(info->rinfo); info->rinfo = NULL; @@ -1775,6 +1787,7 @@ static int talk_to_blkback(struct xenbus_device *dev, int err; unsigned int i, max_page_order; unsigned int ring_page_order; + struct blkfront_ring_info *rinfo; if (!info) return -ENODEV; @@ -1788,9 +1801,7 @@ static int talk_to_blkback(struct xenbus_device *dev, if (err) goto destroy_blkring; - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; - + for_each_rinfo(info, rinfo, i) { /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, rinfo); if (err) @@ -1815,7 +1826,7 @@ again: /* We already got the number of queues/rings in _probe */ if (info->nr_rings == 1) { - err = write_per_ring_nodes(xbt, &info->rinfo[0], dev->nodename); + err = write_per_ring_nodes(xbt, info->rinfo, dev->nodename); if (err) goto destroy_blkring; } else { @@ -1837,10 +1848,10 @@ again: goto abort_transaction; } - for (i = 0; i < info->nr_rings; i++) { + for_each_rinfo(info, rinfo, i) { memset(path, 0, pathsize); snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i); - err = write_per_ring_nodes(xbt, &info->rinfo[i], path); + err = write_per_ring_nodes(xbt, rinfo, path); if (err) { kfree(path); goto destroy_blkring; @@ -1868,9 +1879,8 @@ again: goto destroy_blkring; } - for (i = 0; i < info->nr_rings; i++) { + for_each_rinfo(info, rinfo, i) { unsigned int j; - struct blkfront_ring_info *rinfo = &info->rinfo[i]; for (j = 0; j < BLK_RING_SIZE(info); j++) rinfo->shadow[j].req.u.rw.id = j + 1; @@ -1900,6 +1910,7 @@ static int negotiate_mq(struct blkfront_info *info) { unsigned int backend_max_queues; unsigned int i; + struct blkfront_ring_info *rinfo; BUG_ON(info->nr_rings); @@ -1911,20 +1922,16 @@ static int negotiate_mq(struct blkfront_info *info) if (!info->nr_rings) info->nr_rings = 1; - info->rinfo = kvcalloc(info->nr_rings, - struct_size(info->rinfo, shadow, - BLK_RING_SIZE(info)), - GFP_KERNEL); + info->rinfo_size = struct_size(info->rinfo, shadow, + BLK_RING_SIZE(info)); + info->rinfo = kvcalloc(info->nr_rings, info->rinfo_size, GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); info->nr_rings = 0; return -ENOMEM; } - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo; - - rinfo = &info->rinfo[i]; + for_each_rinfo(info, rinfo, i) { INIT_LIST_HEAD(&rinfo->indirect_pages); INIT_LIST_HEAD(&rinfo->grants); rinfo->dev_info = info; @@ -2017,6 +2024,7 @@ static int blkif_recover(struct blkfront_info *info) int rc; struct bio *bio; unsigned int segs; + struct blkfront_ring_info *rinfo; blkfront_gather_backend_features(info); /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ @@ -2024,9 +2032,7 @@ static int blkif_recover(struct blkfront_info *info) segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); - for (r_index = 0; r_index < info->nr_rings; r_index++) { - struct blkfront_ring_info *rinfo = &info->rinfo[r_index]; - + for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); if (rc) return rc; @@ -2036,10 +2042,7 @@ static int blkif_recover(struct blkfront_info *info) /* Now safe for us to use the shared ring */ info->connected = BLKIF_STATE_CONNECTED; - for (r_index = 0; r_index < info->nr_rings; r_index++) { - struct blkfront_ring_info *rinfo; - - rinfo = &info->rinfo[r_index]; + for_each_rinfo(info, rinfo, r_index) { /* Kick any other new requests queued since we resumed */ kick_pending_request_queues(rinfo); } @@ -2072,13 +2075,13 @@ static int blkfront_resume(struct xenbus_device *dev) struct blkfront_info *info = dev_get_drvdata(&dev->dev); int err = 0; unsigned int i, j; + struct blkfront_ring_info *rinfo; dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); bio_list_init(&info->bio_list); INIT_LIST_HEAD(&info->requests); - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; + for_each_rinfo(info, rinfo, i) { struct bio_list merge_bio; struct blk_shadow *shadow = rinfo->shadow; @@ -2337,6 +2340,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned int binfo; char *envp[] = { "RESIZE=1", NULL }; int err, i; + struct blkfront_ring_info *rinfo; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -2394,8 +2398,8 @@ static void blkfront_connect(struct blkfront_info *info) "physical-sector-size", sector_size); blkfront_gather_backend_features(info); - for (i = 0; i < info->nr_rings; i++) { - err = blkfront_setup_indirect(&info->rinfo[i]); + for_each_rinfo(info, rinfo, i) { + err = blkfront_setup_indirect(rinfo); if (err) { xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", info->xbdev->otherend); @@ -2416,8 +2420,8 @@ static void blkfront_connect(struct blkfront_info *info) /* Kick pending requests. */ info->connected = BLKIF_STATE_CONNECTED; - for (i = 0; i < info->nr_rings; i++) - kick_pending_request_queues(&info->rinfo[i]); + for_each_rinfo(info, rinfo, i) + kick_pending_request_queues(rinfo); device_add_disk(&info->xbdev->dev, info->gd, NULL); @@ -2652,9 +2656,9 @@ static void purge_persistent_grants(struct blkfront_info *info) { unsigned int i; unsigned long flags; + struct blkfront_ring_info *rinfo; - for (i = 0; i < info->nr_rings; i++) { - struct blkfront_ring_info *rinfo = &info->rinfo[i]; + for_each_rinfo(info, rinfo, i) { struct grant *gnt_list_entry, *tmp; spin_lock_irqsave(&rinfo->ring_lock, flags); -- cgit v1.2.3-59-g8ed1b