aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 10:43:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 10:43:59 -0800
commitce8a79d5601aab94c02ed4539c48e8605422ac94 (patch)
tree7830a97a475d57284640c8e2d3516521722708b6 /drivers
parentMerge tag 'for-6.2/io_uring-next-2022-12-08' of git://git.kernel.dk/linux (diff)
parentblktrace: Fix output non-blktrace event when blk_classic option enabled (diff)
downloadwireguard-linux-ce8a79d5601aab94c02ed4539c48e8605422ac94.tar.xz
wireguard-linux-ce8a79d5601aab94c02ed4539c48e8605422ac94.zip
Merge tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Christoph: - Support some passthrough commands without CAP_SYS_ADMIN (Kanchan Joshi) - Refactor PCIe probing and reset (Christoph Hellwig) - Various fabrics authentication fixes and improvements (Sagi Grimberg) - Avoid fallback to sequential scan due to transient issues (Uday Shankar) - Implement support for the DEAC bit in Write Zeroes (Christoph Hellwig) - Allow overriding the IEEE OUI and firmware revision in configfs for nvmet (Aleksandr Miloserdov) - Force reconnect when number of queue changes in nvmet (Daniel Wagner) - Minor fixes and improvements (Uros Bizjak, Joel Granados, Sagi Grimberg, Christoph Hellwig, Christophe JAILLET) - Fix and cleanup nvme-fc req allocation (Chaitanya Kulkarni) - Use the common tagset helpers in nvme-pci driver (Christoph Hellwig) - Cleanup the nvme-pci removal path (Christoph Hellwig) - Use kstrtobool() instead of strtobool (Christophe JAILLET) - Allow unprivileged passthrough of Identify Controller (Joel Granados) - Support io stats on the mpath device (Sagi Grimberg) - Minor nvmet cleanup (Sagi Grimberg) - MD pull requests via Song: - Code cleanups (Christoph) - Various fixes - Floppy pull request from Denis: - Fix a memory leak in the init error path (Yuan) - Series fixing some batch wakeup issues with sbitmap (Gabriel) - Removal of the pktcdvd driver that was deprecated more than 5 years ago, and subsequent removal of the devnode callback in struct block_device_operations as no users are now left (Greg) - Fix for partition read on an exclusively opened bdev (Jan) - Series of elevator API cleanups (Jinlong, Christoph) - Series of fixes and cleanups for blk-iocost (Kemeng) - Series of fixes and cleanups for blk-throttle (Kemeng) - Series adding concurrent support for sync queues in BFQ (Yu) - Series bringing drbd a bit closer to the out-of-tree maintained version (Christian, Joel, Lars, Philipp) - Misc drbd fixes (Wang) - blk-wbt fixes and tweaks for enable/disable (Yu) - Fixes for mq-deadline for zoned devices (Damien) - Add support for read-only and offline zones for null_blk (Shin'ichiro) - Series fixing the delayed holder tracking, as used by DM (Yu, Christoph) - Series enabling bio alloc caching for IRQ based IO (Pavel) - Series enabling userspace peer-to-peer DMA (Logan) - BFQ waker fixes (Khazhismel) - Series fixing elevator refcount issues (Christoph, Jinlong) - Series cleaning up references around queue destruction (Christoph) - Series doing quiesce by tagset, enabling cleanups in drivers (Christoph, Chao) - Series untangling the queue kobject and queue references (Christoph) - Misc fixes and cleanups (Bart, David, Dawei, Jinlong, Kemeng, Ye, Yang, Waiman, Shin'ichiro, Randy, Pankaj, Christoph) * tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux: (247 commits) blktrace: Fix output non-blktrace event when blk_classic option enabled block: sed-opal: Don't include <linux/kernel.h> sed-opal: allow using IOC_OPAL_SAVE for locking too blk-cgroup: Fix typo in comment block: remove bio_set_op_attrs nvmet: don't open-code NVME_NS_ATTR_RO enumeration nvme-pci: use the tagset alloc/free helpers nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers nvme: consolidate setting the tagset flags nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set block: bio_copy_data_iter nvme-pci: split out a nvme_pci_ctrl_is_dead helper nvme-pci: return early on ctrl state mismatch in nvme_reset_work nvme-pci: rename nvme_disable_io_queues nvme-pci: cleanup nvme_suspend_queue nvme-pci: remove nvme_pci_disable nvme-pci: remove nvme_disable_admin_queue nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl nvme: use nvme_wait_ready in nvme_shutdown_ctrl ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/Kconfig43
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/drbd/Kconfig2
-rw-r--r--drivers/block/drbd/Makefile2
-rw-r--r--drivers/block/drbd/drbd_actlog.c8
-rw-r--r--drivers/block/drbd/drbd_bitmap.c62
-rw-r--r--drivers/block/drbd/drbd_debugfs.c2
-rw-r--r--drivers/block/drbd/drbd_debugfs.h2
-rw-r--r--drivers/block/drbd/drbd_int.h78
-rw-r--r--drivers/block/drbd/drbd_interval.c2
-rw-r--r--drivers/block/drbd/drbd_interval.h2
-rw-r--r--drivers/block/drbd/drbd_main.c21
-rw-r--r--drivers/block/drbd/drbd_nl.c27
-rw-r--r--drivers/block/drbd/drbd_nla.c2
-rw-r--r--drivers/block/drbd/drbd_nla.h2
-rw-r--r--drivers/block/drbd/drbd_polymorph_printk.h141
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_protocol.h2
-rw-r--r--drivers/block/drbd/drbd_receiver.c99
-rw-r--r--drivers/block/drbd/drbd_req.c8
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_state.c2
-rw-r--r--drivers/block/drbd/drbd_state.h2
-rw-r--r--drivers/block/drbd/drbd_state_change.h2
-rw-r--r--drivers/block/drbd/drbd_strings.c2
-rw-r--r--drivers/block/drbd/drbd_strings.h2
-rw-r--r--drivers/block/drbd/drbd_vli.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c18
-rw-r--r--drivers/block/floppy.c4
-rw-r--r--drivers/block/null_blk/main.c22
-rw-r--r--drivers/block/null_blk/null_blk.h8
-rw-r--r--drivers/block/null_blk/zoned.c95
-rw-r--r--drivers/block/pktcdvd.c2944
-rw-r--r--drivers/block/virtio_blk.c8
-rw-r--r--drivers/block/xen-blkfront.c1
-rw-r--r--drivers/md/bcache/movinggc.c2
-rw-r--r--drivers/md/bcache/request.c2
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-thin.c2
-rw-r--r--drivers/md/dm.c138
-rw-r--r--drivers/md/md-bitmap.c47
-rw-r--r--drivers/md/md.c323
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid10.c20
-rw-r--r--drivers/md/raid5-cache.c10
-rw-r--r--drivers/md/raid5-ppl.c5
-rw-r--r--drivers/nvme/host/apple.c30
-rw-r--r--drivers/nvme/host/auth.c258
-rw-r--r--drivers/nvme/host/core.c319
-rw-r--r--drivers/nvme/host/fc.c59
-rw-r--r--drivers/nvme/host/ioctl.c118
-rw-r--r--drivers/nvme/host/multipath.c26
-rw-r--r--drivers/nvme/host/nvme.h69
-rw-r--r--drivers/nvme/host/pci.c606
-rw-r--r--drivers/nvme/host/rdma.c42
-rw-r--r--drivers/nvme/host/tcp.c45
-rw-r--r--drivers/nvme/target/admin-cmd.c11
-rw-r--r--drivers/nvme/target/configfs.c138
-rw-r--r--drivers/nvme/target/core.c44
-rw-r--r--drivers/nvme/target/io-cmd-file.c16
-rw-r--r--drivers/nvme/target/loop.c16
-rw-r--r--drivers/nvme/target/nvmet.h6
-rw-r--r--drivers/pci/p2pdma.c124
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/scsi_scan.c1
-rw-r--r--drivers/ufs/core/ufshcd.c2
69 files changed, 1841 insertions, 4283 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index a41145d52de9..a2184b428493 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -285,49 +285,6 @@ config BLK_DEV_RAM_SIZE
The default value is 4096 kilobytes. Only change this if you know
what you are doing.
-config CDROM_PKTCDVD
- tristate "Packet writing on CD/DVD media (DEPRECATED)"
- depends on !UML
- depends on SCSI
- select CDROM
- help
- Note: This driver is deprecated and will be removed from the
- kernel in the near future!
-
- If you have a CDROM/DVD drive that supports packet writing, say
- Y to include support. It should work with any MMC/Mt Fuji
- compliant ATAPI or SCSI drive, which is just about any newer
- DVD/CD writer.
-
- Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
- is possible.
- DVD-RW disks must be in restricted overwrite mode.
-
- See the file <file:Documentation/cdrom/packet-writing.rst>
- for further information on the use of this driver.
-
- To compile this driver as a module, choose M here: the
- module will be called pktcdvd.
-
-config CDROM_PKTCDVD_BUFFERS
- int "Free buffers for data gathering"
- depends on CDROM_PKTCDVD
- default "8"
- help
- This controls the maximum number of active concurrent packets. More
- concurrent packets can increase write performance, but also require
- more memory. Each concurrent packet will require approximately 64Kb
- of non-swappable kernel memory, memory which will be allocated when
- a disc is opened for writing.
-
-config CDROM_PKTCDVD_WCACHE
- bool "Enable write caching"
- depends on CDROM_PKTCDVD
- help
- If enabled, write caching will be set for the CD-R/W device. For now
- this option is dangerous unless the CD-RW media is known good, as we
- don't do deferred write error handling yet.
-
config ATA_OVER_ETH
tristate "ATA over Ethernet support"
depends on NET
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 101612cba303..962ee65d8ca3 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_N64CART) += n64cart.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
-obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
index cbacddc55a1d..6fb4e38fca88 100644
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0-only
#
# DRBD device driver configuration
#
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 8bd534697d1b..c93e462130ff 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0-only
drbd-y := drbd_bitmap.o drbd_proc.o
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e27478ae579c..429255876800 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_actlog.c
@@ -868,9 +868,9 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
nr_sectors = get_capacity(device->vdisk);
esector = sector + (size >> 9) - 1;
- if (!expect(sector < nr_sectors))
+ if (!expect(device, sector < nr_sectors))
goto out;
- if (!expect(esector < nr_sectors))
+ if (!expect(device, esector < nr_sectors))
esector = nr_sectors - 1;
lbnr = BM_SECT_TO_BIT(nr_sectors-1);
@@ -1143,7 +1143,7 @@ void drbd_rs_complete_io(struct drbd_device *device, sector_t sector)
bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
if (!bm_ext) {
spin_unlock_irqrestore(&device->al_lock, flags);
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n");
return;
}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 7d9db33363de..289876ffbc31 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_bitmap.c
@@ -113,7 +113,7 @@ struct drbd_bitmap {
static void __bm_print_lock_info(struct drbd_device *device, const char *func)
{
struct drbd_bitmap *b = device->bitmap;
- if (!__ratelimit(&drbd_ratelimit_state))
+ if (!drbd_ratelimit())
return;
drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n",
current->comm, task_pid_nr(current),
@@ -448,7 +448,7 @@ int drbd_bm_init(struct drbd_device *device)
sector_t drbd_bm_capacity(struct drbd_device *device)
{
- if (!expect(device->bitmap))
+ if (!expect(device, device->bitmap))
return 0;
return device->bitmap->bm_dev_capacity;
}
@@ -457,7 +457,7 @@ sector_t drbd_bm_capacity(struct drbd_device *device)
*/
void drbd_bm_cleanup(struct drbd_device *device)
{
- if (!expect(device->bitmap))
+ if (!expect(device, device->bitmap))
return;
bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
bm_vk_free(device->bitmap->bm_pages);
@@ -636,7 +636,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
int err = 0;
bool growing;
- if (!expect(b))
+ if (!expect(device, b))
return -ENOMEM;
drbd_bm_lock(device, "resize", BM_LOCKED_MASK);
@@ -757,9 +757,9 @@ unsigned long _drbd_bm_total_weight(struct drbd_device *device)
unsigned long s;
unsigned long flags;
- if (!expect(b))
+ if (!expect(device, b))
return 0;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
@@ -783,9 +783,9 @@ unsigned long drbd_bm_total_weight(struct drbd_device *device)
size_t drbd_bm_words(struct drbd_device *device)
{
struct drbd_bitmap *b = device->bitmap;
- if (!expect(b))
+ if (!expect(device, b))
return 0;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return 0;
return b->bm_words;
@@ -794,7 +794,7 @@ size_t drbd_bm_words(struct drbd_device *device)
unsigned long drbd_bm_bits(struct drbd_device *device)
{
struct drbd_bitmap *b = device->bitmap;
- if (!expect(b))
+ if (!expect(device, b))
return 0;
return b->bm_bits;
@@ -816,9 +816,9 @@ void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number,
end = offset + number;
- if (!expect(b))
+ if (!expect(device, b))
return;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return;
if (number == 0)
return;
@@ -863,9 +863,9 @@ void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number,
end = offset + number;
- if (!expect(b))
+ if (!expect(device, b))
return;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return;
spin_lock_irq(&b->bm_lock);
@@ -894,9 +894,9 @@ void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number,
void drbd_bm_set_all(struct drbd_device *device)
{
struct drbd_bitmap *b = device->bitmap;
- if (!expect(b))
+ if (!expect(device, b))
return;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return;
spin_lock_irq(&b->bm_lock);
@@ -910,9 +910,9 @@ void drbd_bm_set_all(struct drbd_device *device)
void drbd_bm_clear_all(struct drbd_device *device)
{
struct drbd_bitmap *b = device->bitmap;
- if (!expect(b))
+ if (!expect(device, b))
return;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return;
spin_lock_irq(&b->bm_lock);
@@ -952,7 +952,7 @@ static void drbd_bm_endio(struct bio *bio)
bm_set_page_io_err(b->bm_pages[idx]);
/* Not identical to on disk version of it.
* Is BM_PAGE_IO_ERROR enough? */
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
bio->bi_status, idx);
} else {
@@ -1013,7 +1013,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
else
len = PAGE_SIZE;
} else {
- if (__ratelimit(&drbd_ratelimit_state)) {
+ if (drbd_ratelimit()) {
drbd_err(device, "Invalid offset during on-disk bitmap access: "
"page idx %u, sector %llu\n", page_nr, on_disk_sector);
}
@@ -1332,9 +1332,9 @@ static unsigned long bm_find_next(struct drbd_device *device,
struct drbd_bitmap *b = device->bitmap;
unsigned long i = DRBD_END_OF_BITMAP;
- if (!expect(b))
+ if (!expect(device, b))
return i;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return i;
spin_lock_irq(&b->bm_lock);
@@ -1436,9 +1436,9 @@ static int bm_change_bits_to(struct drbd_device *device, const unsigned long s,
struct drbd_bitmap *b = device->bitmap;
int c = 0;
- if (!expect(b))
+ if (!expect(device, b))
return 1;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
@@ -1582,9 +1582,9 @@ int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr)
unsigned long *p_addr;
int i;
- if (!expect(b))
+ if (!expect(device, b))
return 0;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
@@ -1619,9 +1619,9 @@ int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const
* robust in case we screwed up elsewhere, in that case pretend there
* was one dirty bit in the requested area, so we won't try to do a
* local read there (no bitmap probably implies no disk) */
- if (!expect(b))
+ if (!expect(device, b))
return 1;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return 1;
spin_lock_irqsave(&b->bm_lock, flags);
@@ -1635,7 +1635,7 @@ int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const
bm_unmap(p_addr);
p_addr = bm_map_pidx(b, idx);
}
- if (expect(bitnr < b->bm_bits))
+ if (expect(device, bitnr < b->bm_bits))
c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
else
drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
@@ -1668,9 +1668,9 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
unsigned long flags;
unsigned long *p_addr, *bm;
- if (!expect(b))
+ if (!expect(device, b))
return 0;
- if (!expect(b->bm_pages))
+ if (!expect(device, b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index b3b9cd5628fd..a72c096aa5b1 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "drbd debugfs: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h
index 58e31cef0844..ee3d66eb40c6 100644
--- a/drivers/block/drbd/drbd_debugfs.h
+++ b/drivers/block/drbd/drbd_debugfs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/debugfs.h>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4d661282ff41..ae713338aa46 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
drbd_int.h
@@ -37,6 +37,7 @@
#include "drbd_strings.h"
#include "drbd_state.h"
#include "drbd_protocol.h"
+#include "drbd_polymorph_printk.h"
#ifdef __CHECKER__
# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
@@ -75,71 +76,6 @@ extern int drbd_proc_details;
struct drbd_device;
struct drbd_connection;
-#define __drbd_printk_device(level, device, fmt, args...) \
- dev_printk(level, disk_to_dev((device)->vdisk), fmt, ## args)
-#define __drbd_printk_peer_device(level, peer_device, fmt, args...) \
- dev_printk(level, disk_to_dev((peer_device)->device->vdisk), fmt, ## args)
-#define __drbd_printk_resource(level, resource, fmt, args...) \
- printk(level "drbd %s: " fmt, (resource)->name, ## args)
-#define __drbd_printk_connection(level, connection, fmt, args...) \
- printk(level "drbd %s: " fmt, (connection)->resource->name, ## args)
-
-void drbd_printk_with_wrong_object_type(void);
-
-#define __drbd_printk_if_same_type(obj, type, func, level, fmt, args...) \
- (__builtin_types_compatible_p(typeof(obj), type) || \
- __builtin_types_compatible_p(typeof(obj), const type)), \
- func(level, (const type)(obj), fmt, ## args)
-
-#define drbd_printk(level, obj, fmt, args...) \
- __builtin_choose_expr( \
- __drbd_printk_if_same_type(obj, struct drbd_device *, \
- __drbd_printk_device, level, fmt, ## args), \
- __builtin_choose_expr( \
- __drbd_printk_if_same_type(obj, struct drbd_resource *, \
- __drbd_printk_resource, level, fmt, ## args), \
- __builtin_choose_expr( \
- __drbd_printk_if_same_type(obj, struct drbd_connection *, \
- __drbd_printk_connection, level, fmt, ## args), \
- __builtin_choose_expr( \
- __drbd_printk_if_same_type(obj, struct drbd_peer_device *, \
- __drbd_printk_peer_device, level, fmt, ## args), \
- drbd_printk_with_wrong_object_type()))))
-
-#define drbd_dbg(obj, fmt, args...) \
- drbd_printk(KERN_DEBUG, obj, fmt, ## args)
-#define drbd_alert(obj, fmt, args...) \
- drbd_printk(KERN_ALERT, obj, fmt, ## args)
-#define drbd_err(obj, fmt, args...) \
- drbd_printk(KERN_ERR, obj, fmt, ## args)
-#define drbd_warn(obj, fmt, args...) \
- drbd_printk(KERN_WARNING, obj, fmt, ## args)
-#define drbd_info(obj, fmt, args...) \
- drbd_printk(KERN_INFO, obj, fmt, ## args)
-#define drbd_emerg(obj, fmt, args...) \
- drbd_printk(KERN_EMERG, obj, fmt, ## args)
-
-#define dynamic_drbd_dbg(device, fmt, args...) \
- dynamic_dev_dbg(disk_to_dev(device->vdisk), fmt, ## args)
-
-#define D_ASSERT(device, exp) do { \
- if (!(exp)) \
- drbd_err(device, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__); \
- } while (0)
-
-/**
- * expect - Make an assertion
- *
- * Unlike the assert macro, this macro returns a boolean result.
- */
-#define expect(exp) ({ \
- bool _bool = (exp); \
- if (!_bool) \
- drbd_err(device, "ASSERTION %s FAILED in %s\n", \
- #exp, __func__); \
- _bool; \
- })
-
/* Defines to control fault insertion */
enum {
DRBD_FAULT_MD_WR = 0, /* meta data write */
@@ -395,6 +331,7 @@ struct drbd_peer_request {
struct drbd_peer_device *peer_device;
struct drbd_epoch *epoch; /* for writes */
struct page *pages;
+ blk_opf_t opf;
atomic_t pending_bios;
struct drbd_interval i;
/* see comments on ee flag bits below */
@@ -406,6 +343,10 @@ struct drbd_peer_request {
};
};
+/* Equivalent to bio_op and req_op. */
+#define peer_req_op(peer_req) \
+ ((peer_req)->opf & REQ_OP_MASK)
+
/* ee flag bits.
* While corresponding bios are in flight, the only modification will be
* set_bit WAS_ERROR, which has to be atomic.
@@ -1545,8 +1486,7 @@ extern void drbd_send_acks_wf(struct work_struct *ws);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
bool throttle_if_app_is_waiting);
-extern int drbd_submit_peer_request(struct drbd_device *,
- struct drbd_peer_request *, blk_opf_t, int);
+extern int drbd_submit_peer_request(struct drbd_peer_request *peer_req);
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
sector_t, unsigned int,
@@ -1718,7 +1658,7 @@ static inline void __drbd_chk_io_error_(struct drbd_device *device,
switch (ep) {
case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */
if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "Local IO failed in %s.\n", where);
if (device->state.disk > D_INCONSISTENT)
_drbd_set_state(_NS(device, disk, D_INCONSISTENT), CS_HARD, NULL);
diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c
index f07b4378388b..5024ffd6143d 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
#include <asm/bug.h>
#include <linux/rbtree_augmented.h>
#include "drbd_interval.h"
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index b8c2dee5edc8..366489b72fe9 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_INTERVAL_H
#define __DRBD_INTERVAL_H
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0aa1dde07a98..2f16e1bfb6e7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd.c
@@ -1259,7 +1259,7 @@ static int _drbd_send_bitmap(struct drbd_device *device)
struct bm_xfer_ctx c;
int err;
- if (!expect(device->bitmap))
+ if (!expect(device, device->bitmap))
return false;
if (get_ldev(device)) {
@@ -2217,7 +2217,8 @@ void drbd_destroy_device(struct kref *kref)
kref_put(&peer_device->connection->kref, drbd_destroy_connection);
kfree(peer_device);
}
- memset(device, 0xfd, sizeof(*device));
+ if (device->submit.wq)
+ destroy_workqueue(device->submit.wq);
kfree(device);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2249,9 +2250,9 @@ static void do_retry(struct work_struct *ws)
bool expected;
expected =
- expect(atomic_read(&req->completion_ref) == 0) &&
- expect(req->rq_state & RQ_POSTPONED) &&
- expect((req->rq_state & RQ_LOCAL_PENDING) == 0 ||
+ expect(device, atomic_read(&req->completion_ref) == 0) &&
+ expect(device, req->rq_state & RQ_POSTPONED) &&
+ expect(device, (req->rq_state & RQ_LOCAL_PENDING) == 0 ||
(req->rq_state & RQ_LOCAL_ABORTED) != 0);
if (!expected)
@@ -2309,7 +2310,6 @@ void drbd_destroy_resource(struct kref *kref)
idr_destroy(&resource->devices);
free_cpumask_var(resource->cpu_mask);
kfree(resource->name);
- memset(resource, 0xf2, sizeof(*resource));
kfree(resource);
}
@@ -2650,7 +2650,6 @@ void drbd_destroy_connection(struct kref *kref)
drbd_free_socket(&connection->data);
kfree(connection->int_dig_in);
kfree(connection->int_dig_vv);
- memset(connection, 0xfc, sizeof(*connection));
kfree(connection);
kref_put(&resource->kref, drbd_destroy_resource);
}
@@ -2774,7 +2773,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
err = add_disk(disk);
if (err)
- goto out_idr_remove_from_resource;
+ goto out_destroy_workqueue;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2788,6 +2787,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
+out_destroy_workqueue:
+ destroy_workqueue(device->submit.wq);
out_idr_remove_from_resource:
for_each_connection_safe(connection, n, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
@@ -3766,7 +3767,7 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type)
if (ret) {
drbd_fault_count++;
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_warn(device, "***Simulating %s failure\n",
_drbd_fault_str(type));
}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 864c98e74875..60757ac31701 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_nl.c
@@ -1210,6 +1210,7 @@ static void decide_on_discard_support(struct drbd_device *device,
struct drbd_connection *connection =
first_peer_device(device)->connection;
struct request_queue *q = device->rq_queue;
+ unsigned int max_discard_sectors;
if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
goto not_supported;
@@ -1230,15 +1231,14 @@ static void decide_on_discard_support(struct drbd_device *device,
* topology on all peers.
*/
blk_queue_discard_granularity(q, 512);
- q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
- q->limits.max_write_zeroes_sectors =
- drbd_max_discard_sectors(connection);
+ max_discard_sectors = drbd_max_discard_sectors(connection);
+ blk_queue_max_discard_sectors(q, max_discard_sectors);
+ blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
return;
not_supported:
blk_queue_discard_granularity(q, 0);
- q->limits.max_discard_sectors = 0;
- q->limits.max_write_zeroes_sectors = 0;
+ blk_queue_max_discard_sectors(q, 0);
}
static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
@@ -1256,6 +1256,18 @@ static void fixup_write_zeroes(struct drbd_device *device, struct request_queue
q->limits.max_write_zeroes_sectors = 0;
}
+static void fixup_discard_support(struct drbd_device *device, struct request_queue *q)
+{
+ unsigned int max_discard = device->rq_queue->limits.max_discard_sectors;
+ unsigned int discard_granularity =
+ device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT;
+
+ if (discard_granularity > max_discard) {
+ blk_queue_discard_granularity(q, 0);
+ blk_queue_max_discard_sectors(q, 0);
+ }
+}
+
static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
unsigned int max_bio_size, struct o_qlim *o)
{
@@ -1288,6 +1300,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
disk_update_readahead(device->vdisk);
}
fixup_write_zeroes(device, q);
+ fixup_discard_support(device, q);
}
void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
@@ -1530,7 +1543,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
goto fail_unlock;
}
- if (!expect(new_disk_conf->resync_rate >= 1))
+ if (!expect(device, new_disk_conf->resync_rate >= 1))
new_disk_conf->resync_rate = 1;
sanitize_disk_conf(device, new_disk_conf, device->ldev);
diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c
index 6a09b0b98018..df0d241d3f6a 100644
--- a/drivers/block/drbd/drbd_nla.c
+++ b/drivers/block/drbd/drbd_nla.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <net/netlink.h>
#include <linux/drbd_genl_api.h>
diff --git a/drivers/block/drbd/drbd_nla.h b/drivers/block/drbd/drbd_nla.h
index f5eaffb6474e..d3555df0d353 100644
--- a/drivers/block/drbd/drbd_nla.h
+++ b/drivers/block/drbd/drbd_nla.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_NLA_H
#define __DRBD_NLA_H
diff --git a/drivers/block/drbd/drbd_polymorph_printk.h b/drivers/block/drbd/drbd_polymorph_printk.h
new file mode 100644
index 000000000000..8e0082d139ba
--- /dev/null
+++ b/drivers/block/drbd/drbd_polymorph_printk.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef DRBD_POLYMORPH_PRINTK_H
+#define DRBD_POLYMORPH_PRINTK_H
+
+#if !defined(CONFIG_DYNAMIC_DEBUG)
+#undef DEFINE_DYNAMIC_DEBUG_METADATA
+#undef __dynamic_pr_debug
+#undef DYNAMIC_DEBUG_BRANCH
+#define DEFINE_DYNAMIC_DEBUG_METADATA(D, F) const char *D = F; ((void)D)
+#define __dynamic_pr_debug(D, F, args...) do { (void)(D); if (0) printk(F, ## args); } while (0)
+#define DYNAMIC_DEBUG_BRANCH(D) false
+#endif
+
+
+#define __drbd_printk_drbd_device_prep(device) \
+ const struct drbd_device *__d = (device); \
+ const struct drbd_resource *__r = __d->resource
+#define __drbd_printk_drbd_device_fmt(fmt) "drbd %s/%u drbd%u: " fmt
+#define __drbd_printk_drbd_device_args() __r->name, __d->vnr, __d->minor
+#define __drbd_printk_drbd_device_unprep()
+
+#define __drbd_printk_drbd_peer_device_prep(peer_device) \
+ const struct drbd_device *__d; \
+ const struct drbd_resource *__r; \
+ __d = (peer_device)->device; \
+ __r = __d->resource
+#define __drbd_printk_drbd_peer_device_fmt(fmt) \
+ "drbd %s/%u drbd%u: " fmt
+#define __drbd_printk_drbd_peer_device_args() \
+ __r->name, __d->vnr, __d->minor
+#define __drbd_printk_drbd_peer_device_unprep()
+
+#define __drbd_printk_drbd_resource_prep(resource) \
+ const struct drbd_resource *__r = resource
+#define __drbd_printk_drbd_resource_fmt(fmt) "drbd %s: " fmt
+#define __drbd_printk_drbd_resource_args() __r->name
+#define __drbd_printk_drbd_resource_unprep(resource)
+
+#define __drbd_printk_drbd_connection_prep(connection) \
+ const struct drbd_connection *__c = (connection); \
+ const struct drbd_resource *__r = __c->resource
+#define __drbd_printk_drbd_connection_fmt(fmt) \
+ "drbd %s: " fmt
+#define __drbd_printk_drbd_connection_args() \
+ __r->name
+#define __drbd_printk_drbd_connection_unprep()
+
+void drbd_printk_with_wrong_object_type(void);
+void drbd_dyn_dbg_with_wrong_object_type(void);
+
+#define __drbd_printk_choose_cond(obj, struct_name) \
+ (__builtin_types_compatible_p(typeof(obj), struct struct_name *) || \
+ __builtin_types_compatible_p(typeof(obj), const struct struct_name *))
+#define __drbd_printk_if_same_type(obj, struct_name, level, fmt, args...) \
+ __drbd_printk_choose_cond(obj, struct_name), \
+({ \
+ __drbd_printk_ ## struct_name ## _prep((const struct struct_name *)(obj)); \
+ printk(level __drbd_printk_ ## struct_name ## _fmt(fmt), \
+ __drbd_printk_ ## struct_name ## _args(), ## args); \
+ __drbd_printk_ ## struct_name ## _unprep(); \
+})
+
+#define drbd_printk(level, obj, fmt, args...) \
+ __builtin_choose_expr( \
+ __drbd_printk_if_same_type(obj, drbd_device, level, fmt, ## args), \
+ __builtin_choose_expr( \
+ __drbd_printk_if_same_type(obj, drbd_resource, level, fmt, ## args), \
+ __builtin_choose_expr( \
+ __drbd_printk_if_same_type(obj, drbd_connection, level, fmt, ## args), \
+ __builtin_choose_expr( \
+ __drbd_printk_if_same_type(obj, drbd_peer_device, level, fmt, ## args), \
+ drbd_printk_with_wrong_object_type()))))
+
+#define __drbd_dyn_dbg_if_same_type(obj, struct_name, fmt, args...) \
+ __drbd_printk_choose_cond(obj, struct_name), \
+({ \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor)) { \
+ __drbd_printk_ ## struct_name ## _prep((const struct struct_name *)(obj)); \
+ __dynamic_pr_debug(&descriptor, __drbd_printk_ ## struct_name ## _fmt(fmt), \
+ __drbd_printk_ ## struct_name ## _args(), ## args); \
+ __drbd_printk_ ## struct_name ## _unprep(); \
+ } \
+})
+
+#define dynamic_drbd_dbg(obj, fmt, args...) \
+ __builtin_choose_expr( \
+ __drbd_dyn_dbg_if_same_type(obj, drbd_device, fmt, ## args), \
+ __builtin_choose_expr( \
+ __drbd_dyn_dbg_if_same_type(obj, drbd_resource, fmt, ## args), \
+ __builtin_choose_expr( \
+ __drbd_dyn_dbg_if_same_type(obj, drbd_connection, fmt, ## args), \
+ __builtin_choose_expr( \
+ __drbd_dyn_dbg_if_same_type(obj, drbd_peer_device, fmt, ## args), \
+ drbd_dyn_dbg_with_wrong_object_type()))))
+
+#define drbd_emerg(device, fmt, args...) \
+ drbd_printk(KERN_EMERG, device, fmt, ## args)
+#define drbd_alert(device, fmt, args...) \
+ drbd_printk(KERN_ALERT, device, fmt, ## args)
+#define drbd_crit(device, fmt, args...) \
+ drbd_printk(KERN_CRIT, device, fmt, ## args)
+#define drbd_err(device, fmt, args...) \
+ drbd_printk(KERN_ERR, device, fmt, ## args)
+#define drbd_warn(device, fmt, args...) \
+ drbd_printk(KERN_WARNING, device, fmt, ## args)
+#define drbd_notice(device, fmt, args...) \
+ drbd_printk(KERN_NOTICE, device, fmt, ## args)
+#define drbd_info(device, fmt, args...) \
+ drbd_printk(KERN_INFO, device, fmt, ## args)
+
+
+#define drbd_ratelimit() \
+({ \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ __ratelimit(&_rs); \
+})
+
+#define D_ASSERT(x, exp) \
+ do { \
+ if (!(exp)) \
+ drbd_err(x, "ASSERTION %s FAILED in %s\n", \
+ #exp, __func__); \
+ } while (0)
+
+/**
+ * expect - Make an assertion
+ *
+ * Unlike the assert macro, this macro returns a boolean result.
+ */
+#define expect(x, exp) ({ \
+ bool _bool = (exp); \
+ if (!_bool && drbd_ratelimit()) \
+ drbd_err(x, "ASSERTION %s FAILED in %s\n", \
+ #exp, __func__); \
+ _bool; \
+ })
+
+#endif
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 3c0193de2498..2227fb0db1ce 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_proc.c
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index a882b65ab5d2..56bbca9d7700 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_PROTOCOL_H
#define __DRBD_PROTOCOL_H
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 9ace76156e4b..0e58a3187345 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_receiver.c
@@ -413,7 +413,7 @@ void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *
drbd_free_pages(device, peer_req->pages, is_net);
D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
- if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
+ if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
drbd_al_complete_io(device, &peer_req->i);
}
@@ -1603,9 +1603,19 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru
drbd_endio_write_sec_final(peer_req);
}
+static int peer_request_fault_type(struct drbd_peer_request *peer_req)
+{
+ if (peer_req_op(peer_req) == REQ_OP_READ) {
+ return peer_req->flags & EE_APPLICATION ?
+ DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD;
+ } else {
+ return peer_req->flags & EE_APPLICATION ?
+ DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR;
+ }
+}
+
/**
* drbd_submit_peer_request()
- * @device: DRBD device.
* @peer_req: peer request
*
* May spread the pages to multiple bios,
@@ -1619,10 +1629,9 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru
* on certain Xen deployments.
*/
/* TODO allocate from our own bio_set. */
-int drbd_submit_peer_request(struct drbd_device *device,
- struct drbd_peer_request *peer_req,
- const blk_opf_t opf, const int fault_type)
+int drbd_submit_peer_request(struct drbd_peer_request *peer_req)
{
+ struct drbd_device *device = peer_req->peer_device->device;
struct bio *bios = NULL;
struct bio *bio;
struct page *page = peer_req->pages;
@@ -1667,7 +1676,18 @@ int drbd_submit_peer_request(struct drbd_device *device,
* generated bio, but a bio allocated on behalf of the peer.
*/
next_bio:
- bio = bio_alloc(device->ldev->backing_bdev, nr_pages, opf, GFP_NOIO);
+ /* _DISCARD, _WRITE_ZEROES handled above.
+ * REQ_OP_FLUSH (empty flush) not expected,
+ * should have been mapped to a "drbd protocol barrier".
+ * REQ_OP_SECURE_ERASE: I don't see how we could ever support that.
+ */
+ if (!(peer_req_op(peer_req) == REQ_OP_WRITE ||
+ peer_req_op(peer_req) == REQ_OP_READ)) {
+ drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf);
+ return -EINVAL;
+ }
+
+ bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO);
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
bio->bi_private = peer_req;
@@ -1697,7 +1717,7 @@ next_bio:
bios = bios->bi_next;
bio->bi_next = NULL;
- drbd_submit_bio_noacct(device, fault_type, bio);
+ drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio);
} while (bios);
return 0;
}
@@ -1853,21 +1873,21 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
/* assume request_size == data_size, but special case trim. */
ds = data_size;
if (trim) {
- if (!expect(data_size == 0))
+ if (!expect(peer_device, data_size == 0))
return NULL;
ds = be32_to_cpu(trim->size);
} else if (zeroes) {
- if (!expect(data_size == 0))
+ if (!expect(peer_device, data_size == 0))
return NULL;
ds = be32_to_cpu(zeroes->size);
}
- if (!expect(IS_ALIGNED(ds, 512)))
+ if (!expect(peer_device, IS_ALIGNED(ds, 512)))
return NULL;
if (trim || zeroes) {
- if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
+ if (!expect(peer_device, ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
return NULL;
- } else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
+ } else if (!expect(peer_device, ds <= DRBD_MAX_BIO_SIZE))
return NULL;
/* even though we trust out peer,
@@ -2051,6 +2071,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
* respective _drbd_clear_done_ee */
peer_req->w.cb = e_end_resync_block;
+ peer_req->opf = REQ_OP_WRITE;
peer_req->submit_jif = jiffies;
spin_lock_irq(&device->resource->req_lock);
@@ -2058,8 +2079,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE,
- DRBD_FAULT_RS_WR) == 0)
+ if (drbd_submit_peer_request(peer_req) == 0)
return 0;
/* don't care for the reason here */
@@ -2145,7 +2165,7 @@ static int receive_RSDataReply(struct drbd_connection *connection, struct packet
* or in drbd_peer_request_endio. */
err = recv_resync_read(peer_device, sector, pi);
} else {
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "Can not write resync data to local disk.\n");
err = drbd_drain_block(peer_device, pi->size);
@@ -2375,16 +2395,6 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
return ret;
}
-/* see also bio_flags_to_wire()
- * DRBD_REQ_*, because we need to semantically map the flags to data packet
- * flags and back. We may replicate to other kernel versions. */
-static blk_opf_t wire_flags_to_bio_flags(u32 dpf)
-{
- return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
- (dpf & DP_FUA ? REQ_FUA : 0) |
- (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
-}
-
static enum req_op wire_flags_to_bio_op(u32 dpf)
{
if (dpf & DP_ZEROES)
@@ -2395,6 +2405,15 @@ static enum req_op wire_flags_to_bio_op(u32 dpf)
return REQ_OP_WRITE;
}
+/* see also bio_flags_to_wire() */
+static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
+{
+ return wire_flags_to_bio_op(dpf) |
+ (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
+ (dpf & DP_FUA ? REQ_FUA : 0) |
+ (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
+}
+
static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
unsigned int size)
{
@@ -2538,8 +2557,6 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
struct drbd_peer_request *peer_req;
struct p_data *p = pi->data;
u32 peer_seq = be32_to_cpu(p->seq_num);
- enum req_op op;
- blk_opf_t op_flags;
u32 dp_flags;
int err, tp;
@@ -2578,11 +2595,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
peer_req->flags |= EE_APPLICATION;
dp_flags = be32_to_cpu(p->dp_flags);
- op = wire_flags_to_bio_op(dp_flags);
- op_flags = wire_flags_to_bio_flags(dp_flags);
+ peer_req->opf = wire_flags_to_bio(connection, dp_flags);
if (pi->cmd == P_TRIM) {
D_ASSERT(peer_device, peer_req->i.size > 0);
- D_ASSERT(peer_device, op == REQ_OP_DISCARD);
+ D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD);
D_ASSERT(peer_device, peer_req->pages == NULL);
/* need to play safe: an older DRBD sender
* may mean zero-out while sending P_TRIM. */
@@ -2590,7 +2606,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
peer_req->flags |= EE_ZEROOUT;
} else if (pi->cmd == P_ZEROES) {
D_ASSERT(peer_device, peer_req->i.size > 0);
- D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
+ D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES);
D_ASSERT(peer_device, peer_req->pages == NULL);
/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
if (dp_flags & DP_DISCARD)
@@ -2677,8 +2693,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
}
- err = drbd_submit_peer_request(device, peer_req, op | op_flags,
- DRBD_FAULT_DT_WR);
+ err = drbd_submit_peer_request(peer_req);
if (!err)
return 0;
@@ -2789,7 +2804,6 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
struct drbd_peer_request *peer_req;
struct digest_info *di = NULL;
int size, verb;
- unsigned int fault_type;
struct p_block_req *p = pi->data;
peer_device = conn_peer_device(connection, pi->vnr);
@@ -2832,7 +2846,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
default:
BUG();
}
- if (verb && __ratelimit(&drbd_ratelimit_state))
+ if (verb && drbd_ratelimit())
drbd_err(device, "Can not satisfy peer's read request, "
"no local data.\n");
@@ -2849,11 +2863,11 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
put_ldev(device);
return -ENOMEM;
}
+ peer_req->opf = REQ_OP_READ;
switch (pi->cmd) {
case P_DATA_REQUEST:
peer_req->w.cb = w_e_end_data_req;
- fault_type = DRBD_FAULT_DT_RD;
/* application IO, don't drbd_rs_begin_io */
peer_req->flags |= EE_APPLICATION;
goto submit;
@@ -2867,14 +2881,12 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
fallthrough;
case P_RS_DATA_REQUEST:
peer_req->w.cb = w_e_end_rsdata_req;
- fault_type = DRBD_FAULT_RS_RD;
/* used in the sector offset progress display */
device->bm_resync_fo = BM_SECT_TO_BIT(sector);
break;
case P_OV_REPLY:
case P_CSUM_RS_REQUEST:
- fault_type = DRBD_FAULT_RS_RD;
di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
if (!di)
goto out_free_e;
@@ -2923,7 +2935,6 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
(unsigned long long)sector);
}
peer_req->w.cb = w_e_end_ov_req;
- fault_type = DRBD_FAULT_RS_RD;
break;
default:
@@ -2975,8 +2986,7 @@ submit_for_resync:
submit:
update_receiver_timing_details(connection, drbd_submit_peer_request);
inc_unacked(device);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ,
- fault_type) == 0)
+ if (drbd_submit_peer_request(peer_req) == 0)
return 0;
/* don't care for the reason here */
@@ -4947,7 +4957,6 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
if (get_ldev(device)) {
struct drbd_peer_request *peer_req;
- const enum req_op op = REQ_OP_WRITE_ZEROES;
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
size, 0, GFP_NOIO);
@@ -4957,6 +4966,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
}
peer_req->w.cb = e_end_resync_block;
+ peer_req->opf = REQ_OP_DISCARD;
peer_req->submit_jif = jiffies;
peer_req->flags |= EE_TRIM;
@@ -4965,8 +4975,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
- err = drbd_submit_peer_request(device, peer_req, op,
- DRBD_FAULT_RS_WR);
+ err = drbd_submit_peer_request(peer_req);
if (err) {
spin_lock_irq(&device->resource->req_lock);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 7f9bcc82fc9c..eb14ec8ec04c 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_req.c
@@ -144,7 +144,7 @@ void drbd_req_destroy(struct kref *kref)
if (get_ldev_if_state(device, D_FAILED)) {
drbd_al_complete_io(device, &req->i);
put_ldev(device);
- } else if (__ratelimit(&drbd_ratelimit_state)) {
+ } else if (drbd_ratelimit()) {
drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), "
"but my Disk seems to have failed :(\n",
(unsigned long long) req->i.sector, req->i.size);
@@ -518,7 +518,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
{
- if (!__ratelimit(&drbd_ratelimit_state))
+ if (!drbd_ratelimit())
return;
drbd_warn(device, "local %s IO error sector %llu+%u on %pg\n",
@@ -1402,7 +1402,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
submit_private_bio = true;
} else if (no_remote) {
nodata:
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
(unsigned long long)req->i.sector, req->i.size >> 9);
/* A write may have been queued for send_oos, however.
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 6237fa1dcb0e..b4017b5c3fbc 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
drbd_req.h
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 3f7bf9f2d874..75d13ea0024f 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_state.c
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index f87371e55e68..cbaeb8018dbf 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef DRBD_STATE_H
#define DRBD_STATE_H
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
index d5b0479bc9a6..9d78d8e3912e 100644
--- a/drivers/block/drbd/drbd_state_change.h
+++ b/drivers/block/drbd/drbd_state_change.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef DRBD_STATE_CHANGE_H
#define DRBD_STATE_CHANGE_H
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index fc01307607ea..0a06f744b096 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd.h
diff --git a/drivers/block/drbd/drbd_strings.h b/drivers/block/drbd/drbd_strings.h
index 87b94a27358a..0201f6590f6a 100644
--- a/drivers/block/drbd/drbd_strings.h
+++ b/drivers/block/drbd/drbd_strings.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __DRBD_STRINGS_H
#define __DRBD_STRINGS_H
diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h
index 01e3babc5277..1ee81e3c2152 100644
--- a/drivers/block/drbd/drbd_vli.h
+++ b/drivers/block/drbd/drbd_vli.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
-*- linux-c -*-
drbd_receiver.c
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 0bb1a900c2d5..f46738040d6b 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_worker.c
@@ -176,7 +176,7 @@ void drbd_peer_request_endio(struct bio *bio)
bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
bio_op(bio) == REQ_OP_DISCARD;
- if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
+ if (bio->bi_status && drbd_ratelimit())
drbd_warn(device, "%s: error=%d s=%llus\n",
is_write ? (is_discard ? "discard" : "write")
: "read", bio->bi_status,
@@ -240,7 +240,7 @@ void drbd_request_endio(struct bio *bio)
* though we still will complain noisily about it.
*/
if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
if (!bio->bi_status)
@@ -400,13 +400,13 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
goto defer;
peer_req->w.cb = w_e_send_csum;
+ peer_req->opf = REQ_OP_READ;
spin_lock_irq(&device->resource->req_lock);
list_add_tail(&peer_req->w.list, &device->read_ee);
spin_unlock_irq(&device->resource->req_lock);
atomic_add(size >> 9, &device->rs_sect_ev);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ,
- DRBD_FAULT_RS_RD) == 0)
+ if (drbd_submit_peer_request(peer_req) == 0)
return 0;
/* If it failed because of ENOMEM, retry should help. If it failed
@@ -1062,7 +1062,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel)
if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
} else {
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "Sending NegDReply. sector=%llus.\n",
(unsigned long long)peer_req->i.sector);
@@ -1135,13 +1135,13 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
else
err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
} else {
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "Not sending RSDataReply, "
"partner DISKLESS!\n");
err = 0;
}
} else {
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
(unsigned long long)peer_req->i.sector);
@@ -1212,7 +1212,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
}
} else {
err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index ccad3d7b3ddd..487840e3564d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4593,8 +4593,10 @@ static int __init do_floppy_init(void)
goto out_put_disk;
err = floppy_alloc_disk(drive, 0);
- if (err)
+ if (err) {
+ blk_mq_free_tag_set(&tag_sets[drive]);
goto out_put_disk;
+ }
timer_setup(&motor_off_timer[drive], motor_off_callback, 0);
}
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 1f154f92f4c2..7d28e3aa406c 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -523,6 +523,24 @@ out:
}
CONFIGFS_ATTR(nullb_device_, badblocks);
+static ssize_t nullb_device_zone_readonly_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ return zone_cond_store(dev, page, count, BLK_ZONE_COND_READONLY);
+}
+CONFIGFS_ATTR_WO(nullb_device_, zone_readonly);
+
+static ssize_t nullb_device_zone_offline_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ return zone_cond_store(dev, page, count, BLK_ZONE_COND_OFFLINE);
+}
+CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
+
static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_size,
&nullb_device_attr_completion_nsec,
@@ -549,6 +567,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_max_open,
&nullb_device_attr_zone_max_active,
+ &nullb_device_attr_zone_readonly,
+ &nullb_device_attr_zone_offline,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
&nullb_device_attr_shared_tag_bitmap,
@@ -614,7 +634,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
"poll_queues,power,queue_mode,shared_tag_bitmap,size,"
"submit_queues,use_per_node_hctx,virt_boundary,zoned,"
"zone_capacity,zone_max_active,zone_max_open,"
- "zone_nr_conv,zone_size\n");
+ "zone_nr_conv,zone_offline,zone_readonly,zone_size\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 94ff68052b1e..eb5972c50be8 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -151,6 +151,8 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, sector_t nr_sectors);
size_t null_zone_valid_read_len(struct nullb *nullb,
sector_t sector, unsigned int len);
+ssize_t zone_cond_store(struct nullb_device *dev, const char *page,
+ size_t count, enum blk_zone_cond cond);
#else
static inline int null_init_zoned_dev(struct nullb_device *dev,
struct request_queue *q)
@@ -174,6 +176,12 @@ static inline size_t null_zone_valid_read_len(struct nullb *nullb,
{
return len;
}
+static inline ssize_t zone_cond_store(struct nullb_device *dev,
+ const char *page, size_t count,
+ enum blk_zone_cond cond)
+{
+ return -EOPNOTSUPP;
+}
#define null_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */
#endif /* __NULL_BLK_H */
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 55a69e48ef8b..635ce0648133 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -384,8 +384,10 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
null_lock_zone(dev, zone);
- if (zone->cond == BLK_ZONE_COND_FULL) {
- /* Cannot write to a full zone */
+ if (zone->cond == BLK_ZONE_COND_FULL ||
+ zone->cond == BLK_ZONE_COND_READONLY ||
+ zone->cond == BLK_ZONE_COND_OFFLINE) {
+ /* Cannot write to the zone */
ret = BLK_STS_IOERR;
goto unlock;
}
@@ -613,7 +615,9 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op,
for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
zone = &dev->zones[i];
null_lock_zone(dev, zone);
- if (zone->cond != BLK_ZONE_COND_EMPTY) {
+ if (zone->cond != BLK_ZONE_COND_EMPTY &&
+ zone->cond != BLK_ZONE_COND_READONLY &&
+ zone->cond != BLK_ZONE_COND_OFFLINE) {
null_reset_zone(dev, zone);
trace_nullb_zone_op(cmd, i, zone->cond);
}
@@ -627,6 +631,12 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op,
null_lock_zone(dev, zone);
+ if (zone->cond == BLK_ZONE_COND_READONLY ||
+ zone->cond == BLK_ZONE_COND_OFFLINE) {
+ ret = BLK_STS_IOERR;
+ goto unlock;
+ }
+
switch (op) {
case REQ_OP_ZONE_RESET:
ret = null_reset_zone(dev, zone);
@@ -648,6 +658,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op,
if (ret == BLK_STS_OK)
trace_nullb_zone_op(cmd, zone_no, zone->cond);
+unlock:
null_unlock_zone(dev, zone);
return ret;
@@ -674,6 +685,8 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
default:
dev = cmd->nq->dev;
zone = &dev->zones[null_zone_no(dev, sector)];
+ if (zone->cond == BLK_ZONE_COND_OFFLINE)
+ return BLK_STS_IOERR;
null_lock_zone(dev, zone);
sts = null_process_cmd(cmd, op, sector, nr_sectors);
@@ -681,3 +694,79 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
return sts;
}
}
+
+/*
+ * Set a zone in the read-only or offline condition.
+ */
+static void null_set_zone_cond(struct nullb_device *dev,
+ struct nullb_zone *zone, enum blk_zone_cond cond)
+{
+ if (WARN_ON_ONCE(cond != BLK_ZONE_COND_READONLY &&
+ cond != BLK_ZONE_COND_OFFLINE))
+ return;
+
+ null_lock_zone(dev, zone);
+
+ /*
+ * If the read-only condition is requested again to zones already in
+ * read-only condition, restore back normal empty condition. Do the same
+ * if the offline condition is requested for offline zones. Otherwise,
+ * set the specified zone condition to the zones. Finish the zones
+ * beforehand to free up zone resources.
+ */
+ if (zone->cond == cond) {
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ if (dev->memory_backed)
+ null_handle_discard(dev, zone->start, zone->len);
+ } else {
+ if (zone->cond != BLK_ZONE_COND_READONLY &&
+ zone->cond != BLK_ZONE_COND_OFFLINE)
+ null_finish_zone(dev, zone);
+ zone->cond = cond;
+ zone->wp = (sector_t)-1;
+ }
+
+ null_unlock_zone(dev, zone);
+}
+
+/*
+ * Identify a zone from the sector written to configfs file. Then set zone
+ * condition to the zone.
+ */
+ssize_t zone_cond_store(struct nullb_device *dev, const char *page,
+ size_t count, enum blk_zone_cond cond)
+{
+ unsigned long long sector;
+ unsigned int zone_no;
+ int ret;
+
+ if (!dev->zoned) {
+ pr_err("null_blk device is not zoned\n");
+ return -EINVAL;
+ }
+
+ if (!dev->zones) {
+ pr_err("null_blk device is not yet powered\n");
+ return -EINVAL;
+ }
+
+ ret = kstrtoull(page, 0, &sector);
+ if (ret < 0)
+ return ret;
+
+ zone_no = null_zone_no(dev, sector);
+ if (zone_no >= dev->nr_zones) {
+ pr_err("Sector out of range\n");
+ return -EINVAL;
+ }
+
+ if (dev->zones[zone_no].type == BLK_ZONE_TYPE_CONVENTIONAL) {
+ pr_err("Can not change condition of conventional zones\n");
+ return -EINVAL;
+ }
+
+ null_set_zone_cond(dev, &dev->zones[zone_no], cond);
+
+ return count;
+}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
deleted file mode 100644
index 4cea3b08087e..000000000000
--- a/drivers/block/pktcdvd.c
+++ /dev/null
@@ -1,2944 +0,0 @@
-/*
- * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
- * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
- * Copyright (C) 2006 Thomas Maier <balagi@justmail.de>
- *
- * May be copied or modified under the terms of the GNU General Public
- * License. See linux/COPYING for more information.
- *
- * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and
- * DVD-RAM devices.
- *
- * Theory of operation:
- *
- * At the lowest level, there is the standard driver for the CD/DVD device,
- * such as drivers/scsi/sr.c. This driver can handle read and write requests,
- * but it doesn't know anything about the special restrictions that apply to
- * packet writing. One restriction is that write requests must be aligned to
- * packet boundaries on the physical media, and the size of a write request
- * must be equal to the packet size. Another restriction is that a
- * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read
- * command, if the previous command was a write.
- *
- * The purpose of the packet writing driver is to hide these restrictions from
- * higher layers, such as file systems, and present a block device that can be
- * randomly read and written using 2kB-sized blocks.
- *
- * The lowest layer in the packet writing driver is the packet I/O scheduler.
- * Its data is defined by the struct packet_iosched and includes two bio
- * queues with pending read and write requests. These queues are processed
- * by the pkt_iosched_process_queue() function. The write requests in this
- * queue are already properly aligned and sized. This layer is responsible for
- * issuing the flush cache commands and scheduling the I/O in a good order.
- *
- * The next layer transforms unaligned write requests to aligned writes. This
- * transformation requires reading missing pieces of data from the underlying
- * block device, assembling the pieces to full packets and queuing them to the
- * packet I/O scheduler.
- *
- * At the top layer there is a custom ->submit_bio function that forwards
- * read requests directly to the iosched queue and puts write requests in the
- * unaligned write queue. A kernel thread performs the necessary read
- * gathering to convert the unaligned writes to aligned writes and then feeds
- * them to the packet I/O scheduler.
- *
- *************************************************************************/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/pktcdvd.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/compat.h>
-#include <linux/kthread.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/file.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/miscdevice.h>
-#include <linux/freezer.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/backing-dev.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_ioctl.h>
-#include <scsi/scsi.h>
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/nospec.h>
-#include <linux/uaccess.h>
-
-#define DRIVER_NAME "pktcdvd"
-
-#define pkt_err(pd, fmt, ...) \
- pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
-#define pkt_notice(pd, fmt, ...) \
- pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
-#define pkt_info(pd, fmt, ...) \
- pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
-
-#define pkt_dbg(level, pd, fmt, ...) \
-do { \
- if (level == 2 && PACKET_DEBUG >= 2) \
- pr_notice("%s: %s():" fmt, \
- pd->name, __func__, ##__VA_ARGS__); \
- else if (level == 1 && PACKET_DEBUG >= 1) \
- pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \
-} while (0)
-
-#define MAX_SPEED 0xffff
-
-static DEFINE_MUTEX(pktcdvd_mutex);
-static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
-static struct proc_dir_entry *pkt_proc;
-static int pktdev_major;
-static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
-static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
-static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
-static mempool_t psd_pool;
-static struct bio_set pkt_bio_set;
-
-static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
-static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
-
-/* forward declaration */
-static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
-static int pkt_remove_dev(dev_t pkt_dev);
-static int pkt_seq_show(struct seq_file *m, void *p);
-
-static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
-{
- return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1);
-}
-
-/**********************************************************
- * sysfs interface for pktcdvd
- * by (C) 2006 Thomas Maier <balagi@justmail.de>
-
- /sys/class/pktcdvd/pktcdvd[0-7]/
- stat/reset
- stat/packets_started
- stat/packets_finished
- stat/kb_written
- stat/kb_read
- stat/kb_read_gather
- write_queue/size
- write_queue/congestion_off
- write_queue/congestion_on
- **********************************************************/
-
-static ssize_t packets_started_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started);
-}
-static DEVICE_ATTR_RO(packets_started);
-
-static ssize_t packets_finished_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended);
-}
-static DEVICE_ATTR_RO(packets_finished);
-
-static ssize_t kb_written_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1);
-}
-static DEVICE_ATTR_RO(kb_written);
-
-static ssize_t kb_read_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1);
-}
-static DEVICE_ATTR_RO(kb_read);
-
-static ssize_t kb_read_gather_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1);
-}
-static DEVICE_ATTR_RO(kb_read_gather);
-
-static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
-
- if (len > 0) {
- pd->stats.pkt_started = 0;
- pd->stats.pkt_ended = 0;
- pd->stats.secs_w = 0;
- pd->stats.secs_rg = 0;
- pd->stats.secs_r = 0;
- }
- return len;
-}
-static DEVICE_ATTR_WO(reset);
-
-static struct attribute *pkt_stat_attrs[] = {
- &dev_attr_packets_finished.attr,
- &dev_attr_packets_started.attr,
- &dev_attr_kb_read.attr,
- &dev_attr_kb_written.attr,
- &dev_attr_kb_read_gather.attr,
- &dev_attr_reset.attr,
- NULL,
-};
-
-static const struct attribute_group pkt_stat_group = {
- .name = "stat",
- .attrs = pkt_stat_attrs,
-};
-
-static ssize_t size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
- int n;
-
- spin_lock(&pd->lock);
- n = sysfs_emit(buf, "%d\n", pd->bio_queue_size);
- spin_unlock(&pd->lock);
- return n;
-}
-static DEVICE_ATTR_RO(size);
-
-static void init_write_congestion_marks(int* lo, int* hi)
-{
- if (*hi > 0) {
- *hi = max(*hi, 500);
- *hi = min(*hi, 1000000);
- if (*lo <= 0)
- *lo = *hi - 100;
- else {
- *lo = min(*lo, *hi - 100);
- *lo = max(*lo, 100);
- }
- } else {
- *hi = -1;
- *lo = -1;
- }
-}
-
-static ssize_t congestion_off_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
- int n;
-
- spin_lock(&pd->lock);
- n = sysfs_emit(buf, "%d\n", pd->write_congestion_off);
- spin_unlock(&pd->lock);
- return n;
-}
-
-static ssize_t congestion_off_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t len)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
- int val;
-
- if (sscanf(buf, "%d", &val) == 1) {
- spin_lock(&pd->lock);
- pd->write_congestion_off = val;
- init_write_congestion_marks(&pd->write_congestion_off,
- &pd->write_congestion_on);
- spin_unlock(&pd->lock);
- }
- return len;
-}
-static DEVICE_ATTR_RW(congestion_off);
-
-static ssize_t congestion_on_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
- int n;
-
- spin_lock(&pd->lock);
- n = sysfs_emit(buf, "%d\n", pd->write_congestion_on);
- spin_unlock(&pd->lock);
- return n;
-}
-
-static ssize_t congestion_on_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t len)
-{
- struct pktcdvd_device *pd = dev_get_drvdata(dev);
- int val;
-
- if (sscanf(buf, "%d", &val) == 1) {
- spin_lock(&pd->lock);
- pd->write_congestion_on = val;
- init_write_congestion_marks(&pd->write_congestion_off,
- &pd->write_congestion_on);
- spin_unlock(&pd->lock);
- }
- return len;
-}
-static DEVICE_ATTR_RW(congestion_on);
-
-static struct attribute *pkt_wq_attrs[] = {
- &dev_attr_congestion_on.attr,
- &dev_attr_congestion_off.attr,
- &dev_attr_size.attr,
- NULL,
-};
-
-static const struct attribute_group pkt_wq_group = {
- .name = "write_queue",
- .attrs = pkt_wq_attrs,
-};
-
-static const struct attribute_group *pkt_groups[] = {
- &pkt_stat_group,
- &pkt_wq_group,
- NULL,
-};
-
-static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
-{
- if (class_pktcdvd) {
- pd->dev = device_create_with_groups(class_pktcdvd, NULL,
- MKDEV(0, 0), pd, pkt_groups,
- "%s", pd->name);
- if (IS_ERR(pd->dev))
- pd->dev = NULL;
- }
-}
-
-static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
-{
- if (class_pktcdvd)
- device_unregister(pd->dev);
-}
-
-
-/********************************************************************
- /sys/class/pktcdvd/
- add map block device
- remove unmap packet dev
- device_map show mappings
- *******************************************************************/
-
-static void class_pktcdvd_release(struct class *cls)
-{
- kfree(cls);
-}
-
-static ssize_t device_map_show(struct class *c, struct class_attribute *attr,
- char *data)
-{
- int n = 0;
- int idx;
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- for (idx = 0; idx < MAX_WRITERS; idx++) {
- struct pktcdvd_device *pd = pkt_devs[idx];
- if (!pd)
- continue;
- n += sprintf(data+n, "%s %u:%u %u:%u\n",
- pd->name,
- MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
- MAJOR(pd->bdev->bd_dev),
- MINOR(pd->bdev->bd_dev));
- }
- mutex_unlock(&ctl_mutex);
- return n;
-}
-static CLASS_ATTR_RO(device_map);
-
-static ssize_t add_store(struct class *c, struct class_attribute *attr,
- const char *buf, size_t count)
-{
- unsigned int major, minor;
-
- if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
- /* pkt_setup_dev() expects caller to hold reference to self */
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
-
- pkt_setup_dev(MKDEV(major, minor), NULL);
-
- module_put(THIS_MODULE);
-
- return count;
- }
-
- return -EINVAL;
-}
-static CLASS_ATTR_WO(add);
-
-static ssize_t remove_store(struct class *c, struct class_attribute *attr,
- const char *buf, size_t count)
-{
- unsigned int major, minor;
- if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
- pkt_remove_dev(MKDEV(major, minor));
- return count;
- }
- return -EINVAL;
-}
-static CLASS_ATTR_WO(remove);
-
-static struct attribute *class_pktcdvd_attrs[] = {
- &class_attr_add.attr,
- &class_attr_remove.attr,
- &class_attr_device_map.attr,
- NULL,
-};
-ATTRIBUTE_GROUPS(class_pktcdvd);
-
-static int pkt_sysfs_init(void)
-{
- int ret = 0;
-
- /*
- * create control files in sysfs
- * /sys/class/pktcdvd/...
- */
- class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL);
- if (!class_pktcdvd)
- return -ENOMEM;
- class_pktcdvd->name = DRIVER_NAME;
- class_pktcdvd->owner = THIS_MODULE;
- class_pktcdvd->class_release = class_pktcdvd_release;
- class_pktcdvd->class_groups = class_pktcdvd_groups;
- ret = class_register(class_pktcdvd);
- if (ret) {
- kfree(class_pktcdvd);
- class_pktcdvd = NULL;
- pr_err("failed to create class pktcdvd\n");
- return ret;
- }
- return 0;
-}
-
-static void pkt_sysfs_cleanup(void)
-{
- if (class_pktcdvd)
- class_destroy(class_pktcdvd);
- class_pktcdvd = NULL;
-}
-
-/********************************************************************
- entries in debugfs
-
- /sys/kernel/debug/pktcdvd[0-7]/
- info
-
- *******************************************************************/
-
-static int pkt_debugfs_seq_show(struct seq_file *m, void *p)
-{
- return pkt_seq_show(m, p);
-}
-
-static int pkt_debugfs_fops_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pkt_debugfs_seq_show, inode->i_private);
-}
-
-static const struct file_operations debug_fops = {
- .open = pkt_debugfs_fops_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .owner = THIS_MODULE,
-};
-
-static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
-{
- if (!pkt_debugfs_root)
- return;
- pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
- if (!pd->dfs_d_root)
- return;
-
- pd->dfs_f_info = debugfs_create_file("info", 0444,
- pd->dfs_d_root, pd, &debug_fops);
-}
-
-static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
-{
- if (!pkt_debugfs_root)
- return;
- debugfs_remove(pd->dfs_f_info);
- debugfs_remove(pd->dfs_d_root);
- pd->dfs_f_info = NULL;
- pd->dfs_d_root = NULL;
-}
-
-static void pkt_debugfs_init(void)
-{
- pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
-}
-
-static void pkt_debugfs_cleanup(void)
-{
- debugfs_remove(pkt_debugfs_root);
- pkt_debugfs_root = NULL;
-}
-
-/* ----------------------------------------------------------*/
-
-
-static void pkt_bio_finished(struct pktcdvd_device *pd)
-{
- BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
- if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
- pkt_dbg(2, pd, "queue empty\n");
- atomic_set(&pd->iosched.attention, 1);
- wake_up(&pd->wqueue);
- }
-}
-
-/*
- * Allocate a packet_data struct
- */
-static struct packet_data *pkt_alloc_packet_data(int frames)
-{
- int i;
- struct packet_data *pkt;
-
- pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL);
- if (!pkt)
- goto no_pkt;
-
- pkt->frames = frames;
- pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
- if (!pkt->w_bio)
- goto no_bio;
-
- for (i = 0; i < frames / FRAMES_PER_PAGE; i++) {
- pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
- if (!pkt->pages[i])
- goto no_page;
- }
-
- spin_lock_init(&pkt->lock);
- bio_list_init(&pkt->orig_bios);
-
- for (i = 0; i < frames; i++) {
- pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
- if (!pkt->r_bios[i])
- goto no_rd_bio;
- }
-
- return pkt;
-
-no_rd_bio:
- for (i = 0; i < frames; i++)
- kfree(pkt->r_bios[i]);
-no_page:
- for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
- if (pkt->pages[i])
- __free_page(pkt->pages[i]);
- kfree(pkt->w_bio);
-no_bio:
- kfree(pkt);
-no_pkt:
- return NULL;
-}
-
-/*
- * Free a packet_data struct
- */
-static void pkt_free_packet_data(struct packet_data *pkt)
-{
- int i;
-
- for (i = 0; i < pkt->frames; i++)
- kfree(pkt->r_bios[i]);
- for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
- __free_page(pkt->pages[i]);
- kfree(pkt->w_bio);
- kfree(pkt);
-}
-
-static void pkt_shrink_pktlist(struct pktcdvd_device *pd)
-{
- struct packet_data *pkt, *next;
-
- BUG_ON(!list_empty(&pd->cdrw.pkt_active_list));
-
- list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) {
- pkt_free_packet_data(pkt);
- }
- INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
-}
-
-static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
-{
- struct packet_data *pkt;
-
- BUG_ON(!list_empty(&pd->cdrw.pkt_free_list));
-
- while (nr_packets > 0) {
- pkt = pkt_alloc_packet_data(pd->settings.size >> 2);
- if (!pkt) {
- pkt_shrink_pktlist(pd);
- return 0;
- }
- pkt->id = nr_packets;
- pkt->pd = pd;
- list_add(&pkt->list, &pd->cdrw.pkt_free_list);
- nr_packets--;
- }
- return 1;
-}
-
-static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
-{
- struct rb_node *n = rb_next(&node->rb_node);
- if (!n)
- return NULL;
- return rb_entry(n, struct pkt_rb_node, rb_node);
-}
-
-static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
-{
- rb_erase(&node->rb_node, &pd->bio_queue);
- mempool_free(node, &pd->rb_pool);
- pd->bio_queue_size--;
- BUG_ON(pd->bio_queue_size < 0);
-}
-
-/*
- * Find the first node in the pd->bio_queue rb tree with a starting sector >= s.
- */
-static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s)
-{
- struct rb_node *n = pd->bio_queue.rb_node;
- struct rb_node *next;
- struct pkt_rb_node *tmp;
-
- if (!n) {
- BUG_ON(pd->bio_queue_size > 0);
- return NULL;
- }
-
- for (;;) {
- tmp = rb_entry(n, struct pkt_rb_node, rb_node);
- if (s <= tmp->bio->bi_iter.bi_sector)
- next = n->rb_left;
- else
- next = n->rb_right;
- if (!next)
- break;
- n = next;
- }
-
- if (s > tmp->bio->bi_iter.bi_sector) {
- tmp = pkt_rbtree_next(tmp);
- if (!tmp)
- return NULL;
- }
- BUG_ON(s > tmp->bio->bi_iter.bi_sector);
- return tmp;
-}
-
-/*
- * Insert a node into the pd->bio_queue rb tree.
- */
-static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node)
-{
- struct rb_node **p = &pd->bio_queue.rb_node;
- struct rb_node *parent = NULL;
- sector_t s = node->bio->bi_iter.bi_sector;
- struct pkt_rb_node *tmp;
-
- while (*p) {
- parent = *p;
- tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
- if (s < tmp->bio->bi_iter.bi_sector)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
- rb_link_node(&node->rb_node, parent, p);
- rb_insert_color(&node->rb_node, &pd->bio_queue);
- pd->bio_queue_size++;
-}
-
-/*
- * Send a packet_command to the underlying block device and
- * wait for completion.
- */
-static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
-{
- struct request_queue *q = bdev_get_queue(pd->bdev);
- struct scsi_cmnd *scmd;
- struct request *rq;
- int ret = 0;
-
- rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
- REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
- scmd = blk_mq_rq_to_pdu(rq);
-
- if (cgc->buflen) {
- ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
- GFP_NOIO);
- if (ret)
- goto out;
- }
-
- scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
- memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE);
-
- rq->timeout = 60*HZ;
- if (cgc->quiet)
- rq->rq_flags |= RQF_QUIET;
-
- blk_execute_rq(rq, false);
- if (scmd->result)
- ret = -EIO;
-out:
- blk_mq_free_request(rq);
- return ret;
-}
-
-static const char *sense_key_string(__u8 index)
-{
- static const char * const info[] = {
- "No sense", "Recovered error", "Not ready",
- "Medium error", "Hardware error", "Illegal request",
- "Unit attention", "Data protect", "Blank check",
- };
-
- return index < ARRAY_SIZE(info) ? info[index] : "INVALID";
-}
-
-/*
- * A generic sense dump / resolve mechanism should be implemented across
- * all ATAPI + SCSI devices.
- */
-static void pkt_dump_sense(struct pktcdvd_device *pd,
- struct packet_command *cgc)
-{
- struct scsi_sense_hdr *sshdr = cgc->sshdr;
-
- if (sshdr)
- pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
- CDROM_PACKET_SIZE, cgc->cmd,
- sshdr->sense_key, sshdr->asc, sshdr->ascq,
- sense_key_string(sshdr->sense_key));
- else
- pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
-}
-
-/*
- * flush the drive cache to media
- */
-static int pkt_flush_cache(struct pktcdvd_device *pd)
-{
- struct packet_command cgc;
-
- init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
- cgc.cmd[0] = GPCMD_FLUSH_CACHE;
- cgc.quiet = 1;
-
- /*
- * the IMMED bit -- we default to not setting it, although that
- * would allow a much faster close, this is safer
- */
-#if 0
- cgc.cmd[1] = 1 << 1;
-#endif
- return pkt_generic_packet(pd, &cgc);
-}
-
-/*
- * speed is given as the normal factor, e.g. 4 for 4x
- */
-static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
- unsigned write_speed, unsigned read_speed)
-{
- struct packet_command cgc;
- struct scsi_sense_hdr sshdr;
- int ret;
-
- init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
- cgc.sshdr = &sshdr;
- cgc.cmd[0] = GPCMD_SET_SPEED;
- cgc.cmd[2] = (read_speed >> 8) & 0xff;
- cgc.cmd[3] = read_speed & 0xff;
- cgc.cmd[4] = (write_speed >> 8) & 0xff;
- cgc.cmd[5] = write_speed & 0xff;
-
- ret = pkt_generic_packet(pd, &cgc);
- if (ret)
- pkt_dump_sense(pd, &cgc);
-
- return ret;
-}
-
-/*
- * Queue a bio for processing by the low-level CD device. Must be called
- * from process context.
- */
-static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
-{
- spin_lock(&pd->iosched.lock);
- if (bio_data_dir(bio) == READ)
- bio_list_add(&pd->iosched.read_queue, bio);
- else
- bio_list_add(&pd->iosched.write_queue, bio);
- spin_unlock(&pd->iosched.lock);
-
- atomic_set(&pd->iosched.attention, 1);
- wake_up(&pd->wqueue);
-}
-
-/*
- * Process the queued read/write requests. This function handles special
- * requirements for CDRW drives:
- * - A cache flush command must be inserted before a read request if the
- * previous request was a write.
- * - Switching between reading and writing is slow, so don't do it more often
- * than necessary.
- * - Optimize for throughput at the expense of latency. This means that streaming
- * writes will never be interrupted by a read, but if the drive has to seek
- * before the next write, switch to reading instead if there are any pending
- * read requests.
- * - Set the read speed according to current usage pattern. When only reading
- * from the device, it's best to use the highest possible read speed, but
- * when switching often between reading and writing, it's better to have the
- * same read and write speeds.
- */
-static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
-{
-
- if (atomic_read(&pd->iosched.attention) == 0)
- return;
- atomic_set(&pd->iosched.attention, 0);
-
- for (;;) {
- struct bio *bio;
- int reads_queued, writes_queued;
-
- spin_lock(&pd->iosched.lock);
- reads_queued = !bio_list_empty(&pd->iosched.read_queue);
- writes_queued = !bio_list_empty(&pd->iosched.write_queue);
- spin_unlock(&pd->iosched.lock);
-
- if (!reads_queued && !writes_queued)
- break;
-
- if (pd->iosched.writing) {
- int need_write_seek = 1;
- spin_lock(&pd->iosched.lock);
- bio = bio_list_peek(&pd->iosched.write_queue);
- spin_unlock(&pd->iosched.lock);
- if (bio && (bio->bi_iter.bi_sector ==
- pd->iosched.last_write))
- need_write_seek = 0;
- if (need_write_seek && reads_queued) {
- if (atomic_read(&pd->cdrw.pending_bios) > 0) {
- pkt_dbg(2, pd, "write, waiting\n");
- break;
- }
- pkt_flush_cache(pd);
- pd->iosched.writing = 0;
- }
- } else {
- if (!reads_queued && writes_queued) {
- if (atomic_read(&pd->cdrw.pending_bios) > 0) {
- pkt_dbg(2, pd, "read, waiting\n");
- break;
- }
- pd->iosched.writing = 1;
- }
- }
-
- spin_lock(&pd->iosched.lock);
- if (pd->iosched.writing)
- bio = bio_list_pop(&pd->iosched.write_queue);
- else
- bio = bio_list_pop(&pd->iosched.read_queue);
- spin_unlock(&pd->iosched.lock);
-
- if (!bio)
- continue;
-
- if (bio_data_dir(bio) == READ)
- pd->iosched.successive_reads +=
- bio->bi_iter.bi_size >> 10;
- else {
- pd->iosched.successive_reads = 0;
- pd->iosched.last_write = bio_end_sector(bio);
- }
- if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
- if (pd->read_speed == pd->write_speed) {
- pd->read_speed = MAX_SPEED;
- pkt_set_speed(pd, pd->write_speed, pd->read_speed);
- }
- } else {
- if (pd->read_speed != pd->write_speed) {
- pd->read_speed = pd->write_speed;
- pkt_set_speed(pd, pd->write_speed, pd->read_speed);
- }
- }
-
- atomic_inc(&pd->cdrw.pending_bios);
- submit_bio_noacct(bio);
- }
-}
-
-/*
- * Special care is needed if the underlying block device has a small
- * max_phys_segments value.
- */
-static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
-{
- if ((pd->settings.size << 9) / CD_FRAMESIZE
- <= queue_max_segments(q)) {
- /*
- * The cdrom device can handle one segment/frame
- */
- clear_bit(PACKET_MERGE_SEGS, &pd->flags);
- return 0;
- } else if ((pd->settings.size << 9) / PAGE_SIZE
- <= queue_max_segments(q)) {
- /*
- * We can handle this case at the expense of some extra memory
- * copies during write operations
- */
- set_bit(PACKET_MERGE_SEGS, &pd->flags);
- return 0;
- } else {
- pkt_err(pd, "cdrom max_phys_segments too small\n");
- return -EIO;
- }
-}
-
-static void pkt_end_io_read(struct bio *bio)
-{
- struct packet_data *pkt = bio->bi_private;
- struct pktcdvd_device *pd = pkt->pd;
- BUG_ON(!pd);
-
- pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
- bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
-
- if (bio->bi_status)
- atomic_inc(&pkt->io_errors);
- bio_uninit(bio);
- if (atomic_dec_and_test(&pkt->io_wait)) {
- atomic_inc(&pkt->run_sm);
- wake_up(&pd->wqueue);
- }
- pkt_bio_finished(pd);
-}
-
-static void pkt_end_io_packet_write(struct bio *bio)
-{
- struct packet_data *pkt = bio->bi_private;
- struct pktcdvd_device *pd = pkt->pd;
- BUG_ON(!pd);
-
- pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
-
- pd->stats.pkt_ended++;
-
- bio_uninit(bio);
- pkt_bio_finished(pd);
- atomic_dec(&pkt->io_wait);
- atomic_inc(&pkt->run_sm);
- wake_up(&pd->wqueue);
-}
-
-/*
- * Schedule reads for the holes in a packet
- */
-static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
-{
- int frames_read = 0;
- struct bio *bio;
- int f;
- char written[PACKET_MAX_SIZE];
-
- BUG_ON(bio_list_empty(&pkt->orig_bios));
-
- atomic_set(&pkt->io_wait, 0);
- atomic_set(&pkt->io_errors, 0);
-
- /*
- * Figure out which frames we need to read before we can write.
- */
- memset(written, 0, sizeof(written));
- spin_lock(&pkt->lock);
- bio_list_for_each(bio, &pkt->orig_bios) {
- int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
- (CD_FRAMESIZE >> 9);
- int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
- pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
- BUG_ON(first_frame < 0);
- BUG_ON(first_frame + num_frames > pkt->frames);
- for (f = first_frame; f < first_frame + num_frames; f++)
- written[f] = 1;
- }
- spin_unlock(&pkt->lock);
-
- if (pkt->cache_valid) {
- pkt_dbg(2, pd, "zone %llx cached\n",
- (unsigned long long)pkt->sector);
- goto out_account;
- }
-
- /*
- * Schedule reads for missing parts of the packet.
- */
- for (f = 0; f < pkt->frames; f++) {
- int p, offset;
-
- if (written[f])
- continue;
-
- bio = pkt->r_bios[f];
- bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
- bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
- bio->bi_end_io = pkt_end_io_read;
- bio->bi_private = pkt;
-
- p = (f * CD_FRAMESIZE) / PAGE_SIZE;
- offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
- pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
- f, pkt->pages[p], offset);
- if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
- BUG();
-
- atomic_inc(&pkt->io_wait);
- pkt_queue_bio(pd, bio);
- frames_read++;
- }
-
-out_account:
- pkt_dbg(2, pd, "need %d frames for zone %llx\n",
- frames_read, (unsigned long long)pkt->sector);
- pd->stats.pkt_started++;
- pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
-}
-
-/*
- * Find a packet matching zone, or the least recently used packet if
- * there is no match.
- */
-static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone)
-{
- struct packet_data *pkt;
-
- list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) {
- if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) {
- list_del_init(&pkt->list);
- if (pkt->sector != zone)
- pkt->cache_valid = 0;
- return pkt;
- }
- }
- BUG();
- return NULL;
-}
-
-static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt)
-{
- if (pkt->cache_valid) {
- list_add(&pkt->list, &pd->cdrw.pkt_free_list);
- } else {
- list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list);
- }
-}
-
-static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
-{
-#if PACKET_DEBUG > 1
- static const char *state_name[] = {
- "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
- };
- enum packet_data_state old_state = pkt->state;
- pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
- pkt->id, (unsigned long long)pkt->sector,
- state_name[old_state], state_name[state]);
-#endif
- pkt->state = state;
-}
-
-/*
- * Scan the work queue to see if we can start a new packet.
- * returns non-zero if any work was done.
- */
-static int pkt_handle_queue(struct pktcdvd_device *pd)
-{
- struct packet_data *pkt, *p;
- struct bio *bio = NULL;
- sector_t zone = 0; /* Suppress gcc warning */
- struct pkt_rb_node *node, *first_node;
- struct rb_node *n;
-
- atomic_set(&pd->scan_queue, 0);
-
- if (list_empty(&pd->cdrw.pkt_free_list)) {
- pkt_dbg(2, pd, "no pkt\n");
- return 0;
- }
-
- /*
- * Try to find a zone we are not already working on.
- */
- spin_lock(&pd->lock);
- first_node = pkt_rbtree_find(pd, pd->current_sector);
- if (!first_node) {
- n = rb_first(&pd->bio_queue);
- if (n)
- first_node = rb_entry(n, struct pkt_rb_node, rb_node);
- }
- node = first_node;
- while (node) {
- bio = node->bio;
- zone = get_zone(bio->bi_iter.bi_sector, pd);
- list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
- if (p->sector == zone) {
- bio = NULL;
- goto try_next_bio;
- }
- }
- break;
-try_next_bio:
- node = pkt_rbtree_next(node);
- if (!node) {
- n = rb_first(&pd->bio_queue);
- if (n)
- node = rb_entry(n, struct pkt_rb_node, rb_node);
- }
- if (node == first_node)
- node = NULL;
- }
- spin_unlock(&pd->lock);
- if (!bio) {
- pkt_dbg(2, pd, "no bio\n");
- return 0;
- }
-
- pkt = pkt_get_packet_data(pd, zone);
-
- pd->current_sector = zone + pd->settings.size;
- pkt->sector = zone;
- BUG_ON(pkt->frames != pd->settings.size >> 2);
- pkt->write_size = 0;
-
- /*
- * Scan work queue for bios in the same zone and link them
- * to this packet.
- */
- spin_lock(&pd->lock);
- pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
- while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
- bio = node->bio;
- pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
- get_zone(bio->bi_iter.bi_sector, pd));
- if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
- break;
- pkt_rbtree_erase(pd, node);
- spin_lock(&pkt->lock);
- bio_list_add(&pkt->orig_bios, bio);
- pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
- spin_unlock(&pkt->lock);
- }
- /* check write congestion marks, and if bio_queue_size is
- * below, wake up any waiters
- */
- if (pd->congested &&
- pd->bio_queue_size <= pd->write_congestion_off) {
- pd->congested = false;
- wake_up_var(&pd->congested);
- }
- spin_unlock(&pd->lock);
-
- pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
- pkt_set_state(pkt, PACKET_WAITING_STATE);
- atomic_set(&pkt->run_sm, 1);
-
- spin_lock(&pd->cdrw.active_list_lock);
- list_add(&pkt->list, &pd->cdrw.pkt_active_list);
- spin_unlock(&pd->cdrw.active_list_lock);
-
- return 1;
-}
-
-/**
- * bio_list_copy_data - copy contents of data buffers from one chain of bios to
- * another
- * @src: source bio list
- * @dst: destination bio list
- *
- * Stops when it reaches the end of either the @src list or @dst list - that is,
- * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
- * bios).
- */
-static void bio_list_copy_data(struct bio *dst, struct bio *src)
-{
- struct bvec_iter src_iter = src->bi_iter;
- struct bvec_iter dst_iter = dst->bi_iter;
-
- while (1) {
- if (!src_iter.bi_size) {
- src = src->bi_next;
- if (!src)
- break;
-
- src_iter = src->bi_iter;
- }
-
- if (!dst_iter.bi_size) {
- dst = dst->bi_next;
- if (!dst)
- break;
-
- dst_iter = dst->bi_iter;
- }
-
- bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
- }
-}
-
-/*
- * Assemble a bio to write one packet and queue the bio for processing
- * by the underlying block device.
- */
-static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
-{
- int f;
-
- bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
- REQ_OP_WRITE);
- pkt->w_bio->bi_iter.bi_sector = pkt->sector;
- pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
- pkt->w_bio->bi_private = pkt;
-
- /* XXX: locking? */
- for (f = 0; f < pkt->frames; f++) {
- struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
- unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
-
- if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset))
- BUG();
- }
- pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
-
- /*
- * Fill-in bvec with data from orig_bios.
- */
- spin_lock(&pkt->lock);
- bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
-
- pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
- spin_unlock(&pkt->lock);
-
- pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
- pkt->write_size, (unsigned long long)pkt->sector);
-
- if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames))
- pkt->cache_valid = 1;
- else
- pkt->cache_valid = 0;
-
- /* Start the write request */
- atomic_set(&pkt->io_wait, 1);
- pkt_queue_bio(pd, pkt->w_bio);
-}
-
-static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
-{
- struct bio *bio;
-
- if (status)
- pkt->cache_valid = 0;
-
- /* Finish all bios corresponding to this packet */
- while ((bio = bio_list_pop(&pkt->orig_bios))) {
- bio->bi_status = status;
- bio_endio(bio);
- }
-}
-
-static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
-{
- pkt_dbg(2, pd, "pkt %d\n", pkt->id);
-
- for (;;) {
- switch (pkt->state) {
- case PACKET_WAITING_STATE:
- if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0))
- return;
-
- pkt->sleep_time = 0;
- pkt_gather_data(pd, pkt);
- pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
- break;
-
- case PACKET_READ_WAIT_STATE:
- if (atomic_read(&pkt->io_wait) > 0)
- return;
-
- if (atomic_read(&pkt->io_errors) > 0) {
- pkt_set_state(pkt, PACKET_RECOVERY_STATE);
- } else {
- pkt_start_write(pd, pkt);
- }
- break;
-
- case PACKET_WRITE_WAIT_STATE:
- if (atomic_read(&pkt->io_wait) > 0)
- return;
-
- if (!pkt->w_bio->bi_status) {
- pkt_set_state(pkt, PACKET_FINISHED_STATE);
- } else {
- pkt_set_state(pkt, PACKET_RECOVERY_STATE);
- }
- break;
-
- case PACKET_RECOVERY_STATE:
- pkt_dbg(2, pd, "No recovery possible\n");
- pkt_set_state(pkt, PACKET_FINISHED_STATE);
- break;
-
- case PACKET_FINISHED_STATE:
- pkt_finish_packet(pkt, pkt->w_bio->bi_status);
- return;
-
- default:
- BUG();
- break;
- }
- }
-}
-
-static void pkt_handle_packets(struct pktcdvd_device *pd)
-{
- struct packet_data *pkt, *next;
-
- /*
- * Run state machine for active packets
- */
- list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
- if (atomic_read(&pkt->run_sm) > 0) {
- atomic_set(&pkt->run_sm, 0);
- pkt_run_state_machine(pd, pkt);
- }
- }
-
- /*
- * Move no longer active packets to the free list
- */
- spin_lock(&pd->cdrw.active_list_lock);
- list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) {
- if (pkt->state == PACKET_FINISHED_STATE) {
- list_del(&pkt->list);
- pkt_put_packet_data(pd, pkt);
- pkt_set_state(pkt, PACKET_IDLE_STATE);
- atomic_set(&pd->scan_queue, 1);
- }
- }
- spin_unlock(&pd->cdrw.active_list_lock);
-}
-
-static void pkt_count_states(struct pktcdvd_device *pd, int *states)
-{
- struct packet_data *pkt;
- int i;
-
- for (i = 0; i < PACKET_NUM_STATES; i++)
- states[i] = 0;
-
- spin_lock(&pd->cdrw.active_list_lock);
- list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
- states[pkt->state]++;
- }
- spin_unlock(&pd->cdrw.active_list_lock);
-}
-
-/*
- * kcdrwd is woken up when writes have been queued for one of our
- * registered devices
- */
-static int kcdrwd(void *foobar)
-{
- struct pktcdvd_device *pd = foobar;
- struct packet_data *pkt;
- long min_sleep_time, residue;
-
- set_user_nice(current, MIN_NICE);
- set_freezable();
-
- for (;;) {
- DECLARE_WAITQUEUE(wait, current);
-
- /*
- * Wait until there is something to do
- */
- add_wait_queue(&pd->wqueue, &wait);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- /* Check if we need to run pkt_handle_queue */
- if (atomic_read(&pd->scan_queue) > 0)
- goto work_to_do;
-
- /* Check if we need to run the state machine for some packet */
- list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
- if (atomic_read(&pkt->run_sm) > 0)
- goto work_to_do;
- }
-
- /* Check if we need to process the iosched queues */
- if (atomic_read(&pd->iosched.attention) != 0)
- goto work_to_do;
-
- /* Otherwise, go to sleep */
- if (PACKET_DEBUG > 1) {
- int states[PACKET_NUM_STATES];
- pkt_count_states(pd, states);
- pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
- states[0], states[1], states[2],
- states[3], states[4], states[5]);
- }
-
- min_sleep_time = MAX_SCHEDULE_TIMEOUT;
- list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
- if (pkt->sleep_time && pkt->sleep_time < min_sleep_time)
- min_sleep_time = pkt->sleep_time;
- }
-
- pkt_dbg(2, pd, "sleeping\n");
- residue = schedule_timeout(min_sleep_time);
- pkt_dbg(2, pd, "wake up\n");
-
- /* make swsusp happy with our thread */
- try_to_freeze();
-
- list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
- if (!pkt->sleep_time)
- continue;
- pkt->sleep_time -= min_sleep_time - residue;
- if (pkt->sleep_time <= 0) {
- pkt->sleep_time = 0;
- atomic_inc(&pkt->run_sm);
- }
- }
-
- if (kthread_should_stop())
- break;
- }
-work_to_do:
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&pd->wqueue, &wait);
-
- if (kthread_should_stop())
- break;
-
- /*
- * if pkt_handle_queue returns true, we can queue
- * another request.
- */
- while (pkt_handle_queue(pd))
- ;
-
- /*
- * Handle packet state machine
- */
- pkt_handle_packets(pd);
-
- /*
- * Handle iosched queues
- */
- pkt_iosched_process_queue(pd);
- }
-
- return 0;
-}
-
-static void pkt_print_settings(struct pktcdvd_device *pd)
-{
- pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
- pd->settings.fp ? "Fixed" : "Variable",
- pd->settings.size >> 2,
- pd->settings.block_mode == 8 ? '1' : '2');
-}
-
-static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control)
-{
- memset(cgc->cmd, 0, sizeof(cgc->cmd));
-
- cgc->cmd[0] = GPCMD_MODE_SENSE_10;
- cgc->cmd[2] = page_code | (page_control << 6);
- cgc->cmd[7] = cgc->buflen >> 8;
- cgc->cmd[8] = cgc->buflen & 0xff;
- cgc->data_direction = CGC_DATA_READ;
- return pkt_generic_packet(pd, cgc);
-}
-
-static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc)
-{
- memset(cgc->cmd, 0, sizeof(cgc->cmd));
- memset(cgc->buffer, 0, 2);
- cgc->cmd[0] = GPCMD_MODE_SELECT_10;
- cgc->cmd[1] = 0x10; /* PF */
- cgc->cmd[7] = cgc->buflen >> 8;
- cgc->cmd[8] = cgc->buflen & 0xff;
- cgc->data_direction = CGC_DATA_WRITE;
- return pkt_generic_packet(pd, cgc);
-}
-
-static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di)
-{
- struct packet_command cgc;
- int ret;
-
- /* set up command and get the disc info */
- init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_READ_DISC_INFO;
- cgc.cmd[8] = cgc.buflen = 2;
- cgc.quiet = 1;
-
- ret = pkt_generic_packet(pd, &cgc);
- if (ret)
- return ret;
-
- /* not all drives have the same disc_info length, so requeue
- * packet with the length the drive tells us it can supply
- */
- cgc.buflen = be16_to_cpu(di->disc_information_length) +
- sizeof(di->disc_information_length);
-
- if (cgc.buflen > sizeof(disc_information))
- cgc.buflen = sizeof(disc_information);
-
- cgc.cmd[8] = cgc.buflen;
- return pkt_generic_packet(pd, &cgc);
-}
-
-static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti)
-{
- struct packet_command cgc;
- int ret;
-
- init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
- cgc.cmd[1] = type & 3;
- cgc.cmd[4] = (track & 0xff00) >> 8;
- cgc.cmd[5] = track & 0xff;
- cgc.cmd[8] = 8;
- cgc.quiet = 1;
-
- ret = pkt_generic_packet(pd, &cgc);
- if (ret)
- return ret;
-
- cgc.buflen = be16_to_cpu(ti->track_information_length) +
- sizeof(ti->track_information_length);
-
- if (cgc.buflen > sizeof(track_information))
- cgc.buflen = sizeof(track_information);
-
- cgc.cmd[8] = cgc.buflen;
- return pkt_generic_packet(pd, &cgc);
-}
-
-static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
- long *last_written)
-{
- disc_information di;
- track_information ti;
- __u32 last_track;
- int ret;
-
- ret = pkt_get_disc_info(pd, &di);
- if (ret)
- return ret;
-
- last_track = (di.last_track_msb << 8) | di.last_track_lsb;
- ret = pkt_get_track_info(pd, last_track, 1, &ti);
- if (ret)
- return ret;
-
- /* if this track is blank, try the previous. */
- if (ti.blank) {
- last_track--;
- ret = pkt_get_track_info(pd, last_track, 1, &ti);
- if (ret)
- return ret;
- }
-
- /* if last recorded field is valid, return it. */
- if (ti.lra_v) {
- *last_written = be32_to_cpu(ti.last_rec_address);
- } else {
- /* make it up instead */
- *last_written = be32_to_cpu(ti.track_start) +
- be32_to_cpu(ti.track_size);
- if (ti.free_blocks)
- *last_written -= (be32_to_cpu(ti.free_blocks) + 7);
- }
- return 0;
-}
-
-/*
- * write mode select package based on pd->settings
- */
-static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
-{
- struct packet_command cgc;
- struct scsi_sense_hdr sshdr;
- write_param_page *wp;
- char buffer[128];
- int ret, size;
-
- /* doesn't apply to DVD+RW or DVD-RAM */
- if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12))
- return 0;
-
- memset(buffer, 0, sizeof(buffer));
- init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
- cgc.sshdr = &sshdr;
- ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
- if (ret) {
- pkt_dump_sense(pd, &cgc);
- return ret;
- }
-
- size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff));
- pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff);
- if (size > sizeof(buffer))
- size = sizeof(buffer);
-
- /*
- * now get it all
- */
- init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
- cgc.sshdr = &sshdr;
- ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
- if (ret) {
- pkt_dump_sense(pd, &cgc);
- return ret;
- }
-
- /*
- * write page is offset header + block descriptor length
- */
- wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset];
-
- wp->fp = pd->settings.fp;
- wp->track_mode = pd->settings.track_mode;
- wp->write_type = pd->settings.write_type;
- wp->data_block_type = pd->settings.block_mode;
-
- wp->multi_session = 0;
-
-#ifdef PACKET_USE_LS
- wp->link_size = 7;
- wp->ls_v = 1;
-#endif
-
- if (wp->data_block_type == PACKET_BLOCK_MODE1) {
- wp->session_format = 0;
- wp->subhdr2 = 0x20;
- } else if (wp->data_block_type == PACKET_BLOCK_MODE2) {
- wp->session_format = 0x20;
- wp->subhdr2 = 8;
-#if 0
- wp->mcn[0] = 0x80;
- memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1);
-#endif
- } else {
- /*
- * paranoia
- */
- pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
- return 1;
- }
- wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
-
- cgc.buflen = cgc.cmd[8] = size;
- ret = pkt_mode_select(pd, &cgc);
- if (ret) {
- pkt_dump_sense(pd, &cgc);
- return ret;
- }
-
- pkt_print_settings(pd);
- return 0;
-}
-
-/*
- * 1 -- we can write to this track, 0 -- we can't
- */
-static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
-{
- switch (pd->mmc3_profile) {
- case 0x1a: /* DVD+RW */
- case 0x12: /* DVD-RAM */
- /* The track is always writable on DVD+RW/DVD-RAM */
- return 1;
- default:
- break;
- }
-
- if (!ti->packet || !ti->fp)
- return 0;
-
- /*
- * "good" settings as per Mt Fuji.
- */
- if (ti->rt == 0 && ti->blank == 0)
- return 1;
-
- if (ti->rt == 0 && ti->blank == 1)
- return 1;
-
- if (ti->rt == 1 && ti->blank == 0)
- return 1;
-
- pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
- return 0;
-}
-
-/*
- * 1 -- we can write to this disc, 0 -- we can't
- */
-static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
-{
- switch (pd->mmc3_profile) {
- case 0x0a: /* CD-RW */
- case 0xffff: /* MMC3 not supported */
- break;
- case 0x1a: /* DVD+RW */
- case 0x13: /* DVD-RW */
- case 0x12: /* DVD-RAM */
- return 1;
- default:
- pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
- pd->mmc3_profile);
- return 0;
- }
-
- /*
- * for disc type 0xff we should probably reserve a new track.
- * but i'm not sure, should we leave this to user apps? probably.
- */
- if (di->disc_type == 0xff) {
- pkt_notice(pd, "unknown disc - no track?\n");
- return 0;
- }
-
- if (di->disc_type != 0x20 && di->disc_type != 0) {
- pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
- return 0;
- }
-
- if (di->erasable == 0) {
- pkt_notice(pd, "disc not erasable\n");
- return 0;
- }
-
- if (di->border_status == PACKET_SESSION_RESERVED) {
- pkt_err(pd, "can't write to last track (reserved)\n");
- return 0;
- }
-
- return 1;
-}
-
-static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
-{
- struct packet_command cgc;
- unsigned char buf[12];
- disc_information di;
- track_information ti;
- int ret, track;
-
- init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
- cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
- cgc.cmd[8] = 8;
- ret = pkt_generic_packet(pd, &cgc);
- pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7];
-
- memset(&di, 0, sizeof(disc_information));
- memset(&ti, 0, sizeof(track_information));
-
- ret = pkt_get_disc_info(pd, &di);
- if (ret) {
- pkt_err(pd, "failed get_disc\n");
- return ret;
- }
-
- if (!pkt_writable_disc(pd, &di))
- return -EROFS;
-
- pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR;
-
- track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
- ret = pkt_get_track_info(pd, track, 1, &ti);
- if (ret) {
- pkt_err(pd, "failed get_track\n");
- return ret;
- }
-
- if (!pkt_writable_track(pd, &ti)) {
- pkt_err(pd, "can't write to this track\n");
- return -EROFS;
- }
-
- /*
- * we keep packet size in 512 byte units, makes it easier to
- * deal with request calculations.
- */
- pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
- if (pd->settings.size == 0) {
- pkt_notice(pd, "detected zero packet size!\n");
- return -ENXIO;
- }
- if (pd->settings.size > PACKET_MAX_SECTORS) {
- pkt_err(pd, "packet size is too big\n");
- return -EROFS;
- }
- pd->settings.fp = ti.fp;
- pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1);
-
- if (ti.nwa_v) {
- pd->nwa = be32_to_cpu(ti.next_writable);
- set_bit(PACKET_NWA_VALID, &pd->flags);
- }
-
- /*
- * in theory we could use lra on -RW media as well and just zero
- * blocks that haven't been written yet, but in practice that
- * is just a no-go. we'll use that for -R, naturally.
- */
- if (ti.lra_v) {
- pd->lra = be32_to_cpu(ti.last_rec_address);
- set_bit(PACKET_LRA_VALID, &pd->flags);
- } else {
- pd->lra = 0xffffffff;
- set_bit(PACKET_LRA_VALID, &pd->flags);
- }
-
- /*
- * fine for now
- */
- pd->settings.link_loss = 7;
- pd->settings.write_type = 0; /* packet */
- pd->settings.track_mode = ti.track_mode;
-
- /*
- * mode1 or mode2 disc
- */
- switch (ti.data_mode) {
- case PACKET_MODE1:
- pd->settings.block_mode = PACKET_BLOCK_MODE1;
- break;
- case PACKET_MODE2:
- pd->settings.block_mode = PACKET_BLOCK_MODE2;
- break;
- default:
- pkt_err(pd, "unknown data mode\n");
- return -EROFS;
- }
- return 0;
-}
-
-/*
- * enable/disable write caching on drive
- */
-static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
- int set)
-{
- struct packet_command cgc;
- struct scsi_sense_hdr sshdr;
- unsigned char buf[64];
- int ret;
-
- init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
- cgc.sshdr = &sshdr;
- cgc.buflen = pd->mode_offset + 12;
-
- /*
- * caching mode page might not be there, so quiet this command
- */
- cgc.quiet = 1;
-
- ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0);
- if (ret)
- return ret;
-
- buf[pd->mode_offset + 10] |= (!!set << 2);
-
- cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
- ret = pkt_mode_select(pd, &cgc);
- if (ret) {
- pkt_err(pd, "write caching control failed\n");
- pkt_dump_sense(pd, &cgc);
- } else if (!ret && set)
- pkt_notice(pd, "enabled write caching\n");
- return ret;
-}
-
-static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag)
-{
- struct packet_command cgc;
-
- init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
- cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
- cgc.cmd[4] = lockflag ? 1 : 0;
- return pkt_generic_packet(pd, &cgc);
-}
-
-/*
- * Returns drive maximum write speed
- */
-static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
- unsigned *write_speed)
-{
- struct packet_command cgc;
- struct scsi_sense_hdr sshdr;
- unsigned char buf[256+18];
- unsigned char *cap_buf;
- int ret, offset;
-
- cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
- init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
- cgc.sshdr = &sshdr;
-
- ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
- if (ret) {
- cgc.buflen = pd->mode_offset + cap_buf[1] + 2 +
- sizeof(struct mode_page_header);
- ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
- if (ret) {
- pkt_dump_sense(pd, &cgc);
- return ret;
- }
- }
-
- offset = 20; /* Obsoleted field, used by older drives */
- if (cap_buf[1] >= 28)
- offset = 28; /* Current write speed selected */
- if (cap_buf[1] >= 30) {
- /* If the drive reports at least one "Logical Unit Write
- * Speed Performance Descriptor Block", use the information
- * in the first block. (contains the highest speed)
- */
- int num_spdb = (cap_buf[30] << 8) + cap_buf[31];
- if (num_spdb > 0)
- offset = 34;
- }
-
- *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1];
- return 0;
-}
-
-/* These tables from cdrecord - I don't have orange book */
-/* standard speed CD-RW (1-4x) */
-static char clv_to_speed[16] = {
- /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
- 0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-/* high speed CD-RW (-10x) */
-static char hs_clv_to_speed[16] = {
- /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
- 0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-/* ultra high speed CD-RW */
-static char us_clv_to_speed[16] = {
- /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
- 0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0
-};
-
-/*
- * reads the maximum media speed from ATIP
- */
-static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
- unsigned *speed)
-{
- struct packet_command cgc;
- struct scsi_sense_hdr sshdr;
- unsigned char buf[64];
- unsigned int size, st, sp;
- int ret;
-
- init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
- cgc.sshdr = &sshdr;
- cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
- cgc.cmd[1] = 2;
- cgc.cmd[2] = 4; /* READ ATIP */
- cgc.cmd[8] = 2;
- ret = pkt_generic_packet(pd, &cgc);
- if (ret) {
- pkt_dump_sense(pd, &cgc);
- return ret;
- }
- size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
- if (size > sizeof(buf))
- size = sizeof(buf);
-
- init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
- cgc.sshdr = &sshdr;
- cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
- cgc.cmd[1] = 2;
- cgc.cmd[2] = 4;
- cgc.cmd[8] = size;
- ret = pkt_generic_packet(pd, &cgc);
- if (ret) {
- pkt_dump_sense(pd, &cgc);
- return ret;
- }
-
- if (!(buf[6] & 0x40)) {
- pkt_notice(pd, "disc type is not CD-RW\n");
- return 1;
- }
- if (!(buf[6] & 0x4)) {
- pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
- return 1;
- }
-
- st = (buf[6] >> 3) & 0x7; /* disc sub-type */
-
- sp = buf[16] & 0xf; /* max speed from ATIP A1 field */
-
- /* Info from cdrecord */
- switch (st) {
- case 0: /* standard speed */
- *speed = clv_to_speed[sp];
- break;
- case 1: /* high speed */
- *speed = hs_clv_to_speed[sp];
- break;
- case 2: /* ultra high speed */
- *speed = us_clv_to_speed[sp];
- break;
- default:
- pkt_notice(pd, "unknown disc sub-type %d\n", st);
- return 1;
- }
- if (*speed) {
- pkt_info(pd, "maximum media speed: %d\n", *speed);
- return 0;
- } else {
- pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
- return 1;
- }
-}
-
-static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
-{
- struct packet_command cgc;
- struct scsi_sense_hdr sshdr;
- int ret;
-
- pkt_dbg(2, pd, "Performing OPC\n");
-
- init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
- cgc.sshdr = &sshdr;
- cgc.timeout = 60*HZ;
- cgc.cmd[0] = GPCMD_SEND_OPC;
- cgc.cmd[1] = 1;
- ret = pkt_generic_packet(pd, &cgc);
- if (ret)
- pkt_dump_sense(pd, &cgc);
- return ret;
-}
-
-static int pkt_open_write(struct pktcdvd_device *pd)
-{
- int ret;
- unsigned int write_speed, media_write_speed, read_speed;
-
- ret = pkt_probe_settings(pd);
- if (ret) {
- pkt_dbg(2, pd, "failed probe\n");
- return ret;
- }
-
- ret = pkt_set_write_settings(pd);
- if (ret) {
- pkt_dbg(1, pd, "failed saving write settings\n");
- return -EIO;
- }
-
- pkt_write_caching(pd, USE_WCACHING);
-
- ret = pkt_get_max_speed(pd, &write_speed);
- if (ret)
- write_speed = 16 * 177;
- switch (pd->mmc3_profile) {
- case 0x13: /* DVD-RW */
- case 0x1a: /* DVD+RW */
- case 0x12: /* DVD-RAM */
- pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
- break;
- default:
- ret = pkt_media_speed(pd, &media_write_speed);
- if (ret)
- media_write_speed = 16;
- write_speed = min(write_speed, media_write_speed * 177);
- pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
- break;
- }
- read_speed = write_speed;
-
- ret = pkt_set_speed(pd, write_speed, read_speed);
- if (ret) {
- pkt_dbg(1, pd, "couldn't set write speed\n");
- return -EIO;
- }
- pd->write_speed = write_speed;
- pd->read_speed = read_speed;
-
- ret = pkt_perform_opc(pd);
- if (ret) {
- pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
- }
-
- return 0;
-}
-
-/*
- * called at open time.
- */
-static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
-{
- int ret;
- long lba;
- struct request_queue *q;
- struct block_device *bdev;
-
- /*
- * We need to re-open the cdrom device without O_NONBLOCK to be able
- * to read/write from/to it. It is already opened in O_NONBLOCK mode
- * so open should not fail.
- */
- bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd);
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
- goto out;
- }
-
- ret = pkt_get_last_written(pd, &lba);
- if (ret) {
- pkt_err(pd, "pkt_get_last_written failed\n");
- goto out_putdev;
- }
-
- set_capacity(pd->disk, lba << 2);
- set_capacity_and_notify(pd->bdev->bd_disk, lba << 2);
-
- q = bdev_get_queue(pd->bdev);
- if (write) {
- ret = pkt_open_write(pd);
- if (ret)
- goto out_putdev;
- /*
- * Some CDRW drives can not handle writes larger than one packet,
- * even if the size is a multiple of the packet size.
- */
- blk_queue_max_hw_sectors(q, pd->settings.size);
- set_bit(PACKET_WRITABLE, &pd->flags);
- } else {
- pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
- clear_bit(PACKET_WRITABLE, &pd->flags);
- }
-
- ret = pkt_set_segment_merging(pd, q);
- if (ret)
- goto out_putdev;
-
- if (write) {
- if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
- pkt_err(pd, "not enough memory for buffers\n");
- ret = -ENOMEM;
- goto out_putdev;
- }
- pkt_info(pd, "%lukB available on disc\n", lba << 1);
- }
-
- return 0;
-
-out_putdev:
- blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
-out:
- return ret;
-}
-
-/*
- * called when the device is closed. makes sure that the device flushes
- * the internal cache before we close.
- */
-static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
-{
- if (flush && pkt_flush_cache(pd))
- pkt_dbg(1, pd, "not flushing cache\n");
-
- pkt_lock_door(pd, 0);
-
- pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
- blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
-
- pkt_shrink_pktlist(pd);
-}
-
-static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
-{
- if (dev_minor >= MAX_WRITERS)
- return NULL;
-
- dev_minor = array_index_nospec(dev_minor, MAX_WRITERS);
- return pkt_devs[dev_minor];
-}
-
-static int pkt_open(struct block_device *bdev, fmode_t mode)
-{
- struct pktcdvd_device *pd = NULL;
- int ret;
-
- mutex_lock(&pktcdvd_mutex);
- mutex_lock(&ctl_mutex);
- pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
- if (!pd) {
- ret = -ENODEV;
- goto out;
- }
- BUG_ON(pd->refcnt < 0);
-
- pd->refcnt++;
- if (pd->refcnt > 1) {
- if ((mode & FMODE_WRITE) &&
- !test_bit(PACKET_WRITABLE, &pd->flags)) {
- ret = -EBUSY;
- goto out_dec;
- }
- } else {
- ret = pkt_open_dev(pd, mode & FMODE_WRITE);
- if (ret)
- goto out_dec;
- /*
- * needed here as well, since ext2 (among others) may change
- * the blocksize at mount time
- */
- set_blocksize(bdev, CD_FRAMESIZE);
- }
-
- mutex_unlock(&ctl_mutex);
- mutex_unlock(&pktcdvd_mutex);
- return 0;
-
-out_dec:
- pd->refcnt--;
-out:
- mutex_unlock(&ctl_mutex);
- mutex_unlock(&pktcdvd_mutex);
- return ret;
-}
-
-static void pkt_close(struct gendisk *disk, fmode_t mode)
-{
- struct pktcdvd_device *pd = disk->private_data;
-
- mutex_lock(&pktcdvd_mutex);
- mutex_lock(&ctl_mutex);
- pd->refcnt--;
- BUG_ON(pd->refcnt < 0);
- if (pd->refcnt == 0) {
- int flush = test_bit(PACKET_WRITABLE, &pd->flags);
- pkt_release_dev(pd, flush);
- }
- mutex_unlock(&ctl_mutex);
- mutex_unlock(&pktcdvd_mutex);
-}
-
-
-static void pkt_end_io_read_cloned(struct bio *bio)
-{
- struct packet_stacked_data *psd = bio->bi_private;
- struct pktcdvd_device *pd = psd->pd;
-
- psd->bio->bi_status = bio->bi_status;
- bio_put(bio);
- bio_endio(psd->bio);
- mempool_free(psd, &psd_pool);
- pkt_bio_finished(pd);
-}
-
-static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
-{
- struct bio *cloned_bio =
- bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
- struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
-
- psd->pd = pd;
- psd->bio = bio;
- cloned_bio->bi_private = psd;
- cloned_bio->bi_end_io = pkt_end_io_read_cloned;
- pd->stats.secs_r += bio_sectors(bio);
- pkt_queue_bio(pd, cloned_bio);
-}
-
-static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
-{
- struct pktcdvd_device *pd = q->queuedata;
- sector_t zone;
- struct packet_data *pkt;
- int was_empty, blocked_bio;
- struct pkt_rb_node *node;
-
- zone = get_zone(bio->bi_iter.bi_sector, pd);
-
- /*
- * If we find a matching packet in state WAITING or READ_WAIT, we can
- * just append this bio to that packet.
- */
- spin_lock(&pd->cdrw.active_list_lock);
- blocked_bio = 0;
- list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
- if (pkt->sector == zone) {
- spin_lock(&pkt->lock);
- if ((pkt->state == PACKET_WAITING_STATE) ||
- (pkt->state == PACKET_READ_WAIT_STATE)) {
- bio_list_add(&pkt->orig_bios, bio);
- pkt->write_size +=
- bio->bi_iter.bi_size / CD_FRAMESIZE;
- if ((pkt->write_size >= pkt->frames) &&
- (pkt->state == PACKET_WAITING_STATE)) {
- atomic_inc(&pkt->run_sm);
- wake_up(&pd->wqueue);
- }
- spin_unlock(&pkt->lock);
- spin_unlock(&pd->cdrw.active_list_lock);
- return;
- } else {
- blocked_bio = 1;
- }
- spin_unlock(&pkt->lock);
- }
- }
- spin_unlock(&pd->cdrw.active_list_lock);
-
- /*
- * Test if there is enough room left in the bio work queue
- * (queue size >= congestion on mark).
- * If not, wait till the work queue size is below the congestion off mark.
- */
- spin_lock(&pd->lock);
- if (pd->write_congestion_on > 0
- && pd->bio_queue_size >= pd->write_congestion_on) {
- struct wait_bit_queue_entry wqe;
-
- init_wait_var_entry(&wqe, &pd->congested, 0);
- for (;;) {
- prepare_to_wait_event(__var_waitqueue(&pd->congested),
- &wqe.wq_entry,
- TASK_UNINTERRUPTIBLE);
- if (pd->bio_queue_size <= pd->write_congestion_off)
- break;
- pd->congested = true;
- spin_unlock(&pd->lock);
- schedule();
- spin_lock(&pd->lock);
- }
- }
- spin_unlock(&pd->lock);
-
- /*
- * No matching packet found. Store the bio in the work queue.
- */
- node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
- node->bio = bio;
- spin_lock(&pd->lock);
- BUG_ON(pd->bio_queue_size < 0);
- was_empty = (pd->bio_queue_size == 0);
- pkt_rbtree_insert(pd, node);
- spin_unlock(&pd->lock);
-
- /*
- * Wake up the worker thread.
- */
- atomic_set(&pd->scan_queue, 1);
- if (was_empty) {
- /* This wake_up is required for correct operation */
- wake_up(&pd->wqueue);
- } else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) {
- /*
- * This wake up is not required for correct operation,
- * but improves performance in some cases.
- */
- wake_up(&pd->wqueue);
- }
-}
-
-static void pkt_submit_bio(struct bio *bio)
-{
- struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
- struct bio *split;
-
- bio = bio_split_to_limits(bio);
-
- pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
- (unsigned long long)bio->bi_iter.bi_sector,
- (unsigned long long)bio_end_sector(bio));
-
- /*
- * Clone READ bios so we can have our own bi_end_io callback.
- */
- if (bio_data_dir(bio) == READ) {
- pkt_make_request_read(pd, bio);
- return;
- }
-
- if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
- pkt_notice(pd, "WRITE for ro device (%llu)\n",
- (unsigned long long)bio->bi_iter.bi_sector);
- goto end_io;
- }
-
- if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
- pkt_err(pd, "wrong bio size\n");
- goto end_io;
- }
-
- do {
- sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
- sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
-
- if (last_zone != zone) {
- BUG_ON(last_zone != zone + pd->settings.size);
-
- split = bio_split(bio, last_zone -
- bio->bi_iter.bi_sector,
- GFP_NOIO, &pkt_bio_set);
- bio_chain(split, bio);
- } else {
- split = bio;
- }
-
- pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
- } while (split != bio);
-
- return;
-end_io:
- bio_io_error(bio);
-}
-
-static void pkt_init_queue(struct pktcdvd_device *pd)
-{
- struct request_queue *q = pd->disk->queue;
-
- blk_queue_logical_block_size(q, CD_FRAMESIZE);
- blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
- q->queuedata = pd;
-}
-
-static int pkt_seq_show(struct seq_file *m, void *p)
-{
- struct pktcdvd_device *pd = m->private;
- char *msg;
- int states[PACKET_NUM_STATES];
-
- seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev);
-
- seq_printf(m, "\nSettings:\n");
- seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
-
- if (pd->settings.write_type == 0)
- msg = "Packet";
- else
- msg = "Unknown";
- seq_printf(m, "\twrite type:\t\t%s\n", msg);
-
- seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
- seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
-
- seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
-
- if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
- msg = "Mode 1";
- else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
- msg = "Mode 2";
- else
- msg = "Unknown";
- seq_printf(m, "\tblock mode:\t\t%s\n", msg);
-
- seq_printf(m, "\nStatistics:\n");
- seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
- seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
- seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
- seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
- seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
-
- seq_printf(m, "\nMisc:\n");
- seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
- seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
- seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
- seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
- seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
- seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
-
- seq_printf(m, "\nQueue state:\n");
- seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
- seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
- seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector);
-
- pkt_count_states(pd, states);
- seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
- states[0], states[1], states[2], states[3], states[4], states[5]);
-
- seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n",
- pd->write_congestion_off,
- pd->write_congestion_on);
- return 0;
-}
-
-static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
-{
- int i;
- struct block_device *bdev;
- struct scsi_device *sdev;
-
- if (pd->pkt_dev == dev) {
- pkt_err(pd, "recursive setup not allowed\n");
- return -EBUSY;
- }
- for (i = 0; i < MAX_WRITERS; i++) {
- struct pktcdvd_device *pd2 = pkt_devs[i];
- if (!pd2)
- continue;
- if (pd2->bdev->bd_dev == dev) {
- pkt_err(pd, "%pg already setup\n", pd2->bdev);
- return -EBUSY;
- }
- if (pd2->pkt_dev == dev) {
- pkt_err(pd, "can't chain pktcdvd devices\n");
- return -EBUSY;
- }
- }
-
- bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- sdev = scsi_device_from_queue(bdev->bd_disk->queue);
- if (!sdev) {
- blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
- return -EINVAL;
- }
- put_device(&sdev->sdev_gendev);
-
- /* This is safe, since we have a reference from open(). */
- __module_get(THIS_MODULE);
-
- pd->bdev = bdev;
- set_blocksize(bdev, CD_FRAMESIZE);
-
- pkt_init_queue(pd);
-
- atomic_set(&pd->cdrw.pending_bios, 0);
- pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
- if (IS_ERR(pd->cdrw.thread)) {
- pkt_err(pd, "can't start kernel thread\n");
- goto out_mem;
- }
-
- proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
- pkt_dbg(1, pd, "writer mapped to %pg\n", bdev);
- return 0;
-
-out_mem:
- blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
- /* This is safe: open() is still holding a reference. */
- module_put(THIS_MODULE);
- return -ENOMEM;
-}
-
-static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
-{
- struct pktcdvd_device *pd = bdev->bd_disk->private_data;
- int ret;
-
- pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
- cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
-
- mutex_lock(&pktcdvd_mutex);
- switch (cmd) {
- case CDROMEJECT:
- /*
- * The door gets locked when the device is opened, so we
- * have to unlock it or else the eject command fails.
- */
- if (pd->refcnt == 1)
- pkt_lock_door(pd, 0);
- fallthrough;
- /*
- * forward selected CDROM ioctls to CD-ROM, for UDF
- */
- case CDROMMULTISESSION:
- case CDROMREADTOCENTRY:
- case CDROM_LAST_WRITTEN:
- case CDROM_SEND_PACKET:
- case SCSI_IOCTL_SEND_COMMAND:
- if (!bdev->bd_disk->fops->ioctl)
- ret = -ENOTTY;
- else
- ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
- break;
- default:
- pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
- ret = -ENOTTY;
- }
- mutex_unlock(&pktcdvd_mutex);
-
- return ret;
-}
-
-static unsigned int pkt_check_events(struct gendisk *disk,
- unsigned int clearing)
-{
- struct pktcdvd_device *pd = disk->private_data;
- struct gendisk *attached_disk;
-
- if (!pd)
- return 0;
- if (!pd->bdev)
- return 0;
- attached_disk = pd->bdev->bd_disk;
- if (!attached_disk || !attached_disk->fops->check_events)
- return 0;
- return attached_disk->fops->check_events(attached_disk, clearing);
-}
-
-static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name);
-}
-
-static const struct block_device_operations pktcdvd_ops = {
- .owner = THIS_MODULE,
- .submit_bio = pkt_submit_bio,
- .open = pkt_open,
- .release = pkt_close,
- .ioctl = pkt_ioctl,
- .compat_ioctl = blkdev_compat_ptr_ioctl,
- .check_events = pkt_check_events,
- .devnode = pkt_devnode,
-};
-
-/*
- * Set up mapping from pktcdvd device to CD-ROM device.
- */
-static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
-{
- int idx;
- int ret = -ENOMEM;
- struct pktcdvd_device *pd;
- struct gendisk *disk;
-
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
- for (idx = 0; idx < MAX_WRITERS; idx++)
- if (!pkt_devs[idx])
- break;
- if (idx == MAX_WRITERS) {
- pr_err("max %d writers supported\n", MAX_WRITERS);
- ret = -EBUSY;
- goto out_mutex;
- }
-
- pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
- if (!pd)
- goto out_mutex;
-
- ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
- sizeof(struct pkt_rb_node));
- if (ret)
- goto out_mem;
-
- INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
- INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
- spin_lock_init(&pd->cdrw.active_list_lock);
-
- spin_lock_init(&pd->lock);
- spin_lock_init(&pd->iosched.lock);
- bio_list_init(&pd->iosched.read_queue);
- bio_list_init(&pd->iosched.write_queue);
- sprintf(pd->name, DRIVER_NAME"%d", idx);
- init_waitqueue_head(&pd->wqueue);
- pd->bio_queue = RB_ROOT;
-
- pd->write_congestion_on = write_congestion_on;
- pd->write_congestion_off = write_congestion_off;
-
- ret = -ENOMEM;
- disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
- goto out_mem;
- pd->disk = disk;
- disk->major = pktdev_major;
- disk->first_minor = idx;
- disk->minors = 1;
- disk->fops = &pktcdvd_ops;
- disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
- strcpy(disk->disk_name, pd->name);
- disk->private_data = pd;
-
- pd->pkt_dev = MKDEV(pktdev_major, idx);
- ret = pkt_new_dev(pd, dev);
- if (ret)
- goto out_mem2;
-
- /* inherit events of the host device */
- disk->events = pd->bdev->bd_disk->events;
-
- ret = add_disk(disk);
- if (ret)
- goto out_mem2;
-
- pkt_sysfs_dev_new(pd);
- pkt_debugfs_dev_new(pd);
-
- pkt_devs[idx] = pd;
- if (pkt_dev)
- *pkt_dev = pd->pkt_dev;
-
- mutex_unlock(&ctl_mutex);
- return 0;
-
-out_mem2:
- put_disk(disk);
-out_mem:
- mempool_exit(&pd->rb_pool);
- kfree(pd);
-out_mutex:
- mutex_unlock(&ctl_mutex);
- pr_err("setup of pktcdvd device failed\n");
- return ret;
-}
-
-/*
- * Tear down mapping from pktcdvd device to CD-ROM device.
- */
-static int pkt_remove_dev(dev_t pkt_dev)
-{
- struct pktcdvd_device *pd;
- int idx;
- int ret = 0;
-
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
- for (idx = 0; idx < MAX_WRITERS; idx++) {
- pd = pkt_devs[idx];
- if (pd && (pd->pkt_dev == pkt_dev))
- break;
- }
- if (idx == MAX_WRITERS) {
- pr_debug("dev not setup\n");
- ret = -ENXIO;
- goto out;
- }
-
- if (pd->refcnt > 0) {
- ret = -EBUSY;
- goto out;
- }
- if (!IS_ERR(pd->cdrw.thread))
- kthread_stop(pd->cdrw.thread);
-
- pkt_devs[idx] = NULL;
-
- pkt_debugfs_dev_remove(pd);
- pkt_sysfs_dev_remove(pd);
-
- blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
-
- remove_proc_entry(pd->name, pkt_proc);
- pkt_dbg(1, pd, "writer unmapped\n");
-
- del_gendisk(pd->disk);
- put_disk(pd->disk);
-
- mempool_exit(&pd->rb_pool);
- kfree(pd);
-
- /* This is safe: open() is still holding a reference. */
- module_put(THIS_MODULE);
-
-out:
- mutex_unlock(&ctl_mutex);
- return ret;
-}
-
-static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
-{
- struct pktcdvd_device *pd;
-
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
- pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
- if (pd) {
- ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev);
- ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
- } else {
- ctrl_cmd->dev = 0;
- ctrl_cmd->pkt_dev = 0;
- }
- ctrl_cmd->num_devices = MAX_WRITERS;
-
- mutex_unlock(&ctl_mutex);
-}
-
-static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
- struct pkt_ctrl_command ctrl_cmd;
- int ret = 0;
- dev_t pkt_dev = 0;
-
- if (cmd != PACKET_CTRL_CMD)
- return -ENOTTY;
-
- if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command)))
- return -EFAULT;
-
- switch (ctrl_cmd.command) {
- case PKT_CTRL_CMD_SETUP:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev);
- ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev);
- break;
- case PKT_CTRL_CMD_TEARDOWN:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev));
- break;
- case PKT_CTRL_CMD_STATUS:
- pkt_get_status(&ctrl_cmd);
- break;
- default:
- return -ENOTTY;
- }
-
- if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command)))
- return -EFAULT;
- return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
-}
-#endif
-
-static const struct file_operations pkt_ctl_fops = {
- .open = nonseekable_open,
- .unlocked_ioctl = pkt_ctl_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = pkt_ctl_compat_ioctl,
-#endif
- .owner = THIS_MODULE,
- .llseek = no_llseek,
-};
-
-static struct miscdevice pkt_misc = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = DRIVER_NAME,
- .nodename = "pktcdvd/control",
- .fops = &pkt_ctl_fops
-};
-
-static int __init pkt_init(void)
-{
- int ret;
-
- mutex_init(&ctl_mutex);
-
- ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
- sizeof(struct packet_stacked_data));
- if (ret)
- return ret;
- ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
- if (ret) {
- mempool_exit(&psd_pool);
- return ret;
- }
-
- ret = register_blkdev(pktdev_major, DRIVER_NAME);
- if (ret < 0) {
- pr_err("unable to register block device\n");
- goto out2;
- }
- if (!pktdev_major)
- pktdev_major = ret;
-
- ret = pkt_sysfs_init();
- if (ret)
- goto out;
-
- pkt_debugfs_init();
-
- ret = misc_register(&pkt_misc);
- if (ret) {
- pr_err("unable to register misc device\n");
- goto out_misc;
- }
-
- pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL);
-
- return 0;
-
-out_misc:
- pkt_debugfs_cleanup();
- pkt_sysfs_cleanup();
-out:
- unregister_blkdev(pktdev_major, DRIVER_NAME);
-out2:
- mempool_exit(&psd_pool);
- bioset_exit(&pkt_bio_set);
- return ret;
-}
-
-static void __exit pkt_exit(void)
-{
- remove_proc_entry("driver/"DRIVER_NAME, NULL);
- misc_deregister(&pkt_misc);
-
- pkt_debugfs_cleanup();
- pkt_sysfs_cleanup();
-
- unregister_blkdev(pktdev_major, DRIVER_NAME);
- mempool_exit(&psd_pool);
- bioset_exit(&pkt_bio_set);
-}
-
-MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
-MODULE_AUTHOR("Jens Axboe <axboe@suse.de>");
-MODULE_LICENSE("GPL");
-
-module_init(pkt_init);
-module_exit(pkt_exit);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 19da5defd734..68bd2f7961b3 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -512,7 +512,7 @@ static void virtblk_free_disk(struct gendisk *disk)
{
struct virtio_blk *vblk = disk->private_data;
- ida_simple_remove(&vd_index_ida, vblk->index);
+ ida_free(&vd_index_ida, vblk->index);
mutex_destroy(&vblk->vdev_mutex);
kfree(vblk);
}
@@ -902,8 +902,8 @@ static int virtblk_probe(struct virtio_device *vdev)
return -EINVAL;
}
- err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
- GFP_KERNEL);
+ err = ida_alloc_range(&vd_index_ida, 0,
+ minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL);
if (err < 0)
goto out;
index = err;
@@ -1163,7 +1163,7 @@ out_free_vq:
out_free_vblk:
kfree(vblk);
out_free_index:
- ida_simple_remove(&vd_index_ida, index);
+ ida_free(&vd_index_ida, index);
out:
return err;
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 35b9bcad9db9..b28489290323 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2129,7 +2129,6 @@ static void blkfront_closing(struct blkfront_info *info)
if (info->rq && info->gd) {
blk_mq_stop_hw_queues(info->rq);
blk_mark_disk_dead(info->gd);
- set_capacity(info->gd, 0);
}
for_each_rinfo(info, rinfo, i) {
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 99499d1f6e66..9f32901fdad1 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -160,7 +160,7 @@ static void read_moving(struct cache_set *c)
moving_init(io);
bio = &io->bio.bio;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
bio->bi_end_io = read_moving_endio;
if (bch_bio_alloc_pages(bio, GFP_KERNEL))
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 32e21ba64357..67a2e29e0b40 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -244,7 +244,7 @@ static void bch_data_insert_start(struct closure *cl)
trace_bcache_cache_insert(k);
bch_keylist_push(&op->insert_keys);
- bio_set_op_attrs(n, REQ_OP_WRITE, 0);
+ n->bi_opf = REQ_OP_WRITE;
bch_submit_bbio(n, op->c, k, 0);
} while (n != bio);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 0285b676e983..d4a5fc0650bb 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -434,7 +434,7 @@ static void write_dirty(struct closure *cl)
*/
if (KEY_DIRTY(&w->key)) {
dirty_init(w);
- bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
+ io->bio.bi_opf = REQ_OP_WRITE;
io->bio.bi_iter.bi_sector = KEY_START(&w->key);
bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
@@ -547,7 +547,7 @@ static void read_dirty(struct cached_dev *dc)
io->sequence = sequence++;
dirty_init(w);
- bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
+ io->bio.bi_opf = REQ_OP_READ;
io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
bio_set_dev(&io->bio, dc->disk.c->cache->bdev);
io->bio.bi_end_io = read_dirty_endio;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 078da18bb86d..8541d5688f3a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1215,7 +1215,7 @@ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
struct dm_keyslot_evict_args *args = data;
int err;
- err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key);
+ err = blk_crypto_evict_key(dev->bdev, args->key);
if (!args->err)
args->err = err;
/* Always try to evict the key from all devices. */
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e76c96c760a9..c2b5a537f5b8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -410,7 +410,7 @@ static void end_discard(struct discard_op *op, int r)
* need to wait for the chain to complete.
*/
bio_chain(op->bio, op->parent_bio);
- bio_set_op_attrs(op->bio, REQ_OP_DISCARD, 0);
+ op->bio->bi_opf = REQ_OP_DISCARD;
submit_bio(op->bio);
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 95a1ee3d314e..e1ea3a7bd9d9 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -732,28 +732,48 @@ static char *_dm_claim_ptr = "I belong to device-mapper";
/*
* Open a table device so we can use it as a map destination.
*/
-static int open_table_device(struct table_device *td, dev_t dev,
- struct mapped_device *md)
+static struct table_device *open_table_device(struct mapped_device *md,
+ dev_t dev, fmode_t mode)
{
+ struct table_device *td;
struct block_device *bdev;
u64 part_off;
int r;
- BUG_ON(td->dm_dev.bdev);
+ td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
+ if (!td)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&td->count, 1);
- bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr);
+ if (IS_ERR(bdev)) {
+ r = PTR_ERR(bdev);
+ goto out_free_td;
+ }
- r = bd_link_disk_holder(bdev, dm_disk(md));
- if (r) {
- blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
- return r;
+ /*
+ * We can be called before the dm disk is added. In that case we can't
+ * register the holder relation here. It will be done once add_disk was
+ * called.
+ */
+ if (md->disk->slave_dir) {
+ r = bd_link_disk_holder(bdev, md->disk);
+ if (r)
+ goto out_blkdev_put;
}
+ td->dm_dev.mode = mode;
td->dm_dev.bdev = bdev;
td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL);
- return 0;
+ format_dev_t(td->dm_dev.name, dev);
+ list_add(&td->list, &md->table_devices);
+ return td;
+
+out_blkdev_put:
+ blkdev_put(bdev, mode | FMODE_EXCL);
+out_free_td:
+ kfree(td);
+ return ERR_PTR(r);
}
/*
@@ -761,14 +781,12 @@ static int open_table_device(struct table_device *td, dev_t dev,
*/
static void close_table_device(struct table_device *td, struct mapped_device *md)
{
- if (!td->dm_dev.bdev)
- return;
-
- bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
+ if (md->disk->slave_dir)
+ bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
put_dax(td->dm_dev.dax_dev);
- td->dm_dev.bdev = NULL;
- td->dm_dev.dax_dev = NULL;
+ list_del(&td->list);
+ kfree(td);
}
static struct table_device *find_table_device(struct list_head *l, dev_t dev,
@@ -786,31 +804,16 @@ static struct table_device *find_table_device(struct list_head *l, dev_t dev,
int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
struct dm_dev **result)
{
- int r;
struct table_device *td;
mutex_lock(&md->table_devices_lock);
td = find_table_device(&md->table_devices, dev, mode);
if (!td) {
- td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
- if (!td) {
+ td = open_table_device(md, dev, mode);
+ if (IS_ERR(td)) {
mutex_unlock(&md->table_devices_lock);
- return -ENOMEM;
+ return PTR_ERR(td);
}
-
- td->dm_dev.mode = mode;
- td->dm_dev.bdev = NULL;
-
- if ((r = open_table_device(td, dev, md))) {
- mutex_unlock(&md->table_devices_lock);
- kfree(td);
- return r;
- }
-
- format_dev_t(td->dm_dev.name, dev);
-
- refcount_set(&td->count, 1);
- list_add(&td->list, &md->table_devices);
} else {
refcount_inc(&td->count);
}
@@ -825,27 +828,11 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
struct table_device *td = container_of(d, struct table_device, dm_dev);
mutex_lock(&md->table_devices_lock);
- if (refcount_dec_and_test(&td->count)) {
+ if (refcount_dec_and_test(&td->count))
close_table_device(td, md);
- list_del(&td->list);
- kfree(td);
- }
mutex_unlock(&md->table_devices_lock);
}
-static void free_table_devices(struct list_head *devices)
-{
- struct list_head *tmp, *next;
-
- list_for_each_safe(tmp, next, devices) {
- struct table_device *td = list_entry(tmp, struct table_device, list);
-
- DMWARN("dm_destroy: %s still exists with %d references",
- td->dm_dev.name, refcount_read(&td->count));
- kfree(td);
- }
-}
-
/*
* Get the geometry associated with a dm device
*/
@@ -1972,8 +1959,21 @@ static void cleanup_mapped_device(struct mapped_device *md)
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
if (dm_get_md_type(md) != DM_TYPE_NONE) {
+ struct table_device *td;
+
dm_sysfs_exit(md);
+ list_for_each_entry(td, &md->table_devices, list) {
+ bd_unlink_disk_holder(td->dm_dev.bdev,
+ md->disk);
+ }
+
+ /*
+ * Hold lock to make sure del_gendisk() won't concurrent
+ * with open/close_table_device().
+ */
+ mutex_lock(&md->table_devices_lock);
del_gendisk(md->disk);
+ mutex_unlock(&md->table_devices_lock);
}
dm_queue_destroy_crypto_profile(md->queue);
put_disk(md->disk);
@@ -2122,7 +2122,7 @@ static void free_dev(struct mapped_device *md)
cleanup_mapped_device(md);
- free_table_devices(&md->table_devices);
+ WARN_ON_ONCE(!list_empty(&md->table_devices));
dm_stats_cleanup(&md->stats);
free_minor(minor);
@@ -2305,6 +2305,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
{
enum dm_queue_mode type = dm_table_get_type(t);
struct queue_limits limits;
+ struct table_device *td;
int r;
switch (type) {
@@ -2333,17 +2334,40 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
if (r)
return r;
+ /*
+ * Hold lock to make sure add_disk() and del_gendisk() won't concurrent
+ * with open_table_device() and close_table_device().
+ */
+ mutex_lock(&md->table_devices_lock);
r = add_disk(md->disk);
+ mutex_unlock(&md->table_devices_lock);
if (r)
return r;
- r = dm_sysfs_init(md);
- if (r) {
- del_gendisk(md->disk);
- return r;
+ /*
+ * Register the holder relationship for devices added before the disk
+ * was live.
+ */
+ list_for_each_entry(td, &md->table_devices, list) {
+ r = bd_link_disk_holder(td->dm_dev.bdev, md->disk);
+ if (r)
+ goto out_undo_holders;
}
+
+ r = dm_sysfs_init(md);
+ if (r)
+ goto out_undo_holders;
+
md->type = type;
return 0;
+
+out_undo_holders:
+ list_for_each_entry_continue_reverse(td, &md->table_devices, list)
+ bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
+ mutex_lock(&md->table_devices_lock);
+ del_gendisk(md->disk);
+ mutex_unlock(&md->table_devices_lock);
+ return r;
}
struct mapped_device *dm_get_md(dev_t dev)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index bf6dffadbe6f..e7cc6ba1b657 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -486,7 +486,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap)
sb = kmap_atomic(bitmap->storage.sb_page);
pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
- pr_debug(" version: %d\n", le32_to_cpu(sb->version));
+ pr_debug(" version: %u\n", le32_to_cpu(sb->version));
pr_debug(" uuid: %08x.%08x.%08x.%08x\n",
le32_to_cpu(*(__le32 *)(sb->uuid+0)),
le32_to_cpu(*(__le32 *)(sb->uuid+4)),
@@ -497,11 +497,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap)
pr_debug("events cleared: %llu\n",
(unsigned long long) le64_to_cpu(sb->events_cleared));
pr_debug(" state: %08x\n", le32_to_cpu(sb->state));
- pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize));
- pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
+ pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize));
+ pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
pr_debug(" sync size: %llu KB\n",
(unsigned long long)le64_to_cpu(sb->sync_size)/2);
- pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind));
+ pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
kunmap_atomic(sb);
}
@@ -2105,7 +2105,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bytes = DIV_ROUND_UP(chunks, 8);
if (!bitmap->mddev->bitmap_info.external)
bytes += sizeof(bitmap_super_t);
- } while (bytes > (space << 9));
+ } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
+ (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
} else
chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
@@ -2150,7 +2151,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bitmap->counts.missing_pages = pages;
bitmap->counts.chunkshift = chunkshift;
bitmap->counts.chunks = chunks;
- bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
+ bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
BITMAP_BLOCK_SHIFT);
blocks = min(old_counts.chunks << old_counts.chunkshift,
@@ -2176,8 +2177,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bitmap->counts.missing_pages = old_counts.pages;
bitmap->counts.chunkshift = old_counts.chunkshift;
bitmap->counts.chunks = old_counts.chunks;
- bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
- BITMAP_BLOCK_SHIFT);
+ bitmap->mddev->bitmap_info.chunksize =
+ 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
blocks = old_counts.chunks << old_counts.chunkshift;
pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
break;
@@ -2195,20 +2196,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
if (set) {
bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
- if (*bmc_new == 0) {
- /* need to set on-disk bits too. */
- sector_t end = block + new_blocks;
- sector_t start = block >> chunkshift;
- start <<= chunkshift;
- while (start < end) {
- md_bitmap_file_set_bit(bitmap, block);
- start += 1 << chunkshift;
+ if (bmc_new) {
+ if (*bmc_new == 0) {
+ /* need to set on-disk bits too. */
+ sector_t end = block + new_blocks;
+ sector_t start = block >> chunkshift;
+
+ start <<= chunkshift;
+ while (start < end) {
+ md_bitmap_file_set_bit(bitmap, block);
+ start += 1 << chunkshift;
+ }
+ *bmc_new = 2;
+ md_bitmap_count_page(&bitmap->counts, block, 1);
+ md_bitmap_set_pending(&bitmap->counts, block);
}
- *bmc_new = 2;
- md_bitmap_count_page(&bitmap->counts, block, 1);
- md_bitmap_set_pending(&bitmap->counts, block);
+ *bmc_new |= NEEDED_MASK;
}
- *bmc_new |= NEEDED_MASK;
if (new_blocks < old_blocks)
old_blocks = new_blocks;
}
@@ -2534,6 +2538,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len)
if (csize < 512 ||
!is_power_of_2(csize))
return -EINVAL;
+ if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
+ sizeof(((bitmap_super_t *)0)->chunksize))))
+ return -EOVERFLOW;
mddev->bitmap_info.chunksize = csize;
return len;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a467b492d4ad..775f1dde190a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -93,6 +93,18 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this);
static void mddev_detach(struct mddev *mddev);
+enum md_ro_state {
+ MD_RDWR,
+ MD_RDONLY,
+ MD_AUTO_READ,
+ MD_MAX_STATE
+};
+
+static bool md_is_rdwr(struct mddev *mddev)
+{
+ return (mddev->ro == MD_RDWR);
+}
+
/*
* Default number of read corrections we'll attempt on an rdev
* before ejecting it from the array. We divide the read error
@@ -444,7 +456,7 @@ static void md_submit_bio(struct bio *bio)
bio = bio_split_to_limits(bio);
- if (mddev->ro == 1 && unlikely(rw == WRITE)) {
+ if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
@@ -509,13 +521,14 @@ static void md_end_flush(struct bio *bio)
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
+ bio_put(bio);
+
rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) {
/* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work);
}
- bio_put(bio);
}
static void md_submit_flush_data(struct work_struct *ws);
@@ -913,10 +926,12 @@ static void super_written(struct bio *bio)
} else
clear_bit(LastDev, &rdev->flags);
+ bio_put(bio);
+
+ rdev_dec_pending(rdev, mddev);
+
if (atomic_dec_and_test(&mddev->pending_writes))
wake_up(&mddev->sb_wait);
- rdev_dec_pending(rdev, mddev);
- bio_put(bio);
}
void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
@@ -2453,7 +2468,22 @@ static void rdev_delayed_delete(struct work_struct *ws)
kobject_put(&rdev->kobj);
}
-static void unbind_rdev_from_array(struct md_rdev *rdev)
+void md_autodetect_dev(dev_t dev);
+
+static void export_rdev(struct md_rdev *rdev)
+{
+ pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
+ md_rdev_clear(rdev);
+#ifndef MODULE
+ if (test_bit(AutoDetected, &rdev->flags))
+ md_autodetect_dev(rdev->bdev->bd_dev);
+#endif
+ blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ rdev->bdev = NULL;
+ kobject_put(&rdev->kobj);
+}
+
+static void md_kick_rdev_from_array(struct md_rdev *rdev)
{
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set);
@@ -2476,56 +2506,8 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
INIT_WORK(&rdev->del_work, rdev_delayed_delete);
kobject_get(&rdev->kobj);
queue_work(md_rdev_misc_wq, &rdev->del_work);
-}
-
-/*
- * prevent the device from being mounted, repartitioned or
- * otherwise reused by a RAID array (or any other kernel
- * subsystem), by bd_claiming the device.
- */
-static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
-{
- int err = 0;
- struct block_device *bdev;
-
- bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- shared ? (struct md_rdev *)lock_rdev : rdev);
- if (IS_ERR(bdev)) {
- pr_warn("md: could not open device unknown-block(%u,%u).\n",
- MAJOR(dev), MINOR(dev));
- return PTR_ERR(bdev);
- }
- rdev->bdev = bdev;
- return err;
-}
-
-static void unlock_rdev(struct md_rdev *rdev)
-{
- struct block_device *bdev = rdev->bdev;
- rdev->bdev = NULL;
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void md_autodetect_dev(dev_t dev);
-
-static void export_rdev(struct md_rdev *rdev)
-{
- pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
- md_rdev_clear(rdev);
-#ifndef MODULE
- if (test_bit(AutoDetected, &rdev->flags))
- md_autodetect_dev(rdev->bdev->bd_dev);
-#endif
- unlock_rdev(rdev);
- kobject_put(&rdev->kobj);
-}
-
-void md_kick_rdev_from_array(struct md_rdev *rdev)
-{
- unbind_rdev_from_array(rdev);
export_rdev(rdev);
}
-EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
static void export_array(struct mddev *mddev)
{
@@ -2639,7 +2621,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
int any_badblocks_changed = 0;
int ret = -1;
- if (mddev->ro) {
+ if (!md_is_rdwr(mddev)) {
if (force_change)
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return;
@@ -3660,9 +3642,10 @@ EXPORT_SYMBOL_GPL(md_rdev_init);
*/
static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
{
- int err;
+ static struct md_rdev *claim_rdev; /* just for claiming the bdev */
struct md_rdev *rdev;
sector_t size;
+ int err;
rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
if (!rdev)
@@ -3670,14 +3653,20 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
err = md_rdev_init(rdev);
if (err)
- goto abort_free;
+ goto out_free_rdev;
err = alloc_disk_sb(rdev);
if (err)
- goto abort_free;
+ goto out_clear_rdev;
- err = lock_rdev(rdev, newdev, super_format == -2);
- if (err)
- goto abort_free;
+ rdev->bdev = blkdev_get_by_dev(newdev,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+ super_format == -2 ? claim_rdev : rdev);
+ if (IS_ERR(rdev->bdev)) {
+ pr_warn("md: could not open device unknown-block(%u,%u).\n",
+ MAJOR(newdev), MINOR(newdev));
+ err = PTR_ERR(rdev->bdev);
+ goto out_clear_rdev;
+ }
kobject_init(&rdev->kobj, &rdev_ktype);
@@ -3686,7 +3675,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
pr_warn("md: %pg has zero or unknown size, marking faulty!\n",
rdev->bdev);
err = -EINVAL;
- goto abort_free;
+ goto out_blkdev_put;
}
if (super_format >= 0) {
@@ -3696,21 +3685,22 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
pr_warn("md: %pg does not have a valid v%d.%d superblock, not importing!\n",
rdev->bdev,
super_format, super_minor);
- goto abort_free;
+ goto out_blkdev_put;
}
if (err < 0) {
pr_warn("md: could not read %pg's sb, not importing!\n",
rdev->bdev);
- goto abort_free;
+ goto out_blkdev_put;
}
}
return rdev;
-abort_free:
- if (rdev->bdev)
- unlock_rdev(rdev);
+out_blkdev_put:
+ blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+out_clear_rdev:
md_rdev_clear(rdev);
+out_free_rdev:
kfree(rdev);
return ERR_PTR(err);
}
@@ -3901,7 +3891,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock;
}
rv = -EROFS;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
goto out_unlock;
/* request to change the personality. Need to ensure:
@@ -4107,7 +4097,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) {
if (mddev->pers->check_reshape == NULL)
err = -EBUSY;
- else if (mddev->ro)
+ else if (!md_is_rdwr(mddev))
err = -EROFS;
else {
mddev->new_layout = n;
@@ -4216,7 +4206,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) {
if (mddev->pers->check_reshape == NULL)
err = -EBUSY;
- else if (mddev->ro)
+ else if (!md_is_rdwr(mddev))
err = -EROFS;
else {
mddev->new_chunk_sectors = n >> 9;
@@ -4339,13 +4329,13 @@ array_state_show(struct mddev *mddev, char *page)
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
switch(mddev->ro) {
- case 1:
+ case MD_RDONLY:
st = readonly;
break;
- case 2:
+ case MD_AUTO_READ:
st = read_auto;
break;
- case 0:
+ case MD_RDWR:
spin_lock(&mddev->lock);
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
st = write_pending;
@@ -4381,7 +4371,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
int err = 0;
enum array_state st = match_word(buf, array_states);
- if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
+ if (mddev->pers && (st == active || st == clean) &&
+ mddev->ro != MD_RDONLY) {
/* don't take reconfig_mutex when toggling between
* clean and active
*/
@@ -4425,23 +4416,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers)
err = md_set_readonly(mddev, NULL);
else {
- mddev->ro = 1;
+ mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
err = do_md_run(mddev);
}
break;
case read_auto:
if (mddev->pers) {
- if (mddev->ro == 0)
+ if (md_is_rdwr(mddev))
err = md_set_readonly(mddev, NULL);
- else if (mddev->ro == 1)
+ else if (mddev->ro == MD_RDONLY)
err = restart_array(mddev);
if (err == 0) {
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
set_disk_ro(mddev->gendisk, 0);
}
} else {
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
err = do_md_run(mddev);
}
break;
@@ -4466,7 +4457,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
wake_up(&mddev->sb_wait);
err = 0;
} else {
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_disk_ro(mddev->gendisk, 0);
err = do_md_run(mddev);
}
@@ -4765,7 +4756,7 @@ action_show(struct mddev *mddev, char *page)
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
type = "frozen";
else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
- (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
+ (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
type = "reshape";
else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
@@ -4851,11 +4842,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
}
- if (mddev->ro == 2) {
+ if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5083,8 +5074,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock;
err = -EBUSY;
- if (max < mddev->resync_max &&
- mddev->ro == 0 &&
+ if (max < mddev->resync_max && md_is_rdwr(mddev) &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
goto out_unlock;
@@ -5813,8 +5803,8 @@ int md_run(struct mddev *mddev)
continue;
sync_blockdev(rdev->bdev);
invalidate_bdev(rdev->bdev);
- if (mddev->ro != 1 && rdev_read_only(rdev)) {
- mddev->ro = 1;
+ if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
+ mddev->ro = MD_RDONLY;
if (mddev->gendisk)
set_disk_ro(mddev->gendisk, 1);
}
@@ -5917,8 +5907,8 @@ int md_run(struct mddev *mddev)
mddev->ok_start_degraded = start_dirty_degraded;
- if (start_readonly && mddev->ro == 0)
- mddev->ro = 2; /* read-only, but switch on first write */
+ if (start_readonly && md_is_rdwr(mddev))
+ mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */
err = pers->run(mddev);
if (err)
@@ -5996,8 +5986,8 @@ int md_run(struct mddev *mddev)
mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
- } else if (mddev->ro == 2) /* auto-readonly not meaningful */
- mddev->ro = 0;
+ } else if (mddev->ro == MD_AUTO_READ)
+ mddev->ro = MD_RDWR;
atomic_set(&mddev->max_corr_read_errors,
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
@@ -6015,7 +6005,7 @@ int md_run(struct mddev *mddev)
if (rdev->raid_disk >= 0)
sysfs_link_rdev(mddev, rdev); /* failure here is OK */
- if (mddev->degraded && !mddev->ro)
+ if (mddev->degraded && md_is_rdwr(mddev))
/* This ensures that recovering status is reported immediately
* via sysfs - until a lack of spares is confirmed.
*/
@@ -6105,7 +6095,7 @@ static int restart_array(struct mddev *mddev)
return -ENXIO;
if (!mddev->pers)
return -EINVAL;
- if (!mddev->ro)
+ if (md_is_rdwr(mddev))
return -EBUSY;
rcu_read_lock();
@@ -6124,7 +6114,7 @@ static int restart_array(struct mddev *mddev)
return -EROFS;
mddev->safemode = 0;
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_disk_ro(disk, 0);
pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
/* Kick recovery or resync if necessary */
@@ -6151,7 +6141,7 @@ static void md_clean(struct mddev *mddev)
mddev->clevel[0] = 0;
mddev->flags = 0;
mddev->sb_flags = 0;
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
mddev->chunk_sectors = 0;
mddev->ctime = mddev->utime = 0;
@@ -6203,7 +6193,7 @@ static void __md_stop_writes(struct mddev *mddev)
}
md_bitmap_flush(mddev);
- if (mddev->ro == 0 &&
+ if (md_is_rdwr(mddev) &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
mddev->sb_flags)) {
/* mark array as shutdown cleanly */
@@ -6312,9 +6302,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
__md_stop_writes(mddev);
err = -ENXIO;
- if (mddev->ro==1)
+ if (mddev->ro == MD_RDONLY)
goto out;
- mddev->ro = 1;
+ mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -6371,7 +6361,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
return -EBUSY;
}
if (mddev->pers) {
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
set_disk_ro(disk, 0);
__md_stop_writes(mddev);
@@ -6388,8 +6378,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
- if (mddev->ro)
- mddev->ro = 0;
+ if (!md_is_rdwr(mddev))
+ mddev->ro = MD_RDWR;
} else
mutex_unlock(&mddev->open_mutex);
/*
@@ -7204,7 +7194,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
mddev->sync_thread)
return -EBUSY;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return -EROFS;
rdev_for_each(rdev, mddev) {
@@ -7234,7 +7224,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
/* change the number of raid disks */
if (mddev->pers->check_reshape == NULL)
return -EINVAL;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return -EROFS;
if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks))
@@ -7464,6 +7454,40 @@ static inline bool md_ioctl_valid(unsigned int cmd)
}
}
+static int __md_set_array_info(struct mddev *mddev, void __user *argp)
+{
+ mdu_array_info_t info;
+ int err;
+
+ if (!argp)
+ memset(&info, 0, sizeof(info));
+ else if (copy_from_user(&info, argp, sizeof(info)))
+ return -EFAULT;
+
+ if (mddev->pers) {
+ err = update_array_info(mddev, &info);
+ if (err)
+ pr_warn("md: couldn't update array info. %d\n", err);
+ return err;
+ }
+
+ if (!list_empty(&mddev->disks)) {
+ pr_warn("md: array %s already has disks!\n", mdname(mddev));
+ return -EBUSY;
+ }
+
+ if (mddev->raid_disks) {
+ pr_warn("md: array %s already initialised!\n", mdname(mddev));
+ return -EBUSY;
+ }
+
+ err = md_set_array_info(mddev, &info);
+ if (err)
+ pr_warn("md: couldn't set array info. %d\n", err);
+
+ return err;
+}
+
static int md_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -7569,36 +7593,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
}
if (cmd == SET_ARRAY_INFO) {
- mdu_array_info_t info;
- if (!arg)
- memset(&info, 0, sizeof(info));
- else if (copy_from_user(&info, argp, sizeof(info))) {
- err = -EFAULT;
- goto unlock;
- }
- if (mddev->pers) {
- err = update_array_info(mddev, &info);
- if (err) {
- pr_warn("md: couldn't update array info. %d\n", err);
- goto unlock;
- }
- goto unlock;
- }
- if (!list_empty(&mddev->disks)) {
- pr_warn("md: array %s already has disks!\n", mdname(mddev));
- err = -EBUSY;
- goto unlock;
- }
- if (mddev->raid_disks) {
- pr_warn("md: array %s already initialised!\n", mdname(mddev));
- err = -EBUSY;
- goto unlock;
- }
- err = md_set_array_info(mddev, &info);
- if (err) {
- pr_warn("md: couldn't set array info. %d\n", err);
- goto unlock;
- }
+ err = __md_set_array_info(mddev, argp);
goto unlock;
}
@@ -7658,26 +7653,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
* The remaining ioctls are changing the state of the
* superblock, so we do not allow them on read-only arrays.
*/
- if (mddev->ro && mddev->pers) {
- if (mddev->ro == 2) {
- mddev->ro = 0;
- sysfs_notify_dirent_safe(mddev->sysfs_state);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- /* mddev_unlock will wake thread */
- /* If a device failed while we were read-only, we
- * need to make sure the metadata is updated now.
- */
- if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
- mddev_unlock(mddev);
- wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
- mddev_lock_nointr(mddev);
- }
- } else {
+ if (!md_is_rdwr(mddev) && mddev->pers) {
+ if (mddev->ro != MD_AUTO_READ) {
err = -EROFS;
goto unlock;
}
+ mddev->ro = MD_RDWR;
+ sysfs_notify_dirent_safe(mddev->sysfs_state);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ /* mddev_unlock will wake thread */
+ /* If a device failed while we were read-only, we
+ * need to make sure the metadata is updated now.
+ */
+ if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
+ mddev_unlock(mddev);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+ mddev_lock_nointr(mddev);
+ }
}
switch (cmd) {
@@ -7763,11 +7757,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro)
* Transitioning to read-auto need only happen for arrays that call
* md_write_start and which are not ready for writes yet.
*/
- if (!ro && mddev->ro == 1 && mddev->pers) {
+ if (!ro && mddev->ro == MD_RDONLY && mddev->pers) {
err = restart_array(mddev);
if (err)
goto out_unlock;
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
}
out_unlock:
@@ -8241,9 +8235,9 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%s : %sactive", mdname(mddev),
mddev->pers ? "" : "in");
if (mddev->pers) {
- if (mddev->ro==1)
+ if (mddev->ro == MD_RDONLY)
seq_printf(seq, " (read-only)");
- if (mddev->ro==2)
+ if (mddev->ro == MD_AUTO_READ)
seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name);
}
@@ -8502,10 +8496,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (bio_data_dir(bi) != WRITE)
return true;
- BUG_ON(mddev->ro == 1);
- if (mddev->ro == 2) {
+ BUG_ON(mddev->ro == MD_RDONLY);
+ if (mddev->ro == MD_AUTO_READ) {
/* need to switch to read/write */
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
@@ -8556,7 +8550,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi)
{
if (bio_data_dir(bi) != WRITE)
return;
- WARN_ON_ONCE(mddev->in_sync || mddev->ro);
+ WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev));
percpu_ref_get(&mddev->writes_pending);
}
EXPORT_SYMBOL(md_write_inc);
@@ -8661,7 +8655,7 @@ void md_allow_write(struct mddev *mddev)
{
if (!mddev->pers)
return;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return;
if (!mddev->pers->sync_request)
return;
@@ -8709,7 +8703,7 @@ void md_do_sync(struct md_thread *thread)
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
return;
- if (mddev->ro) {/* never try to sync a read-only array */
+ if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return;
}
@@ -9178,9 +9172,9 @@ static int remove_and_add_spares(struct mddev *mddev,
if (test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(Journal, &rdev->flags)) {
- if (mddev->ro &&
- ! (rdev->saved_raid_disk >= 0 &&
- !test_bit(Bitmap_sync, &rdev->flags)))
+ if (!md_is_rdwr(mddev) &&
+ !(rdev->saved_raid_disk >= 0 &&
+ !test_bit(Bitmap_sync, &rdev->flags)))
continue;
rdev->recovery_offset = 0;
@@ -9278,7 +9272,8 @@ void md_check_recovery(struct mddev *mddev)
flush_signals(current);
}
- if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+ if (!md_is_rdwr(mddev) &&
+ !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return;
if ( ! (
(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
@@ -9297,7 +9292,7 @@ void md_check_recovery(struct mddev *mddev)
if (!mddev->external && mddev->safemode == 1)
mddev->safemode = 0;
- if (mddev->ro) {
+ if (!md_is_rdwr(mddev)) {
struct md_rdev *rdev;
if (!mddev->external && mddev->in_sync)
/* 'Blocked' flag not needed as failed devices
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b4e2d8b87b61..554a9026669a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -782,7 +782,6 @@ extern void mddev_resume(struct mddev *mddev);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
-extern void md_kick_rdev_from_array(struct md_rdev * rdev);
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
bool is_suspend);
extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 857c49399c28..b536befd8898 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -398,7 +398,6 @@ static int raid0_run(struct mddev *mddev)
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
- blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
blk_queue_io_opt(mddev->queue,
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 05d8438cfec8..68a9e2d9985b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1321,7 +1321,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
read_bio->bi_end_io = raid1_end_read_request;
- bio_set_op_attrs(read_bio, op, do_sync);
+ read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &mirror->rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@@ -2254,7 +2254,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
continue;
}
- bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
+ wbio->bi_opf = REQ_OP_WRITE;
if (test_bit(FailFast, &conf->mirrors[i].rdev->flags))
wbio->bi_opf |= MD_FAILFAST;
@@ -2419,7 +2419,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
GFP_NOIO, &mddev->bio_set);
}
- bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
+ wbio->bi_opf = REQ_OP_WRITE;
wbio->bi_iter.bi_sector = r1_bio->sector;
wbio->bi_iter.bi_size = r1_bio->sectors << 9;
@@ -2770,7 +2770,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (i < conf->raid_disks)
still_degraded = 1;
} else if (!test_bit(In_sync, &rdev->flags)) {
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
bio->bi_end_io = end_sync_write;
write_targets ++;
} else {
@@ -2797,7 +2797,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (disk < 0)
disk = i;
}
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
bio->bi_end_io = end_sync_read;
read_targets++;
} else if (!test_bit(WriteErrorSeen, &rdev->flags) &&
@@ -2809,7 +2809,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
* if we are doing resync or repair. Otherwise, leave
* this device alone for this sync request.
*/
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
bio->bi_end_io = end_sync_write;
write_targets++;
}
@@ -3159,6 +3159,7 @@ static int raid1_run(struct mddev *mddev)
* RAID1 needs at least one disk in active
*/
if (conf->raid_disks - mddev->degraded < 1) {
+ md_unregister_thread(&conf->thread);
ret = -EINVAL;
goto abort;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3aa8b6e11d58..6c66357f92f5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1254,7 +1254,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
read_bio->bi_end_io = raid10_end_read_request;
- bio_set_op_attrs(read_bio, op, do_sync);
+ read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &rdev->flags) &&
test_bit(R10BIO_FailFast, &r10_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@@ -1301,7 +1301,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
mbio->bi_end_io = raid10_end_write_request;
- bio_set_op_attrs(mbio, op, do_sync | do_fua);
+ mbio->bi_opf = op | do_sync | do_fua;
if (!replacement && test_bit(FailFast,
&conf->mirrors[devnum].rdev->flags)
&& enough(conf, devnum))
@@ -2933,7 +2933,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
wbio->bi_iter.bi_sector = wsector +
choose_data_offset(r10_bio, rdev);
- bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
+ wbio->bi_opf = REQ_OP_WRITE;
if (submit_bio_wait(wbio) < 0)
/* Failure! */
@@ -3542,7 +3542,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_next = biolist;
biolist = bio;
bio->bi_end_io = end_sync_read;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
from_addr = r10_bio->devs[j].addr;
@@ -3567,7 +3567,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_next = biolist;
biolist = bio;
bio->bi_end_io = end_sync_write;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
bio->bi_iter.bi_sector = to_addr
+ mrdev->data_offset;
bio_set_dev(bio, mrdev->bdev);
@@ -3588,7 +3588,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_next = biolist;
biolist = bio;
bio->bi_end_io = end_sync_write;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
bio->bi_iter.bi_sector = to_addr +
mreplace->data_offset;
bio_set_dev(bio, mreplace->bdev);
@@ -3742,7 +3742,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_next = biolist;
biolist = bio;
bio->bi_end_io = end_sync_read;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
bio->bi_iter.bi_sector = sector + rdev->data_offset;
@@ -3764,7 +3764,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_next = biolist;
biolist = bio;
bio->bi_end_io = end_sync_write;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
bio->bi_iter.bi_sector = sector + rdev->data_offset;
@@ -4145,8 +4145,6 @@ static int raid10_run(struct mddev *mddev)
conf->thread = NULL;
if (mddev->queue) {
- blk_queue_max_discard_sectors(mddev->queue,
- UINT_MAX);
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
raid10_set_io_opt(conf);
@@ -4972,7 +4970,7 @@ read_more:
b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
rdev2->new_data_offset;
b->bi_end_io = end_reshape_write;
- bio_set_op_attrs(b, REQ_OP_WRITE, 0);
+ b->bi_opf = REQ_OP_WRITE;
b->bi_next = blist;
blist = b;
}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 832d8566e165..46182b955aef 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1565,11 +1565,12 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
if (!log)
return;
+
+ target = READ_ONCE(log->reclaim_target);
do {
- target = log->reclaim_target;
if (new < target)
return;
- } while (cmpxchg(&log->reclaim_target, target, new) != target);
+ } while (!try_cmpxchg(&log->reclaim_target, &target, new));
md_wakeup_thread(log->reclaim_thread);
}
@@ -3061,7 +3062,6 @@ void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{
- struct request_queue *q = bdev_get_queue(rdev->bdev);
struct r5l_log *log;
int ret;
@@ -3090,9 +3090,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
if (!log)
return -ENOMEM;
log->rdev = rdev;
-
- log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0;
-
+ log->need_cache_flush = bdev_write_cache(rdev->bdev);
log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
sizeof(rdev->mddev->uuid));
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 31b9157bc9ae..e495939bb3e0 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1301,8 +1301,6 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
{
- struct request_queue *q;
-
if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
PPL_HEADER_SIZE) * 2) {
log->use_multippl = true;
@@ -1316,8 +1314,7 @@ static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
}
log->next_io_sector = rdev->ppl.sector;
- q = bdev_get_queue(rdev->bdev);
- if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ if (bdev_write_cache(rdev->bdev))
log->wb_cache_on = true;
}
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index ff8b083dc5c6..e36aeb50b4ed 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -763,7 +763,7 @@ static blk_status_t apple_nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
goto out_free_cmd;
}
- blk_mq_start_request(req);
+ nvme_start_request(req);
apple_nvme_submit_cmd(q, cmnd);
return BLK_STS_OK;
@@ -821,7 +821,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
if (!dead && shutdown && freeze)
nvme_wait_freeze_timeout(&anv->ctrl, NVME_IO_TIMEOUT);
- nvme_stop_queues(&anv->ctrl);
+ nvme_quiesce_io_queues(&anv->ctrl);
if (!dead) {
if (READ_ONCE(anv->ioq.enabled)) {
@@ -829,15 +829,13 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
apple_nvme_remove_cq(anv);
}
- if (shutdown)
- nvme_shutdown_ctrl(&anv->ctrl);
- nvme_disable_ctrl(&anv->ctrl);
+ nvme_disable_ctrl(&anv->ctrl, shutdown);
}
WRITE_ONCE(anv->ioq.enabled, false);
WRITE_ONCE(anv->adminq.enabled, false);
mb(); /* ensure that nvme_queue_rq() sees that enabled is cleared */
- nvme_stop_admin_queue(&anv->ctrl);
+ nvme_quiesce_admin_queue(&anv->ctrl);
/* last chance to complete any requests before nvme_cancel_request */
spin_lock_irqsave(&anv->lock, flags);
@@ -854,8 +852,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
* deadlocking blk-mq hot-cpu notifier.
*/
if (shutdown) {
- nvme_start_queues(&anv->ctrl);
- nvme_start_admin_queue(&anv->ctrl);
+ nvme_unquiesce_io_queues(&anv->ctrl);
+ nvme_unquiesce_admin_queue(&anv->ctrl);
}
}
@@ -1093,7 +1091,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
dev_dbg(anv->dev, "Starting admin queue");
apple_nvme_init_queue(&anv->adminq);
- nvme_start_admin_queue(&anv->ctrl);
+ nvme_unquiesce_admin_queue(&anv->ctrl);
if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_CONNECTING)) {
dev_warn(anv->ctrl.device,
@@ -1102,7 +1100,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
goto out;
}
- ret = nvme_init_ctrl_finish(&anv->ctrl);
+ ret = nvme_init_ctrl_finish(&anv->ctrl, false);
if (ret)
goto out;
@@ -1127,7 +1125,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
anv->ctrl.queue_count = nr_io_queues + 1;
- nvme_start_queues(&anv->ctrl);
+ nvme_unquiesce_io_queues(&anv->ctrl);
nvme_wait_freeze(&anv->ctrl);
blk_mq_update_nr_hw_queues(&anv->tagset, 1);
nvme_unfreeze(&anv->ctrl);
@@ -1153,7 +1151,7 @@ out:
nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_DELETING);
nvme_get_ctrl(&anv->ctrl);
apple_nvme_disable(anv, false);
- nvme_kill_queues(&anv->ctrl);
+ nvme_mark_namespaces_dead(&anv->ctrl);
if (!queue_work(nvme_wq, &anv->remove_work))
nvme_put_ctrl(&anv->ctrl);
}
@@ -1507,14 +1505,6 @@ static int apple_nvme_probe(struct platform_device *pdev)
goto put_dev;
}
- if (!blk_get_queue(anv->ctrl.admin_q)) {
- nvme_start_admin_queue(&anv->ctrl);
- blk_mq_destroy_queue(anv->ctrl.admin_q);
- anv->ctrl.admin_q = NULL;
- ret = -ENODEV;
- goto put_dev;
- }
-
nvme_reset_ctrl(&anv->ctrl);
async_schedule(apple_nvme_async_probe, anv);
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index c8a6db7c4498..bb0abbe4491c 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -13,6 +13,10 @@
#include "fabrics.h"
#include <linux/nvme-auth.h>
+#define CHAP_BUF_SIZE 4096
+static struct kmem_cache *nvme_chap_buf_cache;
+static mempool_t *nvme_chap_buf_pool;
+
struct nvme_dhchap_queue_context {
struct list_head entry;
struct work_struct auth_work;
@@ -20,7 +24,6 @@ struct nvme_dhchap_queue_context {
struct crypto_shash *shash_tfm;
struct crypto_kpp *dh_tfm;
void *buf;
- size_t buf_size;
int qid;
int error;
u32 s1;
@@ -47,6 +50,12 @@ struct nvme_dhchap_queue_context {
#define nvme_auth_queue_from_qid(ctrl, qid) \
(qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q
+static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl)
+{
+ return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues +
+ ctrl->opts->nr_poll_queues + 1;
+}
+
static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
void *data, size_t data_len, bool auth_send)
{
@@ -112,7 +121,7 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl,
struct nvmf_auth_dhchap_negotiate_data *data = chap->buf;
size_t size = sizeof(*data) + sizeof(union nvmf_auth_protocol);
- if (chap->buf_size < size) {
+ if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return -EINVAL;
}
@@ -147,7 +156,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
const char *gid_name = nvme_auth_dhgroup_name(data->dhgid);
const char *hmac_name, *kpp_name;
- if (chap->buf_size < size) {
+ if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return NVME_SC_INVALID_FIELD;
}
@@ -197,12 +206,6 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
return NVME_SC_AUTH_REQUIRED;
}
- /* Reset host response if the hash had been changed */
- if (chap->hash_id != data->hashid) {
- kfree(chap->host_response);
- chap->host_response = NULL;
- }
-
chap->hash_id = data->hashid;
chap->hash_len = data->hl;
dev_dbg(ctrl->device, "qid %d: selected hash %s\n",
@@ -219,14 +222,6 @@ select_kpp:
return NVME_SC_AUTH_REQUIRED;
}
- /* Clear host and controller key to avoid accidental reuse */
- kfree_sensitive(chap->host_key);
- chap->host_key = NULL;
- chap->host_key_len = 0;
- kfree_sensitive(chap->ctrl_key);
- chap->ctrl_key = NULL;
- chap->ctrl_key_len = 0;
-
if (chap->dhgroup_id == data->dhgid &&
(data->dhgid == NVME_AUTH_DHGROUP_NULL || chap->dh_tfm)) {
dev_dbg(ctrl->device,
@@ -302,7 +297,7 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl,
if (chap->host_key_len)
size += chap->host_key_len;
- if (chap->buf_size < size) {
+ if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return -EINVAL;
}
@@ -344,10 +339,10 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl,
struct nvmf_auth_dhchap_success1_data *data = chap->buf;
size_t size = sizeof(*data);
- if (ctrl->ctrl_key)
+ if (chap->ctrl_key)
size += chap->hash_len;
- if (chap->buf_size < size) {
+ if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return NVME_SC_INVALID_FIELD;
}
@@ -521,6 +516,7 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl,
ret = PTR_ERR(ctrl_response);
return ret;
}
+
ret = crypto_shash_setkey(chap->shash_tfm,
ctrl_response, ctrl->ctrl_key->len);
if (ret) {
@@ -621,9 +617,6 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl,
if (ret) {
dev_dbg(ctrl->device,
"failed to generate public key, error %d\n", ret);
- kfree(chap->host_key);
- chap->host_key = NULL;
- chap->host_key_len = 0;
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return ret;
}
@@ -643,9 +636,6 @@ gen_sesskey:
if (ret) {
dev_dbg(ctrl->device,
"failed to generate shared secret, error %d\n", ret);
- kfree_sensitive(chap->sess_key);
- chap->sess_key = NULL;
- chap->sess_key_len = 0;
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return ret;
}
@@ -654,7 +644,7 @@ gen_sesskey:
return 0;
}
-static void __nvme_auth_reset(struct nvme_dhchap_queue_context *chap)
+static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap)
{
kfree_sensitive(chap->host_response);
chap->host_response = NULL;
@@ -674,24 +664,20 @@ static void __nvme_auth_reset(struct nvme_dhchap_queue_context *chap)
chap->transaction = 0;
memset(chap->c1, 0, sizeof(chap->c1));
memset(chap->c2, 0, sizeof(chap->c2));
+ mempool_free(chap->buf, nvme_chap_buf_pool);
+ chap->buf = NULL;
}
-static void __nvme_auth_free(struct nvme_dhchap_queue_context *chap)
+static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap)
{
- __nvme_auth_reset(chap);
+ nvme_auth_reset_dhchap(chap);
if (chap->shash_tfm)
crypto_free_shash(chap->shash_tfm);
if (chap->dh_tfm)
crypto_free_kpp(chap->dh_tfm);
- kfree_sensitive(chap->ctrl_key);
- kfree_sensitive(chap->host_key);
- kfree_sensitive(chap->sess_key);
- kfree_sensitive(chap->host_response);
- kfree(chap->buf);
- kfree(chap);
}
-static void __nvme_auth_work(struct work_struct *work)
+static void nvme_queue_auth_work(struct work_struct *work)
{
struct nvme_dhchap_queue_context *chap =
container_of(work, struct nvme_dhchap_queue_context, auth_work);
@@ -699,6 +685,16 @@ static void __nvme_auth_work(struct work_struct *work)
size_t tl;
int ret = 0;
+ /*
+ * Allocate a large enough buffer for the entire negotiation:
+ * 4k is enough to ffdhe8192.
+ */
+ chap->buf = mempool_alloc(nvme_chap_buf_pool, GFP_KERNEL);
+ if (!chap->buf) {
+ chap->error = -ENOMEM;
+ return;
+ }
+
chap->transaction = ctrl->transaction++;
/* DH-HMAC-CHAP Step 1: send negotiate */
@@ -720,8 +716,9 @@ static void __nvme_auth_work(struct work_struct *work)
dev_dbg(ctrl->device, "%s: qid %d receive challenge\n",
__func__, chap->qid);
- memset(chap->buf, 0, chap->buf_size);
- ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, chap->buf_size, false);
+ memset(chap->buf, 0, CHAP_BUF_SIZE);
+ ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, CHAP_BUF_SIZE,
+ false);
if (ret) {
dev_warn(ctrl->device,
"qid %d failed to receive challenge, %s %d\n",
@@ -757,11 +754,14 @@ static void __nvme_auth_work(struct work_struct *work)
dev_dbg(ctrl->device, "%s: qid %d host response\n",
__func__, chap->qid);
+ mutex_lock(&ctrl->dhchap_auth_mutex);
ret = nvme_auth_dhchap_setup_host_response(ctrl, chap);
if (ret) {
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
chap->error = ret;
goto fail2;
}
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
/* DH-HMAC-CHAP Step 3: send reply */
dev_dbg(ctrl->device, "%s: qid %d send reply\n",
@@ -783,8 +783,9 @@ static void __nvme_auth_work(struct work_struct *work)
dev_dbg(ctrl->device, "%s: qid %d receive success1\n",
__func__, chap->qid);
- memset(chap->buf, 0, chap->buf_size);
- ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, chap->buf_size, false);
+ memset(chap->buf, 0, CHAP_BUF_SIZE);
+ ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, CHAP_BUF_SIZE,
+ false);
if (ret) {
dev_warn(ctrl->device,
"qid %d failed to receive success1, %s %d\n",
@@ -801,16 +802,19 @@ static void __nvme_auth_work(struct work_struct *work)
return;
}
+ mutex_lock(&ctrl->dhchap_auth_mutex);
if (ctrl->ctrl_key) {
dev_dbg(ctrl->device,
"%s: qid %d controller response\n",
__func__, chap->qid);
ret = nvme_auth_dhchap_setup_ctrl_response(ctrl, chap);
if (ret) {
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
chap->error = ret;
goto fail2;
}
}
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
ret = nvme_auth_process_dhchap_success1(ctrl, chap);
if (ret) {
@@ -819,7 +823,7 @@ static void __nvme_auth_work(struct work_struct *work)
goto fail2;
}
- if (ctrl->ctrl_key) {
+ if (chap->ctrl_key) {
/* DH-HMAC-CHAP Step 5: send success2 */
dev_dbg(ctrl->device, "%s: qid %d send success2\n",
__func__, chap->qid);
@@ -860,42 +864,8 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
return -ENOKEY;
}
- mutex_lock(&ctrl->dhchap_auth_mutex);
- /* Check if the context is already queued */
- list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) {
- WARN_ON(!chap->buf);
- if (chap->qid == qid) {
- dev_dbg(ctrl->device, "qid %d: re-using context\n", qid);
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- flush_work(&chap->auth_work);
- __nvme_auth_reset(chap);
- queue_work(nvme_wq, &chap->auth_work);
- return 0;
- }
- }
- chap = kzalloc(sizeof(*chap), GFP_KERNEL);
- if (!chap) {
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- return -ENOMEM;
- }
- chap->qid = (qid == NVME_QID_ANY) ? 0 : qid;
- chap->ctrl = ctrl;
-
- /*
- * Allocate a large enough buffer for the entire negotiation:
- * 4k should be enough to ffdhe8192.
- */
- chap->buf_size = 4096;
- chap->buf = kzalloc(chap->buf_size, GFP_KERNEL);
- if (!chap->buf) {
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- kfree(chap);
- return -ENOMEM;
- }
-
- INIT_WORK(&chap->auth_work, __nvme_auth_work);
- list_add(&chap->entry, &ctrl->dhchap_auth_list);
- mutex_unlock(&ctrl->dhchap_auth_mutex);
+ chap = &ctrl->dhchap_ctxs[qid];
+ cancel_work_sync(&chap->auth_work);
queue_work(nvme_wq, &chap->auth_work);
return 0;
}
@@ -906,40 +876,28 @@ int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
struct nvme_dhchap_queue_context *chap;
int ret;
- mutex_lock(&ctrl->dhchap_auth_mutex);
- list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) {
- if (chap->qid != qid)
- continue;
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- flush_work(&chap->auth_work);
- ret = chap->error;
- return ret;
- }
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- return -ENXIO;
+ chap = &ctrl->dhchap_ctxs[qid];
+ flush_work(&chap->auth_work);
+ ret = chap->error;
+ /* clear sensitive info */
+ nvme_auth_reset_dhchap(chap);
+ return ret;
}
EXPORT_SYMBOL_GPL(nvme_auth_wait);
-void nvme_auth_reset(struct nvme_ctrl *ctrl)
-{
- struct nvme_dhchap_queue_context *chap;
-
- mutex_lock(&ctrl->dhchap_auth_mutex);
- list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) {
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- flush_work(&chap->auth_work);
- __nvme_auth_reset(chap);
- }
- mutex_unlock(&ctrl->dhchap_auth_mutex);
-}
-EXPORT_SYMBOL_GPL(nvme_auth_reset);
-
-static void nvme_dhchap_auth_work(struct work_struct *work)
+static void nvme_ctrl_auth_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, dhchap_auth_work);
int ret, q;
+ /*
+ * If the ctrl is no connected, bail as reconnect will handle
+ * authentication.
+ */
+ if (ctrl->state != NVME_CTRL_LIVE)
+ return;
+
/* Authenticate admin queue first */
ret = nvme_auth_negotiate(ctrl, 0);
if (ret) {
@@ -968,43 +926,75 @@ static void nvme_dhchap_auth_work(struct work_struct *work)
* Failure is a soft-state; credentials remain valid until
* the controller terminates the connection.
*/
+ for (q = 1; q < ctrl->queue_count; q++) {
+ ret = nvme_auth_wait(ctrl, q);
+ if (ret)
+ dev_warn(ctrl->device,
+ "qid %d: authentication failed\n", q);
+ }
}
-void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
+int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
{
- INIT_LIST_HEAD(&ctrl->dhchap_auth_list);
- INIT_WORK(&ctrl->dhchap_auth_work, nvme_dhchap_auth_work);
+ struct nvme_dhchap_queue_context *chap;
+ int i, ret;
+
mutex_init(&ctrl->dhchap_auth_mutex);
+ INIT_WORK(&ctrl->dhchap_auth_work, nvme_ctrl_auth_work);
if (!ctrl->opts)
- return;
- nvme_auth_generate_key(ctrl->opts->dhchap_secret, &ctrl->host_key);
- nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret, &ctrl->ctrl_key);
+ return 0;
+ ret = nvme_auth_generate_key(ctrl->opts->dhchap_secret,
+ &ctrl->host_key);
+ if (ret)
+ return ret;
+ ret = nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret,
+ &ctrl->ctrl_key);
+ if (ret)
+ goto err_free_dhchap_secret;
+
+ if (!ctrl->opts->dhchap_secret && !ctrl->opts->dhchap_ctrl_secret)
+ return ret;
+
+ ctrl->dhchap_ctxs = kvcalloc(ctrl_max_dhchaps(ctrl),
+ sizeof(*chap), GFP_KERNEL);
+ if (!ctrl->dhchap_ctxs) {
+ ret = -ENOMEM;
+ goto err_free_dhchap_ctrl_secret;
+ }
+
+ for (i = 0; i < ctrl_max_dhchaps(ctrl); i++) {
+ chap = &ctrl->dhchap_ctxs[i];
+ chap->qid = i;
+ chap->ctrl = ctrl;
+ INIT_WORK(&chap->auth_work, nvme_queue_auth_work);
+ }
+
+ return 0;
+err_free_dhchap_ctrl_secret:
+ nvme_auth_free_key(ctrl->ctrl_key);
+ ctrl->ctrl_key = NULL;
+err_free_dhchap_secret:
+ nvme_auth_free_key(ctrl->host_key);
+ ctrl->host_key = NULL;
+ return ret;
}
EXPORT_SYMBOL_GPL(nvme_auth_init_ctrl);
void nvme_auth_stop(struct nvme_ctrl *ctrl)
{
- struct nvme_dhchap_queue_context *chap = NULL, *tmp;
-
cancel_work_sync(&ctrl->dhchap_auth_work);
- mutex_lock(&ctrl->dhchap_auth_mutex);
- list_for_each_entry_safe(chap, tmp, &ctrl->dhchap_auth_list, entry)
- cancel_work_sync(&chap->auth_work);
- mutex_unlock(&ctrl->dhchap_auth_mutex);
}
EXPORT_SYMBOL_GPL(nvme_auth_stop);
void nvme_auth_free(struct nvme_ctrl *ctrl)
{
- struct nvme_dhchap_queue_context *chap = NULL, *tmp;
+ int i;
- mutex_lock(&ctrl->dhchap_auth_mutex);
- list_for_each_entry_safe(chap, tmp, &ctrl->dhchap_auth_list, entry) {
- list_del_init(&chap->entry);
- flush_work(&chap->auth_work);
- __nvme_auth_free(chap);
+ if (ctrl->dhchap_ctxs) {
+ for (i = 0; i < ctrl_max_dhchaps(ctrl); i++)
+ nvme_auth_free_dhchap(&ctrl->dhchap_ctxs[i]);
+ kfree(ctrl->dhchap_ctxs);
}
- mutex_unlock(&ctrl->dhchap_auth_mutex);
if (ctrl->host_key) {
nvme_auth_free_key(ctrl->host_key);
ctrl->host_key = NULL;
@@ -1015,3 +1005,27 @@ void nvme_auth_free(struct nvme_ctrl *ctrl)
}
}
EXPORT_SYMBOL_GPL(nvme_auth_free);
+
+int __init nvme_init_auth(void)
+{
+ nvme_chap_buf_cache = kmem_cache_create("nvme-chap-buf-cache",
+ CHAP_BUF_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!nvme_chap_buf_cache)
+ return -ENOMEM;
+
+ nvme_chap_buf_pool = mempool_create(16, mempool_alloc_slab,
+ mempool_free_slab, nvme_chap_buf_cache);
+ if (!nvme_chap_buf_pool)
+ goto err_destroy_chap_buf_cache;
+
+ return 0;
+err_destroy_chap_buf_cache:
+ kmem_cache_destroy(nvme_chap_buf_cache);
+ return -ENOMEM;
+}
+
+void __exit nvme_exit_auth(void)
+{
+ mempool_destroy(nvme_chap_buf_pool);
+ kmem_cache_destroy(nvme_chap_buf_cache);
+}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7e3893d06bab..e26b085a007a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -384,6 +384,8 @@ static inline void nvme_end_req(struct request *req)
nvme_log_error(req);
nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
+ if (req->cmd_flags & REQ_NVME_MPATH)
+ nvme_mpath_end_request(req);
blk_mq_end_request(req, status);
}
@@ -851,8 +853,11 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC))
+ cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC);
+
if (nvme_ns_has_pi(ns)) {
- cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
+ cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE1:
@@ -1118,11 +1123,12 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
- nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock);
}
- if (effects & NVME_CMD_EFFECTS_CCC)
- nvme_init_ctrl_finish(ctrl);
+ if (effects & NVME_CMD_EFFECTS_CCC) {
+ dev_info(ctrl->device,
+"controller capabilities changed, reset may be required to take effect.\n");
+ }
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
@@ -2003,6 +2009,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
}
}
+ /*
+ * Only set the DEAC bit if the device guarantees that reads from
+ * deallocated data return zeroes. While the DEAC bit does not
+ * require that, it must be a no-op if reads from deallocated data
+ * do not return zeroes.
+ */
+ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
+ ns->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2179,7 +2193,7 @@ const struct pr_ops nvme_pr_ops = {
};
#ifdef CONFIG_BLK_SED_OPAL
-int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
+static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send)
{
struct nvme_ctrl *ctrl = data;
@@ -2196,7 +2210,23 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
NVME_QID_ANY, 1, 0);
}
-EXPORT_SYMBOL_GPL(nvme_sec_submit);
+
+static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
+{
+ if (ctrl->oacs & NVME_CTRL_OACS_SEC_SUPP) {
+ if (!ctrl->opal_dev)
+ ctrl->opal_dev = init_opal_dev(ctrl, &nvme_sec_submit);
+ else if (was_suspended)
+ opal_unlock_from_suspend(ctrl->opal_dev);
+ } else {
+ free_opal_dev(ctrl->opal_dev);
+ ctrl->opal_dev = NULL;
+ }
+}
+#else
+static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
+{
+}
#endif /* CONFIG_BLK_SED_OPAL */
#ifdef CONFIG_BLK_DEV_ZONED
@@ -2221,16 +2251,17 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops,
};
-static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 mask, u32 val,
+ u32 timeout, const char *op)
{
- unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
- u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
+ unsigned long timeout_jiffies = jiffies + timeout * HZ;
+ u32 csts;
int ret;
while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
if (csts == ~0)
return -ENODEV;
- if ((csts & NVME_CSTS_RDY) == bit)
+ if ((csts & mask) == val)
break;
usleep_range(1000, 2000);
@@ -2239,7 +2270,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
if (time_after(jiffies, timeout_jiffies)) {
dev_err(ctrl->device,
"Device not ready; aborting %s, CSTS=0x%x\n",
- enabled ? "initialisation" : "reset", csts);
+ op, csts);
return -ENODEV;
}
}
@@ -2247,27 +2278,29 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
return ret;
}
-/*
- * If the device has been passed off to us in an enabled state, just clear
- * the enabled bit. The spec says we should set the 'shutdown notification
- * bits', but doing so may cause the device to complete commands to the
- * admin queue ... and we don't know what memory that might be pointing at!
- */
-int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{
int ret;
ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
- ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+ if (shutdown)
+ ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+ else
+ ctrl->ctrl_config &= ~NVME_CC_ENABLE;
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
+ if (shutdown) {
+ return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK,
+ NVME_CSTS_SHST_CMPLT,
+ ctrl->shutdown_timeout, "shutdown");
+ }
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
-
- return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
+ return nvme_wait_ready(ctrl, NVME_CSTS_RDY, 0,
+ (NVME_CAP_TIMEOUT(ctrl->cap) + 1) / 2, "reset");
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
@@ -2332,41 +2365,11 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
- return nvme_wait_ready(ctrl, timeout, true);
+ return nvme_wait_ready(ctrl, NVME_CSTS_RDY, NVME_CSTS_RDY,
+ (timeout + 1) / 2, "initialisation");
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
-int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
-{
- unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
- u32 csts;
- int ret;
-
- ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
- ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
-
- ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
- if (ret)
- return ret;
-
- while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
- if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
- break;
-
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(ctrl->device,
- "Device shutdown incomplete; abort shutdown\n");
- return -ENODEV;
- }
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
-
static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
{
__le64 ts;
@@ -3049,7 +3052,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id)
- return 0;
+ return -ENOMEM;
c.identify.opcode = nvme_admin_identify;
c.identify.cns = NVME_ID_CNS_CS_CTRL;
@@ -3229,7 +3232,7 @@ out_free:
* register in our nvme_ctrl structure. This should be called as soon as
* the admin queue is fully up and running.
*/
-int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
+int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
{
int ret;
@@ -3260,6 +3263,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
if (ret < 0)
return ret;
+ nvme_configure_opal(ctrl, was_suspended);
+
if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
/*
* Do not return errors unless we are in a controller reset,
@@ -3745,15 +3750,19 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
memcpy(dhchap_secret, buf, count);
nvme_auth_stop(ctrl);
if (strcmp(dhchap_secret, opts->dhchap_secret)) {
+ struct nvme_dhchap_key *key, *host_key;
int ret;
- ret = nvme_auth_generate_key(dhchap_secret, &ctrl->host_key);
+ ret = nvme_auth_generate_key(dhchap_secret, &key);
if (ret)
return ret;
kfree(opts->dhchap_secret);
opts->dhchap_secret = dhchap_secret;
- /* Key has changed; re-authentication with new key */
- nvme_auth_reset(ctrl);
+ host_key = ctrl->host_key;
+ mutex_lock(&ctrl->dhchap_auth_mutex);
+ ctrl->host_key = key;
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
+ nvme_auth_free_key(host_key);
}
/* Start re-authentication */
dev_info(ctrl->device, "re-authenticating controller\n");
@@ -3795,15 +3804,19 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
memcpy(dhchap_secret, buf, count);
nvme_auth_stop(ctrl);
if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
+ struct nvme_dhchap_key *key, *ctrl_key;
int ret;
- ret = nvme_auth_generate_key(dhchap_secret, &ctrl->ctrl_key);
+ ret = nvme_auth_generate_key(dhchap_secret, &key);
if (ret)
return ret;
kfree(opts->dhchap_ctrl_secret);
opts->dhchap_ctrl_secret = dhchap_secret;
- /* Key has changed; re-authentication with new key */
- nvme_auth_reset(ctrl);
+ ctrl_key = ctrl->ctrl_key;
+ mutex_lock(&ctrl->dhchap_auth_mutex);
+ ctrl->ctrl_key = key;
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
+ nvme_auth_free_key(ctrl_key);
}
/* Start re-authentication */
dev_info(ctrl->device, "re-authenticating controller\n");
@@ -3875,10 +3888,11 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
-static const struct attribute_group nvme_dev_attrs_group = {
+const struct attribute_group nvme_dev_attrs_group = {
.attrs = nvme_dev_attrs,
.is_visible = nvme_dev_attrs_are_visible,
};
+EXPORT_SYMBOL_GPL(nvme_dev_attrs_group);
static const struct attribute_group *nvme_dev_attr_groups[] = {
&nvme_dev_attrs_group,
@@ -4333,10 +4347,6 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
{
int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
- if (test_bit(NVME_NS_DEAD, &ns->flags))
- goto out;
-
- ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
dev_err(ns->ctrl->device,
"identifiers changed for nsid %d\n", ns->head->ns_id);
@@ -4407,7 +4417,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
- if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
+ if (ns->head->ns_id > nsid)
list_move_tail(&ns->list, &rm_list);
}
up_write(&ctrl->namespaces_rwsem);
@@ -4424,9 +4434,6 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
u32 prev = 0;
int ret = 0, i;
- if (nvme_ctrl_limited_cns(ctrl))
- return -EOPNOTSUPP;
-
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list)
return -ENOMEM;
@@ -4534,8 +4541,18 @@ static void nvme_scan_work(struct work_struct *work)
}
mutex_lock(&ctrl->scan_lock);
- if (nvme_scan_ns_list(ctrl) != 0)
+ if (nvme_ctrl_limited_cns(ctrl)) {
nvme_scan_ns_sequential(ctrl);
+ } else {
+ /*
+ * Fall back to sequential scan if DNR is set to handle broken
+ * devices which should support Identify NS List (as per the VS
+ * they report) but don't actually support it.
+ */
+ ret = nvme_scan_ns_list(ctrl);
+ if (ret > 0 && ret & NVME_SC_DNR)
+ nvme_scan_ns_sequential(ctrl);
+ }
mutex_unlock(&ctrl->scan_lock);
}
@@ -4565,8 +4582,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
* removing the namespaces' disks; fail all the queues now to avoid
* potentially having to clean up the failed sync later.
*/
- if (ctrl->state == NVME_CTRL_DEAD)
- nvme_kill_queues(ctrl);
+ if (ctrl->state == NVME_CTRL_DEAD) {
+ nvme_mark_namespaces_dead(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
+ }
/* this is a no-op when called from the controller reset handler */
nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
@@ -4692,7 +4711,7 @@ static void nvme_fw_act_work(struct work_struct *work)
fw_act_timeout = jiffies +
msecs_to_jiffies(admin_timeout * 1000);
- nvme_stop_queues(ctrl);
+ nvme_quiesce_io_queues(ctrl);
while (nvme_ctrl_pp_status(ctrl)) {
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
@@ -4706,7 +4725,7 @@ static void nvme_fw_act_work(struct work_struct *work)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
- nvme_start_queues(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
/* read FW slot information to clear the AER */
nvme_get_fw_slot_info(ctrl);
@@ -4811,8 +4830,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
EXPORT_SYMBOL_GPL(nvme_complete_async_event);
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
- const struct blk_mq_ops *ops, unsigned int flags,
- unsigned int cmd_size)
+ const struct blk_mq_ops *ops, unsigned int cmd_size)
{
int ret;
@@ -4822,7 +4840,9 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ctrl->ops->flags & NVME_F_FABRICS)
set->reserved_tags = NVMF_RESERVED_TAGS;
set->numa_node = ctrl->numa_node;
- set->flags = flags;
+ set->flags = BLK_MQ_F_NO_SCHED;
+ if (ctrl->ops->flags & NVME_F_BLOCKING)
+ set->flags |= BLK_MQ_F_BLOCKING;
set->cmd_size = cmd_size;
set->driver_data = ctrl;
set->nr_hw_queues = 1;
@@ -4850,6 +4870,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
out_cleanup_admin_q:
blk_mq_destroy_queue(ctrl->admin_q);
+ blk_put_queue(ctrl->admin_q);
out_free_tagset:
blk_mq_free_tag_set(ctrl->admin_tagset);
return ret;
@@ -4859,14 +4880,17 @@ EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set);
void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl)
{
blk_mq_destroy_queue(ctrl->admin_q);
- if (ctrl->ops->flags & NVME_F_FABRICS)
+ blk_put_queue(ctrl->admin_q);
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
blk_mq_destroy_queue(ctrl->fabrics_q);
+ blk_put_queue(ctrl->fabrics_q);
+ }
blk_mq_free_tag_set(ctrl->admin_tagset);
}
EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set);
int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
- const struct blk_mq_ops *ops, unsigned int flags,
+ const struct blk_mq_ops *ops, unsigned int nr_maps,
unsigned int cmd_size)
{
int ret;
@@ -4874,15 +4898,23 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
memset(set, 0, sizeof(*set));
set->ops = ops;
set->queue_depth = ctrl->sqsize + 1;
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ /*
+ * Some Apple controllers requires tags to be unique across admin and
+ * the (only) I/O queue, so reserve the first 32 tags of the I/O queue.
+ */
+ if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS)
+ set->reserved_tags = NVME_AQ_DEPTH;
+ else if (ctrl->ops->flags & NVME_F_FABRICS)
+ set->reserved_tags = NVMF_RESERVED_TAGS;
set->numa_node = ctrl->numa_node;
- set->flags = flags;
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+ if (ctrl->ops->flags & NVME_F_BLOCKING)
+ set->flags |= BLK_MQ_F_BLOCKING;
set->cmd_size = cmd_size,
set->driver_data = ctrl;
set->nr_hw_queues = ctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
- if (ops->map_queues)
- set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+ set->nr_maps = nr_maps;
ret = blk_mq_alloc_tag_set(set);
if (ret)
return ret;
@@ -4893,6 +4925,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
ret = PTR_ERR(ctrl->connect_q);
goto out_free_tag_set;
}
+ blk_queue_flag_set(QUEUE_FLAG_SKIP_TAGSET_QUIESCE,
+ ctrl->connect_q);
}
ctrl->tagset = set;
@@ -4906,8 +4940,10 @@ EXPORT_SYMBOL_GPL(nvme_alloc_io_tag_set);
void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl)
{
- if (ctrl->ops->flags & NVME_F_FABRICS)
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
blk_mq_destroy_queue(ctrl->connect_q);
+ blk_put_queue(ctrl->connect_q);
+ }
blk_mq_free_tag_set(ctrl->tagset);
}
EXPORT_SYMBOL_GPL(nvme_remove_io_tag_set);
@@ -4943,7 +4979,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl);
- nvme_start_queues(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
nvme_mpath_update(ctrl);
}
@@ -4988,6 +5024,7 @@ static void nvme_free_ctrl(struct device *dev)
nvme_auth_stop(ctrl);
nvme_auth_free(ctrl);
__free_page(ctrl->discard_page);
+ free_opal_dev(ctrl->opal_dev);
if (subsys) {
mutex_lock(&nvme_subsystems_lock);
@@ -5053,7 +5090,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->instance);
ctrl->device->class = nvme_class;
ctrl->device->parent = ctrl->dev;
- ctrl->device->groups = nvme_dev_attr_groups;
+ if (ops->dev_attr_groups)
+ ctrl->device->groups = ops->dev_attr_groups;
+ else
+ ctrl->device->groups = nvme_dev_attr_groups;
ctrl->device->release = nvme_free_ctrl;
dev_set_drvdata(ctrl->device, ctrl);
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
@@ -5077,9 +5117,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
nvme_mpath_init_ctrl(ctrl);
- nvme_auth_init_ctrl(ctrl);
+ ret = nvme_auth_init_ctrl(ctrl);
+ if (ret)
+ goto out_free_cdev;
return 0;
+out_free_cdev:
+ cdev_device_del(&ctrl->cdev, ctrl->device);
out_free_name:
nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
@@ -5092,62 +5136,17 @@ out:
}
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
-static void nvme_start_ns_queue(struct nvme_ns *ns)
-{
- if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags))
- blk_mq_unquiesce_queue(ns->queue);
-}
-
-static void nvme_stop_ns_queue(struct nvme_ns *ns)
-{
- if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags))
- blk_mq_quiesce_queue(ns->queue);
- else
- blk_mq_wait_quiesce_done(ns->queue);
-}
-
-/*
- * Prepare a queue for teardown.
- *
- * This must forcibly unquiesce queues to avoid blocking dispatch, and only set
- * the capacity to 0 after that to avoid blocking dispatchers that may be
- * holding bd_butex. This will end buffered writers dirtying pages that can't
- * be synced.
- */
-static void nvme_set_queue_dying(struct nvme_ns *ns)
-{
- if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- return;
-
- blk_mark_disk_dead(ns->disk);
- nvme_start_ns_queue(ns);
-
- set_capacity_and_notify(ns->disk, 0);
-}
-
-/**
- * nvme_kill_queues(): Ends all namespace queues
- * @ctrl: the dead controller that needs to end
- *
- * Call this function when the driver determines it is unable to get the
- * controller in a state capable of servicing IO.
- */
-void nvme_kill_queues(struct nvme_ctrl *ctrl)
+/* let I/O to all namespaces fail in preparation for surprise removal */
+void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
down_read(&ctrl->namespaces_rwsem);
-
- /* Forcibly unquiesce queues to avoid blocking dispatch */
- if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
- nvme_start_admin_queue(ctrl);
-
list_for_each_entry(ns, &ctrl->namespaces, list)
- nvme_set_queue_dying(ns);
-
+ blk_mark_disk_dead(ns->disk);
up_read(&ctrl->namespaces_rwsem);
}
-EXPORT_SYMBOL_GPL(nvme_kill_queues);
+EXPORT_SYMBOL_GPL(nvme_mark_namespaces_dead);
void nvme_unfreeze(struct nvme_ctrl *ctrl)
{
@@ -5197,43 +5196,41 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_freeze);
-void nvme_stop_queues(struct nvme_ctrl *ctrl)
+void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns;
-
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
- nvme_stop_ns_queue(ns);
- up_read(&ctrl->namespaces_rwsem);
+ if (!ctrl->tagset)
+ return;
+ if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+ blk_mq_quiesce_tagset(ctrl->tagset);
+ else
+ blk_mq_wait_quiesce_done(ctrl->tagset);
}
-EXPORT_SYMBOL_GPL(nvme_stop_queues);
+EXPORT_SYMBOL_GPL(nvme_quiesce_io_queues);
-void nvme_start_queues(struct nvme_ctrl *ctrl)
+void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns;
-
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
- nvme_start_ns_queue(ns);
- up_read(&ctrl->namespaces_rwsem);
+ if (!ctrl->tagset)
+ return;
+ if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+ blk_mq_unquiesce_tagset(ctrl->tagset);
}
-EXPORT_SYMBOL_GPL(nvme_start_queues);
+EXPORT_SYMBOL_GPL(nvme_unquiesce_io_queues);
-void nvme_stop_admin_queue(struct nvme_ctrl *ctrl)
+void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl)
{
if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
blk_mq_quiesce_queue(ctrl->admin_q);
else
- blk_mq_wait_quiesce_done(ctrl->admin_q);
+ blk_mq_wait_quiesce_done(ctrl->admin_q->tag_set);
}
-EXPORT_SYMBOL_GPL(nvme_stop_admin_queue);
+EXPORT_SYMBOL_GPL(nvme_quiesce_admin_queue);
-void nvme_start_admin_queue(struct nvme_ctrl *ctrl)
+void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl)
{
if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
blk_mq_unquiesce_queue(ctrl->admin_q);
}
-EXPORT_SYMBOL_GPL(nvme_start_admin_queue);
+EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue);
void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
{
@@ -5344,8 +5341,13 @@ static int __init nvme_core_init(void)
goto unregister_generic_ns;
}
+ result = nvme_init_auth();
+ if (result)
+ goto destroy_ns_chr;
return 0;
+destroy_ns_chr:
+ class_destroy(nvme_ns_chr_class);
unregister_generic_ns:
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
destroy_subsys_class:
@@ -5366,6 +5368,7 @@ out:
static void __exit nvme_core_exit(void)
{
+ nvme_exit_auth();
class_destroy(nvme_ns_chr_class);
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5d57a042dbca..4564f16a0b20 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1475,6 +1475,8 @@ nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
fc_dma_unmap_single(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+ kfree(lsop->rspbuf);
+ kfree(lsop->rqstbuf);
kfree(lsop);
nvme_fc_rport_put(rport);
@@ -1699,6 +1701,15 @@ restart:
spin_unlock_irqrestore(&rport->lock, flags);
}
+static
+void nvme_fc_rcv_ls_req_err_msg(struct nvme_fc_lport *lport,
+ struct fcnvme_ls_rqst_w0 *w0)
+{
+ dev_info(lport->dev, "RCV %s LS failed: No memory\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
+}
+
/**
* nvme_fc_rcv_ls_req - transport entry point called by an LLDD
* upon the reception of a NVME LS request.
@@ -1751,20 +1762,20 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
goto out_put;
}
- lsop = kzalloc(sizeof(*lsop) +
- sizeof(union nvmefc_ls_requests) +
- sizeof(union nvmefc_ls_responses),
- GFP_KERNEL);
+ lsop = kzalloc(sizeof(*lsop), GFP_KERNEL);
if (!lsop) {
- dev_info(lport->dev,
- "RCV %s LS failed: No memory\n",
- (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
- nvmefc_ls_names[w0->ls_cmd] : "");
+ nvme_fc_rcv_ls_req_err_msg(lport, w0);
ret = -ENOMEM;
goto out_put;
}
- lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
- lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
+
+ lsop->rqstbuf = kzalloc(sizeof(*lsop->rqstbuf), GFP_KERNEL);
+ lsop->rspbuf = kzalloc(sizeof(*lsop->rspbuf), GFP_KERNEL);
+ if (!lsop->rqstbuf || !lsop->rspbuf) {
+ nvme_fc_rcv_ls_req_err_msg(lport, w0);
+ ret = -ENOMEM;
+ goto out_free;
+ }
lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
sizeof(*lsop->rspbuf),
@@ -1801,6 +1812,8 @@ out_unmap:
fc_dma_unmap_single(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
out_free:
+ kfree(lsop->rspbuf);
+ kfree(lsop->rqstbuf);
kfree(lsop);
out_put:
nvme_fc_rport_put(rport);
@@ -2391,7 +2404,7 @@ nvme_fc_ctrl_free(struct kref *ref)
list_del(&ctrl->ctrl_list);
spin_unlock_irqrestore(&ctrl->rport->lock, flags);
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
nvme_remove_admin_tag_set(&ctrl->ctrl);
kfree(ctrl->queues);
@@ -2492,20 +2505,20 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
* (but with error status).
*/
if (ctrl->ctrl.queue_count > 1) {
- nvme_stop_queues(&ctrl->ctrl);
+ nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
if (start_queues)
- nvme_start_queues(&ctrl->ctrl);
+ nvme_unquiesce_io_queues(&ctrl->ctrl);
}
/*
* Other transports, which don't have link-level contexts bound
* to sqe's, would try to gracefully shutdown the controller by
* writing the registers for shutdown and polling (call
- * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
+ * nvme_disable_ctrl()). Given a bunch of i/o was potentially
* just aborted and we will wait on those contexts, and given
* there was no indication of how live the controlelr is on the
* link, don't send more io to create more contexts for the
@@ -2516,13 +2529,13 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
/*
* clean up the admin queue. Same thing as above.
*/
- nvme_stop_admin_queue(&ctrl->ctrl);
+ nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
if (start_queues)
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
}
static void
@@ -2732,7 +2745,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
atomic_set(&op->state, FCPOP_STATE_ACTIVE);
if (!(op->flags & FCOP_FLAGS_AEN))
- blk_mq_start_request(op->rq);
+ nvme_start_request(op->rq);
cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
@@ -2903,7 +2916,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
nvme_fc_init_io_queues(ctrl);
ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set,
- &nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE,
+ &nvme_fc_mq_ops, 1,
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
ctrl->lport->ops->fcprqst_priv_sz));
if (ret)
@@ -3104,9 +3117,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
(ilog2(SZ_4K) - 9);
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
- ret = nvme_init_ctrl_finish(&ctrl->ctrl);
+ ret = nvme_init_ctrl_finish(&ctrl->ctrl, false);
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
goto out_disconnect_admin_queue;
@@ -3250,10 +3263,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
nvme_fc_free_queue(&ctrl->queues[0]);
/* re-enable the admin_q so anything new can fast fail */
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
/* resume the io queues so that things will fast fail */
- nvme_start_queues(&ctrl->ctrl);
+ nvme_unquiesce_io_queues(&ctrl->ctrl);
nvme_fc_ctlr_inactive_on_rport(ctrl);
}
@@ -3509,7 +3522,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
nvme_fc_init_queue(ctrl, 0);
ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
- &nvme_fc_admin_mq_ops, BLK_MQ_F_NO_SCHED,
+ &nvme_fc_admin_mq_ops,
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
ctrl->lport->ops->fcprqst_priv_sz));
if (ret)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 81f5550b670d..9ddda571f046 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -8,6 +8,50 @@
#include <linux/io_uring.h>
#include "nvme.h"
+static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
+ fmode_t mode)
+{
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+
+ /*
+ * Do not allow unprivileged processes to send vendor specific or fabrics
+ * commands as we can't be sure about their effects.
+ */
+ if (c->common.opcode >= nvme_cmd_vendor_start ||
+ c->common.opcode == nvme_fabrics_command)
+ return false;
+
+ /*
+ * Do not allow unprivileged passthrough of admin commands except
+ * for a subset of identify commands that contain information required
+ * to form proper I/O commands in userspace and do not expose any
+ * potentially sensitive information.
+ */
+ if (!ns) {
+ if (c->common.opcode == nvme_admin_identify) {
+ switch (c->identify.cns) {
+ case NVME_ID_CNS_NS:
+ case NVME_ID_CNS_CS_NS:
+ case NVME_ID_CNS_NS_CS_INDEP:
+ case NVME_ID_CNS_CS_CTRL:
+ case NVME_ID_CNS_CTRL:
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /*
+ * Only allow I/O commands that transfer data to the controller if the
+ * special file is open for writing, but always allow I/O commands that
+ * transfer data from the controller.
+ */
+ if (nvme_is_write(c))
+ return mode & FMODE_WRITE;
+ return true;
+}
+
/*
* Convert integer values from ioctl structures to user pointers, silently
* ignoring the upper bits in the compat case to match behaviour of 32-bit
@@ -261,7 +305,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd)
+ struct nvme_passthru_cmd __user *ucmd, fmode_t mode)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
@@ -269,8 +313,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u64 result;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -291,6 +333,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+ if (!nvme_cmd_allowed(ns, &c, mode))
+ return -EACCES;
+
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
@@ -308,15 +353,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
}
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd64 __user *ucmd, bool vec)
+ struct nvme_passthru_cmd64 __user *ucmd, bool vec,
+ fmode_t mode)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
unsigned timeout = 0;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -337,6 +381,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+ if (!nvme_cmd_allowed(ns, &c, mode))
+ return -EACCES;
+
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
@@ -483,9 +530,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
void *meta = NULL;
int ret;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
c.common.opcode = READ_ONCE(cmd->opcode);
c.common.flags = READ_ONCE(cmd->flags);
if (c.common.flags)
@@ -507,6 +551,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
+ if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode))
+ return -EACCES;
+
d.metadata = READ_ONCE(cmd->metadata);
d.addr = READ_ONCE(cmd->addr);
d.data_len = READ_ONCE(cmd->data_len);
@@ -570,13 +617,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
}
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
- void __user *argp)
+ void __user *argp, fmode_t mode)
{
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false);
+ return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
default:
return sed_ioctl(ctrl->opal_dev, cmd, argp);
}
@@ -601,14 +648,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp)
+ void __user *argp, fmode_t mode)
{
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
return ns->head->ns_id;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, argp);
+ return nvme_user_cmd(ns->ctrl, ns, argp, mode);
/*
* struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the
@@ -620,19 +667,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, argp);
case NVME_IOCTL_IO64_CMD:
- return nvme_user_cmd64(ns->ctrl, ns, argp, false);
+ return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
case NVME_IOCTL_IO64_CMD_VEC:
- return nvme_user_cmd64(ns->ctrl, ns, argp, true);
+ return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
default:
return -ENOTTY;
}
}
-static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg)
+static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg,
+ fmode_t mode)
{
- if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, arg);
- return nvme_ns_ioctl(ns, cmd, arg);
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode);
+ return nvme_ns_ioctl(ns, cmd, arg, mode);
}
int nvme_ioctl(struct block_device *bdev, fmode_t mode,
@@ -640,7 +688,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode,
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
- return __nvme_ioctl(ns, cmd, (void __user *)arg);
+ return __nvme_ioctl(ns, cmd, (void __user *)arg, mode);
}
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -648,7 +696,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
- return __nvme_ioctl(ns, cmd, (void __user *)arg);
+ return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode);
}
static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -716,7 +764,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp, struct nvme_ns_head *head, int srcu_idx)
+ void __user *argp, struct nvme_ns_head *head, int srcu_idx,
+ fmode_t mode)
__releases(&head->srcu)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -724,7 +773,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl);
srcu_read_unlock(&head->srcu, srcu_idx);
- ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+ ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
nvme_put_ctrl(ctrl);
return ret;
@@ -749,9 +798,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
* deadlock when deleting namespaces using the passthrough interface.
*/
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ mode);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -773,9 +823,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
goto out_unlock;
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ file->f_mode);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -849,7 +900,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
return ret;
}
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
+ fmode_t mode)
{
struct nvme_ns *ns;
int ret;
@@ -873,7 +925,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp);
+ ret = nvme_user_cmd(ctrl, ns, argp, mode);
nvme_put_ns(ns);
return ret;
@@ -890,11 +942,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, file->f_mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false);
+ return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode);
case NVME_IOCTL_IO_CMD:
- return nvme_dev_user_cmd(ctrl, argp);
+ return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
case NVME_IOCTL_RESET:
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 7e025b8948cb..c03093b6813c 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -114,6 +114,31 @@ void nvme_failover_req(struct request *req)
kblockd_schedule_work(&ns->head->requeue_work);
}
+void nvme_mpath_start_request(struct request *rq)
+{
+ struct nvme_ns *ns = rq->q->queuedata;
+ struct gendisk *disk = ns->head->disk;
+
+ if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
+ return;
+
+ nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
+ nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
+ blk_rq_bytes(rq) >> SECTOR_SHIFT,
+ req_op(rq), jiffies);
+}
+EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
+
+void nvme_mpath_end_request(struct request *rq)
+{
+ struct nvme_ns *ns = rq->q->queuedata;
+
+ if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
+ return;
+ bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
+ nvme_req(rq)->start_time);
+}
+
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
@@ -506,6 +531,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue);
/*
* This assumes all controllers that refer to a namespace either
* support poll queues or not. That is not a strict guarantee,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a29877217ee6..6bbb73ef8b25 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -162,6 +162,9 @@ struct nvme_request {
u8 retries;
u8 flags;
u16 status;
+#ifdef CONFIG_NVME_MULTIPATH
+ unsigned long start_time;
+#endif
struct nvme_ctrl *ctrl;
};
@@ -173,6 +176,7 @@ struct nvme_request {
enum {
NVME_REQ_CANCELLED = (1 << 0),
NVME_REQ_USERCMD = (1 << 1),
+ NVME_MPATH_IO_STATS = (1 << 2),
};
static inline struct nvme_request *nvme_req(struct request *req)
@@ -237,6 +241,7 @@ enum nvme_ctrl_flags {
NVME_CTRL_FAILFAST_EXPIRED = 0,
NVME_CTRL_ADMIN_Q_STOPPED = 1,
NVME_CTRL_STARTED_ONCE = 2,
+ NVME_CTRL_STOPPED = 3,
};
struct nvme_ctrl {
@@ -336,8 +341,8 @@ struct nvme_ctrl {
#ifdef CONFIG_NVME_AUTH
struct work_struct dhchap_auth_work;
- struct list_head dhchap_auth_list;
struct mutex dhchap_auth_mutex;
+ struct nvme_dhchap_queue_context *dhchap_ctxs;
struct nvme_dhchap_key *host_key;
struct nvme_dhchap_key *ctrl_key;
u16 transaction;
@@ -454,6 +459,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
+ NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */
};
struct nvme_ns {
@@ -483,11 +489,9 @@ struct nvme_ns {
unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
-#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
#define NVME_NS_FORCE_RO 3
#define NVME_NS_READY 4
-#define NVME_NS_STOPPED 5
struct cdev cdev;
struct device cdev_device;
@@ -508,6 +512,9 @@ struct nvme_ctrl_ops {
unsigned int flags;
#define NVME_F_FABRICS (1 << 0)
#define NVME_F_METADATA_SUPPORTED (1 << 1)
+#define NVME_F_BLOCKING (1 << 2)
+
+ const struct attribute_group **dev_attr_groups;
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
@@ -728,37 +735,32 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl);
void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state);
-int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
-int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
-int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl);
+int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended);
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
- const struct blk_mq_ops *ops, unsigned int flags,
- unsigned int cmd_size);
+ const struct blk_mq_ops *ops, unsigned int cmd_size);
void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl);
int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
- const struct blk_mq_ops *ops, unsigned int flags,
+ const struct blk_mq_ops *ops, unsigned int nr_maps,
unsigned int cmd_size);
void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
-int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
- bool send);
-
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
volatile union nvme_result *res);
-void nvme_stop_queues(struct nvme_ctrl *ctrl);
-void nvme_start_queues(struct nvme_ctrl *ctrl);
-void nvme_stop_admin_queue(struct nvme_ctrl *ctrl);
-void nvme_start_admin_queue(struct nvme_ctrl *ctrl);
-void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl);
+void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl);
+void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl);
+void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl);
+void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl);
void nvme_sync_queues(struct nvme_ctrl *ctrl);
void nvme_sync_io_queues(struct nvme_ctrl *ctrl);
void nvme_unfreeze(struct nvme_ctrl *ctrl);
@@ -857,6 +859,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct pr_ops nvme_pr_ops;
extern const struct block_device_operations nvme_ns_head_ops;
+extern const struct attribute_group nvme_dev_attrs_group;
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
#ifdef CONFIG_NVME_MULTIPATH
@@ -883,6 +886,8 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
void nvme_mpath_revalidate_paths(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
+void nvme_mpath_start_request(struct request *rq);
+void nvme_mpath_end_request(struct request *rq);
static inline void nvme_trace_bio_complete(struct request *req)
{
@@ -968,6 +973,12 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
static inline void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
{
}
+static inline void nvme_mpath_start_request(struct request *rq)
+{
+}
+static inline void nvme_mpath_end_request(struct request *rq)
+{
+}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_revalidate_zones(struct nvme_ns *ns);
@@ -1013,20 +1024,38 @@ static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
}
#endif
+static inline void nvme_start_request(struct request *rq)
+{
+ if (rq->cmd_flags & REQ_NVME_MPATH)
+ nvme_mpath_start_request(rq);
+ blk_mq_start_request(rq);
+}
+
static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
{
return ctrl->sgls & ((1 << 0) | (1 << 1));
}
#ifdef CONFIG_NVME_AUTH
-void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl);
+int __init nvme_init_auth(void);
+void __exit nvme_exit_auth(void);
+int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl);
void nvme_auth_stop(struct nvme_ctrl *ctrl);
int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
-void nvme_auth_reset(struct nvme_ctrl *ctrl);
void nvme_auth_free(struct nvme_ctrl *ctrl);
#else
-static inline void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) {};
+static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
+{
+ return 0;
+}
+static inline int __init nvme_init_auth(void)
+{
+ return 0;
+}
+static inline void __exit nvme_exit_auth(void)
+{
+}
static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {};
static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 488ad7dabeb8..f0f8027644bb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/kstrtox.h>
#include <linux/memremap.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -108,7 +109,7 @@ struct nvme_dev;
struct nvme_queue;
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
-static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
+static void nvme_delete_io_queues(struct nvme_dev *dev);
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -130,7 +131,6 @@ struct nvme_dev {
u32 db_stride;
void __iomem *bar;
unsigned long bar_mapped_size;
- struct work_struct remove_work;
struct mutex shutdown_lock;
bool subsystem;
u64 cmb_size;
@@ -158,8 +158,6 @@ struct nvme_dev {
unsigned int nr_allocated_queues;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
-
- bool attrs_added;
};
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
@@ -241,10 +239,13 @@ static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
return dev->nr_allocated_queues * 8 * dev->db_stride;
}
-static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
+static void nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
{
unsigned int mem_size = nvme_dbbuf_size(dev);
+ if (!(dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP))
+ return;
+
if (dev->dbbuf_dbs) {
/*
* Clear the dbbuf memory so the driver doesn't observe stale
@@ -252,25 +253,27 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
*/
memset(dev->dbbuf_dbs, 0, mem_size);
memset(dev->dbbuf_eis, 0, mem_size);
- return 0;
+ return;
}
dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
&dev->dbbuf_dbs_dma_addr,
GFP_KERNEL);
if (!dev->dbbuf_dbs)
- return -ENOMEM;
+ goto fail;
dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size,
&dev->dbbuf_eis_dma_addr,
GFP_KERNEL);
- if (!dev->dbbuf_eis) {
- dma_free_coherent(dev->dev, mem_size,
- dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
- dev->dbbuf_dbs = NULL;
- return -ENOMEM;
- }
+ if (!dev->dbbuf_eis)
+ goto fail_free_dbbuf_dbs;
+ return;
- return 0;
+fail_free_dbbuf_dbs:
+ dma_free_coherent(dev->dev, mem_size, dev->dbbuf_dbs,
+ dev->dbbuf_dbs_dma_addr);
+ dev->dbbuf_dbs = NULL;
+fail:
+ dev_warn(dev->dev, "unable to allocate dma for dbbuf\n");
}
static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
@@ -392,18 +395,10 @@ static int nvme_pci_npages_sgl(void)
PAGE_SIZE);
}
-static size_t nvme_pci_iod_alloc_size(void)
-{
- size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
-
- return sizeof(__le64 *) * npages +
- sizeof(struct scatterlist) * NVME_MAX_SEGS;
-}
-
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
- struct nvme_dev *dev = data;
+ struct nvme_dev *dev = to_nvme_dev(data);
struct nvme_queue *nvmeq = &dev->queues[0];
WARN_ON(hctx_idx != 0);
@@ -416,7 +411,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
- struct nvme_dev *dev = data;
+ struct nvme_dev *dev = to_nvme_dev(data);
struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
@@ -428,7 +423,7 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
unsigned int numa_node)
{
- struct nvme_dev *dev = set->driver_data;
+ struct nvme_dev *dev = to_nvme_dev(set->driver_data);
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
nvme_req(req)->ctrl = &dev->ctrl;
@@ -447,7 +442,7 @@ static int queue_irq_offset(struct nvme_dev *dev)
static void nvme_pci_map_queues(struct blk_mq_tag_set *set)
{
- struct nvme_dev *dev = set->driver_data;
+ struct nvme_dev *dev = to_nvme_dev(set->driver_data);
int i, qoff, offset;
offset = queue_irq_offset(dev);
@@ -914,7 +909,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
goto out_unmap_data;
}
- blk_mq_start_request(req);
+ nvme_start_request(req);
return BLK_STS_OK;
out_unmap_data:
nvme_unmap_data(dev, req);
@@ -1474,24 +1469,21 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
}
}
-/**
- * nvme_suspend_queue - put queue into suspended state
- * @nvmeq: queue to suspend
- */
-static int nvme_suspend_queue(struct nvme_queue *nvmeq)
+static void nvme_suspend_queue(struct nvme_dev *dev, unsigned int qid)
{
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+
if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
- return 1;
+ return;
/* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
mb();
nvmeq->dev->online_queues--;
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
- nvme_stop_admin_queue(&nvmeq->dev->ctrl);
+ nvme_quiesce_admin_queue(&nvmeq->dev->ctrl);
if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags))
- pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
- return 0;
+ pci_free_irq(to_pci_dev(dev->dev), nvmeq->cq_vector, nvmeq);
}
static void nvme_suspend_io_queues(struct nvme_dev *dev)
@@ -1499,19 +1491,7 @@ static void nvme_suspend_io_queues(struct nvme_dev *dev)
int i;
for (i = dev->ctrl.queue_count - 1; i > 0; i--)
- nvme_suspend_queue(&dev->queues[i]);
-}
-
-static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
-{
- struct nvme_queue *nvmeq = &dev->queues[0];
-
- if (shutdown)
- nvme_shutdown_ctrl(&dev->ctrl);
- else
- nvme_disable_ctrl(&dev->ctrl);
-
- nvme_poll_irqdisable(nvmeq);
+ nvme_suspend_queue(dev, i);
}
/*
@@ -1748,44 +1728,11 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
* user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion.
*/
- nvme_start_admin_queue(&dev->ctrl);
- blk_mq_destroy_queue(dev->ctrl.admin_q);
- blk_mq_free_tag_set(&dev->admin_tagset);
+ nvme_unquiesce_admin_queue(&dev->ctrl);
+ nvme_remove_admin_tag_set(&dev->ctrl);
}
}
-static int nvme_pci_alloc_admin_tag_set(struct nvme_dev *dev)
-{
- struct blk_mq_tag_set *set = &dev->admin_tagset;
-
- set->ops = &nvme_mq_admin_ops;
- set->nr_hw_queues = 1;
-
- set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
- set->timeout = NVME_ADMIN_TIMEOUT;
- set->numa_node = dev->ctrl.numa_node;
- set->cmd_size = sizeof(struct nvme_iod);
- set->flags = BLK_MQ_F_NO_SCHED;
- set->driver_data = dev;
-
- if (blk_mq_alloc_tag_set(set))
- return -ENOMEM;
- dev->ctrl.admin_tagset = set;
-
- dev->ctrl.admin_q = blk_mq_init_queue(set);
- if (IS_ERR(dev->ctrl.admin_q)) {
- blk_mq_free_tag_set(set);
- dev->ctrl.admin_q = NULL;
- return -ENOMEM;
- }
- if (!blk_get_queue(dev->ctrl.admin_q)) {
- nvme_dev_remove_admin(dev);
- dev->ctrl.admin_q = NULL;
- return -ENODEV;
- }
- return 0;
-}
-
static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
@@ -1829,7 +1776,14 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(&dev->ctrl);
+ /*
+ * If the device has been passed off to us in an enabled state, just
+ * clear the enabled bit. The spec says we should set the 'shutdown
+ * notification bits', but doing so may cause the device to complete
+ * commands to the admin queue ... and we don't know what memory that
+ * might be pointing at!
+ */
+ result = nvme_disable_ctrl(&dev->ctrl, false);
if (result < 0)
return result;
@@ -2112,6 +2066,9 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
u32 enable_bits = NVME_HOST_MEM_ENABLE;
int ret;
+ if (!dev->ctrl.hmpre)
+ return 0;
+
preferred = min(preferred, max);
if (min > max) {
dev_warn(dev->ctrl.device,
@@ -2192,7 +2149,7 @@ static ssize_t hmb_store(struct device *dev, struct device_attribute *attr,
bool new;
int ret;
- if (strtobool(buf, &new) < 0)
+ if (kstrtobool(buf, &new) < 0)
return -EINVAL;
if (new == ndev->hmb)
@@ -2240,11 +2197,17 @@ static struct attribute *nvme_pci_attrs[] = {
NULL,
};
-static const struct attribute_group nvme_pci_attr_group = {
+static const struct attribute_group nvme_pci_dev_attrs_group = {
.attrs = nvme_pci_attrs,
.is_visible = nvme_pci_attrs_are_visible,
};
+static const struct attribute_group *nvme_pci_dev_attr_groups[] = {
+ &nvme_dev_attrs_group,
+ &nvme_pci_dev_attrs_group,
+ NULL,
+};
+
/*
* nirqs is the number of interrupts available for write and read
* queues. The core already reserved an interrupt for the admin queue.
@@ -2319,12 +2282,6 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
}
-static void nvme_disable_io_queues(struct nvme_dev *dev)
-{
- if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq))
- __nvme_disable_io_queues(dev, nvme_admin_delete_cq);
-}
-
static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
{
/*
@@ -2432,7 +2389,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (dev->online_queues - 1 < dev->max_qid) {
nr_io_queues = dev->online_queues - 1;
- nvme_disable_io_queues(dev);
+ nvme_delete_io_queues(dev);
result = nvme_setup_io_queues_trylock(dev);
if (result)
return result;
@@ -2495,7 +2452,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
return 0;
}
-static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
+static bool __nvme_delete_io_queues(struct nvme_dev *dev, u8 opcode)
{
int nr_queues = dev->online_queues - 1, sent = 0;
unsigned long timeout;
@@ -2523,40 +2480,19 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
return true;
}
-static void nvme_pci_alloc_tag_set(struct nvme_dev *dev)
+static void nvme_delete_io_queues(struct nvme_dev *dev)
{
- struct blk_mq_tag_set * set = &dev->tagset;
- int ret;
+ if (__nvme_delete_io_queues(dev, nvme_admin_delete_sq))
+ __nvme_delete_io_queues(dev, nvme_admin_delete_cq);
+}
- set->ops = &nvme_mq_ops;
- set->nr_hw_queues = dev->online_queues - 1;
- set->nr_maps = 1;
- if (dev->io_queues[HCTX_TYPE_READ])
- set->nr_maps = 2;
+static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev)
+{
if (dev->io_queues[HCTX_TYPE_POLL])
- set->nr_maps = 3;
- set->timeout = NVME_IO_TIMEOUT;
- set->numa_node = dev->ctrl.numa_node;
- set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
- set->cmd_size = sizeof(struct nvme_iod);
- set->flags = BLK_MQ_F_SHOULD_MERGE;
- set->driver_data = dev;
-
- /*
- * Some Apple controllers requires tags to be unique
- * across admin and IO queue, so reserve the first 32
- * tags of the IO queue.
- */
- if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
- set->reserved_tags = NVME_AQ_DEPTH;
-
- ret = blk_mq_alloc_tag_set(set);
- if (ret) {
- dev_warn(dev->ctrl.device,
- "IO queues tagset allocation failed %d\n", ret);
- return;
- }
- dev->ctrl.tagset = set;
+ return 3;
+ if (dev->io_queues[HCTX_TYPE_READ])
+ return 2;
+ return 1;
}
static void nvme_pci_update_nr_queues(struct nvme_dev *dev)
@@ -2647,7 +2583,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev);
- return 0;
+
+ return nvme_pci_configure_admin_queue(dev);
disable:
pci_disable_device(pdev);
@@ -2661,57 +2598,53 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
pci_release_mem_regions(to_pci_dev(dev->dev));
}
-static void nvme_pci_disable(struct nvme_dev *dev)
+static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
+ u32 csts;
- pci_free_irq_vectors(pdev);
+ if (!pci_is_enabled(pdev) || !pci_device_is_present(pdev))
+ return true;
+ if (pdev->error_state != pci_channel_io_normal)
+ return true;
- if (pci_is_enabled(pdev)) {
- pci_disable_pcie_error_reporting(pdev);
- pci_disable_device(pdev);
- }
+ csts = readl(dev->bar + NVME_REG_CSTS);
+ return (csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY);
}
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
- bool dead = true, freeze = false;
struct pci_dev *pdev = to_pci_dev(dev->dev);
+ bool dead;
mutex_lock(&dev->shutdown_lock);
- if (pci_is_enabled(pdev)) {
- u32 csts;
-
- if (pci_device_is_present(pdev))
- csts = readl(dev->bar + NVME_REG_CSTS);
- else
- csts = ~0;
-
- if (dev->ctrl.state == NVME_CTRL_LIVE ||
- dev->ctrl.state == NVME_CTRL_RESETTING) {
- freeze = true;
+ dead = nvme_pci_ctrl_is_dead(dev);
+ if (dev->ctrl.state == NVME_CTRL_LIVE ||
+ dev->ctrl.state == NVME_CTRL_RESETTING) {
+ if (pci_is_enabled(pdev))
nvme_start_freeze(&dev->ctrl);
- }
- dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
- pdev->error_state != pci_channel_io_normal);
+ /*
+ * Give the controller a chance to complete all entered requests
+ * if doing a safe shutdown.
+ */
+ if (!dead && shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
}
- /*
- * Give the controller a chance to complete all entered requests if
- * doing a safe shutdown.
- */
- if (!dead && shutdown && freeze)
- nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
-
- nvme_stop_queues(&dev->ctrl);
+ nvme_quiesce_io_queues(&dev->ctrl);
if (!dead && dev->ctrl.queue_count > 0) {
- nvme_disable_io_queues(dev);
- nvme_disable_admin_queue(dev, shutdown);
+ nvme_delete_io_queues(dev);
+ nvme_disable_ctrl(&dev->ctrl, shutdown);
+ nvme_poll_irqdisable(&dev->queues[0]);
}
nvme_suspend_io_queues(dev);
- nvme_suspend_queue(&dev->queues[0]);
- nvme_pci_disable(dev);
+ nvme_suspend_queue(dev, 0);
+ pci_free_irq_vectors(pdev);
+ if (pci_is_enabled(pdev)) {
+ pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+ }
nvme_reap_pending_cqes(dev);
nvme_cancel_tagset(&dev->ctrl);
@@ -2723,9 +2656,9 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* deadlocking blk-mq hot-cpu notifier.
*/
if (shutdown) {
- nvme_start_queues(&dev->ctrl);
+ nvme_unquiesce_io_queues(&dev->ctrl);
if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q))
- nvme_start_admin_queue(&dev->ctrl);
+ nvme_unquiesce_admin_queue(&dev->ctrl);
}
mutex_unlock(&dev->shutdown_lock);
}
@@ -2762,42 +2695,40 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool);
}
+static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
+{
+ size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
+ size_t alloc_size = sizeof(__le64 *) * npages +
+ sizeof(struct scatterlist) * NVME_MAX_SEGS;
+
+ WARN_ON_ONCE(alloc_size > PAGE_SIZE);
+ dev->iod_mempool = mempool_create_node(1,
+ mempool_kmalloc, mempool_kfree,
+ (void *)alloc_size, GFP_KERNEL,
+ dev_to_node(dev->dev));
+ if (!dev->iod_mempool)
+ return -ENOMEM;
+ return 0;
+}
+
static void nvme_free_tagset(struct nvme_dev *dev)
{
if (dev->tagset.tags)
- blk_mq_free_tag_set(&dev->tagset);
+ nvme_remove_io_tag_set(&dev->ctrl);
dev->ctrl.tagset = NULL;
}
+/* pairs with nvme_pci_alloc_dev */
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
- nvme_dbbuf_dma_free(dev);
nvme_free_tagset(dev);
- if (dev->ctrl.admin_q)
- blk_put_queue(dev->ctrl.admin_q);
- free_opal_dev(dev->ctrl.opal_dev);
- mempool_destroy(dev->iod_mempool);
put_device(dev->dev);
kfree(dev->queues);
kfree(dev);
}
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
-{
- /*
- * Set state to deleting now to avoid blocking nvme_wait_reset(), which
- * may be holding this pci_dev's device lock.
- */
- nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
- nvme_get_ctrl(&dev->ctrl);
- nvme_dev_disable(dev, false);
- nvme_kill_queues(&dev->ctrl);
- if (!queue_work(nvme_wq, &dev->remove_work))
- nvme_put_ctrl(&dev->ctrl);
-}
-
static void nvme_reset_work(struct work_struct *work)
{
struct nvme_dev *dev =
@@ -2808,8 +2739,7 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.state != NVME_CTRL_RESETTING) {
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
dev->ctrl.state);
- result = -ENODEV;
- goto out;
+ return;
}
/*
@@ -2824,34 +2754,7 @@ static void nvme_reset_work(struct work_struct *work)
result = nvme_pci_enable(dev);
if (result)
goto out_unlock;
-
- result = nvme_pci_configure_admin_queue(dev);
- if (result)
- goto out_unlock;
-
- if (!dev->ctrl.admin_q) {
- result = nvme_pci_alloc_admin_tag_set(dev);
- if (result)
- goto out_unlock;
- } else {
- nvme_start_admin_queue(&dev->ctrl);
- }
-
- dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
-
- /*
- * Limit the max command size to prevent iod->sg allocations going
- * over a single page.
- */
- dev->ctrl.max_hw_sectors = min_t(u32,
- NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
- dev->ctrl.max_segments = NVME_MAX_SEGS;
-
- /*
- * Don't limit the IOMMU merged segment size.
- */
- dma_set_max_seg_size(dev->dev, 0xffffffff);
-
+ nvme_unquiesce_admin_queue(&dev->ctrl);
mutex_unlock(&dev->shutdown_lock);
/*
@@ -2865,62 +2768,37 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
- /*
- * We do not support an SGL for metadata (yet), so we are limited to a
- * single integrity segment for the separate metadata pointer.
- */
- dev->ctrl.max_integrity_segments = 1;
-
- result = nvme_init_ctrl_finish(&dev->ctrl);
+ result = nvme_init_ctrl_finish(&dev->ctrl, was_suspend);
if (result)
goto out;
- if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
- if (!dev->ctrl.opal_dev)
- dev->ctrl.opal_dev =
- init_opal_dev(&dev->ctrl, &nvme_sec_submit);
- else if (was_suspend)
- opal_unlock_from_suspend(dev->ctrl.opal_dev);
- } else {
- free_opal_dev(dev->ctrl.opal_dev);
- dev->ctrl.opal_dev = NULL;
- }
-
- if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) {
- result = nvme_dbbuf_dma_alloc(dev);
- if (result)
- dev_warn(dev->dev,
- "unable to allocate dma for dbbuf\n");
- }
+ nvme_dbbuf_dma_alloc(dev);
- if (dev->ctrl.hmpre) {
- result = nvme_setup_host_mem(dev);
- if (result < 0)
- goto out;
- }
+ result = nvme_setup_host_mem(dev);
+ if (result < 0)
+ goto out;
result = nvme_setup_io_queues(dev);
if (result)
goto out;
/*
- * Keep the controller around but remove all namespaces if we don't have
- * any working I/O queue.
+ * Freeze and update the number of I/O queues as thos might have
+ * changed. If there are no I/O queues left after this reset, keep the
+ * controller around but remove all namespaces.
*/
- if (dev->online_queues < 2) {
- dev_warn(dev->ctrl.device, "IO queues not created\n");
- nvme_kill_queues(&dev->ctrl);
- nvme_remove_namespaces(&dev->ctrl);
- nvme_free_tagset(dev);
- } else {
- nvme_start_queues(&dev->ctrl);
+ if (dev->online_queues > 1) {
+ nvme_unquiesce_io_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
- if (!dev->ctrl.tagset)
- nvme_pci_alloc_tag_set(dev);
- else
- nvme_pci_update_nr_queues(dev);
+ nvme_pci_update_nr_queues(dev);
nvme_dbbuf_set(dev);
nvme_unfreeze(&dev->ctrl);
+ } else {
+ dev_warn(dev->ctrl.device, "IO queues lost\n");
+ nvme_mark_namespaces_dead(&dev->ctrl);
+ nvme_unquiesce_io_queues(&dev->ctrl);
+ nvme_remove_namespaces(&dev->ctrl);
+ nvme_free_tagset(dev);
}
/*
@@ -2934,30 +2812,22 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
- if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj,
- &nvme_pci_attr_group))
- dev->attrs_added = true;
-
nvme_start_ctrl(&dev->ctrl);
return;
out_unlock:
mutex_unlock(&dev->shutdown_lock);
out:
- if (result)
- dev_warn(dev->ctrl.device,
- "Removing after probe failure status: %d\n", result);
- nvme_remove_dead_ctrl(dev);
-}
-
-static void nvme_remove_dead_ctrl_work(struct work_struct *work)
-{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
- struct pci_dev *pdev = to_pci_dev(dev->dev);
-
- if (pci_get_drvdata(pdev))
- device_release_driver(&pdev->dev);
- nvme_put_ctrl(&dev->ctrl);
+ /*
+ * Set state to deleting now to avoid blocking nvme_wait_reset(), which
+ * may be holding this pci_dev's device lock.
+ */
+ dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n",
+ result);
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
+ nvme_dev_disable(dev, true);
+ nvme_mark_namespaces_dead(&dev->ctrl);
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
}
static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
@@ -3010,6 +2880,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
.flags = NVME_F_METADATA_SUPPORTED,
+ .dev_attr_groups = nvme_pci_dev_attr_groups,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
@@ -3079,29 +2950,22 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0;
}
-static void nvme_async_probe(void *data, async_cookie_t cookie)
+static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
+ const struct pci_device_id *id)
{
- struct nvme_dev *dev = data;
-
- flush_work(&dev->ctrl.reset_work);
- flush_work(&dev->ctrl.scan_work);
- nvme_put_ctrl(&dev->ctrl);
-}
-
-static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
- int node, result = -ENOMEM;
- struct nvme_dev *dev;
unsigned long quirks = id->driver_data;
- size_t alloc_size;
+ int node = dev_to_node(&pdev->dev);
+ struct nvme_dev *dev;
+ int ret = -ENOMEM;
- node = dev_to_node(&pdev->dev);
if (node == NUMA_NO_NODE)
set_dev_node(&pdev->dev, first_memory_node);
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
if (!dev)
- return -ENOMEM;
+ return NULL;
+ INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
+ mutex_init(&dev->shutdown_lock);
dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues;
@@ -3109,25 +2973,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev->queues = kcalloc_node(dev->nr_allocated_queues,
sizeof(struct nvme_queue), GFP_KERNEL, node);
if (!dev->queues)
- goto free;
+ goto out_free_dev;
dev->dev = get_device(&pdev->dev);
- pci_set_drvdata(pdev, dev);
-
- result = nvme_dev_map(dev);
- if (result)
- goto put_pci;
-
- INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
- INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
- mutex_init(&dev->shutdown_lock);
-
- result = nvme_setup_prp_pools(dev);
- if (result)
- goto unmap;
quirks |= check_vendor_combination_bug(pdev);
-
if (!noacpi && acpi_storage_d3(&pdev->dev)) {
/*
* Some systems use a bios work around to ask for D3 on
@@ -3137,46 +2987,131 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
"platform quirk: setting simple suspend\n");
quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
}
+ ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+ quirks);
+ if (ret)
+ goto out_put_device;
+
+ dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1);
+ dma_set_max_seg_size(&pdev->dev, 0xffffffff);
/*
- * Double check that our mempool alloc size will cover the biggest
- * command we support.
+ * Limit the max command size to prevent iod->sg allocations going
+ * over a single page.
*/
- alloc_size = nvme_pci_iod_alloc_size();
- WARN_ON_ONCE(alloc_size > PAGE_SIZE);
+ dev->ctrl.max_hw_sectors = min_t(u32,
+ NVME_MAX_KB_SZ << 1, dma_max_mapping_size(&pdev->dev) >> 9);
+ dev->ctrl.max_segments = NVME_MAX_SEGS;
- dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
- mempool_kfree,
- (void *) alloc_size,
- GFP_KERNEL, node);
- if (!dev->iod_mempool) {
- result = -ENOMEM;
- goto release_pools;
- }
+ /*
+ * There is no support for SGLs for metadata (yet), so we are limited to
+ * a single integrity segment for the separate metadata pointer.
+ */
+ dev->ctrl.max_integrity_segments = 1;
+ return dev;
+
+out_put_device:
+ put_device(dev->dev);
+ kfree(dev->queues);
+out_free_dev:
+ kfree(dev);
+ return ERR_PTR(ret);
+}
- result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
- quirks);
+static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct nvme_dev *dev;
+ int result = -ENOMEM;
+
+ dev = nvme_pci_alloc_dev(pdev, id);
+ if (!dev)
+ return -ENOMEM;
+
+ result = nvme_dev_map(dev);
if (result)
- goto release_mempool;
+ goto out_uninit_ctrl;
+
+ result = nvme_setup_prp_pools(dev);
+ if (result)
+ goto out_dev_unmap;
+
+ result = nvme_pci_alloc_iod_mempool(dev);
+ if (result)
+ goto out_release_prp_pools;
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- nvme_reset_ctrl(&dev->ctrl);
- async_schedule(nvme_async_probe, dev);
+ result = nvme_pci_enable(dev);
+ if (result)
+ goto out_release_iod_mempool;
+
+ result = nvme_alloc_admin_tag_set(&dev->ctrl, &dev->admin_tagset,
+ &nvme_mq_admin_ops, sizeof(struct nvme_iod));
+ if (result)
+ goto out_disable;
+
+ /*
+ * Mark the controller as connecting before sending admin commands to
+ * allow the timeout handler to do the right thing.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller CONNECTING\n");
+ result = -EBUSY;
+ goto out_disable;
+ }
+
+ result = nvme_init_ctrl_finish(&dev->ctrl, false);
+ if (result)
+ goto out_disable;
+
+ nvme_dbbuf_dma_alloc(dev);
+
+ result = nvme_setup_host_mem(dev);
+ if (result < 0)
+ goto out_disable;
+
+ result = nvme_setup_io_queues(dev);
+ if (result)
+ goto out_disable;
+ if (dev->online_queues > 1) {
+ nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops,
+ nvme_pci_nr_maps(dev), sizeof(struct nvme_iod));
+ nvme_dbbuf_set(dev);
+ }
+
+ if (!dev->ctrl.tagset)
+ dev_warn(dev->ctrl.device, "IO queues not created\n");
+
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller live state\n");
+ result = -ENODEV;
+ goto out_disable;
+ }
+
+ pci_set_drvdata(pdev, dev);
+
+ nvme_start_ctrl(&dev->ctrl);
+ nvme_put_ctrl(&dev->ctrl);
return 0;
- release_mempool:
+out_disable:
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
+ nvme_dev_disable(dev, true);
+ nvme_free_host_mem(dev);
+ nvme_dev_remove_admin(dev);
+ nvme_dbbuf_dma_free(dev);
+ nvme_free_queues(dev, 0);
+out_release_iod_mempool:
mempool_destroy(dev->iod_mempool);
- release_pools:
+out_release_prp_pools:
nvme_release_prp_pools(dev);
- unmap:
+out_dev_unmap:
nvme_dev_unmap(dev);
- put_pci:
- put_device(dev->dev);
- free:
- kfree(dev->queues);
- kfree(dev);
+out_uninit_ctrl:
+ nvme_uninit_ctrl(&dev->ctrl);
return result;
}
@@ -3208,13 +3143,6 @@ static void nvme_shutdown(struct pci_dev *pdev)
nvme_disable_prepare_reset(dev, true);
}
-static void nvme_remove_attrs(struct nvme_dev *dev)
-{
- if (dev->attrs_added)
- sysfs_remove_group(&dev->ctrl.device->kobj,
- &nvme_pci_attr_group);
-}
-
/*
* The driver's remove may be called on a device in a partially initialized
* state. This function must not have any dependencies on the device state in
@@ -3236,10 +3164,11 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
- nvme_remove_attrs(dev);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
+ nvme_dbbuf_dma_free(dev);
nvme_free_queues(dev, 0);
+ mempool_destroy(dev->iod_mempool);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
nvme_uninit_ctrl(&dev->ctrl);
@@ -3576,11 +3505,12 @@ static struct pci_driver nvme_driver = {
.probe = nvme_probe,
.remove = nvme_remove,
.shutdown = nvme_shutdown,
-#ifdef CONFIG_PM_SLEEP
.driver = {
- .pm = &nvme_dev_pm_ops,
- },
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+#ifdef CONFIG_PM_SLEEP
+ .pm = &nvme_dev_pm_ops,
#endif
+ },
.sriov_configure = pci_sriov_configure_simple,
.err_handler = &nvme_err_handler,
};
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6e079abb22ee..bbad26b82b56 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -798,7 +798,9 @@ static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl)
NVME_RDMA_METADATA_SGL_SIZE;
return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set,
- &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, cmd_size);
+ &nvme_rdma_mq_ops,
+ ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
+ cmd_size);
}
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
@@ -846,7 +848,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (new) {
error = nvme_alloc_admin_tag_set(&ctrl->ctrl,
&ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops,
- BLK_MQ_F_NO_SCHED,
sizeof(struct nvme_rdma_request) +
NVME_RDMA_DATA_SGL_SIZE);
if (error)
@@ -869,16 +870,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
else
ctrl->ctrl.max_integrity_segments = 0;
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
- error = nvme_init_ctrl_finish(&ctrl->ctrl);
+ error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
if (error)
goto out_quiesce_queue;
return 0;
out_quiesce_queue:
- nvme_stop_admin_queue(&ctrl->ctrl);
+ nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
out_stop_queue:
nvme_rdma_stop_queue(&ctrl->queues[0]);
@@ -922,7 +923,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_cleanup_tagset;
if (!new) {
- nvme_start_queues(&ctrl->ctrl);
+ nvme_unquiesce_io_queues(&ctrl->ctrl);
if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
/*
* If we timed out waiting for freeze we are likely to
@@ -949,7 +950,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
return 0;
out_wait_freeze_timed_out:
- nvme_stop_queues(&ctrl->ctrl);
+ nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
out_cleanup_tagset:
@@ -964,12 +965,12 @@ out_free_io_queues:
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
- nvme_stop_admin_queue(&ctrl->ctrl);
+ nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_cancel_admin_tagset(&ctrl->ctrl);
if (remove) {
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
nvme_remove_admin_tag_set(&ctrl->ctrl);
}
nvme_rdma_destroy_admin_queue(ctrl);
@@ -980,12 +981,12 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
{
if (ctrl->ctrl.queue_count > 1) {
nvme_start_freeze(&ctrl->ctrl);
- nvme_stop_queues(&ctrl->ctrl);
+ nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
nvme_cancel_tagset(&ctrl->ctrl);
if (remove) {
- nvme_start_queues(&ctrl->ctrl);
+ nvme_unquiesce_io_queues(&ctrl->ctrl);
nvme_remove_io_tag_set(&ctrl->ctrl);
}
nvme_rdma_free_io_queues(ctrl);
@@ -1106,7 +1107,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
destroy_io:
if (ctrl->ctrl.queue_count > 1) {
- nvme_stop_queues(&ctrl->ctrl);
+ nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
nvme_cancel_tagset(&ctrl->ctrl);
@@ -1115,7 +1116,7 @@ destroy_io:
nvme_rdma_free_io_queues(ctrl);
}
destroy_admin:
- nvme_stop_admin_queue(&ctrl->ctrl);
+ nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_cancel_admin_tagset(&ctrl->ctrl);
@@ -1153,13 +1154,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, err_work);
- nvme_auth_stop(&ctrl->ctrl);
nvme_stop_keep_alive(&ctrl->ctrl);
flush_work(&ctrl->ctrl.async_event_work);
nvme_rdma_teardown_io_queues(ctrl, false);
- nvme_start_queues(&ctrl->ctrl);
+ nvme_unquiesce_io_queues(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, false);
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
+ nvme_auth_stop(&ctrl->ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
@@ -2040,7 +2041,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret)
goto unmap_qe;
- blk_mq_start_request(rq);
+ nvme_start_request(rq);
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
queue->pi_support &&
@@ -2207,11 +2208,8 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
nvme_rdma_teardown_io_queues(ctrl, shutdown);
- nvme_stop_admin_queue(&ctrl->ctrl);
- if (shutdown)
- nvme_shutdown_ctrl(&ctrl->ctrl);
- else
- nvme_disable_ctrl(&ctrl->ctrl);
+ nvme_quiesce_admin_queue(&ctrl->ctrl);
+ nvme_disable_ctrl(&ctrl->ctrl, shutdown);
nvme_rdma_teardown_admin_queue(ctrl, shutdown);
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3d13f6f08388..b69b89166b6b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1867,7 +1867,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
if (new) {
ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
&nvme_tcp_mq_ops,
- BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING,
+ ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
sizeof(struct nvme_tcp_request));
if (ret)
goto out_free_io_queues;
@@ -1884,7 +1884,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
goto out_cleanup_connect_q;
if (!new) {
- nvme_start_queues(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
/*
* If we timed out waiting for freeze we are likely to
@@ -1911,7 +1911,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
return 0;
out_wait_freeze_timed_out:
- nvme_stop_queues(ctrl);
+ nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
out_cleanup_connect_q:
@@ -1942,7 +1942,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
if (new) {
error = nvme_alloc_admin_tag_set(ctrl,
&to_tcp_ctrl(ctrl)->admin_tag_set,
- &nvme_tcp_admin_mq_ops, BLK_MQ_F_BLOCKING,
+ &nvme_tcp_admin_mq_ops,
sizeof(struct nvme_tcp_request));
if (error)
goto out_free_queue;
@@ -1956,16 +1956,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
if (error)
goto out_stop_queue;
- nvme_start_admin_queue(ctrl);
+ nvme_unquiesce_admin_queue(ctrl);
- error = nvme_init_ctrl_finish(ctrl);
+ error = nvme_init_ctrl_finish(ctrl, false);
if (error)
goto out_quiesce_queue;
return 0;
out_quiesce_queue:
- nvme_stop_admin_queue(ctrl);
+ nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
out_stop_queue:
nvme_tcp_stop_queue(ctrl, 0);
@@ -1981,12 +1981,12 @@ out_free_queue:
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
bool remove)
{
- nvme_stop_admin_queue(ctrl);
+ nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
nvme_cancel_admin_tagset(ctrl);
if (remove)
- nvme_start_admin_queue(ctrl);
+ nvme_unquiesce_admin_queue(ctrl);
nvme_tcp_destroy_admin_queue(ctrl, remove);
}
@@ -1995,14 +1995,14 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
{
if (ctrl->queue_count <= 1)
return;
- nvme_stop_admin_queue(ctrl);
+ nvme_quiesce_admin_queue(ctrl);
nvme_start_freeze(ctrl);
- nvme_stop_queues(ctrl);
+ nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
nvme_cancel_tagset(ctrl);
if (remove)
- nvme_start_queues(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
nvme_tcp_destroy_io_queues(ctrl, remove);
}
@@ -2083,14 +2083,14 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
destroy_io:
if (ctrl->queue_count > 1) {
- nvme_stop_queues(ctrl);
+ nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
nvme_cancel_tagset(ctrl);
nvme_tcp_destroy_io_queues(ctrl, new);
}
destroy_admin:
- nvme_stop_admin_queue(ctrl);
+ nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
nvme_cancel_admin_tagset(ctrl);
@@ -2128,14 +2128,14 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
struct nvme_tcp_ctrl, err_work);
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
- nvme_auth_stop(ctrl);
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
nvme_tcp_teardown_io_queues(ctrl, false);
/* unquiesce to fail fast pending requests */
- nvme_start_queues(ctrl);
+ nvme_unquiesce_io_queues(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, false);
- nvme_start_admin_queue(ctrl);
+ nvme_unquiesce_admin_queue(ctrl);
+ nvme_auth_stop(ctrl);
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
@@ -2150,11 +2150,8 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{
nvme_tcp_teardown_io_queues(ctrl, shutdown);
- nvme_stop_admin_queue(ctrl);
- if (shutdown)
- nvme_shutdown_ctrl(ctrl);
- else
- nvme_disable_ctrl(ctrl);
+ nvme_quiesce_admin_queue(ctrl);
+ nvme_disable_ctrl(ctrl, shutdown);
nvme_tcp_teardown_admin_queue(ctrl, shutdown);
}
@@ -2414,7 +2411,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(ret))
return ret;
- blk_mq_start_request(rq);
+ nvme_start_request(rq);
nvme_tcp_queue_request(req, true, bd->last);
@@ -2523,7 +2520,7 @@ static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.name = "tcp",
.module = THIS_MODULE,
- .flags = NVME_F_FABRICS,
+ .flags = NVME_F_FABRICS | NVME_F_BLOCKING,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index c8a061ce3ee5..53a004ea320c 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -370,7 +370,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
strlen(subsys->model_number), ' ');
memcpy_and_pad(id->fr, sizeof(id->fr),
- UTS_RELEASE, strlen(UTS_RELEASE), ' ');
+ subsys->firmware_rev, strlen(subsys->firmware_rev), ' ');
+
+ put_unaligned_le24(subsys->ieee_oui, id->ieee);
id->rab = 6;
@@ -379,11 +381,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
else
id->cntrltype = NVME_CTRL_IO;
- /*
- * XXX: figure out how we can assign a IEEE OUI, but until then
- * the safest is to leave it as zeroes.
- */
-
/* we support multiple ports, multiples hosts and ANA: */
id->cmic = NVME_CTRL_CMIC_MULTI_PORT | NVME_CTRL_CMIC_MULTI_CTRL |
NVME_CTRL_CMIC_ANA;
@@ -564,7 +561,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
}
if (req->ns->readonly)
- id->nsattr |= (1 << 0);
+ id->nsattr |= NVME_NS_ATTR_RO;
done:
if (!status)
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 6a2816f3b4e8..907143870da5 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -4,6 +4,7 @@
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kstrtox.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -267,7 +268,7 @@ static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
struct nvmet_port *port = to_nvmet_port(item);
bool val;
- if (strtobool(page, &val))
+ if (kstrtobool(page, &val))
return -EINVAL;
if (nvmet_is_port_enabled(port, __func__))
@@ -532,7 +533,7 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
bool enable;
int ret = 0;
- if (strtobool(page, &enable))
+ if (kstrtobool(page, &enable))
return -EINVAL;
if (enable)
@@ -556,7 +557,7 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
struct nvmet_ns *ns = to_nvmet_ns(item);
bool val;
- if (strtobool(page, &val))
+ if (kstrtobool(page, &val))
return -EINVAL;
mutex_lock(&ns->subsys->lock);
@@ -579,7 +580,7 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
struct nvmet_ns *ns = to_nvmet_ns(item);
bool val;
- if (strtobool(page, &val))
+ if (kstrtobool(page, &val))
return -EINVAL;
if (!val)
@@ -728,7 +729,7 @@ static ssize_t nvmet_passthru_enable_store(struct config_item *item,
bool enable;
int ret = 0;
- if (strtobool(page, &enable))
+ if (kstrtobool(page, &enable))
return -EINVAL;
if (enable)
@@ -995,7 +996,7 @@ static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item,
bool allow_any_host;
int ret = 0;
- if (strtobool(page, &allow_any_host))
+ if (kstrtobool(page, &allow_any_host))
return -EINVAL;
down_write(&nvmet_config_sem);
@@ -1262,6 +1263,116 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
+static ssize_t nvmet_subsys_attr_ieee_oui_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+
+ return sysfs_emit(page, "0x%06x\n", subsys->ieee_oui);
+}
+
+static ssize_t nvmet_subsys_attr_ieee_oui_store_locked(struct nvmet_subsys *subsys,
+ const char *page, size_t count)
+{
+ uint32_t val = 0;
+ int ret;
+
+ if (subsys->subsys_discovered) {
+ pr_err("Can't set IEEE OUI. 0x%06x is already assigned\n",
+ subsys->ieee_oui);
+ return -EINVAL;
+ }
+
+ ret = kstrtou32(page, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ if (val >= 0x1000000)
+ return -EINVAL;
+
+ subsys->ieee_oui = val;
+
+ return count;
+}
+
+static ssize_t nvmet_subsys_attr_ieee_oui_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ ssize_t ret;
+
+ down_write(&nvmet_config_sem);
+ mutex_lock(&subsys->lock);
+ ret = nvmet_subsys_attr_ieee_oui_store_locked(subsys, page, count);
+ mutex_unlock(&subsys->lock);
+ up_write(&nvmet_config_sem);
+
+ return ret;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_ieee_oui);
+
+static ssize_t nvmet_subsys_attr_firmware_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+
+ return sysfs_emit(page, "%s\n", subsys->firmware_rev);
+}
+
+static ssize_t nvmet_subsys_attr_firmware_store_locked(struct nvmet_subsys *subsys,
+ const char *page, size_t count)
+{
+ int pos = 0, len;
+ char *val;
+
+ if (subsys->subsys_discovered) {
+ pr_err("Can't set firmware revision. %s is already assigned\n",
+ subsys->firmware_rev);
+ return -EINVAL;
+ }
+
+ len = strcspn(page, "\n");
+ if (!len)
+ return -EINVAL;
+
+ if (len > NVMET_FR_MAX_SIZE) {
+ pr_err("Firmware revision size can not exceed %d Bytes\n",
+ NVMET_FR_MAX_SIZE);
+ return -EINVAL;
+ }
+
+ for (pos = 0; pos < len; pos++) {
+ if (!nvmet_is_ascii(page[pos]))
+ return -EINVAL;
+ }
+
+ val = kmemdup_nul(page, len, GFP_KERNEL);
+ if (!val)
+ return -ENOMEM;
+
+ kfree(subsys->firmware_rev);
+
+ subsys->firmware_rev = val;
+
+ return count;
+}
+
+static ssize_t nvmet_subsys_attr_firmware_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ ssize_t ret;
+
+ down_write(&nvmet_config_sem);
+ mutex_lock(&subsys->lock);
+ ret = nvmet_subsys_attr_firmware_store_locked(subsys, page, count);
+ mutex_unlock(&subsys->lock);
+ up_write(&nvmet_config_sem);
+
+ return ret;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_firmware);
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
char *page)
@@ -1275,7 +1386,7 @@ static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
struct nvmet_subsys *subsys = to_subsys(item);
bool pi_enable;
- if (strtobool(page, &pi_enable))
+ if (kstrtobool(page, &pi_enable))
return -EINVAL;
subsys->pi_support = pi_enable;
@@ -1293,6 +1404,8 @@ static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item,
static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item,
const char *page, size_t cnt)
{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ struct nvmet_ctrl *ctrl;
u16 qid_max;
if (sscanf(page, "%hu\n", &qid_max) != 1)
@@ -1302,8 +1415,13 @@ static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item,
return -EINVAL;
down_write(&nvmet_config_sem);
- to_subsys(item)->max_qid = qid_max;
+ subsys->max_qid = qid_max;
+
+ /* Force reconnect */
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ ctrl->ops->delete_ctrl(ctrl);
up_write(&nvmet_config_sem);
+
return cnt;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_qid_max);
@@ -1316,6 +1434,8 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
&nvmet_subsys_attr_attr_qid_max,
+ &nvmet_subsys_attr_attr_ieee_oui,
+ &nvmet_subsys_attr_attr_firmware,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_subsys_attr_attr_pi_enable,
#endif
@@ -1395,7 +1515,7 @@ static ssize_t nvmet_referral_enable_store(struct config_item *item,
struct nvmet_port *port = to_nvmet_port(item);
bool enable;
- if (strtobool(page, &enable))
+ if (kstrtobool(page, &enable))
goto inval;
if (enable)
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index aecb5853f8da..f66ed13d7c11 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -10,11 +10,14 @@
#include <linux/pci-p2pdma.h>
#include <linux/scatterlist.h>
+#include <generated/utsrelease.h>
+
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "nvmet.h"
+struct kmem_cache *nvmet_bvec_cache;
struct workqueue_struct *buffered_io_wq;
struct workqueue_struct *zbd_wq;
static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -695,11 +698,10 @@ static void nvmet_update_sq_head(struct nvmet_req *req)
if (req->sq->size) {
u32 old_sqhd, new_sqhd;
+ old_sqhd = READ_ONCE(req->sq->sqhd);
do {
- old_sqhd = req->sq->sqhd;
new_sqhd = (old_sqhd + 1) % req->sq->size;
- } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
- old_sqhd);
+ } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd));
}
req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
}
@@ -1561,6 +1563,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
goto free_subsys;
}
+ subsys->ieee_oui = 0;
+
+ subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL);
+ if (!subsys->firmware_rev) {
+ ret = -ENOMEM;
+ goto free_mn;
+ }
+
switch (type) {
case NVME_NQN_NVME:
subsys->max_qid = NVMET_NR_QUEUES;
@@ -1572,14 +1582,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
default:
pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
ret = -EINVAL;
- goto free_mn;
+ goto free_fr;
}
subsys->type = type;
subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
GFP_KERNEL);
if (!subsys->subsysnqn) {
ret = -ENOMEM;
- goto free_mn;
+ goto free_fr;
}
subsys->cntlid_min = NVME_CNTLID_MIN;
subsys->cntlid_max = NVME_CNTLID_MAX;
@@ -1592,6 +1602,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
return subsys;
+free_fr:
+ kfree(subsys->firmware_rev);
free_mn:
kfree(subsys->model_number);
free_subsys:
@@ -1611,6 +1623,7 @@ static void nvmet_subsys_free(struct kref *ref)
kfree(subsys->subsysnqn);
kfree(subsys->model_number);
+ kfree(subsys->firmware_rev);
kfree(subsys);
}
@@ -1631,26 +1644,28 @@ void nvmet_subsys_put(struct nvmet_subsys *subsys)
static int __init nvmet_init(void)
{
- int error;
+ int error = -ENOMEM;
nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
+ nvmet_bvec_cache = kmem_cache_create("nvmet-bvec",
+ NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!nvmet_bvec_cache)
+ return -ENOMEM;
+
zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
if (!zbd_wq)
- return -ENOMEM;
+ goto out_destroy_bvec_cache;
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
WQ_MEM_RECLAIM, 0);
- if (!buffered_io_wq) {
- error = -ENOMEM;
+ if (!buffered_io_wq)
goto out_free_zbd_work_queue;
- }
nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0);
- if (!nvmet_wq) {
- error = -ENOMEM;
+ if (!nvmet_wq)
goto out_free_buffered_work_queue;
- }
error = nvmet_init_discovery();
if (error)
@@ -1669,6 +1684,8 @@ out_free_buffered_work_queue:
destroy_workqueue(buffered_io_wq);
out_free_zbd_work_queue:
destroy_workqueue(zbd_wq);
+out_destroy_bvec_cache:
+ kmem_cache_destroy(nvmet_bvec_cache);
return error;
}
@@ -1680,6 +1697,7 @@ static void __exit nvmet_exit(void)
destroy_workqueue(nvmet_wq);
destroy_workqueue(buffered_io_wq);
destroy_workqueue(zbd_wq);
+ kmem_cache_destroy(nvmet_bvec_cache);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 946ad0240ee5..871c4f32f443 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -11,7 +11,6 @@
#include <linux/fs.h>
#include "nvmet.h"
-#define NVMET_MAX_MPOOL_BVEC 16
#define NVMET_MIN_MPOOL_OBJ 16
void nvmet_file_ns_revalidate(struct nvmet_ns *ns)
@@ -26,8 +25,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
flush_workqueue(buffered_io_wq);
mempool_destroy(ns->bvec_pool);
ns->bvec_pool = NULL;
- kmem_cache_destroy(ns->bvec_cache);
- ns->bvec_cache = NULL;
fput(ns->file);
ns->file = NULL;
}
@@ -59,16 +56,8 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
ns->blksize_shift = min_t(u8,
file_inode(ns->file)->i_blkbits, 12);
- ns->bvec_cache = kmem_cache_create("nvmet-bvec",
- NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!ns->bvec_cache) {
- ret = -ENOMEM;
- goto err;
- }
-
ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
- mempool_free_slab, ns->bvec_cache);
+ mempool_free_slab, nvmet_bvec_cache);
if (!ns->bvec_pool) {
ret = -ENOMEM;
@@ -77,9 +66,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
return ret;
err:
+ fput(ns->file);
+ ns->file = NULL;
ns->size = 0;
ns->blksize_shift = 0;
- nvmet_file_ns_disable(ns);
return ret;
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index b45fe3adf015..f2d24b2d992f 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -145,7 +145,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret)
return ret;
- blk_mq_start_request(req);
+ nvme_start_request(req);
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
iod->req.port = queue->ctrl->port;
if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
@@ -353,7 +353,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->ctrl.queue_count = 1;
error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
- &nvme_loop_admin_mq_ops, BLK_MQ_F_NO_SCHED,
+ &nvme_loop_admin_mq_ops,
sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist));
if (error)
@@ -375,9 +375,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->ctrl.max_hw_sectors =
(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
- nvme_start_admin_queue(&ctrl->ctrl);
+ nvme_unquiesce_admin_queue(&ctrl->ctrl);
- error = nvme_init_ctrl_finish(&ctrl->ctrl);
+ error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
if (error)
goto out_cleanup_tagset;
@@ -394,14 +394,14 @@ out_free_sq:
static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
{
if (ctrl->ctrl.queue_count > 1) {
- nvme_stop_queues(&ctrl->ctrl);
+ nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_cancel_tagset(&ctrl->ctrl);
nvme_loop_destroy_io_queues(ctrl);
}
- nvme_stop_admin_queue(&ctrl->ctrl);
+ nvme_quiesce_admin_queue(&ctrl->ctrl);
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
- nvme_shutdown_ctrl(&ctrl->ctrl);
+ nvme_disable_ctrl(&ctrl->ctrl, true);
nvme_cancel_admin_tagset(&ctrl->ctrl);
nvme_loop_destroy_admin_queue(ctrl);
@@ -494,7 +494,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
return ret;
ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set,
- &nvme_loop_mq_ops, BLK_MQ_F_SHOULD_MERGE,
+ &nvme_loop_mq_ops, 1,
sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist));
if (ret)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index dfe3894205aa..89bedfcd974c 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -29,6 +29,7 @@
#define NVMET_DEFAULT_CTRL_MODEL "Linux"
#define NVMET_MN_MAX_SIZE 40
#define NVMET_SN_MAX_SIZE 20
+#define NVMET_FR_MAX_SIZE 8
/*
* Supported optional AENs:
@@ -77,7 +78,6 @@ struct nvmet_ns {
struct completion disable_done;
mempool_t *bvec_pool;
- struct kmem_cache *bvec_cache;
int use_p2pmem;
struct pci_dev *p2p_dev;
@@ -264,6 +264,8 @@ struct nvmet_subsys {
struct config_group allowed_hosts_group;
char *model_number;
+ u32 ieee_oui;
+ char *firmware_rev;
#ifdef CONFIG_NVME_TARGET_PASSTHRU
struct nvme_ctrl *passthru_ctrl;
@@ -393,6 +395,8 @@ struct nvmet_req {
u64 error_slba;
};
+#define NVMET_MAX_MPOOL_BVEC 16
+extern struct kmem_cache *nvmet_bvec_cache;
extern struct workqueue_struct *buffered_io_wq;
extern struct workqueue_struct *zbd_wq;
extern struct workqueue_struct *nvmet_wq;
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 5565f67d6537..86812d2073ea 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -89,6 +89,90 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(published);
+static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, struct vm_area_struct *vma)
+{
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+ size_t len = vma->vm_end - vma->vm_start;
+ struct pci_p2pdma *p2pdma;
+ struct percpu_ref *ref;
+ unsigned long vaddr;
+ void *kaddr;
+ int ret;
+
+ /* prevent private mappings from being established */
+ if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
+ pci_info_ratelimited(pdev,
+ "%s: fail, attempted private mapping\n",
+ current->comm);
+ return -EINVAL;
+ }
+
+ if (vma->vm_pgoff) {
+ pci_info_ratelimited(pdev,
+ "%s: fail, attempted mapping with non-zero offset\n",
+ current->comm);
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ p2pdma = rcu_dereference(pdev->p2pdma);
+ if (!p2pdma) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref);
+ if (!kaddr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * vm_insert_page() can sleep, so a reference is taken to mapping
+ * such that rcu_read_unlock() can be done before inserting the
+ * pages
+ */
+ if (unlikely(!percpu_ref_tryget_live_rcu(ref))) {
+ ret = -ENODEV;
+ goto out_free_mem;
+ }
+ rcu_read_unlock();
+
+ for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+ ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr));
+ if (ret) {
+ gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
+ return ret;
+ }
+ percpu_ref_get(ref);
+ put_page(virt_to_page(kaddr));
+ kaddr += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+
+ percpu_ref_put(ref);
+
+ return 0;
+out_free_mem:
+ gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
+static struct bin_attribute p2pmem_alloc_attr = {
+ .attr = { .name = "allocate", .mode = 0660 },
+ .mmap = p2pmem_alloc_mmap,
+ /*
+ * Some places where we want to call mmap (ie. python) will check
+ * that the file size is greater than the mmap size before allowing
+ * the mmap to continue. To work around this, just set the size
+ * to be very large.
+ */
+ .size = SZ_1T,
+};
+
static struct attribute *p2pmem_attrs[] = {
&dev_attr_size.attr,
&dev_attr_available.attr,
@@ -96,11 +180,32 @@ static struct attribute *p2pmem_attrs[] = {
NULL,
};
+static struct bin_attribute *p2pmem_bin_attrs[] = {
+ &p2pmem_alloc_attr,
+ NULL,
+};
+
static const struct attribute_group p2pmem_group = {
.attrs = p2pmem_attrs,
+ .bin_attrs = p2pmem_bin_attrs,
.name = "p2pmem",
};
+static void p2pdma_page_free(struct page *page)
+{
+ struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
+ struct percpu_ref *ref;
+
+ gen_pool_free_owner(pgmap->provider->p2pdma->pool,
+ (uintptr_t)page_to_virt(page), PAGE_SIZE,
+ (void **)&ref);
+ percpu_ref_put(ref);
+}
+
+static const struct dev_pagemap_ops p2pdma_pgmap_ops = {
+ .page_free = p2pdma_page_free,
+};
+
static void pci_p2pdma_release(void *data)
{
struct pci_dev *pdev = data;
@@ -152,6 +257,19 @@ out:
return error;
}
+static void pci_p2pdma_unmap_mappings(void *data)
+{
+ struct pci_dev *pdev = data;
+
+ /*
+ * Removing the alloc attribute from sysfs will call
+ * unmap_mapping_range() on the inode, teardown any existing userspace
+ * mappings and prevent new ones from being created.
+ */
+ sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr,
+ p2pmem_group.name);
+}
+
/**
* pci_p2pdma_add_resource - add memory for use as p2p memory
* @pdev: the device to add the memory to
@@ -198,6 +316,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
pgmap->range.end = pgmap->range.start + size - 1;
pgmap->nr_range = 1;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+ pgmap->ops = &p2pdma_pgmap_ops;
p2p_pgmap->provider = pdev;
p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
@@ -209,6 +328,11 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
goto pgmap_free;
}
+ error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings,
+ pdev);
+ if (error)
+ goto pages_free;
+
p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset,
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 8b89fab7c420..249757ddd8fe 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2735,7 +2735,7 @@ static void scsi_stop_queue(struct scsi_device *sdev, bool nowait)
blk_mq_quiesce_queue(sdev->request_queue);
} else {
if (!nowait)
- blk_mq_wait_quiesce_done(sdev->request_queue);
+ blk_mq_wait_quiesce_done(sdev->request_queue->tag_set);
}
}
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 5d27f5196de6..0a95fa787fdf 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -344,7 +344,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
sdev->request_queue = q;
q->queuedata = sdev;
__scsi_init_queue(sdev->host, q);
- WARN_ON_ONCE(!blk_get_queue(q));
depth = sdev->host->cmd_per_lun ?: 1;
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index b1f59a5fe632..d2b11d5b91ce 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -9544,6 +9544,7 @@ void ufshcd_remove(struct ufs_hba *hba)
ufshpb_remove(hba);
ufs_sysfs_remove_nodes(hba->dev);
blk_mq_destroy_queue(hba->tmf_queue);
+ blk_put_queue(hba->tmf_queue);
blk_mq_free_tag_set(&hba->tmf_tag_set);
scsi_remove_host(hba->host);
/* disable interrupts */
@@ -9840,6 +9841,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
free_tmf_queue:
blk_mq_destroy_queue(hba->tmf_queue);
+ blk_put_queue(hba->tmf_queue);
free_tmf_tag_set:
blk_mq_free_tag_set(&hba->tmf_tag_set);
out_remove_scsi_host: