aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig47
-rw-r--r--drivers/block/Makefile5
-rw-r--r--drivers/block/amiflop.c5
-rw-r--r--drivers/block/aoe/aoe.h2
-rw-r--r--drivers/block/aoe/aoeblk.c20
-rw-r--r--drivers/block/aoe/aoecmd.c9
-rw-r--r--drivers/block/aoe/aoedev.c6
-rw-r--r--drivers/block/aoe/aoemain.c10
-rw-r--r--drivers/block/ataflop.c22
-rw-r--r--drivers/block/brd.c82
-rw-r--r--drivers/block/drbd/drbd_actlog.c14
-rw-r--r--drivers/block/drbd/drbd_bitmap.c54
-rw-r--r--drivers/block/drbd/drbd_int.h18
-rw-r--r--drivers/block/drbd/drbd_main.c113
-rw-r--r--drivers/block/drbd/drbd_nl.c242
-rw-r--r--drivers/block/drbd/drbd_protocol.h6
-rw-r--r--drivers/block/drbd/drbd_receiver.c141
-rw-r--r--drivers/block/drbd/drbd_req.c72
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_state.c21
-rw-r--r--drivers/block/drbd/drbd_state_change.h8
-rw-r--r--drivers/block/drbd/drbd_worker.c19
-rw-r--r--drivers/block/floppy.c92
-rw-r--r--drivers/block/loop.c500
-rw-r--r--drivers/block/loop.h72
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c414
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h6
-rw-r--r--drivers/block/n64cart.c6
-rw-r--r--drivers/block/nbd.c175
-rw-r--r--drivers/block/null_blk/main.c297
-rw-r--r--drivers/block/null_blk/null_blk.h29
-rw-r--r--drivers/block/null_blk/trace.h4
-rw-r--r--drivers/block/null_blk/zoned.c25
-rw-r--r--drivers/block/paride/bpck.c1
-rw-r--r--drivers/block/paride/pcd.c9
-rw-r--r--drivers/block/paride/pd.c12
-rw-r--r--drivers/block/paride/pf.c9
-rw-r--r--drivers/block/pktcdvd.c387
-rw-r--r--drivers/block/ps3disk.c4
-rw-r--r--drivers/block/ps3vram.c9
-rw-r--r--drivers/block/rbd.c39
-rw-r--r--drivers/block/rnbd/Makefile6
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c5
-rw-r--r--drivers/block/rnbd/rnbd-clt.c270
-rw-r--r--drivers/block/rnbd/rnbd-clt.h22
-rw-r--r--drivers/block/rnbd/rnbd-proto.h17
-rw-r--r--drivers/block/rnbd/rnbd-srv-dev.c103
-rw-r--r--drivers/block/rnbd/rnbd-srv-dev.h83
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c6
-rw-r--r--drivers/block/rnbd/rnbd-srv-trace.c17
-rw-r--r--drivers/block/rnbd/rnbd-srv-trace.h207
-rw-r--r--drivers/block/rnbd/rnbd-srv.c215
-rw-r--r--drivers/block/rnbd/rnbd-srv.h12
-rw-r--r--drivers/block/rsxx/Makefile3
-rw-r--r--drivers/block/rsxx/config.c197
-rw-r--r--drivers/block/rsxx/core.c1126
-rw-r--r--drivers/block/rsxx/cregs.c789
-rw-r--r--drivers/block/rsxx/dev.c306
-rw-r--r--drivers/block/rsxx/dma.c1085
-rw-r--r--drivers/block/rsxx/rsxx.h33
-rw-r--r--drivers/block/rsxx/rsxx_cfg.h58
-rw-r--r--drivers/block/rsxx/rsxx_priv.h418
-rw-r--r--drivers/block/sunvdc.c24
-rw-r--r--drivers/block/swim.c3
-rw-r--r--drivers/block/swim3.c4
-rw-r--r--drivers/block/sx8.c1582
-rw-r--r--drivers/block/ublk_drv.c2113
-rw-r--r--drivers/block/virtio_blk.c412
-rw-r--r--drivers/block/xen-blkback/blkback.c48
-rw-r--r--drivers/block/xen-blkback/common.h3
-rw-r--r--drivers/block/xen-blkback/xenbus.c41
-rw-r--r--drivers/block/xen-blkfront.c271
-rw-r--r--drivers/block/z2ram.c4
-rw-r--r--drivers/block/zram/Kconfig3
-rw-r--r--drivers/block/zram/zcomp.c11
-rw-r--r--drivers/block/zram/zram_drv.c165
-rw-r--r--drivers/block/zram/zram_drv.h15
77 files changed, 4533 insertions, 8152 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 2a51dfb09c8f..a41145d52de9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -33,6 +33,22 @@ config BLK_DEV_FD
To compile this driver as a module, choose M here: the
module will be called floppy.
+config BLK_DEV_FD_RAWCMD
+ bool "Support for raw floppy disk commands (DEPRECATED)"
+ depends on BLK_DEV_FD
+ help
+ If you want to use actual physical floppies and expect to do
+ special low-level hardware accesses to them (access and use
+ non-standard formats, for example), then enable this.
+
+ Note that the code enabled by this option is rarely used and
+ might be unstable or insecure, and distros should not enable it.
+
+ Note: FDRAWCMD is deprecated and will be removed from the kernel
+ in the near future.
+
+ If unsure, say N.
+
config AMIGA_FLOPPY
tristate "Amiga floppy support"
depends on AMIGA
@@ -232,15 +248,6 @@ config BLK_DEV_NBD
If unsure, say N.
-config BLK_DEV_SX8
- tristate "Promise SATA SX8 support"
- depends on PCI
- help
- Saying Y or M here will enable support for the
- Promise SATA SX8 controllers.
-
- Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
-
config BLK_DEV_RAM
tristate "RAM block device support"
help
@@ -392,16 +399,20 @@ config BLK_DEV_RBD
If unsure, say N.
-config BLK_DEV_RSXX
- tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
- depends on PCI
- select CRC32
+config BLK_DEV_UBLK
+ tristate "Userspace block driver (Experimental)"
+ select IO_URING
help
- Device driver for IBM's high speed PCIe SSD
- storage device: Flash Adapter 900GB Full Height.
-
- To compile this driver as a module, choose M here: the
- module will be called rsxx.
+ io_uring based userspace block driver. Together with ublk server, ublk
+ has been working well, but interface with userspace or command data
+ definition isn't finalized yet, and might change according to future
+ requirement, so mark is as experimental now.
+
+ Say Y if you want to get better performance because task_work_add()
+ can be used in IO path for replacing io_uring cmd, which will become
+ shared between IO tasks and ubq daemon, meantime task_work_add() can
+ can handle batch more effectively, but task_work_add() isn't exported
+ for module, so ublk has to be built to kernel.
source "drivers/block/rnbd/Kconfig"
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 11a74f17c9ad..101612cba303 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -26,18 +26,17 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
-obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
-
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
-obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
obj-$(CONFIG_ZRAM) += zram/
obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/
+obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o
+
swim_mod-y := swim.o swim_asm.o
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index bf5c124c5452..4c8b2ba579ee 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1505,7 +1505,7 @@ static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
- struct amiga_floppy_struct *floppy = rq->rq_disk->private_data;
+ struct amiga_floppy_struct *floppy = rq->q->disk->private_data;
blk_status_t err;
if (!spin_trylock_irq(&amiflop_lock))
@@ -1790,6 +1790,7 @@ static int fd_alloc_disk(int drive, int system)
disk->first_minor = drive + system;
disk->minors = 1;
disk->fops = &floppy_fops;
+ disk->flags |= GENHD_FL_NO_PART;
disk->events = DISK_EVENT_MEDIA_CHANGE;
if (system)
sprintf(disk->disk_name, "fd%d_msdos", drive);
@@ -1801,7 +1802,7 @@ static int fd_alloc_disk(int drive, int system)
unit[drive].gendisk[system] = disk;
err = add_disk(disk);
if (err)
- blk_cleanup_disk(disk);
+ put_disk(disk);
return err;
}
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 84d0fcebd6af..749ae1246f4c 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -244,3 +244,5 @@ void aoenet_exit(void);
void aoenet_xmit(struct sk_buff_head *);
int is_aoe_netif(struct net_device *ifp);
int set_aoe_iflist(const char __user *str, size_t size);
+
+extern struct workqueue_struct *aoe_wq;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 52484bcdedb9..128722cf6c3c 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -12,7 +12,6 @@
#include <linux/ioctl.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
-#include <linux/genhd.h>
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/export.h>
@@ -109,7 +108,7 @@ static ssize_t aoedisk_show_payload(struct device *dev,
return sysfs_emit(page, "%lu\n", d->maxbcnt);
}
-static int aoedisk_debugfs_show(struct seq_file *s, void *ignored)
+static int aoe_debugfs_show(struct seq_file *s, void *ignored)
{
struct aoedev *d;
struct aoetgt **t, **te;
@@ -152,11 +151,7 @@ static int aoedisk_debugfs_show(struct seq_file *s, void *ignored)
return 0;
}
-
-static int aoe_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, aoedisk_debugfs_show, inode->i_private);
-}
+DEFINE_SHOW_ATTRIBUTE(aoe_debugfs);
static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
@@ -185,13 +180,6 @@ static const struct attribute_group *aoe_attr_groups[] = {
NULL,
};
-static const struct file_operations aoe_debugfs_fops = {
- .open = aoe_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void
aoedisk_add_debugfs(struct aoedev *d)
{
@@ -428,7 +416,7 @@ aoeblk_gdalloc(void *vp)
return;
out_disk_cleanup:
- blk_cleanup_disk(gd);
+ put_disk(gd);
err_tagset:
blk_mq_free_tag_set(set);
err_mempool:
@@ -436,7 +424,7 @@ err_mempool:
err:
spin_lock_irqsave(&d->lock, flags);
d->flags &= ~DEVFL_GD_NOW;
- schedule_work(&d->work);
+ queue_work(aoe_wq, &d->work);
spin_unlock_irqrestore(&d->lock, flags);
}
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 588889bea7c3..d7317425be51 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -10,7 +10,6 @@
#include <linux/blk-mq.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
-#include <linux/genhd.h>
#include <linux/moduleparam.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
@@ -122,7 +121,7 @@ newtag(struct aoedev *d)
register ulong n;
n = jiffies & 0xffff;
- return n |= (++d->lasttag & 0x7fff) << 16;
+ return n | (++d->lasttag & 0x7fff) << 16;
}
static u32
@@ -969,7 +968,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
d->flags |= DEVFL_NEWSIZE;
else
d->flags |= DEVFL_GDALLOC;
- schedule_work(&d->work);
+ queue_work(aoe_wq, &d->work);
}
static void
@@ -1019,9 +1018,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
iter.bi_size = cnt;
__bio_for_each_segment(bv, bio, iter, iter) {
- char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
+ char *p = bvec_kmap_local(&bv);
skb_copy_bits(skb, soff, p, bv.bv_len);
- kunmap_atomic(p);
+ kunmap_local(p);
soff += bv.bv_len;
}
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index c5753c6bfe80..3523dd82d7a0 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -277,7 +277,7 @@ freedev(struct aoedev *d)
if (d->gd) {
aoedisk_rm_debugfs(d);
del_gendisk(d->gd);
- blk_cleanup_disk(d->gd);
+ put_disk(d->gd);
blk_mq_free_tag_set(&d->tag_set);
}
t = d->targets;
@@ -321,7 +321,7 @@ flush(const char __user *str, size_t cnt, int exiting)
specified = 1;
}
- flush_scheduled_work();
+ flush_workqueue(aoe_wq);
/* pass one: do aoedev_downdev, which might sleep */
restart1:
spin_lock_irqsave(&devlist_lock, flags);
@@ -520,7 +520,7 @@ freetgt(struct aoedev *d, struct aoetgt *t)
void
aoedev_exit(void)
{
- flush_scheduled_work();
+ flush_workqueue(aoe_wq);
flush(NULL, 0, EXITING);
}
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 1e4e2971171c..6238c4c87cfc 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -16,6 +16,7 @@ MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
MODULE_VERSION(VERSION);
static struct timer_list timer;
+struct workqueue_struct *aoe_wq;
static void discover_timer(struct timer_list *t)
{
@@ -35,6 +36,7 @@ aoe_exit(void)
aoechr_exit();
aoedev_exit();
aoeblk_exit(); /* free cache after de-allocating bufs */
+ destroy_workqueue(aoe_wq);
}
static int __init
@@ -42,9 +44,13 @@ aoe_init(void)
{
int ret;
+ aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
+ if (!aoe_wq)
+ return -ENOMEM;
+
ret = aoedev_init();
if (ret)
- return ret;
+ goto dev_fail;
ret = aoechr_init();
if (ret)
goto chr_fail;
@@ -77,6 +83,8 @@ aoe_init(void)
aoechr_exit();
chr_fail:
aoedev_exit();
+ dev_fail:
+ destroy_workqueue(aoe_wq);
printk(KERN_INFO "aoe: initialisation failure.\n");
return ret;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index bf769e6e32fe..9deb4df6bdb8 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -303,6 +303,7 @@ static struct atari_floppy_struct {
int ref;
int type;
struct blk_mq_tag_set tag_set;
+ int error_count;
} unit[FD_MAX_UNITS];
#define UD unit[drive]
@@ -705,14 +706,14 @@ static void fd_error( void )
if (!fd_request)
return;
- fd_request->error_count++;
- if (fd_request->error_count >= MAX_ERRORS) {
+ unit[SelectedDrive].error_count++;
+ if (unit[SelectedDrive].error_count >= MAX_ERRORS) {
printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
fd_end_request_cur(BLK_STS_IOERR);
finish_fdc();
return;
}
- else if (fd_request->error_count == RECALIBRATE_ERRORS) {
+ else if (unit[SelectedDrive].error_count == RECALIBRATE_ERRORS) {
printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
if (SelectedDrive != -1)
SUD.track = -1;
@@ -1491,7 +1492,7 @@ static void setup_req_params( int drive )
ReqData = ReqBuffer + 512 * ReqCnt;
if (UseTrackbuffer)
- read_track = (ReqCmd == READ && fd_request->error_count == 0);
+ read_track = (ReqCmd == READ && unit[drive].error_count == 0);
else
read_track = 0;
@@ -1502,7 +1503,7 @@ static void setup_req_params( int drive )
static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
- struct atari_floppy_struct *floppy = bd->rq->rq_disk->private_data;
+ struct atari_floppy_struct *floppy = bd->rq->q->disk->private_data;
int drive = floppy - unit;
int type = floppy->type;
@@ -1520,6 +1521,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_RESOURCE;
}
fd_request = bd->rq;
+ unit[drive].error_count = 0;
blk_mq_start_request(fd_request);
atari_disable_irq( IRQ_MFP_FDC );
@@ -1538,7 +1540,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!UDT) {
Probing = 1;
UDT = atari_disk_type + StartDiskType[DriveType];
- set_capacity(bd->rq->rq_disk, UDT->blocks);
+ set_capacity(bd->rq->q->disk, UDT->blocks);
UD.autoprobe = 1;
}
}
@@ -1558,7 +1560,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
}
type = minor2disktype[type].index;
UDT = &atari_disk_type[type];
- set_capacity(bd->rq->rq_disk, UDT->blocks);
+ set_capacity(bd->rq->q->disk, UDT->blocks);
UD.autoprobe = 0;
}
@@ -2000,6 +2002,7 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
disk->minors = 1;
sprintf(disk->disk_name, "fd%d", drive);
disk->fops = &floppy_fops;
+ disk->flags |= GENHD_FL_NO_PART;
disk->events = DISK_EVENT_MEDIA_CHANGE;
disk->private_data = &unit[drive];
set_capacity(disk, MAX_DISK_SIZE * 2);
@@ -2028,7 +2031,7 @@ static void ataflop_probe(dev_t dev)
return;
cleanup_disk:
- blk_cleanup_disk(unit[drive].disk[type]);
+ put_disk(unit[drive].disk[type]);
unit[drive].disk[type] = NULL;
}
@@ -2042,7 +2045,6 @@ static void atari_floppy_cleanup(void)
if (!unit[i].disk[type])
continue;
del_gendisk(unit[i].disk[type]);
- blk_cleanup_queue(unit[i].disk[type]->queue);
put_disk(unit[i].disk[type]);
}
blk_mq_free_tag_set(&unit[i].tag_set);
@@ -2061,7 +2063,7 @@ static void atari_cleanup_floppy_disk(struct atari_floppy_struct *fs)
continue;
if (fs->registered[type])
del_gendisk(fs->disk[type]);
- blk_cleanup_disk(fs->disk[type]);
+ put_disk(fs->disk[type]);
}
blk_mq_free_tag_set(&fs->tag_set);
}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index a896ee175d86..20acc4a1fd6d 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -256,7 +256,7 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
* Process a single bvec of a bio.
*/
static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, unsigned int op,
+ unsigned int len, unsigned int off, enum req_op op,
sector_t sector)
{
void *mem;
@@ -310,7 +310,7 @@ static void brd_submit_bio(struct bio *bio)
}
static int brd_rw_page(struct block_device *bdev, sector_t sector,
- struct page *page, unsigned int op)
+ struct page *page, enum req_op op)
{
struct brd_device *brd = bdev->bd_disk->private_data;
int err;
@@ -362,7 +362,6 @@ __setup("ramdisk_size=", ramdisk_size);
* (should share code eventually).
*/
static LIST_HEAD(brd_devices);
-static DEFINE_MUTEX(brd_devices_mutex);
static struct dentry *brd_debugfs_dir;
static int brd_alloc(int i)
@@ -372,21 +371,14 @@ static int brd_alloc(int i)
char buf[DISK_NAME_LEN];
int err = -ENOMEM;
- mutex_lock(&brd_devices_mutex);
- list_for_each_entry(brd, &brd_devices, brd_list) {
- if (brd->brd_number == i) {
- mutex_unlock(&brd_devices_mutex);
+ list_for_each_entry(brd, &brd_devices, brd_list)
+ if (brd->brd_number == i)
return -EEXIST;
- }
- }
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
- if (!brd) {
- mutex_unlock(&brd_devices_mutex);
+ if (!brd)
return -ENOMEM;
- }
brd->brd_number = i;
list_add_tail(&brd->brd_list, &brd_devices);
- mutex_unlock(&brd_devices_mutex);
spin_lock_init(&brd->brd_lock);
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
@@ -405,8 +397,7 @@ static int brd_alloc(int i)
disk->minors = max_part;
disk->fops = &brd_fops;
disk->private_data = brd;
- disk->flags = GENHD_FL_EXT_DEVT;
- strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
+ strscpy(disk->disk_name, buf, DISK_NAME_LEN);
set_capacity(disk, rd_size * 2);
/*
@@ -428,11 +419,9 @@ static int brd_alloc(int i)
return 0;
out_cleanup_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_free_dev:
- mutex_lock(&brd_devices_mutex);
list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
kfree(brd);
return err;
}
@@ -442,15 +431,19 @@ static void brd_probe(dev_t dev)
brd_alloc(MINOR(dev) / max_part);
}
-static void brd_del_one(struct brd_device *brd)
+static void brd_cleanup(void)
{
- del_gendisk(brd->brd_disk);
- blk_cleanup_disk(brd->brd_disk);
- brd_free_pages(brd);
- mutex_lock(&brd_devices_mutex);
- list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
- kfree(brd);
+ struct brd_device *brd, *next;
+
+ debugfs_remove_recursive(brd_debugfs_dir);
+
+ list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+ del_gendisk(brd->brd_disk);
+ put_disk(brd->brd_disk);
+ brd_free_pages(brd);
+ list_del(&brd->brd_list);
+ kfree(brd);
+ }
}
static inline void brd_check_and_reset_par(void)
@@ -474,9 +467,18 @@ static inline void brd_check_and_reset_par(void)
static int __init brd_init(void)
{
- struct brd_device *brd, *next;
int err, i;
+ brd_check_and_reset_par();
+
+ brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+
+ for (i = 0; i < rd_nr; i++) {
+ err = brd_alloc(i);
+ if (err)
+ goto out_free;
+ }
+
/*
* brd module now has a feature to instantiate underlying device
* structure on-demand, provided that there is an access dev node.
@@ -492,28 +494,16 @@ static int __init brd_init(void)
* dynamically.
*/
- if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe))
- return -EIO;
-
- brd_check_and_reset_par();
-
- brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
- for (i = 0; i < rd_nr; i++) {
- err = brd_alloc(i);
- if (err)
- goto out_free;
+ if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
+ err = -EIO;
+ goto out_free;
}
pr_info("brd: module loaded\n");
return 0;
out_free:
- unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module NOT loaded !!!\n");
return err;
@@ -521,13 +511,9 @@ out_free:
static void __exit brd_exit(void)
{
- struct brd_device *brd, *next;
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module unloaded\n");
}
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 72cf7603d51f..e27478ae579c 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -124,12 +124,13 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b
static int _drbd_md_sync_page_io(struct drbd_device *device,
struct drbd_backing_dev *bdev,
- sector_t sector, int op)
+ sector_t sector, enum req_op op)
{
struct bio *bio;
/* we do all our meta data IO in aligned 4k blocks. */
const int size = 4096;
- int err, op_flags = 0;
+ int err;
+ blk_opf_t op_flags = 0;
device->md_io.done = 0;
device->md_io.error = -ENODEV;
@@ -138,15 +139,14 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
op_flags |= REQ_FUA | REQ_PREFLUSH;
op_flags |= REQ_SYNC;
- bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
- bio_set_dev(bio, bdev->md_bdev);
+ bio = bio_alloc_bioset(bdev->md_bdev, 1, op | op_flags, GFP_NOIO,
+ &drbd_md_io_bio_set);
bio->bi_iter.bi_sector = sector;
err = -EIO;
if (bio_add_page(bio, device->md_io.page, size, 0) != size)
goto out;
bio->bi_private = device;
bio->bi_end_io = drbd_md_endio;
- bio_set_op_attrs(bio, op, op_flags);
if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
/* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
@@ -175,7 +175,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
}
int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev,
- sector_t sector, int op)
+ sector_t sector, enum req_op op)
{
int err;
D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
@@ -386,7 +386,7 @@ static int __al_write_transaction(struct drbd_device *device, struct al_transact
write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
rcu_read_unlock();
if (write_al_updates) {
- if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
+ if (drbd_md_sync_page_io(device, device->ldev, sector, REQ_OP_WRITE)) {
err = -EIO;
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
} else {
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index c1f816f896a8..7d9db33363de 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -683,7 +683,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
}
}
- want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+ want = PFN_UP(words*sizeof(long));
have = b->bm_number_of_pages;
if (want == have) {
D_ASSERT(device, b->bm_pages != NULL);
@@ -974,24 +974,58 @@ static void drbd_bm_endio(struct bio *bio)
}
}
+/* For the layout, see comment above drbd_md_set_sector_offsets(). */
+static inline sector_t drbd_md_last_bitmap_sector(struct drbd_backing_dev *bdev)
+{
+ switch (bdev->md.meta_dev_idx) {
+ case DRBD_MD_INDEX_INTERNAL:
+ case DRBD_MD_INDEX_FLEX_INT:
+ return bdev->md.md_offset + bdev->md.al_offset -1;
+ case DRBD_MD_INDEX_FLEX_EXT:
+ default:
+ return bdev->md.md_offset + bdev->md.md_size_sect -1;
+ }
+}
+
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{
- struct bio *bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
struct drbd_device *device = ctx->device;
+ enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE;
struct drbd_bitmap *b = device->bitmap;
+ struct bio *bio;
struct page *page;
+ sector_t last_bm_sect;
+ sector_t first_bm_sect;
+ sector_t on_disk_sector;
unsigned int len;
- unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
- sector_t on_disk_sector =
- device->ldev->md.md_offset + device->ldev->md.bm_offset;
- on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
+ first_bm_sect = device->ldev->md.md_offset + device->ldev->md.bm_offset;
+ on_disk_sector = first_bm_sect + (((sector_t)page_nr) << (PAGE_SHIFT-SECTOR_SHIFT));
/* this might happen with very small
* flexible external meta data device,
* or with PAGE_SIZE > 4k */
- len = min_t(unsigned int, PAGE_SIZE,
- (drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
+ last_bm_sect = drbd_md_last_bitmap_sector(device->ldev);
+ if (first_bm_sect <= on_disk_sector && last_bm_sect >= on_disk_sector) {
+ sector_t len_sect = last_bm_sect - on_disk_sector + 1;
+ if (len_sect < PAGE_SIZE/SECTOR_SIZE)
+ len = (unsigned int)len_sect*SECTOR_SIZE;
+ else
+ len = PAGE_SIZE;
+ } else {
+ if (__ratelimit(&drbd_ratelimit_state)) {
+ drbd_err(device, "Invalid offset during on-disk bitmap access: "
+ "page idx %u, sector %llu\n", page_nr, on_disk_sector);
+ }
+ ctx->error = -EIO;
+ bm_set_page_io_err(b->bm_pages[page_nr]);
+ if (atomic_dec_and_test(&ctx->in_flight)) {
+ ctx->done = 1;
+ wake_up(&device->misc_wait);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
+ }
+ return;
+ }
/* serialize IO on this page */
bm_page_lock_io(device, page_nr);
@@ -1006,14 +1040,14 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
- bio_set_dev(bio, device->ldev->md_bdev);
+ bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO,
+ &drbd_md_io_bio_set);
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
bio_add_page(bio, page, len, 0);
bio->bi_private = ctx;
bio->bi_end_io = drbd_bm_endio;
- bio_set_op_attrs(bio, op, 0);
if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
bio_io_error(bio);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f27d5b0f9a0b..4d661282ff41 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -27,7 +27,6 @@
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
-#include <linux/genhd.h>
#include <linux/idr.h>
#include <linux/dynamic_debug.h>
#include <net/tcp.h>
@@ -638,9 +637,6 @@ enum {
STATE_SENT, /* Do not change state/UUIDs while this is set */
CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
* pending, from drbd worker context.
- * If set, bdi_write_congested() returns true,
- * so shrink_page_list() would not recurse into,
- * and potentially deadlock on, this drbd worker.
*/
DISCONNECT_SENT,
@@ -1499,7 +1495,7 @@ extern int drbd_resync_finished(struct drbd_device *device);
extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
extern void drbd_md_put_buffer(struct drbd_device *device);
extern int drbd_md_sync_page_io(struct drbd_device *device,
- struct drbd_backing_dev *bdev, sector_t sector, int op);
+ struct drbd_backing_dev *bdev, sector_t sector, enum req_op op);
extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int);
extern void wait_until_done_or_force_detached(struct drbd_device *device,
struct drbd_backing_dev *bdev, unsigned int *done);
@@ -1533,7 +1529,6 @@ extern int w_send_read_req(struct drbd_work *, int);
extern int w_e_reissue(struct drbd_work *, int);
extern int w_restart_disk_io(struct drbd_work *, int);
extern int w_send_out_of_sync(struct drbd_work *, int);
-extern int w_start_resync(struct drbd_work *, int);
extern void resync_timer_fn(struct timer_list *t);
extern void start_resync_timer_fn(struct timer_list *t);
@@ -1551,8 +1546,7 @@ extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
bool throttle_if_app_is_waiting);
extern int drbd_submit_peer_request(struct drbd_device *,
- struct drbd_peer_request *, const unsigned,
- const unsigned, const int);
+ struct drbd_peer_request *, blk_opf_t, int);
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
sector_t, unsigned int,
@@ -1642,22 +1636,22 @@ struct sib_info {
};
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
-extern void notify_resource_state(struct sk_buff *,
+extern int notify_resource_state(struct sk_buff *,
unsigned int,
struct drbd_resource *,
struct resource_info *,
enum drbd_notification_type);
-extern void notify_device_state(struct sk_buff *,
+extern int notify_device_state(struct sk_buff *,
unsigned int,
struct drbd_device *,
struct device_info *,
enum drbd_notification_type);
-extern void notify_connection_state(struct sk_buff *,
+extern int notify_connection_state(struct sk_buff *,
unsigned int,
struct drbd_connection *,
struct connection_info *,
enum drbd_notification_type);
-extern void notify_peer_device_state(struct sk_buff *,
+extern int notify_peer_device_state(struct sk_buff *,
unsigned int,
struct drbd_peer_device *,
struct peer_device_info *,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 53ba2dddba6e..f3e4db16fd07 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -171,7 +171,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
unsigned int set_size)
{
struct drbd_request *r;
- struct drbd_request *req = NULL;
+ struct drbd_request *req = NULL, *tmp = NULL;
int expect_epoch = 0;
int expect_size = 0;
@@ -225,8 +225,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
* to catch requests being barrier-acked "unexpectedly".
* It usually should find the same req again, or some READ preceding it. */
list_for_each_entry(req, &connection->transfer_log, tl_requests)
- if (req->epoch == expect_epoch)
+ if (req->epoch == expect_epoch) {
+ tmp = req;
break;
+ }
+ req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests);
list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) {
if (req->epoch != expect_epoch)
break;
@@ -729,7 +732,8 @@ int drbd_send_sync_param(struct drbd_peer_device *peer_device)
cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
/* initialize verify_alg and csums_alg */
- memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+ BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
+ memset(&p->algs, 0, sizeof(p->algs));
if (get_ldev(peer_device->device)) {
dc = rcu_dereference(peer_device->device->ldev->disk_conf);
@@ -899,31 +903,6 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
}
}
-/* communicated if (agreed_features & DRBD_FF_WSAME) */
-static void
-assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
- struct request_queue *q)
-{
- if (q) {
- p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
- p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
- p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
- p->qlim->io_min = cpu_to_be32(queue_io_min(q));
- p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
- p->qlim->discard_enabled = blk_queue_discard(q);
- p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
- } else {
- q = device->rq_queue;
- p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
- p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
- p->qlim->alignment_offset = 0;
- p->qlim->io_min = cpu_to_be32(queue_io_min(q));
- p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
- p->qlim->discard_enabled = 0;
- p->qlim->write_same_capable = 0;
- }
-}
-
int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
{
struct drbd_device *device = peer_device->device;
@@ -945,7 +924,9 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
memset(p, 0, packet_size);
if (get_ldev_if_state(device, D_NEGOTIATING)) {
- struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+ struct block_device *bdev = device->ldev->backing_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+
d_size = drbd_get_max_capacity(device->ldev);
rcu_read_lock();
u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
@@ -953,14 +934,32 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
q_order_type = drbd_queue_order_type(device);
max_bio_size = queue_max_hw_sectors(q) << 9;
max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
- assign_p_sizes_qlim(device, p, q);
+ p->qlim->physical_block_size =
+ cpu_to_be32(bdev_physical_block_size(bdev));
+ p->qlim->logical_block_size =
+ cpu_to_be32(bdev_logical_block_size(bdev));
+ p->qlim->alignment_offset =
+ cpu_to_be32(bdev_alignment_offset(bdev));
+ p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev));
+ p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev));
+ p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev);
put_ldev(device);
} else {
+ struct request_queue *q = device->rq_queue;
+
+ p->qlim->physical_block_size =
+ cpu_to_be32(queue_physical_block_size(q));
+ p->qlim->logical_block_size =
+ cpu_to_be32(queue_logical_block_size(q));
+ p->qlim->alignment_offset = 0;
+ p->qlim->io_min = cpu_to_be32(queue_io_min(q));
+ p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
+ p->qlim->discard_enabled = 0;
+
d_size = 0;
u_size = 0;
q_order_type = QUEUE_ORDERED_NONE;
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
- assign_p_sizes_qlim(device, p, NULL);
}
if (peer_device->connection->agreed_pro_version <= 94)
@@ -1590,9 +1589,6 @@ static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio)
? 0 : MSG_MORE);
if (err)
return err;
- /* REQ_OP_WRITE_SAME has only one segment */
- if (bio_op(bio) == REQ_OP_WRITE_SAME)
- break;
}
return 0;
}
@@ -1611,9 +1607,6 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b
bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
if (err)
return err;
- /* REQ_OP_WRITE_SAME has only one segment */
- if (bio_op(bio) == REQ_OP_WRITE_SAME)
- break;
}
return 0;
}
@@ -1645,7 +1638,6 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
return (bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0) |
(bio->bi_opf & REQ_FUA ? DP_FUA : 0) |
(bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
- (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
(bio_op(bio) == REQ_OP_WRITE_ZEROES ?
((connection->agreed_features & DRBD_FF_WZEROES) ?
@@ -1664,7 +1656,6 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
struct drbd_device *device = peer_device->device;
struct drbd_socket *sock;
struct p_data *p;
- struct p_wsame *wsame = NULL;
void *digest_out;
unsigned int dp_flags = 0;
int digest_size;
@@ -1702,29 +1693,14 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0);
goto out;
}
- if (dp_flags & DP_WSAME) {
- /* this will only work if DRBD_FF_WSAME is set AND the
- * handshake agreed that all nodes and backend devices are
- * WRITE_SAME capable and agree on logical_block_size */
- wsame = (struct p_wsame*)p;
- digest_out = wsame + 1;
- wsame->size = cpu_to_be32(req->i.size);
- } else
- digest_out = p + 1;
+ digest_out = p + 1;
/* our digest is still only over the payload.
* TRIM does not carry any payload. */
if (digest_size)
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out);
- if (wsame) {
- err =
- __send_command(peer_device->connection, device->vnr, sock, P_WSAME,
- sizeof(*wsame) + digest_size, NULL,
- bio_iovec(req->master_bio).bv_len);
- } else
- err =
- __send_command(peer_device->connection, device->vnr, sock, P_DATA,
- sizeof(*p) + digest_size, NULL, req->i.size);
+ err = __send_command(peer_device->connection, device->vnr, sock, P_DATA,
+ sizeof(*p) + digest_size, NULL, req->i.size);
if (!err) {
/* For protocol A, we have to memcpy the payload into
* socket buffers, as we may complete right away
@@ -2231,7 +2207,7 @@ void drbd_destroy_device(struct kref *kref)
if (device->bitmap) /* should no longer be there. */
drbd_bm_cleanup(device);
__free_page(device->md_io.page);
- blk_cleanup_disk(device->vdisk);
+ put_disk(device->vdisk);
kfree(device->rs_plan_s);
/* not for_each_connection(connection, resource):
@@ -2734,9 +2710,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
disk->first_minor = minor;
disk->minors = 1;
disk->fops = &drbd_ops;
+ disk->flags |= GENHD_FL_NO_PART;
sprintf(disk->disk_name, "drbd%d", minor);
disk->private_data = device;
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
blk_queue_write_cache(disk->queue, true, true);
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
@@ -2791,12 +2769,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
if (init_submitter(device)) {
err = ERR_NOMEM;
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
}
err = add_disk(disk);
if (err)
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2810,8 +2788,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
-out_idr_remove_vol:
- idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
@@ -2831,7 +2807,7 @@ out_no_minor_idr:
out_no_bitmap:
__free_page(device->md_io.page);
out_no_io_page:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_no_disk:
kref_put(&resource->kref, drbd_destroy_resource);
kfree(device);
@@ -3605,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd)
* when we want to support more than
* one PRO_VERSION */
static const char *cmdnames[] = {
+
[P_DATA] = "Data",
- [P_WSAME] = "WriteSame",
- [P_TRIM] = "Trim",
[P_DATA_REPLY] = "DataReply",
[P_RS_DATA_REPLY] = "RSDataReply",
[P_BARRIER] = "Barrier",
@@ -3618,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd)
[P_DATA_REQUEST] = "DataRequest",
[P_RS_DATA_REQUEST] = "RSDataRequest",
[P_SYNC_PARAM] = "SyncParam",
- [P_SYNC_PARAM89] = "SyncParam89",
[P_PROTOCOL] = "ReportProtocol",
[P_UUIDS] = "ReportUUIDs",
[P_SIZES] = "ReportSizes",
@@ -3626,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd)
[P_SYNC_UUID] = "ReportSyncUUID",
[P_AUTH_CHALLENGE] = "AuthChallenge",
[P_AUTH_RESPONSE] = "AuthResponse",
+ [P_STATE_CHG_REQ] = "StateChgRequest",
[P_PING] = "Ping",
[P_PING_ACK] = "PingAck",
[P_RECV_ACK] = "RecvAck",
@@ -3636,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd)
[P_NEG_DREPLY] = "NegDReply",
[P_NEG_RS_DREPLY] = "NegRSDReply",
[P_BARRIER_ACK] = "BarrierAck",
- [P_STATE_CHG_REQ] = "StateChgRequest",
[P_STATE_CHG_REPLY] = "StateChgReply",
[P_OV_REQUEST] = "OVRequest",
[P_OV_REPLY] = "OVReply",
[P_OV_RESULT] = "OVResult",
[P_CSUM_RS_REQUEST] = "CsumRSRequest",
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
+ [P_SYNC_PARAM89] = "SyncParam89",
[P_COMPRESSED_BITMAP] = "CBitmap",
[P_DELAY_PROBE] = "DelayProbe",
[P_OUT_OF_SYNC] = "OutOfSync",
- [P_RETRY_WRITE] = "RetryWrite",
[P_RS_CANCEL] = "RSCancel",
[P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
[P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
[P_PROTOCOL_UPDATE] = "protocol_update",
+ [P_TRIM] = "Trim",
[P_RS_THIN_REQ] = "rs_thin_req",
[P_RS_DEALLOCATED] = "rs_deallocated",
+ [P_WSAME] = "WriteSame",
+ [P_ZEROES] = "Zeroes",
/* enum drbd_packet, but not commands - obsoleted flags:
* P_MAY_IGNORE
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 44ccf8b4f4b2..864c98e74875 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -770,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
struct set_role_parms parms;
int err;
enum drbd_ret_code retcode;
+ enum drbd_state_rv rv;
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
@@ -790,14 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
- retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
- R_PRIMARY, parms.assume_uptodate);
+ rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
else
- retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
- R_SECONDARY, 0);
+ rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
mutex_unlock(&adm_ctx.resource->adm_mutex);
genl_lock();
+ drbd_adm_finish(&adm_ctx, info, rv);
+ return 0;
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -1204,50 +1205,40 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
}
static void decide_on_discard_support(struct drbd_device *device,
- struct request_queue *q,
- struct request_queue *b,
- bool discard_zeroes_if_aligned)
+ struct drbd_backing_dev *bdev)
{
- /* q = drbd device queue (device->rq_queue)
- * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
- * or NULL if diskless
- */
- struct drbd_connection *connection = first_peer_device(device)->connection;
- bool can_do = b ? blk_queue_discard(b) : true;
-
- if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
- can_do = false;
- drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
- }
- if (can_do) {
- /* We don't care for the granularity, really.
- * Stacking limits below should fix it for the local
- * device. Whether or not it is a suitable granularity
- * on the remote device is not our problem, really. If
- * you care, you need to use devices with similar
- * topology on all peers. */
- blk_queue_discard_granularity(q, 512);
- q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
- q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
- } else {
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
- blk_queue_discard_granularity(q, 0);
- q->limits.max_discard_sectors = 0;
- q->limits.max_write_zeroes_sectors = 0;
- }
-}
+ struct drbd_connection *connection =
+ first_peer_device(device)->connection;
+ struct request_queue *q = device->rq_queue;
-static void fixup_discard_if_not_supported(struct request_queue *q)
-{
- /* To avoid confusion, if this queue does not support discard, clear
- * max_discard_sectors, which is what lsblk -D reports to the user.
- * Older kernels got this wrong in "stack limits".
- * */
- if (!blk_queue_discard(q)) {
- blk_queue_max_discard_sectors(q, 0);
- blk_queue_discard_granularity(q, 0);
+ if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
+ goto not_supported;
+
+ if (connection->cstate >= C_CONNECTED &&
+ !(connection->agreed_features & DRBD_FF_TRIM)) {
+ drbd_info(connection,
+ "peer DRBD too old, does not support TRIM: disabling discards\n");
+ goto not_supported;
}
+
+ /*
+ * We don't care for the granularity, really.
+ *
+ * Stacking limits below should fix it for the local device. Whether or
+ * not it is a suitable granularity on the remote device is not our
+ * problem, really. If you care, you need to use devices with similar
+ * topology on all peers.
+ */
+ blk_queue_discard_granularity(q, 512);
+ q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
+ q->limits.max_write_zeroes_sectors =
+ drbd_max_discard_sectors(connection);
+ return;
+
+not_supported:
+ blk_queue_discard_granularity(q, 0);
+ q->limits.max_discard_sectors = 0;
+ q->limits.max_write_zeroes_sectors = 0;
}
static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
@@ -1265,71 +1256,6 @@ static void fixup_write_zeroes(struct drbd_device *device, struct request_queue
q->limits.max_write_zeroes_sectors = 0;
}
-static void decide_on_write_same_support(struct drbd_device *device,
- struct request_queue *q,
- struct request_queue *b, struct o_qlim *o,
- bool disable_write_same)
-{
- struct drbd_peer_device *peer_device = first_peer_device(device);
- struct drbd_connection *connection = peer_device->connection;
- bool can_do = b ? b->limits.max_write_same_sectors : true;
-
- if (can_do && disable_write_same) {
- can_do = false;
- drbd_info(peer_device, "WRITE_SAME disabled by config\n");
- }
-
- if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
- can_do = false;
- drbd_info(peer_device, "peer does not support WRITE_SAME\n");
- }
-
- if (o) {
- /* logical block size; queue_logical_block_size(NULL) is 512 */
- unsigned int peer_lbs = be32_to_cpu(o->logical_block_size);
- unsigned int me_lbs_b = queue_logical_block_size(b);
- unsigned int me_lbs = queue_logical_block_size(q);
-
- if (me_lbs_b != me_lbs) {
- drbd_warn(device,
- "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
- me_lbs, me_lbs_b);
- /* rather disable write same than trigger some BUG_ON later in the scsi layer. */
- can_do = false;
- }
- if (me_lbs_b != peer_lbs) {
- drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
- me_lbs, peer_lbs);
- if (can_do) {
- drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n");
- can_do = false;
- }
- me_lbs = max(me_lbs, me_lbs_b);
- /* We cannot change the logical block size of an in-use queue.
- * We can only hope that access happens to be properly aligned.
- * If not, the peer will likely produce an IO error, and detach. */
- if (peer_lbs > me_lbs) {
- if (device->state.role != R_PRIMARY) {
- blk_queue_logical_block_size(q, peer_lbs);
- drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs);
- } else {
- drbd_warn(peer_device,
- "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
- me_lbs, peer_lbs);
- }
- }
- }
- if (can_do && !o->write_same_capable) {
- /* If we introduce an open-coded write-same loop on the receiving side,
- * the peer would present itself as "capable". */
- drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n");
- can_do = false;
- }
- }
-
- blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0);
-}
-
static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
unsigned int max_bio_size, struct o_qlim *o)
{
@@ -1338,8 +1264,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
unsigned int max_segments = 0;
struct request_queue *b = NULL;
struct disk_conf *dc;
- bool discard_zeroes_if_aligned = true;
- bool disable_write_same = false;
if (bdev) {
b = bdev->backing_bdev->bd_disk->queue;
@@ -1348,8 +1272,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
rcu_read_lock();
dc = rcu_dereference(device->ldev->disk_conf);
max_segments = dc->max_bio_bvecs;
- discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
- disable_write_same = dc->disable_write_same;
rcu_read_unlock();
blk_set_stacking_limits(&q->limits);
@@ -1359,14 +1281,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
/* This is the workaround for "bio would need to, but cannot, be split" */
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_SIZE-1);
- decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
- decide_on_write_same_support(device, q, b, o, disable_write_same);
+ decide_on_discard_support(device, bdev);
if (b) {
blk_stack_limits(&q->limits, &b->limits, 0);
disk_update_readahead(device->vdisk);
}
- fixup_discard_if_not_supported(q);
fixup_write_zeroes(device, q);
}
@@ -1505,14 +1425,14 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
struct drbd_backing_dev *nbc)
{
- struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
+ struct block_device *bdev = nbc->backing_bdev;
if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
if (disk_conf->al_extents > drbd_al_extents_max(nbc))
disk_conf->al_extents = drbd_al_extents_max(nbc);
- if (!blk_queue_discard(q)) {
+ if (!bdev_max_discard_sectors(bdev)) {
if (disk_conf->rs_discard_granularity) {
disk_conf->rs_discard_granularity = 0; /* disable feature */
drbd_info(device, "rs_discard_granularity feature disabled\n");
@@ -1521,16 +1441,19 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
if (disk_conf->rs_discard_granularity) {
int orig_value = disk_conf->rs_discard_granularity;
+ sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
+ unsigned int discard_granularity = bdev_discard_granularity(bdev);
int remainder;
- if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
- disk_conf->rs_discard_granularity = q->limits.discard_granularity;
+ if (discard_granularity > disk_conf->rs_discard_granularity)
+ disk_conf->rs_discard_granularity = discard_granularity;
- remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
+ remainder = disk_conf->rs_discard_granularity %
+ discard_granularity;
disk_conf->rs_discard_granularity += remainder;
- if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
- disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
+ if (disk_conf->rs_discard_granularity > discard_size)
+ disk_conf->rs_discard_granularity = discard_size;
if (disk_conf->rs_discard_granularity != orig_value)
drbd_info(device, "rs_discard_granularity changed to %d\n",
@@ -1666,8 +1589,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
if (write_ordering_changed(old_disk_conf, new_disk_conf))
drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
- if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned
- || old_disk_conf->disable_write_same != new_disk_conf->disable_write_same)
+ if (old_disk_conf->discard_zeroes_if_aligned !=
+ new_disk_conf->discard_zeroes_if_aligned)
drbd_reconsider_queue_parameters(device, device->ldev, NULL);
drbd_md_sync(device);
@@ -1679,8 +1602,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
drbd_send_sync_param(peer_device);
}
- synchronize_rcu();
- kfree(old_disk_conf);
+ kvfree_rcu(old_disk_conf);
kfree(old_plan);
mod_timer(&device->request_timer, jiffies + HZ);
goto success;
@@ -2511,8 +2433,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
mutex_unlock(&connection->resource->conf_update);
mutex_unlock(&connection->data.mutex);
- synchronize_rcu();
- kfree(old_net_conf);
+ kvfree_rcu(old_net_conf);
if (connection->cstate >= C_WF_REPORT_PARAMS) {
struct drbd_peer_device *peer_device;
@@ -2570,6 +2491,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
struct drbd_resource *resource;
struct drbd_connection *connection;
enum drbd_ret_code retcode;
+ enum drbd_state_rv rv;
int i;
int err;
@@ -2689,12 +2611,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
}
rcu_read_unlock();
- retcode = (enum drbd_ret_code)conn_request_state(connection,
- NS(conn, C_UNCONNECTED), CS_VERBOSE);
+ rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
conn_reconfig_done(connection);
mutex_unlock(&adm_ctx.resource->adm_mutex);
- drbd_adm_finish(&adm_ctx, info, retcode);
+ drbd_adm_finish(&adm_ctx, info, rv);
return 0;
fail:
@@ -2802,11 +2723,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
rv = conn_try_disconnect(connection, parms.force_disconnect);
- if (rv < SS_SUCCESS)
- retcode = (enum drbd_ret_code)rv;
- else
- retcode = NO_ERROR;
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ if (rv < SS_SUCCESS) {
+ drbd_adm_finish(&adm_ctx, info, rv);
+ return 0;
+ }
+ retcode = NO_ERROR;
fail:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -2925,8 +2847,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
new_disk_conf->disk_size = (sector_t)rs.resize_size;
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&device->resource->conf_update);
- synchronize_rcu();
- kfree(old_disk_conf);
+ kvfree_rcu(old_disk_conf);
new_disk_conf = NULL;
}
@@ -4617,7 +4538,7 @@ static int nla_put_notification_header(struct sk_buff *msg,
return drbd_notification_header_to_skb(msg, &nh, true);
}
-void notify_resource_state(struct sk_buff *skb,
+int notify_resource_state(struct sk_buff *skb,
unsigned int seq,
struct drbd_resource *resource,
struct resource_info *resource_info,
@@ -4659,16 +4580,17 @@ void notify_resource_state(struct sk_buff *skb,
if (err && err != -ESRCH)
goto failed;
}
- return;
+ return 0;
nla_put_failure:
nlmsg_free(skb);
failed:
drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
err, seq);
+ return err;
}
-void notify_device_state(struct sk_buff *skb,
+int notify_device_state(struct sk_buff *skb,
unsigned int seq,
struct drbd_device *device,
struct device_info *device_info,
@@ -4708,16 +4630,17 @@ void notify_device_state(struct sk_buff *skb,
if (err && err != -ESRCH)
goto failed;
}
- return;
+ return 0;
nla_put_failure:
nlmsg_free(skb);
failed:
drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
err, seq);
+ return err;
}
-void notify_connection_state(struct sk_buff *skb,
+int notify_connection_state(struct sk_buff *skb,
unsigned int seq,
struct drbd_connection *connection,
struct connection_info *connection_info,
@@ -4757,16 +4680,17 @@ void notify_connection_state(struct sk_buff *skb,
if (err && err != -ESRCH)
goto failed;
}
- return;
+ return 0;
nla_put_failure:
nlmsg_free(skb);
failed:
drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
err, seq);
+ return err;
}
-void notify_peer_device_state(struct sk_buff *skb,
+int notify_peer_device_state(struct sk_buff *skb,
unsigned int seq,
struct drbd_peer_device *peer_device,
struct peer_device_info *peer_device_info,
@@ -4807,13 +4731,14 @@ void notify_peer_device_state(struct sk_buff *skb,
if (err && err != -ESRCH)
goto failed;
}
- return;
+ return 0;
nla_put_failure:
nlmsg_free(skb);
failed:
drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
err, seq);
+ return err;
}
void notify_helper(enum drbd_notification_type type,
@@ -4827,7 +4752,7 @@ void notify_helper(enum drbd_notification_type type,
struct drbd_genlmsghdr *dh;
int err;
- strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
+ strscpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
helper_info.helper_status = status;
@@ -4864,7 +4789,7 @@ fail:
err, seq);
}
-static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
{
struct drbd_genlmsghdr *dh;
int err;
@@ -4878,11 +4803,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
if (nla_put_notification_header(skb, NOTIFY_EXISTS))
goto nla_put_failure;
genlmsg_end(skb, dh);
- return;
+ return 0;
nla_put_failure:
nlmsg_free(skb);
pr_err("Error %d sending event. Event seq:%u\n", err, seq);
+ return err;
}
static void free_state_changes(struct list_head *list)
@@ -4909,6 +4835,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int seq = cb->args[2];
unsigned int n;
enum drbd_notification_type flags = 0;
+ int err = 0;
/* There is no need for taking notification_mutex here: it doesn't
matter if the initial state events mix with later state chage
@@ -4917,32 +4844,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[5]--;
if (cb->args[5] == 1) {
- notify_initial_state_done(skb, seq);
+ err = notify_initial_state_done(skb, seq);
goto out;
}
n = cb->args[4]++;
if (cb->args[4] < cb->args[3])
flags |= NOTIFY_CONTINUES;
if (n < 1) {
- notify_resource_state_change(skb, seq, state_change->resource,
+ err = notify_resource_state_change(skb, seq, state_change->resource,
NOTIFY_EXISTS | flags);
goto next;
}
n--;
if (n < state_change->n_connections) {
- notify_connection_state_change(skb, seq, &state_change->connections[n],
+ err = notify_connection_state_change(skb, seq, &state_change->connections[n],
NOTIFY_EXISTS | flags);
goto next;
}
n -= state_change->n_connections;
if (n < state_change->n_devices) {
- notify_device_state_change(skb, seq, &state_change->devices[n],
+ err = notify_device_state_change(skb, seq, &state_change->devices[n],
NOTIFY_EXISTS | flags);
goto next;
}
n -= state_change->n_devices;
if (n < state_change->n_devices * state_change->n_connections) {
- notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
+ err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
NOTIFY_EXISTS | flags);
goto next;
}
@@ -4957,7 +4884,10 @@ next:
cb->args[4] = 0;
}
out:
- return skb->len;
+ if (err)
+ return err;
+ else
+ return skb->len;
}
int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index dea59c92ecc1..a882b65ab5d2 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -283,8 +283,10 @@ struct p_rs_param_89 {
struct p_rs_param_95 {
u32 resync_rate;
- char verify_alg[SHARED_SECRET_MAX];
- char csums_alg[SHARED_SECRET_MAX];
+ struct_group(algs,
+ char verify_alg[SHARED_SECRET_MAX];
+ char csums_alg[SHARED_SECRET_MAX];
+ );
u32 c_plan_ahead;
u32 c_delay_target;
u32 c_fill_target;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1f740e42e457..ee69d50ba4fd 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -364,7 +364,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req;
struct page *page = NULL;
- unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ unsigned int nr_pages = PFN_UP(payload_size);
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
return NULL;
@@ -781,7 +781,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_connection *connection,
timeo = connect_int * HZ;
/* 28.5% random jitter */
- timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
+ timeo += prandom_u32_max(2) ? timeo / 7 : -timeo / 7;
err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
if (err <= 0)
@@ -1004,7 +1004,7 @@ retry:
drbd_warn(connection, "Error receiving initial packet\n");
sock_release(s);
randomize:
- if (prandom_u32() & 1)
+ if (prandom_u32_max(2))
goto retry;
}
}
@@ -1279,16 +1279,16 @@ static void one_flush_endio(struct bio *bio)
static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
{
- struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
+ REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO);
struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
- if (!bio || !octx) {
- drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
+
+ if (!octx) {
+ drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
/* FIXME: what else can I do now? disconnecting or detaching
* really does not help to improve the state of the world, either.
*/
- kfree(octx);
- if (bio)
- bio_put(bio);
+ bio_put(bio);
ctx->error = -ENOMEM;
put_ldev(device);
@@ -1298,10 +1298,8 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
octx->device = device;
octx->ctx = ctx;
- bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
- bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
device->flush_jif = jiffies;
set_bit(FLUSH_PENDING, &device->flags);
@@ -1513,7 +1511,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
{
struct block_device *bdev = device->ldev->backing_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
sector_t tmp, nr;
unsigned int max_discard_sectors, granularity;
int alignment;
@@ -1523,10 +1520,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
goto zero_out;
/* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
+ granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
- max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+ max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto zero_out;
@@ -1550,7 +1547,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
start = tmp;
}
while (nr_sectors >= max_discard_sectors) {
- err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
+ err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
+ GFP_NOIO);
nr_sectors -= max_discard_sectors;
start += max_discard_sectors;
}
@@ -1562,7 +1560,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
nr = nr_sectors;
nr -= (unsigned int)nr % granularity;
if (nr) {
- err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+ err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
nr_sectors -= nr;
start += nr;
}
@@ -1577,11 +1575,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
static bool can_do_reliable_discards(struct drbd_device *device)
{
- struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
struct disk_conf *dc;
bool can_do;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
return false;
rcu_read_lock();
@@ -1606,19 +1603,7 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru
drbd_endio_write_sec_final(peer_req);
}
-static void drbd_issue_peer_wsame(struct drbd_device *device,
- struct drbd_peer_request *peer_req)
-{
- struct block_device *bdev = device->ldev->backing_bdev;
- sector_t s = peer_req->i.sector;
- sector_t nr = peer_req->i.size >> 9;
- if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
- peer_req->flags |= EE_WAS_ERROR;
- drbd_endio_write_sec_final(peer_req);
-}
-
-
-/*
+/**
* drbd_submit_peer_request()
* @device: DRBD device.
* @peer_req: peer request
@@ -1636,17 +1621,15 @@ static void drbd_issue_peer_wsame(struct drbd_device *device,
/* TODO allocate from our own bio_set. */
int drbd_submit_peer_request(struct drbd_device *device,
struct drbd_peer_request *peer_req,
- const unsigned op, const unsigned op_flags,
- const int fault_type)
+ const blk_opf_t opf, const int fault_type)
{
struct bio *bios = NULL;
struct bio *bio;
struct page *page = peer_req->pages;
sector_t sector = peer_req->i.sector;
- unsigned data_size = peer_req->i.size;
- unsigned n_bios = 0;
- unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
- int err = -ENOMEM;
+ unsigned int data_size = peer_req->i.size;
+ unsigned int n_bios = 0;
+ unsigned int nr_pages = PFN_UP(data_size);
/* TRIM/DISCARD: for now, always use the helper function
* blkdev_issue_zeroout(..., discard=true).
@@ -1654,7 +1637,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
* Correctness first, performance later. Next step is to code an
* asynchronous variant of the same.
*/
- if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
+ if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
/* wait for all pending IO completions, before we start
* zeroing things out. */
conn_wait_active_ee_empty(peer_req->peer_device->connection);
@@ -1671,10 +1654,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
spin_unlock_irq(&device->resource->req_lock);
}
- if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
- drbd_issue_peer_discard_or_zero_out(device, peer_req);
- else /* EE_WRITE_SAME */
- drbd_issue_peer_wsame(device, peer_req);
+ drbd_issue_peer_discard_or_zero_out(device, peer_req);
return 0;
}
@@ -1687,15 +1667,9 @@ int drbd_submit_peer_request(struct drbd_device *device,
* generated bio, but a bio allocated on behalf of the peer.
*/
next_bio:
- bio = bio_alloc(GFP_NOIO, nr_pages);
- if (!bio) {
- drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
- goto fail;
- }
+ bio = bio_alloc(device->ldev->backing_bdev, nr_pages, opf, GFP_NOIO);
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, device->ldev->backing_bdev);
- bio_set_op_attrs(bio, op, op_flags);
bio->bi_private = peer_req;
bio->bi_end_io = drbd_peer_request_endio;
@@ -1726,14 +1700,6 @@ next_bio:
drbd_submit_bio_noacct(device, fault_type, bio);
} while (bios);
return 0;
-
-fail:
- while (bios) {
- bio = bios;
- bios = bios->bi_next;
- bio_put(bio);
- }
- return err;
}
static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
@@ -1870,7 +1836,6 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
unsigned long *data;
struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
- struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
digest_size = 0;
if (!trim && peer_device->connection->peer_integrity_tfm) {
@@ -1885,7 +1850,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
data_size -= digest_size;
}
- /* assume request_size == data_size, but special case trim and wsame. */
+ /* assume request_size == data_size, but special case trim. */
ds = data_size;
if (trim) {
if (!expect(data_size == 0))
@@ -1895,23 +1860,11 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
if (!expect(data_size == 0))
return NULL;
ds = be32_to_cpu(zeroes->size);
- } else if (wsame) {
- if (data_size != queue_logical_block_size(device->rq_queue)) {
- drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
- data_size, queue_logical_block_size(device->rq_queue));
- return NULL;
- }
- if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
- drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
- data_size, bdev_logical_block_size(device->ldev->backing_bdev));
- return NULL;
- }
- ds = be32_to_cpu(wsame->size);
}
if (!expect(IS_ALIGNED(ds, 512)))
return NULL;
- if (trim || wsame || zeroes) {
+ if (trim || zeroes) {
if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
return NULL;
} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
@@ -1943,8 +1896,6 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
peer_req->flags |= EE_ZEROOUT;
return peer_req;
}
- if (wsame)
- peer_req->flags |= EE_WRITE_SAME;
/* receive payload size bytes into page chain */
ds = data_size;
@@ -2033,10 +1984,10 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
bio_for_each_segment(bvec, bio, iter) {
- void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
+ void *mapped = bvec_kmap_local(&bvec);
expect = min_t(int, data_size, bvec.bv_len);
err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
- kunmap(bvec.bv_page);
+ kunmap_local(mapped);
if (err)
return err;
data_size -= expect;
@@ -2107,7 +2058,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
+ if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE,
DRBD_FAULT_RS_WR) == 0)
return 0;
@@ -2162,9 +2113,6 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i
if (unlikely(!req))
return -EIO;
- /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
- * special casing it there for the various failure cases.
- * still no race with drbd_fail_pending_reads */
err = recv_dless_read(peer_device, req, sector, pi->size);
if (!err)
req_mod(req, DATA_RECEIVED);
@@ -2430,21 +2378,19 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
/* see also bio_flags_to_wire()
* DRBD_REQ_*, because we need to semantically map the flags to data packet
* flags and back. We may replicate to other kernel versions. */
-static unsigned long wire_flags_to_bio_flags(u32 dpf)
+static blk_opf_t wire_flags_to_bio_flags(u32 dpf)
{
return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
(dpf & DP_FUA ? REQ_FUA : 0) |
(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
}
-static unsigned long wire_flags_to_bio_op(u32 dpf)
+static enum req_op wire_flags_to_bio_op(u32 dpf)
{
if (dpf & DP_ZEROES)
return REQ_OP_WRITE_ZEROES;
if (dpf & DP_DISCARD)
return REQ_OP_DISCARD;
- if (dpf & DP_WSAME)
- return REQ_OP_WRITE_SAME;
else
return REQ_OP_WRITE;
}
@@ -2592,7 +2538,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
struct drbd_peer_request *peer_req;
struct p_data *p = pi->data;
u32 peer_seq = be32_to_cpu(p->seq_num);
- int op, op_flags;
+ enum req_op op;
+ blk_opf_t op_flags;
u32 dp_flags;
int err, tp;
@@ -2711,11 +2658,11 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, peer_seq);
spin_lock_irq(&device->resource->req_lock);
}
- /* TRIM and WRITE_SAME are processed synchronously,
+ /* TRIM and is processed synchronously,
* we wait for all pending requests, respectively wait for
* active_ee to become empty in drbd_submit_peer_request();
* better not add ourselves here. */
- if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
+ if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
list_add_tail(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock);
@@ -2730,7 +2677,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
}
- err = drbd_submit_peer_request(device, peer_req, op, op_flags,
+ err = drbd_submit_peer_request(device, peer_req, op | op_flags,
DRBD_FAULT_DT_WR);
if (!err)
return 0;
@@ -3028,7 +2975,7 @@ submit_for_resync:
submit:
update_receiver_timing_details(connection, drbd_submit_peer_request);
inc_unacked(device);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
+ if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ,
fault_type) == 0)
return 0;
@@ -3799,8 +3746,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
drbd_info(connection, "peer data-integrity-alg: %s\n",
integrity_alg[0] ? integrity_alg : "(none)");
- synchronize_rcu();
- kfree(old_net_conf);
+ kvfree_rcu(old_net_conf);
return 0;
disconnect_rcu_unlock:
@@ -3921,7 +3867,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
/* initialize verify_alg and csums_alg */
p = pi->data;
- memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+ BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
+ memset(&p->algs, 0, sizeof(p->algs));
err = drbd_recv_all(peer_device->connection, p, header_size);
if (err)
@@ -3950,7 +3897,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
drbd_err(device, "verify-alg of wrong size, "
"peer wants %u, accepting only up to %u byte\n",
data_size, SHARED_SECRET_MAX);
- err = -EIO;
goto reconnect;
}
@@ -4168,8 +4114,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&connection->resource->conf_update);
- synchronize_rcu();
- kfree(old_disk_conf);
+ kvfree_rcu(old_disk_conf);
drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
(unsigned long)p_usize, (unsigned long)my_usize);
@@ -5002,7 +4947,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
if (get_ldev(device)) {
struct drbd_peer_request *peer_req;
- const int op = REQ_OP_WRITE_ZEROES;
+ const enum req_op op = REQ_OP_WRITE_ZEROES;
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
size, 0, GFP_NOIO);
@@ -5020,7 +4965,8 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
- err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
+ err = drbd_submit_peer_request(device, peer_req, op,
+ DRBD_FAULT_RS_WR);
if (err) {
spin_lock_irq(&device->resource->req_lock);
@@ -5083,7 +5029,6 @@ static struct data_cmd drbd_cmd_handler[] = {
[P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
[P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data },
[P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
- [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data },
};
static void drbdd(struct drbd_connection *connection)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3235532ae077..7f9bcc82fc9c 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -30,12 +30,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
return NULL;
memset(req, 0, sizeof(*req));
- req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
- req->private_bio->bi_private = req;
- req->private_bio->bi_end_io = drbd_request_endio;
-
req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
- | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
| (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
| (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
req->device = device;
@@ -180,7 +175,8 @@ void start_new_tl_epoch(struct drbd_connection *connection)
void complete_master_bio(struct drbd_device *device,
struct bio_and_error *m)
{
- m->bio->bi_status = errno_to_blk_status(m->error);
+ if (unlikely(m->error))
+ m->bio->bi_status = errno_to_blk_status(m->error);
bio_endio(m->bio);
dec_ap_bio(device);
}
@@ -332,17 +328,21 @@ static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct dr
static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
{
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ struct drbd_request *iter = req;
if (!connection)
return;
if (connection->req_next != req)
return;
- list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
- const unsigned s = req->rq_state;
- if (s & RQ_NET_QUEUED)
+
+ req = NULL;
+ list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
+ const unsigned int s = iter->rq_state;
+
+ if (s & RQ_NET_QUEUED) {
+ req = iter;
break;
+ }
}
- if (&req->tl_requests == &connection->transfer_log)
- req = NULL;
connection->req_next = req;
}
@@ -358,17 +358,21 @@ static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, st
static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
{
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ struct drbd_request *iter = req;
if (!connection)
return;
if (connection->req_ack_pending != req)
return;
- list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
- const unsigned s = req->rq_state;
- if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
+
+ req = NULL;
+ list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
+ const unsigned int s = iter->rq_state;
+
+ if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) {
+ req = iter;
break;
+ }
}
- if (&req->tl_requests == &connection->transfer_log)
- req = NULL;
connection->req_ack_pending = req;
}
@@ -384,17 +388,21 @@ static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, s
static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
{
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+ struct drbd_request *iter = req;
if (!connection)
return;
if (connection->req_not_net_done != req)
return;
- list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
- const unsigned s = req->rq_state;
- if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
+
+ req = NULL;
+ list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
+ const unsigned int s = iter->rq_state;
+
+ if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) {
+ req = iter;
break;
+ }
}
- if (&req->tl_requests == &connection->transfer_log)
- req = NULL;
connection->req_not_net_done = req;
}
@@ -510,16 +518,14 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
{
- char b[BDEVNAME_SIZE];
-
if (!__ratelimit(&drbd_ratelimit_state))
return;
- drbd_warn(device, "local %s IO error sector %llu+%u on %s\n",
+ drbd_warn(device, "local %s IO error sector %llu+%u on %pg\n",
(req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
(unsigned long long)req->i.sector,
req->i.size >> 9,
- bdevname(device->ldev->backing_bdev, b));
+ device->ldev->backing_bdev);
}
/* Helper for HANDED_OVER_TO_NETWORK.
@@ -909,8 +915,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
switch (rbm) {
case RB_CONGESTED_REMOTE:
- return bdi_read_congested(
- device->ldev->backing_bdev->bd_disk->bdi);
+ return false;
case RB_LEAST_PENDING:
return atomic_read(&device->local_cnt) >
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
@@ -1151,8 +1156,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
else
type = DRBD_FAULT_DT_RD;
- bio_set_dev(bio, device->ldev->backing_bdev);
-
/* State may have changed since we grabbed our reference on the
* ->ldev member. Double check, and short-circuit to endio.
* In case the last activity log transaction failed to get on
@@ -1211,9 +1214,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio)
/* Update disk stats */
req->start_jif = bio_start_io_acct(req->master_bio);
- if (!get_ldev(device)) {
- bio_put(req->private_bio);
- req->private_bio = NULL;
+ if (get_ldev(device)) {
+ req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
+ bio, GFP_NOIO,
+ &drbd_io_bio_set);
+ req->private_bio->bi_private = req;
+ req->private_bio->bi_end_io = drbd_request_endio;
}
/* process discards always from our submitter thread */
@@ -1600,7 +1606,7 @@ void drbd_submit_bio(struct bio *bio)
{
struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
- blk_queue_split(&bio);
+ bio = bio_split_to_limits(bio);
/*
* what we "blindly" assume:
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 511f39a08de4..6237fa1dcb0e 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -266,8 +266,6 @@ struct bio_and_error {
extern void start_new_tl_epoch(struct drbd_connection *connection);
extern void drbd_req_destroy(struct kref *kref);
-extern void _req_may_be_done(struct drbd_request *req,
- struct bio_and_error *m);
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m);
extern void complete_master_bio(struct drbd_device *device,
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index b8a27818ab3f..3f7bf9f2d874 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
return rv;
}
-void notify_resource_state_change(struct sk_buff *skb,
+int notify_resource_state_change(struct sk_buff *skb,
unsigned int seq,
struct drbd_resource_state_change *resource_state_change,
enum drbd_notification_type type)
@@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb,
.res_susp_fen = resource_state_change->susp_fen[NEW],
};
- notify_resource_state(skb, seq, resource, &resource_info, type);
+ return notify_resource_state(skb, seq, resource, &resource_info, type);
}
-void notify_connection_state_change(struct sk_buff *skb,
+int notify_connection_state_change(struct sk_buff *skb,
unsigned int seq,
struct drbd_connection_state_change *connection_state_change,
enum drbd_notification_type type)
@@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb,
.conn_role = connection_state_change->peer_role[NEW],
};
- notify_connection_state(skb, seq, connection, &connection_info, type);
+ return notify_connection_state(skb, seq, connection, &connection_info, type);
}
-void notify_device_state_change(struct sk_buff *skb,
+int notify_device_state_change(struct sk_buff *skb,
unsigned int seq,
struct drbd_device_state_change *device_state_change,
enum drbd_notification_type type)
@@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb,
.dev_disk_state = device_state_change->disk_state[NEW],
};
- notify_device_state(skb, seq, device, &device_info, type);
+ return notify_device_state(skb, seq, device, &device_info, type);
}
-void notify_peer_device_state_change(struct sk_buff *skb,
+int notify_peer_device_state_change(struct sk_buff *skb,
unsigned int seq,
struct drbd_peer_device_state_change *p,
enum drbd_notification_type type)
@@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb,
.peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
};
- notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
+ return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
}
static void broadcast_state_change(struct drbd_state_change *state_change)
@@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
bool resource_state_has_changed;
unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
- void (*last_func)(struct sk_buff *, unsigned int, void *,
+ int (*last_func)(struct sk_buff *, unsigned int, void *,
enum drbd_notification_type) = NULL;
void *last_arg = NULL;
@@ -2071,8 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
conn_free_crypto(connection);
mutex_unlock(&connection->resource->conf_update);
- synchronize_rcu();
- kfree(old_conf);
+ kvfree_rcu(old_conf);
}
if (ns_max.susp_fen) {
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
index ba80f612d6ab..d5b0479bc9a6 100644
--- a/drivers/block/drbd/drbd_state_change.h
+++ b/drivers/block/drbd/drbd_state_change.h
@@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_
extern void copy_old_to_new_state_change(struct drbd_state_change *);
extern void forget_state_change(struct drbd_state_change *);
-extern void notify_resource_state_change(struct sk_buff *,
+extern int notify_resource_state_change(struct sk_buff *,
unsigned int,
struct drbd_resource_state_change *,
enum drbd_notification_type type);
-extern void notify_connection_state_change(struct sk_buff *,
+extern int notify_connection_state_change(struct sk_buff *,
unsigned int,
struct drbd_connection_state_change *,
enum drbd_notification_type type);
-extern void notify_device_state_change(struct sk_buff *,
+extern int notify_device_state_change(struct sk_buff *,
unsigned int,
struct drbd_device_state_change *,
enum drbd_notification_type type);
-extern void notify_peer_device_state_change(struct sk_buff *,
+extern int notify_peer_device_state_change(struct sk_buff *,
unsigned int,
struct drbd_peer_device_state_change *,
enum drbd_notification_type type);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 64563bfdf0da..0bb1a900c2d5 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -326,14 +326,9 @@ void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest)
bio_for_each_segment(bvec, bio, iter) {
u8 *src;
- src = kmap_atomic(bvec.bv_page);
- crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len);
- kunmap_atomic(src);
-
- /* REQ_OP_WRITE_SAME has only one segment,
- * checksum the payload only once. */
- if (bio_op(bio) == REQ_OP_WRITE_SAME)
- break;
+ src = bvec_kmap_local(&bvec);
+ crypto_shash_update(desc, src, bvec.bv_len);
+ kunmap_local(src);
}
crypto_shash_final(desc, digest);
shash_desc_zero(desc);
@@ -410,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
spin_unlock_irq(&device->resource->req_lock);
atomic_add(size >> 9, &device->rs_sect_ev);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
+ if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ,
DRBD_FAULT_RS_RD) == 0)
return 0;
@@ -1035,7 +1030,7 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
{
if (drbd_peer_req_has_active_page(peer_req)) {
/* This might happen if sendpage() has not finished */
- int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ int i = PFN_UP(peer_req->i.size);
atomic_add(i, &device->pp_in_use_by_net);
atomic_sub(i, &device->pp_in_use);
spin_lock_irq(&device->resource->req_lock);
@@ -1523,9 +1518,9 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
drbd_al_begin_io(device, &req->i);
- req->private_bio = bio_clone_fast(req->master_bio, GFP_NOIO,
+ req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
+ req->master_bio, GFP_NOIO,
&drbd_io_bio_set);
- bio_set_dev(req->private_bio, device->ldev->backing_bdev);
req->private_bio->bi_private = req;
req->private_bio->bi_end_io = drbd_request_endio;
submit_bio_noacct(req->private_bio);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index c4267da716fe..ccad3d7b3ddd 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -509,8 +509,8 @@ static unsigned long fdc_busy;
static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
static DECLARE_WAIT_QUEUE_HEAD(command_done);
-/* Errors during formatting are counted here. */
-static int format_errors;
+/* errors encountered on the current (or last) request */
+static int floppy_errors;
/* Format request descriptor. */
static struct format_descr format_req;
@@ -530,7 +530,6 @@ static struct format_descr format_req;
static char *floppy_track_buffer;
static int max_buffer_sectors;
-static int *errors;
typedef void (*done_f)(int);
static const struct cont_t {
void (*interrupt)(void);
@@ -1015,7 +1014,7 @@ static DECLARE_DELAYED_WORK(fd_timer, fd_timer_workfn);
static void cancel_activity(void)
{
do_floppy = NULL;
- cancel_delayed_work_sync(&fd_timer);
+ cancel_delayed_work(&fd_timer);
cancel_work_sync(&floppy_work);
}
@@ -1455,7 +1454,7 @@ static int interpret_errors(void)
if (drive_params[current_drive].flags & FTD_MSG)
DPRINT("Over/Underrun - retrying\n");
bad = 0;
- } else if (*errors >= drive_params[current_drive].max_errors.reporting) {
+ } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) {
print_errors();
}
if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC)
@@ -2095,7 +2094,7 @@ static void bad_flp_intr(void)
if (!next_valid_format(current_drive))
return;
}
- err_count = ++(*errors);
+ err_count = ++floppy_errors;
INFBOUND(write_errors[current_drive].badness, err_count);
if (err_count > drive_params[current_drive].max_errors.abort)
cont->done(0);
@@ -2241,9 +2240,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
return -EINVAL;
}
format_req = *tmp_format_req;
- format_errors = 0;
cont = &format_cont;
- errors = &format_errors;
+ floppy_errors = 0;
ret = wait_til_done(redo_format, true);
if (ret == -EINTR)
return -EINTR;
@@ -2259,7 +2257,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
static void floppy_end_request(struct request *req, blk_status_t error)
{
unsigned int nr_sectors = current_count_sectors;
- unsigned int drive = (unsigned long)req->rq_disk->private_data;
+ unsigned int drive = (unsigned long)req->q->disk->private_data;
/* current_count_sectors can be zero if transfer failed */
if (error)
@@ -2485,11 +2483,9 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
}
if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
- memcpy_to_page(bv.bv_page, bv.bv_offset, dma_buffer,
- size);
+ memcpy_to_bvec(&bv, dma_buffer);
else
- memcpy_from_page(dma_buffer, bv.bv_page, bv.bv_offset,
- size);
+ memcpy_from_bvec(dma_buffer, &bv);
remaining -= size;
dma_buffer += size;
@@ -2550,7 +2546,7 @@ static int make_raw_rw_request(void)
if (WARN(max_buffer_sectors == 0, "VFS: Block I/O scheduled on unopened device\n"))
return 0;
- set_fdc((long)current_req->rq_disk->private_data);
+ set_fdc((long)current_req->q->disk->private_data);
raw_cmd = &default_raw_cmd;
raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK;
@@ -2761,10 +2757,11 @@ static int set_next_request(void)
current_req = list_first_entry_or_null(&floppy_reqs, struct request,
queuelist);
if (current_req) {
- current_req->error_count = 0;
+ floppy_errors = 0;
list_del_init(&current_req->queuelist);
+ return 1;
}
- return current_req != NULL;
+ return 0;
}
/* Starts or continues processing request. Will automatically unlock the
@@ -2792,7 +2789,7 @@ do_request:
return;
}
}
- drive = (long)current_req->rq_disk->private_data;
+ drive = (long)current_req->q->disk->private_data;
set_fdc(drive);
reschedule_timeout(current_drive, "redo fd request");
@@ -2823,7 +2820,6 @@ do_request:
_floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format];
} else
probing = 0;
- errors = &(current_req->error_count);
tmp = make_raw_rw_request();
if (tmp < 2) {
request_done(tmp);
@@ -2863,7 +2859,7 @@ static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx,
if (WARN(atomic_read(&usage_count) == 0,
"warning: usage count=0, current_req=%p sect=%ld flags=%llx\n",
current_req, (long)blk_rq_pos(current_req),
- (unsigned long long) current_req->cmd_flags))
+ (__force unsigned long long) current_req->cmd_flags))
return BLK_STS_IOERR;
if (test_and_set_bit(0, &fdc_busy)) {
@@ -2984,6 +2980,8 @@ static const char *drive_name(int type, int drive)
return "(null)";
}
+#ifdef CONFIG_BLK_DEV_FD_RAWCMD
+
/* raw commands */
static void raw_cmd_done(int flag)
{
@@ -3081,6 +3079,8 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr)
}
}
+#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT)
+
static int raw_cmd_copyin(int cmd, void __user *param,
struct floppy_raw_cmd **rcmd)
{
@@ -3108,7 +3108,7 @@ loop:
ptr->resultcode = 0;
if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
- if (ptr->length <= 0)
+ if (ptr->length <= 0 || ptr->length >= MAX_LEN)
return -EINVAL;
ptr->kernel_data = (char *)fd_dma_mem_alloc(ptr->length);
fallback_on_nodma_alloc(&ptr->kernel_data, ptr->length);
@@ -3181,6 +3181,35 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
return ret;
}
+static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
+ void __user *param)
+{
+ int ret;
+
+ pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n");
+
+ if (type)
+ return -EINVAL;
+ if (lock_fdc(drive))
+ return -EINTR;
+ set_floppy(drive);
+ ret = raw_cmd_ioctl(cmd, param);
+ if (ret == -EINTR)
+ return -EINTR;
+ process_fd_request();
+ return ret;
+}
+
+#else /* CONFIG_BLK_DEV_FD_RAWCMD */
+
+static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
+ void __user *param)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif
+
static int invalidate_drive(struct block_device *bdev)
{
/* invalidate the buffer track to force a reread */
@@ -3369,7 +3398,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
{
int drive = (long)bdev->bd_disk->private_data;
int type = ITYPE(drive_state[drive].fd_device);
- int i;
int ret;
int size;
union inparam {
@@ -3520,16 +3548,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
outparam = &write_errors[drive];
break;
case FDRAWCMD:
- if (type)
- return -EINVAL;
- if (lock_fdc(drive))
- return -EINTR;
- set_floppy(drive);
- i = raw_cmd_ioctl(cmd, (void __user *)param);
- if (i == -EINTR)
- return -EINTR;
- process_fd_request();
- return i;
+ return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param);
case FDTWADDLE:
if (lock_fdc(drive))
return -EINTR;
@@ -4127,15 +4146,13 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
cbdata.drive = drive;
- bio_init(&bio, &bio_vec, 1);
- bio_set_dev(&bio, bdev);
+ bio_init(&bio, bdev, &bio_vec, 1, REQ_OP_READ);
bio_add_page(&bio, page, block_size(bdev), 0);
bio.bi_iter.bi_sector = 0;
bio.bi_flags |= (1 << BIO_QUIET);
bio.bi_private = &cbdata;
bio.bi_end_io = floppy_rb0_cb;
- bio_set_op_attrs(&bio, REQ_OP_READ, 0);
init_completion(&cbdata.complete);
@@ -4503,6 +4520,7 @@ static int floppy_alloc_disk(unsigned int drive, unsigned int type)
disk->first_minor = TOMINOR(drive) | (type << 2);
disk->minors = 1;
disk->fops = &floppy_fops;
+ disk->flags |= GENHD_FL_NO_PART;
disk->events = DISK_EVENT_MEDIA_CHANGE;
if (type)
sprintf(disk->disk_name, "fd%d_type%d", drive, type);
@@ -4539,7 +4557,7 @@ out:
return;
cleanup_disk:
- blk_cleanup_disk(disks[drive][type]);
+ put_disk(disks[drive][type]);
disks[drive][type] = NULL;
mutex_unlock(&floppy_probe_lock);
}
@@ -4735,7 +4753,7 @@ out_put_disk:
if (!disks[drive][0])
break;
del_timer_sync(&motor_off_timer[drive]);
- blk_cleanup_disk(disks[drive][0]);
+ put_disk(disks[drive][0]);
blk_mq_free_tag_set(&tag_sets[drive]);
}
return err;
@@ -4967,7 +4985,7 @@ static void __exit floppy_module_exit(void)
}
for (i = 0; i < ARRAY_SIZE(floppy_type); i++) {
if (disks[drive][i])
- blk_cleanup_disk(disks[drive][i]);
+ put_disk(disks[drive][i]);
}
blk_mq_free_tag_set(&tag_sets[drive]);
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c3a36cfaa855..ad92192c7d61 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1,54 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/drivers/block/loop.c
- *
- * Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
- * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Make real block number available to downstream transfer functions, enables
- * CBC (and relatives) mode encryption requiring unique IVs per data block.
- * Reed H. Petty, rhp@draper.net
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations write_begin is not available on the backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
+ * Copyright 1993 by Theodore Ts'o.
*/
-
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
@@ -59,7 +12,6 @@
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/wait.h>
-#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/init.h>
#include <linux/swap.h>
@@ -79,12 +31,66 @@
#include <linux/ioprio.h>
#include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>
+#include <linux/statfs.h>
+#include <linux/uaccess.h>
+#include <linux/blk-mq.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/loop.h>
+
+/* Possible states of device */
+enum {
+ Lo_unbound,
+ Lo_bound,
+ Lo_rundown,
+ Lo_deleting,
+};
-#include "loop.h"
+struct loop_func_table;
+
+struct loop_device {
+ int lo_number;
+ loff_t lo_offset;
+ loff_t lo_sizelimit;
+ int lo_flags;
+ char lo_file_name[LO_NAME_SIZE];
+
+ struct file * lo_backing_file;
+ struct block_device *lo_device;
+
+ gfp_t old_gfp_mask;
+
+ spinlock_t lo_lock;
+ int lo_state;
+ spinlock_t lo_work_lock;
+ struct workqueue_struct *workqueue;
+ struct work_struct rootcg_work;
+ struct list_head rootcg_cmd_list;
+ struct list_head idle_worker_list;
+ struct rb_root worker_tree;
+ struct timer_list timer;
+ bool use_dio;
+ bool sysfs_inited;
+
+ struct request_queue *lo_queue;
+ struct blk_mq_tag_set tag_set;
+ struct gendisk *lo_disk;
+ struct mutex lo_mutex;
+ bool idr_visible;
+};
-#include <linux/uaccess.h>
+struct loop_cmd {
+ struct list_head list_entry;
+ bool use_aio; /* use AIO interface to handle I/O */
+ atomic_t ref; /* only for aio */
+ long ret;
+ struct kiocb iocb;
+ struct bio_vec *bvec;
+ struct cgroup_subsys_state *blkcg_css;
+ struct cgroup_subsys_state *memcg_css;
+};
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
+#define LOOP_DEFAULT_HW_Q_DEPTH (128)
static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex);
@@ -184,8 +190,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
*/
if (dio) {
if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
- !(lo->lo_offset & dio_align) &&
- mapping->a_ops->direct_IO)
+ !(lo->lo_offset & dio_align) &&
+ (file->f_mode & FMODE_CAN_ODIRECT))
use_dio = true;
else
use_dio = false;
@@ -308,27 +314,22 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
* a.k.a. discard/zerorange.
*/
struct file *file = lo->lo_backing_file;
- struct request_queue *q = lo->lo_queue;
int ret;
mode |= FALLOC_FL_KEEP_SIZE;
- if (!blk_queue_discard(q)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
+ if (!bdev_max_discard_sectors(lo->lo_device))
+ return -EOPNOTSUPP;
ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
- ret = -EIO;
- out:
+ return -EIO;
return ret;
}
static int lo_req_flush(struct loop_device *lo, struct request *rq)
{
- struct file *file = lo->lo_backing_file;
- int ret = vfs_fsync(file, 0);
+ int ret = vfs_fsync(lo->lo_backing_file, 0);
if (unlikely(ret && ret != -EINVAL))
ret = -EIO;
@@ -572,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
if (!file)
return -EBADF;
+
+ /* suppress uevents while reconfiguring the device */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
is_loop = is_loop_device(file);
error = loop_global_lock_killable(lo, is_loop);
if (error)
@@ -626,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
fput(old_file);
if (partscan)
loop_reread_partitions(lo);
- return 0;
+
+ error = 0;
+done:
+ /* enable and uncork uevent now that we are done */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+ return error;
out_err:
loop_global_unlock(lo, is_loop);
out_putf:
fput(file);
- return error;
+ goto done;
}
/* loop sysfs attributes */
@@ -680,33 +690,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset);
}
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
}
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
{
int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
- return sprintf(buf, "%s\n", autoclear ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0");
}
static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
{
int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
- return sprintf(buf, "%s\n", partscan ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", partscan ? "1" : "0");
}
static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
{
int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
- return sprintf(buf, "%s\n", dio ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", dio ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
@@ -762,7 +772,7 @@ static void loop_config_discard(struct loop_device *lo)
struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
- granularity = backingq->limits.discard_granularity ?:
+ granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
queue_physical_block_size(backingq);
/*
@@ -774,22 +784,24 @@ static void loop_config_discard(struct loop_device *lo)
granularity = 0;
} else {
+ struct kstatfs sbuf;
+
max_discard_sectors = UINT_MAX >> 9;
- granularity = inode->i_sb->s_blocksize;
+ if (!vfs_statfs(&file->f_path, &sbuf))
+ granularity = sbuf.f_bsize;
+ else
+ max_discard_sectors = 0;
}
if (max_discard_sectors) {
q->limits.discard_granularity = granularity;
blk_queue_max_discard_sectors(q, max_discard_sectors);
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
} else {
q->limits.discard_granularity = 0;
blk_queue_max_discard_sectors(q, 0);
blk_queue_max_write_zeroes_sectors(q, 0);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
}
- q->limits.discard_alignment = 0;
}
struct loop_worker {
@@ -803,8 +815,6 @@ struct loop_worker {
};
static void loop_workfn(struct work_struct *work);
-static void loop_rootcg_workfn(struct work_struct *work);
-static void loop_free_idle_workers(struct timer_list *timer);
#ifdef CONFIG_BLK_CGROUP
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@@ -820,7 +830,7 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
{
- struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
+ struct rb_node **node, *parent = NULL;
struct loop_worker *cur_worker, *worker = NULL;
struct work_struct *work;
struct list_head *cmd_list;
@@ -888,6 +898,39 @@ queue_work:
spin_unlock_irq(&lo->lo_work_lock);
}
+static void loop_set_timer(struct loop_device *lo)
+{
+ timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
+}
+
+static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
+{
+ struct loop_worker *pos, *worker;
+
+ spin_lock_irq(&lo->lo_work_lock);
+ list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+ idle_list) {
+ if (!delete_all &&
+ time_is_after_jiffies(worker->last_ran_at +
+ LOOP_IDLE_WORKER_TIMEOUT))
+ break;
+ list_del(&worker->idle_list);
+ rb_erase(&worker->rb_node, &lo->worker_tree);
+ css_put(worker->blkcg_css);
+ kfree(worker);
+ }
+ if (!list_empty(&lo->idle_worker_list))
+ loop_set_timer(lo);
+ spin_unlock_irq(&lo->lo_work_lock);
+}
+
+static void loop_free_idle_workers_timer(struct timer_list *timer)
+{
+ struct loop_device *lo = container_of(timer, struct loop_device, timer);
+
+ return loop_free_idle_workers(lo, false);
+}
+
static void loop_update_rotational(struct loop_device *lo)
{
struct file *file = lo->lo_backing_file;
@@ -898,7 +941,7 @@ static void loop_update_rotational(struct loop_device *lo)
/* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
if (file_bdev)
- nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
+ nonrot = bdev_nonrot(file_bdev);
if (nonrot)
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
@@ -936,6 +979,11 @@ loop_set_status_from_info(struct loop_device *lo,
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
+
+ /* loff_t vars have been assigned __u64 */
+ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
+ return -EOVERFLOW;
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
lo->lo_flags = info->lo_flags;
@@ -962,6 +1010,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
+ /* suppress uevents while reconfiguring the device */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
@@ -1006,24 +1057,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
- lo->workqueue = alloc_workqueue("loop%d",
- WQ_UNBOUND | WQ_FREEZABLE,
- 0,
- lo->lo_number);
if (!lo->workqueue) {
- error = -ENOMEM;
- goto out_unlock;
+ lo->workqueue = alloc_workqueue("loop%d",
+ WQ_UNBOUND | WQ_FREEZABLE,
+ 0, lo->lo_number);
+ if (!lo->workqueue) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
}
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
- INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
- INIT_LIST_HEAD(&lo->rootcg_cmd_list);
- INIT_LIST_HEAD(&lo->idle_worker_list);
- lo->worker_tree = RB_ROOT;
- timer_setup(&lo->timer, loop_free_idle_workers,
- TIMER_DEFERRABLE);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
@@ -1061,14 +1107,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
lo->lo_flags |= LO_FLAGS_PARTSCAN;
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
if (partscan)
- lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+ clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
loop_global_unlock(lo, is_loop);
if (partscan)
loop_reread_partitions(lo);
if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure);
- return 0;
+
+ error = 0;
+done:
+ /* enable and uncork uevent now that we are done */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+ return error;
out_unlock:
loop_global_unlock(lo, is_loop);
@@ -1079,61 +1130,27 @@ out_putf:
fput(file);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- return error;
+ goto done;
}
-static int __loop_clr_fd(struct loop_device *lo, bool release)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
{
- struct file *filp = NULL;
+ struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
- int err = 0;
- bool partscan = false;
- int lo_number;
- struct loop_worker *pos, *worker;
-
- /*
- * Flush loop_configure() and loop_change_fd(). It is acceptable for
- * loop_validate_file() to succeed, for actual clear operation has not
- * started yet.
- */
- mutex_lock(&loop_validate_mutex);
- mutex_unlock(&loop_validate_mutex);
- /*
- * loop_validate_file() now fails because l->lo_state != Lo_bound
- * became visible.
- */
-
- mutex_lock(&lo->lo_mutex);
- if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
- err = -ENXIO;
- goto out_unlock;
- }
-
- filp = lo->lo_backing_file;
- if (filp == NULL) {
- err = -EINVAL;
- goto out_unlock;
- }
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
blk_queue_write_cache(lo->lo_queue, false, false);
- /* freeze request queue during the transition */
- blk_mq_freeze_queue(lo->lo_queue);
-
- destroy_workqueue(lo->workqueue);
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- spin_unlock_irq(&lo->lo_work_lock);
- del_timer_sync(&lo->timer);
+ /*
+ * Freeze the request queue when unbinding on a live file descriptor and
+ * thus an open device. When called from ->release we are guaranteed
+ * that there is no I/O in progress already.
+ */
+ if (!release)
+ blk_mq_freeze_queue(lo->lo_queue);
spin_lock_irq(&lo->lo_lock);
+ filp = lo->lo_backing_file;
lo->lo_backing_file = NULL;
spin_unlock_irq(&lo->lo_lock);
@@ -1151,14 +1168,14 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
mapping_set_gfp_mask(filp->f_mapping, gfp);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- blk_mq_unfreeze_queue(lo->lo_queue);
+ if (!release)
+ blk_mq_unfreeze_queue(lo->lo_queue);
- partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
- lo_number = lo->lo_number;
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
-out_unlock:
- mutex_unlock(&lo->lo_mutex);
- if (partscan) {
+
+ if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
+ int err;
+
/*
* open_mutex has been held already in release path, so don't
* acquire it if this function is called in such case.
@@ -1174,24 +1191,20 @@ out_unlock:
mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
- __func__, lo_number, err);
+ __func__, lo->lo_number, err);
/* Device is gone, no point in returning error */
- err = 0;
}
/*
* lo->lo_state is set to Lo_unbound here after above partscan has
- * finished.
- *
- * There cannot be anybody else entering __loop_clr_fd() as
- * lo->lo_backing_file is already cleared and Lo_rundown state
- * protects us from all the other places trying to change the 'lo'
- * device.
+ * finished. There cannot be anybody else entering __loop_clr_fd() as
+ * Lo_rundown state protects us from all the other places trying to
+ * change the 'lo' device.
*/
- mutex_lock(&lo->lo_mutex);
lo->lo_flags = 0;
if (!part_shift)
- lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+ set_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
+ mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
@@ -1200,20 +1213,27 @@ out_unlock:
* lo_mutex triggers a circular lock dependency possibility warning as
* fput can take open_mutex which is usually taken before lo_mutex.
*/
- if (filp)
- fput(filp);
- return err;
+ fput(filp);
}
static int loop_clr_fd(struct loop_device *lo)
{
int err;
- err = mutex_lock_killable(&lo->lo_mutex);
+ /*
+ * Since lo_ioctl() is called without locks held, it is possible that
+ * loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
+ *
+ * Therefore, use global lock when setting Lo_rundown state in order to
+ * make sure that loop_validate_file() will fail if the "struct file"
+ * which loop_configure()/loop_change_fd() found via fget() was this
+ * loop device.
+ */
+ err = loop_global_lock_killable(lo, true);
if (err)
return err;
if (lo->lo_state != Lo_bound) {
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
return -ENXIO;
}
/*
@@ -1226,15 +1246,16 @@ static int loop_clr_fd(struct loop_device *lo)
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
* command to fail with EBUSY.
*/
- if (atomic_read(&lo->lo_refcnt) > 1) {
+ if (disk_openers(lo->lo_disk) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
return 0;
}
lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
- return __loop_clr_fd(lo, false);
+ __loop_clr_fd(lo, false);
+ return 0;
}
static int
@@ -1263,15 +1284,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
- if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
- /* If any pages were dirtied after invalidate_bdev(), try again */
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
prev_lo_flags = lo->lo_flags;
err = loop_set_status_from_info(lo, info);
@@ -1301,7 +1313,7 @@ out_unfreeze:
if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
!(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
- lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+ clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
partscan = true;
}
out_unlock:
@@ -1482,21 +1494,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
-
- /* invalidate_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
-out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
return err;
@@ -1597,6 +1598,7 @@ struct compat_loop_info {
compat_ulong_t lo_inode; /* ioctl r/o */
compat_dev_t lo_rdevice; /* ioctl r/o */
compat_int_t lo_offset;
+ compat_int_t lo_encrypt_type; /* obsolete, ignored */
compat_int_t lo_encrypt_key_size; /* ioctl w/o */
compat_int_t lo_flags; /* ioctl r/o */
char lo_name[LO_NAME_SIZE];
@@ -1725,33 +1727,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
- struct loop_device *lo = bdev->bd_disk->private_data;
- int err;
-
- err = mutex_lock_killable(&lo->lo_mutex);
- if (err)
- return err;
- if (lo->lo_state == Lo_deleting)
- err = -ENXIO;
- else
- atomic_inc(&lo->lo_refcnt);
- mutex_unlock(&lo->lo_mutex);
- return err;
-}
-
static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
- mutex_lock(&lo->lo_mutex);
- if (atomic_dec_return(&lo->lo_refcnt))
- goto out_unlock;
+ if (disk_openers(disk) > 0)
+ return;
- if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
- if (lo->lo_state != Lo_bound)
- goto out_unlock;
+ mutex_lock(&lo->lo_mutex);
+ if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
/*
@@ -1760,27 +1744,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
*/
__loop_clr_fd(lo, true);
return;
- } else if (lo->lo_state == Lo_bound) {
- /*
- * Otherwise keep thread (if running) and config,
- * but flush possible ongoing bios in thread.
- */
- blk_mq_freeze_queue(lo->lo_queue);
- blk_mq_unfreeze_queue(lo->lo_queue);
}
-
-out_unlock:
mutex_unlock(&lo->lo_mutex);
}
+static void lo_free_disk(struct gendisk *disk)
+{
+ struct loop_device *lo = disk->private_data;
+
+ if (lo->workqueue)
+ destroy_workqueue(lo->workqueue);
+ loop_free_idle_workers(lo, true);
+ del_timer_sync(&lo->timer);
+ mutex_destroy(&lo->lo_mutex);
+ kfree(lo);
+}
+
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
- .open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = lo_compat_ioctl,
#endif
+ .free_disk = lo_free_disk,
};
/*
@@ -1791,6 +1778,24 @@ module_param(max_loop, int, 0444);
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
+
+static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH;
+
+static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p)
+{
+ int ret = kstrtoint(s, 10, &hw_queue_depth);
+
+ return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0;
+}
+
+static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
+ .set = loop_set_hw_queue_depth,
+ .get = param_get_int,
+};
+
+device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128");
+
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
@@ -1821,12 +1826,14 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
cmd->blkcg_css = NULL;
cmd->memcg_css = NULL;
#ifdef CONFIG_BLK_CGROUP
- if (rq->bio && rq->bio->bi_blkg) {
- cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
+ if (rq->bio) {
+ cmd->blkcg_css = bio_blkcg_css(rq->bio);
#ifdef CONFIG_MEMCG
- cmd->memcg_css =
- cgroup_get_e_css(cmd->blkcg_css->cgroup,
- &memory_cgrp_subsys);
+ if (cmd->blkcg_css) {
+ cmd->memcg_css =
+ cgroup_get_e_css(cmd->blkcg_css->cgroup,
+ &memory_cgrp_subsys);
+ }
#endif
}
#endif
@@ -1875,11 +1882,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
}
-static void loop_set_timer(struct loop_device *lo)
-{
- timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
-}
-
static void loop_process_work(struct loop_worker *worker,
struct list_head *cmd_list, struct loop_device *lo)
{
@@ -1928,27 +1930,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
}
-static void loop_free_idle_workers(struct timer_list *timer)
-{
- struct loop_device *lo = container_of(timer, struct loop_device, timer);
- struct loop_worker *pos, *worker;
-
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- if (time_is_after_jiffies(worker->last_ran_at +
- LOOP_IDLE_WORKER_TIMEOUT))
- break;
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- if (!list_empty(&lo->idle_worker_list))
- loop_set_timer(lo);
- spin_unlock_irq(&lo->lo_work_lock);
-}
-
static const struct blk_mq_ops loop_mq_ops = {
.queue_rq = loop_queue_rq,
.complete = lo_complete_rq,
@@ -1964,6 +1945,9 @@ static int loop_add(int i)
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
if (!lo)
goto out;
+ lo->worker_tree = RB_ROOT;
+ INIT_LIST_HEAD(&lo->idle_worker_list);
+ timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
lo->lo_state = Lo_unbound;
err = mutex_lock_killable(&loop_ctl_mutex);
@@ -1985,7 +1969,7 @@ static int loop_add(int i)
lo->tag_set.ops = &loop_mq_ops;
lo->tag_set.nr_hw_queues = 1;
- lo->tag_set.queue_depth = 128;
+ lo->tag_set.queue_depth = hw_queue_depth;
lo->tag_set.numa_node = NUMA_NO_NODE;
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
@@ -2032,13 +2016,13 @@ static int loop_add(int i)
* userspace tools. Parameters like this in general should be avoided.
*/
if (!part_shift)
- disk->flags |= GENHD_FL_NO_PART_SCAN;
- disk->flags |= GENHD_FL_EXT_DEVT;
- atomic_set(&lo->lo_refcnt, 0);
+ set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
mutex_init(&lo->lo_mutex);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
spin_lock_init(&lo->lo_work_lock);
+ INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
+ INIT_LIST_HEAD(&lo->rootcg_cmd_list);
disk->major = LOOP_MAJOR;
disk->first_minor = i << part_shift;
disk->minors = 1 << part_shift;
@@ -2061,7 +2045,7 @@ static int loop_add(int i)
return i;
out_cleanup_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_cleanup_tags:
blk_mq_free_tag_set(&lo->tag_set);
out_free_idr:
@@ -2078,14 +2062,13 @@ static void loop_remove(struct loop_device *lo)
{
/* Make this loop device unreachable from pathname. */
del_gendisk(lo->lo_disk);
- blk_cleanup_disk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set);
+
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
- /* There is no route which can find this loop device. */
- mutex_destroy(&lo->lo_mutex);
- kfree(lo);
+
+ put_disk(lo->lo_disk);
}
static void loop_probe(dev_t dev)
@@ -2124,13 +2107,12 @@ static int loop_control_remove(int idx)
ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
goto mark_visible;
- if (lo->lo_state != Lo_unbound ||
- atomic_read(&lo->lo_refcnt) > 0) {
+ if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
mutex_unlock(&lo->lo_mutex);
ret = -EBUSY;
goto mark_visible;
}
- /* Mark this loop device no longer open()-able. */
+ /* Mark this loop device as no more bound, but not quite unbound yet */
lo->lo_state = Lo_deleting;
mutex_unlock(&lo->lo_mutex);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
deleted file mode 100644
index 082d4b6bfc6a..000000000000
--- a/drivers/block/loop.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
- * permitted under the GNU General Public License.
- */
-#ifndef _LINUX_LOOP_H
-#define _LINUX_LOOP_H
-
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/blk-mq.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <uapi/linux/loop.h>
-
-/* Possible states of device */
-enum {
- Lo_unbound,
- Lo_bound,
- Lo_rundown,
- Lo_deleting,
-};
-
-struct loop_func_table;
-
-struct loop_device {
- int lo_number;
- atomic_t lo_refcnt;
- loff_t lo_offset;
- loff_t lo_sizelimit;
- int lo_flags;
- char lo_file_name[LO_NAME_SIZE];
-
- struct file * lo_backing_file;
- struct block_device *lo_device;
-
- gfp_t old_gfp_mask;
-
- spinlock_t lo_lock;
- int lo_state;
- spinlock_t lo_work_lock;
- struct workqueue_struct *workqueue;
- struct work_struct rootcg_work;
- struct list_head rootcg_cmd_list;
- struct list_head idle_worker_list;
- struct rb_root worker_tree;
- struct timer_list timer;
- bool use_dio;
- bool sysfs_inited;
-
- struct request_queue *lo_queue;
- struct blk_mq_tag_set tag_set;
- struct gendisk *lo_disk;
- struct mutex lo_mutex;
- bool idr_visible;
-};
-
-struct loop_cmd {
- struct list_head list_entry;
- bool use_aio; /* use AIO interface to handle I/O */
- atomic_t ref; /* only for aio */
- long ret;
- struct kiocb iocb;
- struct bio_vec *bvec;
- struct cgroup_subsys_state *blkcg_css;
- struct cgroup_subsys_state *memcg_css;
-};
-
-#endif
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index c91b9010c1a6..815d77ba6381 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -19,7 +19,6 @@
#include <linux/compat.h>
#include <linux/fs.h>
#include <linux/module.h>
-#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>
@@ -95,17 +94,12 @@
/* Device instance number, incremented each time a device is probed. */
static int instance;
-static LIST_HEAD(online_list);
-static LIST_HEAD(removing_list);
-static DEFINE_SPINLOCK(dev_lock);
-
/*
* Global variable used to hold the major block device number
* allocated in mtip_init().
*/
static int mtip_major;
static struct dentry *dfs_parent;
-static struct dentry *dfs_device_status;
static u32 cpu_use[NR_CPUS];
@@ -136,23 +130,19 @@ struct mtip_compat_ide_task_request_s {
* return value
* true if device removed, else false
*/
-static bool mtip_check_surprise_removal(struct pci_dev *pdev)
+static bool mtip_check_surprise_removal(struct driver_data *dd)
{
u16 vendor_id = 0;
- struct driver_data *dd = pci_get_drvdata(pdev);
if (dd->sr)
return true;
/* Read the vendorID from the configuration space */
- pci_read_config_word(pdev, 0x00, &vendor_id);
+ pci_read_config_word(dd->pdev, 0x00, &vendor_id);
if (vendor_id == 0xFFFF) {
dd->sr = true;
- if (dd->queue)
- blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue);
- else
- dev_warn(&dd->pdev->dev,
- "%s: dd->queue is NULL\n", __func__);
+ if (dd->disk)
+ blk_mark_disk_dead(dd->disk);
return true; /* device removed */
}
@@ -162,9 +152,7 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
unsigned int tag)
{
- struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0];
-
- return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(hctx->tags, tag));
+ return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(dd->tags.tags[0], tag));
}
/*
@@ -447,7 +435,7 @@ static int mtip_device_reset(struct driver_data *dd)
{
int rv = 0;
- if (mtip_check_surprise_removal(dd->pdev))
+ if (mtip_check_surprise_removal(dd))
return 0;
if (mtip_hba_reset(dd) < 0)
@@ -727,7 +715,7 @@ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
dev_warn(&dd->pdev->dev,
"Port stat errors %x unhandled\n",
(port_stat & ~PORT_IRQ_HANDLED));
- if (mtip_check_surprise_removal(dd->pdev))
+ if (mtip_check_surprise_removal(dd))
return;
}
if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) {
@@ -752,7 +740,7 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
/* Acknowledge the interrupt status on the port.*/
port_stat = readl(port->mmio + PORT_IRQ_STAT);
if (unlikely(port_stat == 0xFFFFFFFF)) {
- mtip_check_surprise_removal(dd->pdev);
+ mtip_check_surprise_removal(dd);
return IRQ_HANDLED;
}
writel(port_stat, port->mmio + PORT_IRQ_STAT);
@@ -796,7 +784,7 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
}
if (unlikely(port_stat & PORT_IRQ_ERR)) {
- if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
+ if (unlikely(mtip_check_surprise_removal(dd))) {
/* don't proceed further */
return IRQ_HANDLED;
}
@@ -915,7 +903,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
msleep(100);
- if (mtip_check_surprise_removal(port->dd->pdev))
+ if (mtip_check_surprise_removal(port->dd))
goto err_fault;
active = mtip_commands_active(port);
@@ -980,7 +968,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
return -EFAULT;
}
- if (mtip_check_surprise_removal(dd->pdev))
+ if (mtip_check_surprise_removal(dd))
return -EFAULT;
rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED);
@@ -1015,14 +1003,14 @@ static int mtip_exec_internal_command(struct mtip_port *port,
rq->timeout = timeout;
/* insert request and run queue */
- blk_execute_rq(NULL, rq, true);
+ blk_execute_rq(rq, true);
if (int_cmd->status) {
dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
fis->command, int_cmd->status);
rv = -EIO;
- if (mtip_check_surprise_removal(dd->pdev) ||
+ if (mtip_check_surprise_removal(dd) ||
test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
&dd->dd_flag)) {
dev_err(&dd->pdev->dev,
@@ -1409,15 +1397,15 @@ static void mtip_dump_identify(struct mtip_port *port)
if (!port->identify_valid)
return;
- strlcpy(cbuf, (char *)(port->identify+10), 21);
+ strscpy(cbuf, (char *)(port->identify + 10), 21);
dev_info(&port->dd->pdev->dev,
"Serial No.: %s\n", cbuf);
- strlcpy(cbuf, (char *)(port->identify+23), 9);
+ strscpy(cbuf, (char *)(port->identify + 23), 9);
dev_info(&port->dd->pdev->dev,
"Firmware Ver.: %s\n", cbuf);
- strlcpy(cbuf, (char *)(port->identify+27), 41);
+ strscpy(cbuf, (char *)(port->identify + 27), 41);
dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
@@ -1433,13 +1421,13 @@ static void mtip_dump_identify(struct mtip_port *port)
pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
switch (revid & 0xFF) {
case 0x1:
- strlcpy(cbuf, "A0", 3);
+ strscpy(cbuf, "A0", 3);
break;
case 0x3:
- strlcpy(cbuf, "A2", 3);
+ strscpy(cbuf, "A2", 3);
break;
default:
- strlcpy(cbuf, "?", 2);
+ strscpy(cbuf, "?", 2);
break;
}
dev_info(&port->dd->pdev->dev,
@@ -2174,106 +2162,6 @@ static const struct attribute_group *mtip_disk_attr_groups[] = {
NULL,
};
-/* debugsfs entries */
-
-static ssize_t show_device_status(struct device_driver *drv, char *buf)
-{
- int size = 0;
- struct driver_data *dd, *tmp;
- unsigned long flags;
- char id_buf[42];
- u16 status = 0;
-
- spin_lock_irqsave(&dev_lock, flags);
- size += sprintf(&buf[size], "Devices Present:\n");
- list_for_each_entry_safe(dd, tmp, &online_list, online_list) {
- if (dd->pdev) {
- if (dd->port &&
- dd->port->identify &&
- dd->port->identify_valid) {
- strlcpy(id_buf,
- (char *) (dd->port->identify + 10), 21);
- status = *(dd->port->identify + 141);
- } else {
- memset(id_buf, 0, 42);
- status = 0;
- }
-
- if (dd->port &&
- test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
- size += sprintf(&buf[size],
- " device %s %s (ftl rebuild %d %%)\n",
- dev_name(&dd->pdev->dev),
- id_buf,
- status);
- } else {
- size += sprintf(&buf[size],
- " device %s %s\n",
- dev_name(&dd->pdev->dev),
- id_buf);
- }
- }
- }
-
- size += sprintf(&buf[size], "Devices Being Removed:\n");
- list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) {
- if (dd->pdev) {
- if (dd->port &&
- dd->port->identify &&
- dd->port->identify_valid) {
- strlcpy(id_buf,
- (char *) (dd->port->identify+10), 21);
- status = *(dd->port->identify + 141);
- } else {
- memset(id_buf, 0, 42);
- status = 0;
- }
-
- if (dd->port &&
- test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
- size += sprintf(&buf[size],
- " device %s %s (ftl rebuild %d %%)\n",
- dev_name(&dd->pdev->dev),
- id_buf,
- status);
- } else {
- size += sprintf(&buf[size],
- " device %s %s\n",
- dev_name(&dd->pdev->dev),
- id_buf);
- }
- }
- }
- spin_unlock_irqrestore(&dev_lock, flags);
-
- return size;
-}
-
-static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
- size_t len, loff_t *offset)
-{
- int size = *offset;
- char *buf;
- int rv = 0;
-
- if (!len || *offset)
- return 0;
-
- buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- size += show_device_status(NULL, buf);
-
- *offset = size <= len ? size : len;
- size = copy_to_user(ubuf, buf, *offset);
- if (size)
- rv = -EFAULT;
-
- kfree(buf);
- return rv ? rv : *offset;
-}
-
static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
size_t len, loff_t *offset)
{
@@ -2367,13 +2255,6 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
return rv ? rv : *offset;
}
-static const struct file_operations mtip_device_status_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .read = mtip_hw_read_device_status,
- .llseek = no_llseek,
-};
-
static const struct file_operations mtip_regs_fops = {
.owner = THIS_MODULE,
.open = simple_open,
@@ -2513,7 +2394,7 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
&dd->dd_flag)))
return -EFAULT;
- if (mtip_check_surprise_removal(dd->pdev))
+ if (mtip_check_surprise_removal(dd))
return -EFAULT;
if (mtip_get_identify(dd->port, NULL) < 0)
@@ -2560,7 +2441,7 @@ static void mtip_softirq_done_fn(struct request *rq)
blk_mq_end_request(rq, cmd->status);
}
-static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_abort_cmd(struct request *req, void *data)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
struct driver_data *dd = data;
@@ -2573,7 +2454,7 @@ static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
return true;
}
-static bool mtip_queue_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_queue_cmd(struct request *req, void *data)
{
struct driver_data *dd = data;
@@ -2733,7 +2614,7 @@ static int mtip_dma_alloc(struct driver_data *dd)
{
struct mtip_port *port = dd->port;
- /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
+ /* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
port->block1 =
dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
&port->block1_dma, GFP_KERNEL);
@@ -2891,7 +2772,7 @@ static int mtip_hw_init(struct driver_data *dd)
time_before(jiffies, timeout)) {
mdelay(100);
}
- if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
+ if (unlikely(mtip_check_surprise_removal(dd))) {
timetaken = jiffies - timetaken;
dev_warn(&dd->pdev->dev,
"Surprise removal detected at %u ms\n",
@@ -3301,26 +3182,12 @@ static int mtip_block_getgeo(struct block_device *dev,
return 0;
}
-static int mtip_block_open(struct block_device *dev, fmode_t mode)
+static void mtip_block_free_disk(struct gendisk *disk)
{
- struct driver_data *dd;
-
- if (dev && dev->bd_disk) {
- dd = (struct driver_data *) dev->bd_disk->private_data;
+ struct driver_data *dd = disk->private_data;
- if (dd) {
- if (test_bit(MTIP_DDF_REMOVAL_BIT,
- &dd->dd_flag)) {
- return -ENODEV;
- }
- return 0;
- }
- }
- return -ENODEV;
-}
-
-static void mtip_block_release(struct gendisk *disk, fmode_t mode)
-{
+ ida_free(&rssd_index_ida, dd->index);
+ kfree(dd);
}
/*
@@ -3330,13 +3197,12 @@ static void mtip_block_release(struct gendisk *disk, fmode_t mode)
* layer.
*/
static const struct block_device_operations mtip_block_ops = {
- .open = mtip_block_open,
- .release = mtip_block_release,
.ioctl = mtip_block_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = mtip_block_compat_ioctl,
#endif
.getgeo = mtip_block_getgeo,
+ .free_disk = mtip_block_free_disk,
.owner = THIS_MODULE
};
@@ -3491,12 +3357,11 @@ static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq,
return 0;
}
-static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
- bool reserved)
+static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req)
{
struct driver_data *dd = req->q->queuedata;
- if (reserved) {
+ if (blk_mq_is_reserved_rq(req)) {
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
cmd->status = BLK_STS_TIMEOUT;
@@ -3668,7 +3533,7 @@ init_hw_cmds_error:
disk_index_error:
ida_free(&rssd_index_ida, index);
ida_get_error:
- blk_cleanup_disk(dd->disk);
+ put_disk(dd->disk);
block_queue_alloc_init_error:
blk_mq_free_tag_set(&dd->tags);
block_queue_alloc_tag_error:
@@ -3677,72 +3542,6 @@ protocol_init_error:
return rv;
}
-static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
-{
- struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-
- cmd->status = BLK_STS_IOERR;
- blk_mq_complete_request(rq);
- return true;
-}
-
-/*
- * Block layer deinitialization function.
- *
- * Called by the PCI layer as each P320 device is removed.
- *
- * @dd Pointer to the driver data structure.
- *
- * return value
- * 0
- */
-static int mtip_block_remove(struct driver_data *dd)
-{
- mtip_hw_debugfs_exit(dd);
-
- if (dd->mtip_svc_handler) {
- set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
- wake_up_interruptible(&dd->port->svc_wait);
- kthread_stop(dd->mtip_svc_handler);
- }
-
- if (!dd->sr) {
- /*
- * Explicitly wait here for IOs to quiesce,
- * as mtip_standby_drive usually won't wait for IOs.
- */
- if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS))
- mtip_standby_drive(dd);
- }
- else
- dev_info(&dd->pdev->dev, "device %s surprise removal\n",
- dd->disk->disk_name);
-
- blk_freeze_queue_start(dd->queue);
- blk_mq_quiesce_queue(dd->queue);
- blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
- blk_mq_unquiesce_queue(dd->queue);
-
- if (dd->disk) {
- if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
- del_gendisk(dd->disk);
- if (dd->disk->queue) {
- blk_cleanup_queue(dd->queue);
- blk_mq_free_tag_set(&dd->tags);
- dd->queue = NULL;
- }
- put_disk(dd->disk);
- }
- dd->disk = NULL;
-
- ida_free(&rssd_index_ida, dd->index);
-
- /* De-initialize the protocol layer. */
- mtip_hw_exit(dd);
-
- return 0;
-}
-
/*
* Function called by the PCI layer when just before the
* machine shuts down.
@@ -3759,23 +3558,14 @@ static int mtip_block_shutdown(struct driver_data *dd)
{
mtip_hw_shutdown(dd);
- /* Delete our gendisk structure, and cleanup the blk queue. */
- if (dd->disk) {
- dev_info(&dd->pdev->dev,
- "Shutting down %s ...\n", dd->disk->disk_name);
+ dev_info(&dd->pdev->dev,
+ "Shutting down %s ...\n", dd->disk->disk_name);
- if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
- del_gendisk(dd->disk);
- if (dd->disk->queue) {
- blk_cleanup_queue(dd->queue);
- blk_mq_free_tag_set(&dd->tags);
- }
- put_disk(dd->disk);
- dd->disk = NULL;
- dd->queue = NULL;
- }
+ if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
+ del_gendisk(dd->disk);
- ida_free(&rssd_index_ida, dd->index);
+ blk_mq_free_tag_set(&dd->tags);
+ put_disk(dd->disk);
return 0;
}
@@ -3909,7 +3699,6 @@ static int mtip_pci_probe(struct pci_dev *pdev,
const struct cpumask *node_mask;
int cpu, i = 0, j = 0;
int my_node = NUMA_NO_NODE;
- unsigned long flags;
/* Allocate memory for this devices private data. */
my_node = pcibus_to_node(pdev->bus);
@@ -3956,9 +3745,6 @@ static int mtip_pci_probe(struct pci_dev *pdev,
dd->pdev = pdev;
dd->numa_node = my_node;
- INIT_LIST_HEAD(&dd->online_list);
- INIT_LIST_HEAD(&dd->remove_list);
-
memset(dd->workq_name, 0, 32);
snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
@@ -4051,11 +3837,6 @@ static int mtip_pci_probe(struct pci_dev *pdev,
else
rv = 0; /* device in rebuild state, return 0 from probe */
- /* Add to online list even if in ftl rebuild */
- spin_lock_irqsave(&dev_lock, flags);
- list_add(&dd->online_list, &online_list);
- spin_unlock_irqrestore(&dev_lock, flags);
-
goto done;
block_initialize_err:
@@ -4089,16 +3870,9 @@ done:
static void mtip_pci_remove(struct pci_dev *pdev)
{
struct driver_data *dd = pci_get_drvdata(pdev);
- unsigned long flags, to;
-
- set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag);
-
- spin_lock_irqsave(&dev_lock, flags);
- list_del_init(&dd->online_list);
- list_add(&dd->remove_list, &removing_list);
- spin_unlock_irqrestore(&dev_lock, flags);
+ unsigned long to;
- mtip_check_surprise_removal(pdev);
+ mtip_check_surprise_removal(dd);
synchronize_irq(dd->pdev->irq);
/* Spin until workers are done */
@@ -4113,11 +3887,35 @@ static void mtip_pci_remove(struct pci_dev *pdev)
"Completion workers still active!\n");
}
- blk_set_queue_dying(dd->queue);
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
- /* Clean up the block layer. */
- mtip_block_remove(dd);
+ if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
+ del_gendisk(dd->disk);
+
+ mtip_hw_debugfs_exit(dd);
+
+ if (dd->mtip_svc_handler) {
+ set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
+ wake_up_interruptible(&dd->port->svc_wait);
+ kthread_stop(dd->mtip_svc_handler);
+ }
+
+ if (!dd->sr) {
+ /*
+ * Explicitly wait here for IOs to quiesce,
+ * as mtip_standby_drive usually won't wait for IOs.
+ */
+ if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS))
+ mtip_standby_drive(dd);
+ }
+ else
+ dev_info(&dd->pdev->dev, "device %s surprise removal\n",
+ dd->disk->disk_name);
+
+ blk_mq_free_tag_set(&dd->tags);
+
+ /* De-initialize the protocol layer. */
+ mtip_hw_exit(dd);
if (dd->isr_workq) {
destroy_workqueue(dd->isr_workq);
@@ -4128,14 +3926,10 @@ static void mtip_pci_remove(struct pci_dev *pdev)
pci_disable_msi(pdev);
- spin_lock_irqsave(&dev_lock, flags);
- list_del_init(&dd->remove_list);
- spin_unlock_irqrestore(&dev_lock, flags);
-
- kfree(dd);
-
pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
pci_set_drvdata(pdev, NULL);
+
+ put_disk(dd->disk);
}
/*
@@ -4145,36 +3939,17 @@ static void mtip_pci_remove(struct pci_dev *pdev)
* 0 Success
* <0 Error
*/
-static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
+static int __maybe_unused mtip_pci_suspend(struct device *dev)
{
int rv = 0;
- struct driver_data *dd = pci_get_drvdata(pdev);
-
- if (!dd) {
- dev_err(&pdev->dev,
- "Driver private datastructure is NULL\n");
- return -EFAULT;
- }
+ struct driver_data *dd = dev_get_drvdata(dev);
set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
/* Disable ports & interrupts then send standby immediate */
rv = mtip_block_suspend(dd);
- if (rv < 0) {
- dev_err(&pdev->dev,
- "Failed to suspend controller\n");
- return rv;
- }
-
- /*
- * Save the pci config space to pdev structure &
- * disable the device
- */
- pci_save_state(pdev);
- pci_disable_device(pdev);
-
- /* Move to Low power state*/
- pci_set_power_state(pdev, PCI_D3hot);
+ if (rv < 0)
+ dev_err(dev, "Failed to suspend controller\n");
return rv;
}
@@ -4186,32 +3961,10 @@ static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
* 0 Success
* <0 Error
*/
-static int mtip_pci_resume(struct pci_dev *pdev)
+static int __maybe_unused mtip_pci_resume(struct device *dev)
{
int rv = 0;
- struct driver_data *dd;
-
- dd = pci_get_drvdata(pdev);
- if (!dd) {
- dev_err(&pdev->dev,
- "Driver private datastructure is NULL\n");
- return -EFAULT;
- }
-
- /* Move the device to active State */
- pci_set_power_state(pdev, PCI_D0);
-
- /* Restore PCI configuration space */
- pci_restore_state(pdev);
-
- /* Enable the PCI device*/
- rv = pcim_enable_device(pdev);
- if (rv < 0) {
- dev_err(&pdev->dev,
- "Failed to enable card during resume\n");
- goto err;
- }
- pci_set_master(pdev);
+ struct driver_data *dd = dev_get_drvdata(dev);
/*
* Calls hbaReset, initPort, & startPort function
@@ -4219,9 +3972,8 @@ static int mtip_pci_resume(struct pci_dev *pdev)
*/
rv = mtip_block_resume(dd);
if (rv < 0)
- dev_err(&pdev->dev, "Unable to resume\n");
+ dev_err(dev, "Unable to resume\n");
-err:
clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
return rv;
@@ -4252,14 +4004,15 @@ static const struct pci_device_id mtip_pci_tbl[] = {
{ 0 }
};
+static SIMPLE_DEV_PM_OPS(mtip_pci_pm_ops, mtip_pci_suspend, mtip_pci_resume);
+
/* Structure that describes the PCI driver functions. */
static struct pci_driver mtip_pci_driver = {
.name = MTIP_DRV_NAME,
.id_table = mtip_pci_tbl,
.probe = mtip_pci_probe,
.remove = mtip_pci_remove,
- .suspend = mtip_pci_suspend,
- .resume = mtip_pci_resume,
+ .driver.pm = &mtip_pci_pm_ops,
.shutdown = mtip_pci_shutdown,
};
@@ -4295,15 +4048,6 @@ static int __init mtip_init(void)
pr_warn("Error creating debugfs parent\n");
dfs_parent = NULL;
}
- if (dfs_parent) {
- dfs_device_status = debugfs_create_file("device_status",
- 0444, dfs_parent, NULL,
- &mtip_device_status_fops);
- if (IS_ERR_OR_NULL(dfs_device_status)) {
- pr_err("Error creating device_status node\n");
- dfs_device_status = NULL;
- }
- }
/* Register our PCI operations. */
error = pci_register_driver(&mtip_pci_driver);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 88f4206310e4..f7328f19ac5c 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -15,7 +15,6 @@
#include <linux/rwsem.h>
#include <linux/ata.h>
#include <linux/interrupt.h>
-#include <linux/genhd.h>
/* Offset of Subsystem Device ID in pci confoguration space */
#define PCI_SUBSYSTEM_DEVICEID 0x2E
@@ -150,7 +149,6 @@ enum {
MTIP_DDF_RESUME_BIT = 6,
MTIP_DDF_INIT_DONE_BIT = 7,
MTIP_DDF_REBUILD_FAILED_BIT = 8,
- MTIP_DDF_REMOVAL_BIT = 9,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
(1 << MTIP_DDF_SEC_LOCK_BIT) |
@@ -463,10 +461,6 @@ struct driver_data {
int isr_binding;
- struct list_head online_list; /* linkage for online list */
-
- struct list_head remove_list; /* linkage for removing list */
-
int unal_qdepth; /* qdepth of unaligned IO queue */
};
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index 78282f01f581..d914156db2d8 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -88,7 +88,7 @@ static void n64cart_submit_bio(struct bio *bio)
{
struct bio_vec bvec;
struct bvec_iter iter;
- struct device *dev = bio->bi_disk->private_data;
+ struct device *dev = bio->bi_bdev->bd_disk->private_data;
u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT;
bio_for_each_segment(bvec, bio, iter) {
@@ -136,7 +136,7 @@ static int __init n64cart_probe(struct platform_device *pdev)
goto out;
disk->first_minor = 0;
- disk->flags = GENHD_FL_NO_PART_SCAN;
+ disk->flags = GENHD_FL_NO_PART;
disk->fops = &n64cart_fops;
disk->private_data = &pdev->dev;
strcpy(disk->disk_name, "n64cart");
@@ -157,7 +157,7 @@ static int __init n64cart_probe(struct platform_device *pdev)
return 0;
out_cleanup_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out:
return err;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5a1f98494ddd..5cffd96ef2d7 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -11,6 +11,8 @@
* (part of code stolen from loop.c)
*/
+#define pr_fmt(fmt) "nbd: " fmt
+
#include <linux/major.h>
#include <linux/blkdev.h>
@@ -155,8 +157,6 @@ static struct dentry *nbd_dbg_dir;
#define nbd_name(nbd) ((nbd)->disk->disk_name)
-#define NBD_MAGIC 0x68797548
-
#define NBD_DEF_BLKSIZE_BITS 10
static unsigned int nbds_max = 16;
@@ -250,7 +250,7 @@ static void nbd_dev_remove(struct nbd_device *nbd)
struct gendisk *disk = nbd->disk;
del_gendisk(disk);
- blk_cleanup_disk(disk);
+ put_disk(disk);
blk_mq_free_tag_set(&nbd->tag_set);
/*
@@ -333,7 +333,6 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
nbd->disk->queue->limits.discard_granularity = blksize;
- nbd->disk->queue->limits.discard_alignment = blksize;
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
}
blk_queue_logical_block_size(nbd->disk->queue, blksize);
@@ -394,8 +393,7 @@ static u32 req_to_nbd_cmd_type(struct request *req)
}
}
-static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
- bool reserved)
+static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
struct nbd_device *nbd = cmd->nbd;
@@ -404,13 +402,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
- if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
mutex_unlock(&cmd->lock);
return BLK_EH_DONE;
}
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
goto done;
}
@@ -479,6 +478,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
nbd_config_put(nbd);
@@ -746,7 +746,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
- if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
tag, cmd->status, cmd->flags);
ret = -ENOENT;
@@ -855,8 +855,16 @@ static void recv_work(struct work_struct *work)
}
rq = blk_mq_rq_from_pdu(cmd);
- if (likely(!blk_should_fake_timeout(rq->q)))
- blk_mq_complete_request(rq);
+ if (likely(!blk_should_fake_timeout(rq->q))) {
+ bool complete;
+
+ mutex_lock(&cmd->lock);
+ complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
+ &cmd->flags);
+ mutex_unlock(&cmd->lock);
+ if (complete)
+ blk_mq_complete_request(rq);
+ }
percpu_ref_put(&q->q_usage_counter);
}
@@ -871,7 +879,7 @@ static void recv_work(struct work_struct *work)
kfree(args);
}
-static bool nbd_clear_req(struct request *req, void *data, bool reserved)
+static bool nbd_clear_req(struct request *req, void *data)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
@@ -947,11 +955,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout)
return 0;
- if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
+
+ if (!wait_event_timeout(config->conn_wait,
+ test_bit(NBD_RT_DISCONNECTED,
+ &config->runtime_flags) ||
+ atomic_read(&config->live_connections) > 0,
+ config->dead_conn_timeout))
return 0;
- return wait_event_timeout(config->conn_wait,
- atomic_read(&config->live_connections) > 0,
- config->dead_conn_timeout) > 0;
+
+ return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
}
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -1217,11 +1229,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
return -ENOSPC;
}
-static void nbd_bdev_reset(struct block_device *bdev)
+static void nbd_bdev_reset(struct nbd_device *nbd)
{
- if (bdev->bd_openers > 1)
+ if (disk_openers(nbd->disk) > 1)
return;
- set_capacity(bdev->bd_disk, 0);
+ set_capacity(nbd->disk, 0);
}
static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1231,8 +1243,6 @@ static void nbd_parse_flags(struct nbd_device *nbd)
set_disk_ro(nbd->disk, true);
else
set_disk_ro(nbd->disk, false);
- if (config->flags & NBD_FLAG_SEND_TRIM)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
if (config->flags & NBD_FLAG_SEND_FLUSH) {
if (config->flags & NBD_FLAG_SEND_FUA)
blk_queue_write_cache(nbd->disk->queue, true, true);
@@ -1318,9 +1328,7 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->tag_set.timeout = 0;
nbd->disk->queue->limits.discard_granularity = 0;
- nbd->disk->queue->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+ blk_queue_max_discard_sectors(nbd->disk->queue, 0);
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
@@ -1389,7 +1397,7 @@ static int nbd_start_device(struct nbd_device *nbd)
return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
-static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_start_device_ioctl(struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
int ret;
@@ -1403,12 +1411,14 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
mutex_unlock(&nbd->config_lock);
ret = wait_event_interruptible(config->recv_wq,
atomic_read(&config->recv_threads) == 0);
- if (ret)
+ if (ret) {
sock_shutdown(nbd);
- flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ }
+ flush_workqueue(nbd->recv_workq);
mutex_lock(&nbd->config_lock);
- nbd_bdev_reset(bdev);
+ nbd_bdev_reset(nbd);
/* user requested, ignore socket errors */
if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
@@ -1420,9 +1430,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
struct block_device *bdev)
{
- sock_shutdown(nbd);
+ nbd_clear_sock(nbd);
__invalidate_device(bdev, true);
- nbd_bdev_reset(bdev);
+ nbd_bdev_reset(nbd);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
@@ -1468,7 +1478,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
config->flags = arg;
return 0;
case NBD_DO_IT:
- return nbd_start_device_ioctl(nbd, bdev);
+ return nbd_start_device_ioctl(nbd);
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
@@ -1519,15 +1529,20 @@ static struct nbd_config *nbd_alloc_config(void)
{
struct nbd_config *config;
+ if (!try_module_get(THIS_MODULE))
+ return ERR_PTR(-ENODEV);
+
config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
- if (!config)
- return NULL;
+ if (!config) {
+ module_put(THIS_MODULE);
+ return ERR_PTR(-ENOMEM);
+ }
+
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
- try_module_get(THIS_MODULE);
return config;
}
@@ -1554,12 +1569,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
mutex_unlock(&nbd->config_lock);
goto out;
}
- config = nbd->config = nbd_alloc_config();
- if (!config) {
- ret = -ENOMEM;
+ config = nbd_alloc_config();
+ if (IS_ERR(config)) {
+ ret = PTR_ERR(config);
mutex_unlock(&nbd->config_lock);
goto out;
}
+ nbd->config = config;
refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
@@ -1579,7 +1595,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
struct nbd_device *nbd = disk->private_data;
if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
- disk->part0->bd_openers == 0)
+ disk_openers(disk) == 0)
nbd_disconnect_and_put(nbd);
nbd_config_put(nbd);
@@ -1784,7 +1800,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 0;
- disk->queue->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(disk->queue, 0);
blk_queue_max_segment_size(disk->queue, UINT_MAX);
blk_queue_max_segments(disk->queue, USHRT_MAX);
@@ -1800,17 +1815,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
refcount_set(&nbd->refs, 0);
INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR;
-
- /* Too big first_minor can cause duplicate creation of
- * sysfs files/links, since index << part_shift might overflow, or
- * MKDEV() expect that the max bits of first_minor is 20.
- */
disk->first_minor = index << part_shift;
- if (disk->first_minor < index || disk->first_minor > MINORMASK) {
- err = -EINVAL;
- goto out_free_work;
- }
-
disk->minors = 1 << part_shift;
disk->fops = &nbd_fops;
disk->private_data = nbd;
@@ -1829,7 +1834,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
out_free_work:
destroy_workqueue(nbd->recv_workq);
out_err_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_free_idr:
mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, index);
@@ -1915,14 +1920,25 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- if (info->attrs[NBD_ATTR_INDEX])
+ if (info->attrs[NBD_ATTR_INDEX]) {
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+
+ /*
+ * Too big first_minor can cause duplicate creation of
+ * sysfs files/links, since index << part_shift might overflow, or
+ * MKDEV() expect that the max bits of first_minor is 20.
+ */
+ if (index < 0 || index > MINORMASK >> part_shift) {
+ pr_err("illegal input index %d\n", index);
+ return -EINVAL;
+ }
+ }
if (!info->attrs[NBD_ATTR_SOCKETS]) {
- printk(KERN_ERR "nbd: must specify at least one socket\n");
+ pr_err("must specify at least one socket\n");
return -EINVAL;
}
if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
- printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
+ pr_err("must specify a size in bytes for the device\n");
return -EINVAL;
}
again:
@@ -1936,7 +1952,7 @@ again:
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- pr_err("nbd: device at index %d is going down\n",
+ pr_err("device at index %d is going down\n",
index);
return -EINVAL;
}
@@ -1947,7 +1963,7 @@ again:
if (!nbd) {
nbd = nbd_dev_add(index, 2);
if (IS_ERR(nbd)) {
- pr_err("nbd: failed to add new device\n");
+ pr_err("failed to add new device\n");
return PTR_ERR(nbd);
}
}
@@ -1958,7 +1974,7 @@ again:
nbd_put(nbd);
if (index == -1)
goto again;
- printk(KERN_ERR "nbd: nbd%d already in use\n", index);
+ pr_err("nbd%d already in use\n", index);
return -EBUSY;
}
if (WARN_ON(nbd->config)) {
@@ -1966,13 +1982,14 @@ again:
nbd_put(nbd);
return -EINVAL;
}
- config = nbd->config = nbd_alloc_config();
- if (!nbd->config) {
+ config = nbd_alloc_config();
+ if (IS_ERR(config)) {
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
- printk(KERN_ERR "nbd: couldn't allocate config\n");
- return -ENOMEM;
+ pr_err("couldn't allocate config\n");
+ return PTR_ERR(config);
}
+ nbd->config = config;
refcount_set(&nbd->config_refs, 1);
set_bit(NBD_RT_BOUND, &config->runtime_flags);
@@ -2025,7 +2042,7 @@ again:
struct nlattr *socks[NBD_SOCK_MAX+1];
if (nla_type(attr) != NBD_SOCK_ITEM) {
- printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+ pr_err("socks must be embedded in a SOCK_ITEM attr\n");
ret = -EINVAL;
goto out;
}
@@ -2034,7 +2051,7 @@ again:
nbd_sock_policy,
info->extack);
if (ret != 0) {
- printk(KERN_ERR "nbd: error processing sock list\n");
+ pr_err("error processing sock list\n");
ret = -EINVAL;
goto out;
}
@@ -2082,6 +2099,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
sock_shutdown(nbd);
+ wake_up(&nbd->config->conn_wait);
/*
* Make sure recv thread has finished, we can safely call nbd_clear_que()
* to cancel the inflight I/Os.
@@ -2105,7 +2123,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
return -EPERM;
if (!info->attrs[NBD_ATTR_INDEX]) {
- printk(KERN_ERR "nbd: must specify an index to disconnect\n");
+ pr_err("must specify an index to disconnect\n");
return -EINVAL;
}
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
@@ -2113,14 +2131,12 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
nbd = idr_find(&nbd_index_idr, index);
if (!nbd) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: couldn't find device at index %d\n",
- index);
+ pr_err("couldn't find device at index %d\n", index);
return -EINVAL;
}
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: device at index %d is going down\n",
- index);
+ pr_err("device at index %d is going down\n", index);
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
@@ -2145,7 +2161,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
return -EPERM;
if (!info->attrs[NBD_ATTR_INDEX]) {
- printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
+ pr_err("must specify a device to reconfigure\n");
return -EINVAL;
}
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
@@ -2153,8 +2169,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
nbd = idr_find(&nbd_index_idr, index);
if (!nbd) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
- index);
+ pr_err("couldn't find a device at index %d\n", index);
return -EINVAL;
}
if (nbd->backend) {
@@ -2175,8 +2190,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
}
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: device at index %d is going down\n",
- index);
+ pr_err("device at index %d is going down\n", index);
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
@@ -2240,7 +2254,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
struct nlattr *socks[NBD_SOCK_MAX+1];
if (nla_type(attr) != NBD_SOCK_ITEM) {
- printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+ pr_err("socks must be embedded in a SOCK_ITEM attr\n");
ret = -EINVAL;
goto out;
}
@@ -2249,7 +2263,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
nbd_sock_policy,
info->extack);
if (ret != 0) {
- printk(KERN_ERR "nbd: error processing sock list\n");
+ pr_err("error processing sock list\n");
ret = -EINVAL;
goto out;
}
@@ -2308,6 +2322,7 @@ static struct genl_family nbd_genl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = nbd_connect_genl_ops,
.n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
+ .resv_start_op = NBD_CMD_STATUS + 1,
.maxattr = NBD_ATTR_MAX,
.policy = nbd_attr_policy,
.mcgrps = nbd_mcast_grps,
@@ -2466,7 +2481,7 @@ static int __init nbd_init(void)
BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
if (max_part < 0) {
- printk(KERN_ERR "nbd: max_part must be >= 0\n");
+ pr_err("max_part must be >= 0\n");
return -EINVAL;
}
@@ -2529,6 +2544,12 @@ static void __exit nbd_cleanup(void)
struct nbd_device *nbd;
LIST_HEAD(del_list);
+ /*
+ * Unregister netlink interface prior to waiting
+ * for the completion of netlink commands.
+ */
+ genl_unregister_family(&nbd_genl_family);
+
nbd_dbg_close();
mutex_lock(&nbd_index_mutex);
@@ -2538,8 +2559,11 @@ static void __exit nbd_cleanup(void)
while (!list_empty(&del_list)) {
nbd = list_first_entry(&del_list, struct nbd_device, list);
list_del_init(&nbd->list);
+ if (refcount_read(&nbd->config_refs))
+ pr_err("possibly leaking nbd_config (ref %d)\n",
+ refcount_read(&nbd->config_refs));
if (refcount_read(&nbd->refs) != 1)
- printk(KERN_ERR "nbd: possibly leaking a device\n");
+ pr_err("possibly leaking a device\n");
nbd_put(nbd);
}
@@ -2547,7 +2571,6 @@ static void __exit nbd_cleanup(void)
destroy_workqueue(nbd_del_wq);
idr_destroy(&nbd_index_idr);
- genl_unregister_family(&nbd_genl_family);
unregister_blkdev(NBD_MAJOR, "nbd");
}
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 323af5c9c802..1f154f92f4c2 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -11,6 +11,9 @@
#include <linux/init.h>
#include "null_blk.h"
+#undef pr_fmt
+#define pr_fmt(fmt) "null_blk: " fmt
+
#define FREE_BATCH 16
#define TICKS_PER_SEC 50ULL
@@ -74,12 +77,6 @@ enum {
NULL_IRQ_TIMER = 2,
};
-enum {
- NULL_Q_BIO = 0,
- NULL_Q_RQ = 1,
- NULL_Q_MQ = 2,
-};
-
static bool g_virt_boundary = false;
module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
@@ -204,6 +201,22 @@ static bool g_use_per_node_hctx;
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+static bool g_memory_backed;
+module_param_named(memory_backed, g_memory_backed, bool, 0444);
+MODULE_PARM_DESC(memory_backed, "Create a memory-backed block device. Default: false");
+
+static bool g_discard;
+module_param_named(discard, g_discard, bool, 0444);
+MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed null_blk device). Default: false");
+
+static unsigned long g_cache_size;
+module_param_named(cache_size, g_cache_size, ulong, 0444);
+MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
+
+static unsigned int g_mbps;
+module_param_named(mbps, g_mbps, uint, 0444);
+MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
+
static bool g_zoned;
module_param_named(zoned, g_zoned, bool, S_IRUGO);
MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
@@ -232,6 +245,7 @@ static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
static int null_add_dev(struct nullb_device *dev);
+static struct nullb *null_find_dev_by_name(const char *name);
static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
static inline struct nullb_device *to_nullb_device(struct config_item *item)
@@ -340,9 +354,9 @@ static int nullb_update_nr_hw_queues(struct nullb_device *dev,
return 0;
/*
- * Make sure at least one queue exists for each of submit and poll.
+ * Make sure at least one submit queue exists.
*/
- if (!submit_queues || !poll_queues)
+ if (!submit_queues)
return -EINVAL;
/*
@@ -411,6 +425,8 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
+NULLB_DEVICE_ATTR(no_sched, bool, NULL);
+NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -431,9 +447,10 @@ static ssize_t nullb_device_power_store(struct config_item *item,
if (!dev->power && newp) {
if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
return count;
- if (null_add_dev(dev)) {
+ ret = null_add_dev(dev);
+ if (ret) {
clear_bit(NULLB_DEV_FL_UP, &dev->flags);
- return -ENOMEM;
+ return ret;
}
set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
@@ -533,6 +550,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_max_open,
&nullb_device_attr_zone_max_active,
&nullb_device_attr_virt_boundary,
+ &nullb_device_attr_no_sched,
+ &nullb_device_attr_shared_tag_bitmap,
NULL,
};
@@ -559,6 +578,9 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
{
struct nullb_device *dev;
+ if (null_find_dev_by_name(name))
+ return ERR_PTR(-EEXIST);
+
dev = null_alloc_dev();
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -586,7 +608,13 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
return snprintf(page, PAGE_SIZE,
- "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
+ "badblocks,blocking,blocksize,cache_size,"
+ "completion_nsec,discard,home_node,hw_queue_depth,"
+ "irqmode,max_sectors,mbps,memory_backed,no_sched,"
+ "poll_queues,power,queue_mode,shared_tag_bitmap,size,"
+ "submit_queues,use_per_node_hctx,virt_boundary,zoned,"
+ "zone_capacity,zone_max_active,zone_max_open,"
+ "zone_nr_conv,zone_size\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -648,6 +676,10 @@ static struct nullb_device *null_alloc_dev(void)
dev->irqmode = g_irqmode;
dev->hw_queue_depth = g_hw_queue_depth;
dev->blocking = g_blocking;
+ dev->memory_backed = g_memory_backed;
+ dev->discard = g_discard;
+ dev->cache_size = g_cache_size;
+ dev->mbps = g_mbps;
dev->use_per_node_hctx = g_use_per_node_hctx;
dev->zoned = g_zoned;
dev->zone_size = g_zone_size;
@@ -656,6 +688,8 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
dev->virt_boundary = g_virt_boundary;
+ dev->no_sched = g_no_sched;
+ dev->shared_tag_bitmap = g_shared_tag_bitmap;
return dev;
}
@@ -719,26 +753,25 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
return NULL;
}
-static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
+static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
{
struct nullb_cmd *cmd;
DEFINE_WAIT(wait);
- cmd = __alloc_cmd(nq);
- if (cmd || !can_wait)
- return cmd;
-
do {
- prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
+ /*
+ * This avoids multiple return statements, multiple calls to
+ * __alloc_cmd() and a fast path call to prepare_to_wait().
+ */
cmd = __alloc_cmd(nq);
- if (cmd)
- break;
-
+ if (cmd) {
+ cmd->bio = bio;
+ return cmd;
+ }
+ prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
io_schedule();
+ finish_wait(&nq->wait, &wait);
} while (1);
-
- finish_wait(&nq->wait, &wait);
- return cmd;
}
static void end_cmd(struct nullb_cmd *cmd)
@@ -777,24 +810,22 @@ static void null_complete_rq(struct request *rq)
end_cmd(blk_mq_rq_to_pdu(rq));
}
-static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
+static struct nullb_page *null_alloc_page(void)
{
struct nullb_page *t_page;
- t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+ t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
if (!t_page)
- goto out;
+ return NULL;
- t_page->page = alloc_pages(gfp_flags, 0);
- if (!t_page->page)
- goto out_freepage;
+ t_page->page = alloc_pages(GFP_NOIO, 0);
+ if (!t_page->page) {
+ kfree(t_page);
+ return NULL;
+ }
memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
return t_page;
-out_freepage:
- kfree(t_page);
-out:
- return NULL;
}
static void null_free_page(struct nullb_page *t_page)
@@ -932,7 +963,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb,
spin_unlock_irq(&nullb->lock);
- t_page = null_alloc_page(GFP_NOIO);
+ t_page = null_alloc_page();
if (!t_page)
goto out_lock;
@@ -1311,7 +1342,7 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
}
static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
- enum req_opf op,
+ enum req_op op,
sector_t sector,
sector_t nr_sectors)
{
@@ -1382,9 +1413,8 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
}
}
-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
- enum req_opf op, sector_t sector,
- unsigned int nr_sectors)
+blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, unsigned int nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
blk_status_t ret;
@@ -1402,7 +1432,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd,
}
static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
- sector_t nr_sectors, enum req_opf op)
+ sector_t nr_sectors, enum req_op op)
{
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
@@ -1476,12 +1506,8 @@ static void null_submit_bio(struct bio *bio)
sector_t nr_sectors = bio_sectors(bio);
struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
struct nullb_queue *nq = nullb_to_queue(nullb);
- struct nullb_cmd *cmd;
-
- cmd = alloc_cmd(nq, 1);
- cmd->bio = bio;
- null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
+ null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
}
static bool should_timeout_request(struct request *rq)
@@ -1502,7 +1528,7 @@ static bool should_requeue_request(struct request *rq)
return false;
}
-static int null_map_queues(struct blk_mq_tag_set *set)
+static void null_map_queues(struct blk_mq_tag_set *set)
{
struct nullb *nullb = set->driver_data;
int i, qoff;
@@ -1529,7 +1555,9 @@ static int null_map_queues(struct blk_mq_tag_set *set)
} else {
pr_warn("tag set has unexpected nr_hw_queues: %d\n",
set->nr_hw_queues);
- return -EINVAL;
+ WARN_ON_ONCE(true);
+ submit_queues = 1;
+ poll_queues = 0;
}
}
@@ -1551,8 +1579,6 @@ static int null_map_queues(struct blk_mq_tag_set *set)
qoff += map->nr_queues;
blk_mq_map_queues(map);
}
-
- return 0;
}
static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
@@ -1574,14 +1600,16 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
cmd = blk_mq_rq_to_pdu(req);
cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
blk_rq_sectors(req));
- end_cmd(cmd);
+ if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
+ blk_mq_end_request_batch))
+ end_cmd(cmd);
nr++;
}
return nr;
}
-static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
+static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
{
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
@@ -1604,7 +1632,7 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
* Only fake timeouts need to execute blk_mq_complete_request() here.
*/
cmd->error = BLK_STS_TIMEOUT;
- if (cmd->fake_timeout)
+ if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL)
blk_mq_complete_request(rq);
return BLK_EH_DONE;
}
@@ -1659,7 +1687,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
static void cleanup_queue(struct nullb_queue *nq)
{
- kfree(nq->tag_map);
+ bitmap_free(nq->tag_map);
kfree(nq->cmds);
}
@@ -1740,7 +1768,7 @@ static void null_del_dev(struct nullb *nullb)
null_restart_queue_async(nullb);
}
- blk_cleanup_disk(nullb->disk);
+ put_disk(nullb->disk);
if (dev->queue_mode == NULL_Q_MQ &&
nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
@@ -1769,9 +1797,7 @@ static void null_config_discard(struct nullb *nullb)
}
nullb->q->limits.discard_granularity = nullb->dev->blocksize;
- nullb->q->limits.discard_alignment = nullb->dev->blocksize;
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
}
static const struct block_device_operations null_bio_ops = {
@@ -1788,14 +1814,13 @@ static const struct block_device_operations null_rq_ops = {
static int setup_commands(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
- int i, tag_size;
+ int i;
nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
if (!nq->cmds)
return -ENOMEM;
- tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
- nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
+ nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL);
if (!nq->tag_map) {
kfree(nq->cmds);
return -ENOMEM;
@@ -1850,7 +1875,6 @@ static int null_gendisk_register(struct nullb *nullb)
set_capacity(disk, size);
- disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->major = null_major;
disk->first_minor = nullb->index;
disk->minors = 1;
@@ -1873,31 +1897,48 @@ static int null_gendisk_register(struct nullb *nullb)
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
{
+ unsigned int flags = BLK_MQ_F_SHOULD_MERGE;
+ int hw_queues, numa_node;
+ unsigned int queue_depth;
int poll_queues;
+ if (nullb) {
+ hw_queues = nullb->dev->submit_queues;
+ poll_queues = nullb->dev->poll_queues;
+ queue_depth = nullb->dev->hw_queue_depth;
+ numa_node = nullb->dev->home_node;
+ if (nullb->dev->no_sched)
+ flags |= BLK_MQ_F_NO_SCHED;
+ if (nullb->dev->shared_tag_bitmap)
+ flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (nullb->dev->blocking)
+ flags |= BLK_MQ_F_BLOCKING;
+ } else {
+ hw_queues = g_submit_queues;
+ poll_queues = g_poll_queues;
+ queue_depth = g_hw_queue_depth;
+ numa_node = g_home_node;
+ if (g_no_sched)
+ flags |= BLK_MQ_F_NO_SCHED;
+ if (g_shared_tag_bitmap)
+ flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (g_blocking)
+ flags |= BLK_MQ_F_BLOCKING;
+ }
+
set->ops = &null_mq_ops;
- set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
- g_submit_queues;
- poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues;
- if (poll_queues)
- set->nr_hw_queues += poll_queues;
- set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
- g_hw_queue_depth;
- set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
set->cmd_size = sizeof(struct nullb_cmd);
- set->flags = BLK_MQ_F_SHOULD_MERGE;
- if (g_no_sched)
- set->flags |= BLK_MQ_F_NO_SCHED;
- if (g_shared_tag_bitmap)
- set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ set->flags = flags;
set->driver_data = nullb;
- if (g_poll_queues)
+ set->nr_hw_queues = hw_queues;
+ set->queue_depth = queue_depth;
+ set->numa_node = numa_node;
+ if (poll_queues) {
+ set->nr_hw_queues += poll_queues;
set->nr_maps = 3;
- else
+ } else {
set->nr_maps = 1;
-
- if ((nullb && nullb->dev->blocking) || g_blocking)
- set->flags |= BLK_MQ_F_BLOCKING;
+ }
return blk_mq_alloc_tag_set(set);
}
@@ -1918,8 +1959,6 @@ static int null_validate_conf(struct nullb_device *dev)
if (dev->poll_queues > g_poll_queues)
dev->poll_queues = g_poll_queues;
- else if (dev->poll_queues == 0)
- dev->poll_queues = 1;
dev->prev_poll_queues = dev->poll_queues;
dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
@@ -2051,8 +2090,13 @@ static int null_add_dev(struct nullb_device *dev)
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock);
- nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
- dev->index = nullb->index;
+ rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+ if (rv < 0) {
+ mutex_unlock(&lock);
+ goto out_cleanup_zone;
+ }
+ nullb->index = rv;
+ dev->index = rv;
mutex_unlock(&lock);
blk_queue_logical_block_size(nullb->q, dev->blocksize);
@@ -2068,21 +2112,32 @@ static int null_add_dev(struct nullb_device *dev)
null_config_discard(nullb);
- sprintf(nullb->disk_name, "nullb%d", nullb->index);
+ if (config_item_name(&dev->item)) {
+ /* Use configfs dir name as the device name */
+ snprintf(nullb->disk_name, sizeof(nullb->disk_name),
+ "%s", config_item_name(&dev->item));
+ } else {
+ sprintf(nullb->disk_name, "nullb%d", nullb->index);
+ }
rv = null_gendisk_register(nullb);
if (rv)
- goto out_cleanup_zone;
+ goto out_ida_free;
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
mutex_unlock(&lock);
+ pr_info("disk %s created\n", nullb->disk_name);
+
return 0;
+
+out_ida_free:
+ ida_free(&nullb_indexes, nullb->index);
out_cleanup_zone:
null_free_zoned_dev(dev);
out_cleanup_disk:
- blk_cleanup_disk(nullb->disk);
+ put_disk(nullb->disk);
out_cleanup_tags:
if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
@@ -2095,12 +2150,53 @@ out:
return rv;
}
+static struct nullb *null_find_dev_by_name(const char *name)
+{
+ struct nullb *nullb = NULL, *nb;
+
+ mutex_lock(&lock);
+ list_for_each_entry(nb, &nullb_list, list) {
+ if (strcmp(nb->disk_name, name) == 0) {
+ nullb = nb;
+ break;
+ }
+ }
+ mutex_unlock(&lock);
+
+ return nullb;
+}
+
+static int null_create_dev(void)
+{
+ struct nullb_device *dev;
+ int ret;
+
+ dev = null_alloc_dev();
+ if (!dev)
+ return -ENOMEM;
+
+ ret = null_add_dev(dev);
+ if (ret) {
+ null_free_dev(dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void null_destroy_dev(struct nullb *nullb)
+{
+ struct nullb_device *dev = nullb->dev;
+
+ null_del_dev(nullb);
+ null_free_dev(dev);
+}
+
static int __init null_init(void)
{
int ret = 0;
unsigned int i;
struct nullb *nullb;
- struct nullb_device *dev;
if (g_bs > PAGE_SIZE) {
pr_warn("invalid block size\n");
@@ -2120,19 +2216,21 @@ static int __init null_init(void)
}
if (g_queue_mode == NULL_Q_RQ) {
- pr_err("legacy IO path no longer available\n");
+ pr_err("legacy IO path is no longer available\n");
return -EINVAL;
}
+
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
if (g_submit_queues != nr_online_nodes) {
pr_warn("submit_queues param is set to %u.\n",
- nr_online_nodes);
+ nr_online_nodes);
g_submit_queues = nr_online_nodes;
}
- } else if (g_submit_queues > nr_cpu_ids)
+ } else if (g_submit_queues > nr_cpu_ids) {
g_submit_queues = nr_cpu_ids;
- else if (g_submit_queues <= 0)
+ } else if (g_submit_queues <= 0) {
g_submit_queues = 1;
+ }
if (g_queue_mode == NULL_Q_MQ && shared_tags) {
ret = null_init_tag_set(NULL, &tag_set);
@@ -2156,16 +2254,9 @@ static int __init null_init(void)
}
for (i = 0; i < nr_devices; i++) {
- dev = null_alloc_dev();
- if (!dev) {
- ret = -ENOMEM;
- goto err_dev;
- }
- ret = null_add_dev(dev);
- if (ret) {
- null_free_dev(dev);
+ ret = null_create_dev();
+ if (ret)
goto err_dev;
- }
}
pr_info("module loaded\n");
@@ -2174,9 +2265,7 @@ static int __init null_init(void)
err_dev:
while (!list_empty(&nullb_list)) {
nullb = list_entry(nullb_list.next, struct nullb, list);
- dev = nullb->dev;
- null_del_dev(nullb);
- null_free_dev(dev);
+ null_destroy_dev(nullb);
}
unregister_blkdev(null_major, "nullb");
err_conf:
@@ -2197,12 +2286,8 @@ static void __exit null_exit(void)
mutex_lock(&lock);
while (!list_empty(&nullb_list)) {
- struct nullb_device *dev;
-
nullb = list_entry(nullb_list.next, struct nullb, list);
- dev = nullb->dev;
- null_del_dev(nullb);
- null_free_dev(dev);
+ null_destroy_dev(nullb);
}
mutex_unlock(&lock);
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 78eb56b0ca55..94ff68052b1e 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -16,13 +16,15 @@
#include <linux/mutex.h>
struct nullb_cmd {
- struct request *rq;
- struct bio *bio;
+ union {
+ struct request *rq;
+ struct bio *bio;
+ };
unsigned int tag;
blk_status_t error;
+ bool fake_timeout;
struct nullb_queue *nq;
struct hrtimer timer;
- bool fake_timeout;
};
struct nullb_queue {
@@ -58,6 +60,13 @@ struct nullb_zone {
unsigned int capacity;
};
+/* Queue modes */
+enum {
+ NULL_Q_BIO = 0,
+ NULL_Q_RQ = 1,
+ NULL_Q_MQ = 2,
+};
+
struct nullb_device {
struct nullb *nullb;
struct config_item item;
@@ -104,6 +113,8 @@ struct nullb_device {
bool discard; /* if support discard */
bool zoned; /* if device is zoned */
bool virt_boundary; /* virtual boundary on/off for the device */
+ bool no_sched; /* no IO scheduler for the device */
+ bool shared_tag_bitmap; /* use hostwide shared tags */
};
struct nullb {
@@ -127,9 +138,8 @@ struct nullb {
blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
sector_t nr_sectors);
-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
- enum req_opf op, sector_t sector,
- unsigned int nr_sectors);
+blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, unsigned int nr_sectors);
#ifdef CONFIG_BLK_DEV_ZONED
int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
@@ -137,9 +147,8 @@ int null_register_zoned_dev(struct nullb *nullb);
void null_free_zoned_dev(struct nullb_device *dev);
int null_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
- enum req_opf op, sector_t sector,
- sector_t nr_sectors);
+blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, sector_t nr_sectors);
size_t null_zone_valid_read_len(struct nullb *nullb,
sector_t sector, unsigned int len);
#else
@@ -155,7 +164,7 @@ static inline int null_register_zoned_dev(struct nullb *nullb)
}
static inline void null_free_zoned_dev(struct nullb_device *dev) {}
static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
- enum req_opf op, sector_t sector, sector_t nr_sectors)
+ enum req_op op, sector_t sector, sector_t nr_sectors)
{
return BLK_STS_NOTSUPP;
}
diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h
index ce3b430e88c5..6b2b370e786f 100644
--- a/drivers/block/null_blk/trace.h
+++ b/drivers/block/null_blk/trace.h
@@ -36,7 +36,7 @@ TRACE_EVENT(nullb_zone_op,
TP_ARGS(cmd, zone_no, zone_cond),
TP_STRUCT__entry(
__array(char, disk, DISK_NAME_LEN)
- __field(enum req_opf, op)
+ __field(enum req_op, op)
__field(unsigned int, zone_no)
__field(unsigned int, zone_cond)
),
@@ -44,7 +44,7 @@ TRACE_EVENT(nullb_zone_op,
__entry->op = req_op(cmd->rq);
__entry->zone_no = zone_no;
__entry->zone_cond = zone_cond;
- __assign_disk_name(__entry->disk, cmd->rq->rq_disk);
+ __assign_disk_name(__entry->disk, cmd->rq->q->disk);
),
TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
__print_disk_name(__entry->disk),
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index dae54dd1aeac..55a69e48ef8b 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -6,6 +6,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
+#undef pr_fmt
+#define pr_fmt(fmt) "null_blk: " fmt
+
static inline sector_t mb_to_sects(unsigned long mb)
{
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
@@ -75,8 +78,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
dev->zone_capacity = dev->zone_size;
if (dev->zone_capacity > dev->zone_size) {
- pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
- dev->zone_capacity, dev->zone_size);
+ pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
+ dev->zone_capacity, dev->zone_size);
return -EINVAL;
}
@@ -156,7 +159,7 @@ int null_register_zoned_dev(struct nullb *nullb)
struct nullb_device *dev = nullb->dev;
struct request_queue *q = nullb->q;
- blk_queue_set_zoned(nullb->disk, BLK_ZONED_HM);
+ disk_set_zoned(nullb->disk, BLK_ZONED_HM);
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
@@ -167,12 +170,12 @@ int null_register_zoned_dev(struct nullb *nullb)
return ret;
} else {
blk_queue_chunk_sectors(q, dev->zone_size_sects);
- q->nr_zones = blkdev_nr_zones(nullb->disk);
+ nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
}
blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
- blk_queue_max_open_zones(q, dev->zone_max_open);
- blk_queue_max_active_zones(q, dev->zone_max_active);
+ disk_set_max_open_zones(nullb->disk, dev->zone_max_open);
+ disk_set_max_active_zones(nullb->disk, dev->zone_max_active);
return 0;
}
@@ -395,10 +398,10 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
*/
if (append) {
sector = zone->wp;
- if (cmd->bio)
- cmd->bio->bi_iter.bi_sector = sector;
- else
+ if (dev->queue_mode == NULL_Q_MQ)
cmd->rq->__sector = sector;
+ else
+ cmd->bio->bi_iter.bi_sector = sector;
} else if (sector != zone->wp) {
ret = BLK_STS_IOERR;
goto unlock;
@@ -597,7 +600,7 @@ static blk_status_t null_reset_zone(struct nullb_device *dev,
return BLK_STS_OK;
}
-static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op,
sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
@@ -650,7 +653,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
return ret;
}
-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
+blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, sector_t nr_sectors)
{
struct nullb_device *dev;
diff --git a/drivers/block/paride/bpck.c b/drivers/block/paride/bpck.c
index f5f63ca2889d..d880a9465e9b 100644
--- a/drivers/block/paride/bpck.c
+++ b/drivers/block/paride/bpck.c
@@ -28,6 +28,7 @@
#undef r2
#undef w2
+#undef PC
#define PC pi->private
#define r2() (PC=(in_p(2) & 0xff))
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index f6b1d63e96e1..a5ab40784119 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -690,7 +690,7 @@ static void pcd_request(void)
if (!pcd_req && !set_next_request())
return;
- cd = pcd_req->rq_disk->private_data;
+ cd = pcd_req->q->disk->private_data;
if (cd != pcd_current)
pcd_bufblk = -1;
pcd_current = cd;
@@ -928,8 +928,9 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port,
disk->minors = 1;
strcpy(disk->disk_name, cd->name); /* umm... */
disk->fops = &pcd_bdops;
- disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+ disk->flags |= GENHD_FL_NO_PART;
disk->events = DISK_EVENT_MEDIA_CHANGE;
+ disk->event_flags = DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE;
if (!pi_init(cd->pi, autoprobe, port, mode, unit, protocol, delay,
pcd_buffer, PI_PCD, verbose, cd->name)) {
@@ -955,7 +956,7 @@ out_unreg_cdrom:
out_pi_release:
pi_release(cd->pi);
out_free_disk:
- blk_cleanup_disk(cd->disk);
+ put_disk(cd->disk);
out_free_tag_set:
blk_mq_free_tag_set(&cd->tag_set);
return ret;
@@ -1028,7 +1029,7 @@ static void __exit pcd_exit(void)
unregister_cdrom(&cd->info);
del_gendisk(cd->disk);
pi_release(cd->pi);
- blk_cleanup_disk(cd->disk);
+ put_disk(cd->disk);
blk_mq_free_tag_set(&cd->tag_set);
}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index fba865058a17..f8a75bc90f70 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -430,7 +430,7 @@ static void run_fsm(void)
int stop = 0;
if (!phase) {
- pd_current = pd_req->rq_disk->private_data;
+ pd_current = pd_req->q->disk->private_data;
pi_current = pd_current->pi;
phase = do_pd_io_start;
}
@@ -492,7 +492,7 @@ static enum action do_pd_io_start(void)
case REQ_OP_WRITE:
pd_block = blk_rq_pos(pd_req);
pd_count = blk_rq_cur_sectors(pd_req);
- if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
+ if (pd_block + pd_count > get_capacity(pd_req->q->disk))
return Fail;
pd_run = blk_rq_sectors(pd_req);
pd_buf = bio_data(pd_req->bio);
@@ -501,6 +501,8 @@ static enum action do_pd_io_start(void)
return do_pd_read_start();
else
return do_pd_write_start();
+ default:
+ break;
}
return Fail;
}
@@ -781,7 +783,7 @@ static int pd_special_command(struct pd_unit *disk,
req = blk_mq_rq_to_pdu(rq);
req->func = func;
- blk_execute_rq(disk->gd, rq, 0);
+ blk_execute_rq(rq, false);
blk_mq_free_request(rq);
return 0;
}
@@ -943,7 +945,7 @@ static int pd_probe_drive(struct pd_unit *disk, int autoprobe, int port,
goto cleanup_disk;
return 0;
cleanup_disk:
- blk_cleanup_disk(disk->gd);
+ put_disk(disk->gd);
put_disk:
put_disk(p);
disk->gd = NULL;
@@ -1018,7 +1020,7 @@ static void __exit pd_exit(void)
if (p) {
disk->gd = NULL;
del_gendisk(p);
- blk_cleanup_disk(p);
+ put_disk(p);
blk_mq_free_tag_set(&disk->tag_set);
pi_release(disk->pi);
}
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index bf8d0ef41a0a..eec1b9fde245 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -746,12 +746,12 @@ repeat:
if (!pf_req && !set_next_request())
return;
- pf_current = pf_req->rq_disk->private_data;
+ pf_current = pf_req->q->disk->private_data;
pf_block = blk_rq_pos(pf_req);
pf_run = blk_rq_sectors(pf_req);
pf_count = blk_rq_cur_sectors(pf_req);
- if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
+ if (pf_block + pf_count > get_capacity(pf_req->q->disk)) {
pf_end_request(BLK_STS_IOERR);
goto repeat;
}
@@ -942,6 +942,7 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port,
disk->minors = 1;
strcpy(disk->disk_name, pf->name);
disk->fops = &pf_fops;
+ disk->flags |= GENHD_FL_NO_PART;
disk->events = DISK_EVENT_MEDIA_CHANGE;
disk->private_data = pf;
@@ -974,7 +975,7 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port,
out_pi_release:
pi_release(pf->pi);
out_free_disk:
- blk_cleanup_disk(pf->disk);
+ put_disk(pf->disk);
out_free_tag_set:
blk_mq_free_tag_set(&pf->tag_set);
return ret;
@@ -1043,7 +1044,7 @@ static void __exit pf_exit(void)
if (!pf->present)
continue;
del_gendisk(pf->disk);
- blk_cleanup_disk(pf->disk);
+ put_disk(pf->disk);
blk_mq_free_tag_set(&pf->tag_set);
pi_release(pf->pi);
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index b53f648302c1..4cea3b08087e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -12,7 +12,7 @@
* Theory of operation:
*
* At the lowest level, there is the standard driver for the CD/DVD device,
- * typically ide-cd.c or sr.c. This driver can handle read and write requests,
+ * such as drivers/scsi/sr.c. This driver can handle read and write requests,
* but it doesn't know anything about the special restrictions that apply to
* packet writing. One restriction is that write requests must be aligned to
* packet boundaries on the physical media, and the size of a write request
@@ -113,57 +113,10 @@ static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1);
}
-/*
- * create and register a pktcdvd kernel object.
- */
-static struct pktcdvd_kobj* pkt_kobj_create(struct pktcdvd_device *pd,
- const char* name,
- struct kobject* parent,
- struct kobj_type* ktype)
-{
- struct pktcdvd_kobj *p;
- int error;
-
- p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (!p)
- return NULL;
- p->pd = pd;
- error = kobject_init_and_add(&p->kobj, ktype, parent, "%s", name);
- if (error) {
- kobject_put(&p->kobj);
- return NULL;
- }
- kobject_uevent(&p->kobj, KOBJ_ADD);
- return p;
-}
-/*
- * remove a pktcdvd kernel object.
- */
-static void pkt_kobj_remove(struct pktcdvd_kobj *p)
-{
- if (p)
- kobject_put(&p->kobj);
-}
-/*
- * default release function for pktcdvd kernel objects.
- */
-static void pkt_kobj_release(struct kobject *kobj)
-{
- kfree(to_pktcdvdkobj(kobj));
-}
-
-
/**********************************************************
- *
* sysfs interface for pktcdvd
* by (C) 2006 Thomas Maier <balagi@justmail.de>
- *
- **********************************************************/
-
-#define DEF_ATTR(_obj,_name,_mode) \
- static struct attribute _obj = { .name = _name, .mode = _mode }
-
-/**********************************************************
+
/sys/class/pktcdvd/pktcdvd[0-7]/
stat/reset
stat/packets_started
@@ -176,75 +129,94 @@ static void pkt_kobj_release(struct kobject *kobj)
write_queue/congestion_on
**********************************************************/
-DEF_ATTR(kobj_pkt_attr_st1, "reset", 0200);
-DEF_ATTR(kobj_pkt_attr_st2, "packets_started", 0444);
-DEF_ATTR(kobj_pkt_attr_st3, "packets_finished", 0444);
-DEF_ATTR(kobj_pkt_attr_st4, "kb_written", 0444);
-DEF_ATTR(kobj_pkt_attr_st5, "kb_read", 0444);
-DEF_ATTR(kobj_pkt_attr_st6, "kb_read_gather", 0444);
-
-static struct attribute *kobj_pkt_attrs_stat[] = {
- &kobj_pkt_attr_st1,
- &kobj_pkt_attr_st2,
- &kobj_pkt_attr_st3,
- &kobj_pkt_attr_st4,
- &kobj_pkt_attr_st5,
- &kobj_pkt_attr_st6,
- NULL
-};
+static ssize_t packets_started_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
-DEF_ATTR(kobj_pkt_attr_wq1, "size", 0444);
-DEF_ATTR(kobj_pkt_attr_wq2, "congestion_off", 0644);
-DEF_ATTR(kobj_pkt_attr_wq3, "congestion_on", 0644);
+ return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started);
+}
+static DEVICE_ATTR_RO(packets_started);
-static struct attribute *kobj_pkt_attrs_wqueue[] = {
- &kobj_pkt_attr_wq1,
- &kobj_pkt_attr_wq2,
- &kobj_pkt_attr_wq3,
- NULL
-};
+static ssize_t packets_finished_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended);
+}
+static DEVICE_ATTR_RO(packets_finished);
-static ssize_t kobj_pkt_show(struct kobject *kobj,
- struct attribute *attr, char *data)
+static ssize_t kb_written_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct pktcdvd_device *pd = to_pktcdvdkobj(kobj)->pd;
- int n = 0;
- int v;
- if (strcmp(attr->name, "packets_started") == 0) {
- n = sprintf(data, "%lu\n", pd->stats.pkt_started);
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
- } else if (strcmp(attr->name, "packets_finished") == 0) {
- n = sprintf(data, "%lu\n", pd->stats.pkt_ended);
+ return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1);
+}
+static DEVICE_ATTR_RO(kb_written);
- } else if (strcmp(attr->name, "kb_written") == 0) {
- n = sprintf(data, "%lu\n", pd->stats.secs_w >> 1);
+static ssize_t kb_read_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
- } else if (strcmp(attr->name, "kb_read") == 0) {
- n = sprintf(data, "%lu\n", pd->stats.secs_r >> 1);
+ return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1);
+}
+static DEVICE_ATTR_RO(kb_read);
- } else if (strcmp(attr->name, "kb_read_gather") == 0) {
- n = sprintf(data, "%lu\n", pd->stats.secs_rg >> 1);
+static ssize_t kb_read_gather_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
- } else if (strcmp(attr->name, "size") == 0) {
- spin_lock(&pd->lock);
- v = pd->bio_queue_size;
- spin_unlock(&pd->lock);
- n = sprintf(data, "%d\n", v);
+ return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1);
+}
+static DEVICE_ATTR_RO(kb_read_gather);
- } else if (strcmp(attr->name, "congestion_off") == 0) {
- spin_lock(&pd->lock);
- v = pd->write_congestion_off;
- spin_unlock(&pd->lock);
- n = sprintf(data, "%d\n", v);
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
- } else if (strcmp(attr->name, "congestion_on") == 0) {
- spin_lock(&pd->lock);
- v = pd->write_congestion_on;
- spin_unlock(&pd->lock);
- n = sprintf(data, "%d\n", v);
+ if (len > 0) {
+ pd->stats.pkt_started = 0;
+ pd->stats.pkt_ended = 0;
+ pd->stats.secs_w = 0;
+ pd->stats.secs_rg = 0;
+ pd->stats.secs_r = 0;
}
+ return len;
+}
+static DEVICE_ATTR_WO(reset);
+
+static struct attribute *pkt_stat_attrs[] = {
+ &dev_attr_packets_finished.attr,
+ &dev_attr_packets_started.attr,
+ &dev_attr_kb_read.attr,
+ &dev_attr_kb_written.attr,
+ &dev_attr_kb_read_gather.attr,
+ &dev_attr_reset.attr,
+ NULL,
+};
+
+static const struct attribute_group pkt_stat_group = {
+ .name = "stat",
+ .attrs = pkt_stat_attrs,
+};
+
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int n;
+
+ spin_lock(&pd->lock);
+ n = sysfs_emit(buf, "%d\n", pd->bio_queue_size);
+ spin_unlock(&pd->lock);
return n;
}
+static DEVICE_ATTR_RO(size);
static void init_write_congestion_marks(int* lo, int* hi)
{
@@ -263,30 +235,56 @@ static void init_write_congestion_marks(int* lo, int* hi)
}
}
-static ssize_t kobj_pkt_store(struct kobject *kobj,
- struct attribute *attr,
- const char *data, size_t len)
+static ssize_t congestion_off_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct pktcdvd_device *pd = to_pktcdvdkobj(kobj)->pd;
- int val;
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int n;
- if (strcmp(attr->name, "reset") == 0 && len > 0) {
- pd->stats.pkt_started = 0;
- pd->stats.pkt_ended = 0;
- pd->stats.secs_w = 0;
- pd->stats.secs_rg = 0;
- pd->stats.secs_r = 0;
+ spin_lock(&pd->lock);
+ n = sysfs_emit(buf, "%d\n", pd->write_congestion_off);
+ spin_unlock(&pd->lock);
+ return n;
+}
- } else if (strcmp(attr->name, "congestion_off") == 0
- && sscanf(data, "%d", &val) == 1) {
+static ssize_t congestion_off_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int val;
+
+ if (sscanf(buf, "%d", &val) == 1) {
spin_lock(&pd->lock);
pd->write_congestion_off = val;
init_write_congestion_marks(&pd->write_congestion_off,
&pd->write_congestion_on);
spin_unlock(&pd->lock);
+ }
+ return len;
+}
+static DEVICE_ATTR_RW(congestion_off);
+
+static ssize_t congestion_on_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int n;
- } else if (strcmp(attr->name, "congestion_on") == 0
- && sscanf(data, "%d", &val) == 1) {
+ spin_lock(&pd->lock);
+ n = sysfs_emit(buf, "%d\n", pd->write_congestion_on);
+ spin_unlock(&pd->lock);
+ return n;
+}
+
+static ssize_t congestion_on_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct pktcdvd_device *pd = dev_get_drvdata(dev);
+ int val;
+
+ if (sscanf(buf, "%d", &val) == 1) {
spin_lock(&pd->lock);
pd->write_congestion_on = val;
init_write_congestion_marks(&pd->write_congestion_off,
@@ -295,44 +293,39 @@ static ssize_t kobj_pkt_store(struct kobject *kobj,
}
return len;
}
+static DEVICE_ATTR_RW(congestion_on);
-static const struct sysfs_ops kobj_pkt_ops = {
- .show = kobj_pkt_show,
- .store = kobj_pkt_store
+static struct attribute *pkt_wq_attrs[] = {
+ &dev_attr_congestion_on.attr,
+ &dev_attr_congestion_off.attr,
+ &dev_attr_size.attr,
+ NULL,
};
-static struct kobj_type kobj_pkt_type_stat = {
- .release = pkt_kobj_release,
- .sysfs_ops = &kobj_pkt_ops,
- .default_attrs = kobj_pkt_attrs_stat
+
+static const struct attribute_group pkt_wq_group = {
+ .name = "write_queue",
+ .attrs = pkt_wq_attrs,
};
-static struct kobj_type kobj_pkt_type_wqueue = {
- .release = pkt_kobj_release,
- .sysfs_ops = &kobj_pkt_ops,
- .default_attrs = kobj_pkt_attrs_wqueue
+
+static const struct attribute_group *pkt_groups[] = {
+ &pkt_stat_group,
+ &pkt_wq_group,
+ NULL,
};
static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
{
if (class_pktcdvd) {
- pd->dev = device_create(class_pktcdvd, NULL, MKDEV(0, 0), NULL,
- "%s", pd->name);
+ pd->dev = device_create_with_groups(class_pktcdvd, NULL,
+ MKDEV(0, 0), pd, pkt_groups,
+ "%s", pd->name);
if (IS_ERR(pd->dev))
pd->dev = NULL;
}
- if (pd->dev) {
- pd->kobj_stat = pkt_kobj_create(pd, "stat",
- &pd->dev->kobj,
- &kobj_pkt_type_stat);
- pd->kobj_wqueue = pkt_kobj_create(pd, "write_queue",
- &pd->dev->kobj,
- &kobj_pkt_type_wqueue);
- }
}
static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
{
- pkt_kobj_remove(pd->kobj_stat);
- pkt_kobj_remove(pd->kobj_wqueue);
if (class_pktcdvd)
device_unregister(pd->dev);
}
@@ -529,7 +522,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
goto no_pkt;
pkt->frames = frames;
- pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
+ pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
if (!pkt->w_bio)
goto no_bio;
@@ -543,27 +536,21 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
bio_list_init(&pkt->orig_bios);
for (i = 0; i < frames; i++) {
- struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
- if (!bio)
+ pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
+ if (!pkt->r_bios[i])
goto no_rd_bio;
-
- pkt->r_bios[i] = bio;
}
return pkt;
no_rd_bio:
- for (i = 0; i < frames; i++) {
- struct bio *bio = pkt->r_bios[i];
- if (bio)
- bio_put(bio);
- }
-
+ for (i = 0; i < frames; i++)
+ kfree(pkt->r_bios[i]);
no_page:
for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
if (pkt->pages[i])
__free_page(pkt->pages[i]);
- bio_put(pkt->w_bio);
+ kfree(pkt->w_bio);
no_bio:
kfree(pkt);
no_pkt:
@@ -577,14 +564,11 @@ static void pkt_free_packet_data(struct packet_data *pkt)
{
int i;
- for (i = 0; i < pkt->frames; i++) {
- struct bio *bio = pkt->r_bios[i];
- if (bio)
- bio_put(bio);
- }
+ for (i = 0; i < pkt->frames; i++)
+ kfree(pkt->r_bios[i]);
for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
__free_page(pkt->pages[i]);
- bio_put(pkt->w_bio);
+ kfree(pkt->w_bio);
kfree(pkt);
}
@@ -700,6 +684,7 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod
static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
{
struct request_queue *q = bdev_get_queue(pd->bdev);
+ struct scsi_cmnd *scmd;
struct request *rq;
int ret = 0;
@@ -707,6 +692,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
+ scmd = blk_mq_rq_to_pdu(rq);
if (cgc->buflen) {
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -715,15 +701,15 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
goto out;
}
- scsi_req(rq)->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
- memcpy(scsi_req(rq)->cmd, cgc->cmd, CDROM_PACKET_SIZE);
+ scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
+ memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE);
rq->timeout = 60*HZ;
if (cgc->quiet)
rq->rq_flags |= RQF_QUIET;
- blk_execute_rq(pd->bdev->bd_disk, rq, 0);
- if (scsi_req(rq)->result)
+ blk_execute_rq(rq, false);
+ if (scmd->result)
ret = -EIO;
out:
blk_mq_free_request(rq);
@@ -956,6 +942,7 @@ static void pkt_end_io_read(struct bio *bio)
if (bio->bi_status)
atomic_inc(&pkt->io_errors);
+ bio_uninit(bio);
if (atomic_dec_and_test(&pkt->io_wait)) {
atomic_inc(&pkt->run_sm);
wake_up(&pd->wqueue);
@@ -973,6 +960,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
pd->stats.pkt_ended++;
+ bio_uninit(bio);
pkt_bio_finished(pd);
atomic_dec(&pkt->io_wait);
atomic_inc(&pkt->run_sm);
@@ -1027,9 +1015,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
continue;
bio = pkt->r_bios[f];
- bio_reset(bio);
+ bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
- bio_set_dev(bio, pd->bdev);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1041,7 +1028,6 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
BUG();
atomic_inc(&pkt->io_wait);
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
pkt_queue_bio(pd, bio);
frames_read++;
}
@@ -1107,7 +1093,6 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
sector_t zone = 0; /* Suppress gcc warning */
struct pkt_rb_node *node, *first_node;
struct rb_node *n;
- int wakeup;
atomic_set(&pd->scan_queue, 0);
@@ -1179,12 +1164,14 @@ try_next_bio:
spin_unlock(&pkt->lock);
}
/* check write congestion marks, and if bio_queue_size is
- below, wake up any waiters */
- wakeup = (pd->write_congestion_on > 0
- && pd->bio_queue_size <= pd->write_congestion_off);
+ * below, wake up any waiters
+ */
+ if (pd->congested &&
+ pd->bio_queue_size <= pd->write_congestion_off) {
+ pd->congested = false;
+ wake_up_var(&pd->congested);
+ }
spin_unlock(&pd->lock);
- if (wakeup)
- clear_bdi_congested(pd->disk->bdi, BLK_RW_ASYNC);
pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
pkt_set_state(pkt, PACKET_WAITING_STATE);
@@ -1241,9 +1228,9 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
{
int f;
- bio_reset(pkt->w_bio);
+ bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
+ REQ_OP_WRITE);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
- bio_set_dev(pkt->w_bio, pd->bdev);
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@@ -1276,7 +1263,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
/* Start the write request */
atomic_set(&pkt->io_wait, 1);
- bio_set_op_attrs(pkt->w_bio, REQ_OP_WRITE, 0);
pkt_queue_bio(pd, pkt->w_bio);
}
@@ -2304,12 +2290,12 @@ static void pkt_end_io_read_cloned(struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
+ struct bio *cloned_bio =
+ bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
psd->pd = pd;
psd->bio = bio;
- bio_set_dev(cloned_bio, pd->bdev);
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
@@ -2356,7 +2342,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
}
spin_unlock(&pd->cdrw.active_list_lock);
- /*
+ /*
* Test if there is enough room left in the bio work queue
* (queue size >= congestion on mark).
* If not, wait till the work queue size is below the congestion off mark.
@@ -2364,12 +2350,20 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
spin_lock(&pd->lock);
if (pd->write_congestion_on > 0
&& pd->bio_queue_size >= pd->write_congestion_on) {
- set_bdi_congested(bio->bi_bdev->bd_disk->bdi, BLK_RW_ASYNC);
- do {
+ struct wait_bit_queue_entry wqe;
+
+ init_wait_var_entry(&wqe, &pd->congested, 0);
+ for (;;) {
+ prepare_to_wait_event(__var_waitqueue(&pd->congested),
+ &wqe.wq_entry,
+ TASK_UNINTERRUPTIBLE);
+ if (pd->bio_queue_size <= pd->write_congestion_off)
+ break;
+ pd->congested = true;
spin_unlock(&pd->lock);
- congestion_wait(BLK_RW_ASYNC, HZ);
+ schedule();
spin_lock(&pd->lock);
- } while(pd->bio_queue_size > pd->write_congestion_off);
+ }
}
spin_unlock(&pd->lock);
@@ -2402,17 +2396,10 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
static void pkt_submit_bio(struct bio *bio)
{
- struct pktcdvd_device *pd;
- char b[BDEVNAME_SIZE];
+ struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
struct bio *split;
- blk_queue_split(&bio);
-
- pd = bio->bi_bdev->bd_disk->queue->queuedata;
- if (!pd) {
- pr_err("%s incorrect request queue\n", bio_devname(bio, b));
- goto end_io;
- }
+ bio = bio_split_to_limits(bio);
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
(unsigned long long)bio->bi_iter.bi_sector,
@@ -2473,11 +2460,9 @@ static int pkt_seq_show(struct seq_file *m, void *p)
{
struct pktcdvd_device *pd = m->private;
char *msg;
- char bdev_buf[BDEVNAME_SIZE];
int states[PACKET_NUM_STATES];
- seq_printf(m, "Writer %s mapped to %s:\n", pd->name,
- bdevname(pd->bdev, bdev_buf));
+ seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev);
seq_printf(m, "\nSettings:\n");
seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
@@ -2534,7 +2519,6 @@ static int pkt_seq_show(struct seq_file *m, void *p)
static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
{
int i;
- char b[BDEVNAME_SIZE];
struct block_device *bdev;
struct scsi_device *sdev;
@@ -2547,8 +2531,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
if (!pd2)
continue;
if (pd2->bdev->bd_dev == dev) {
- pkt_err(pd, "%s already setup\n",
- bdevname(pd2->bdev, b));
+ pkt_err(pd, "%pg already setup\n", pd2->bdev);
return -EBUSY;
}
if (pd2->pkt_dev == dev) {
@@ -2583,7 +2566,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
}
proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
- pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b));
+ pkt_dbg(1, pd, "writer mapped to %pg\n", bdev);
return 0;
out_mem:
@@ -2719,7 +2702,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
disk->first_minor = idx;
disk->minors = 1;
disk->fops = &pktcdvd_ops;
- disk->flags = GENHD_FL_REMOVABLE;
+ disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
strcpy(disk->disk_name, pd->name);
disk->private_data = pd;
@@ -2746,7 +2729,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
return 0;
out_mem2:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_mem:
mempool_exit(&pd->rb_pool);
kfree(pd);
@@ -2796,7 +2779,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
pkt_dbg(1, pd, "writer unmapped\n");
del_gendisk(pd->disk);
- blk_cleanup_disk(pd->disk);
+ put_disk(pd->disk);
mempool_exit(&pd->rb_pool);
kfree(pd);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 3054adf77460..36d7b36c60c7 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -473,7 +473,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
return 0;
fail_cleanup_disk:
- blk_cleanup_disk(gendisk);
+ put_disk(gendisk);
fail_free_tag_set:
blk_mq_free_tag_set(&priv->tag_set);
fail_teardown:
@@ -500,7 +500,7 @@ static void ps3disk_remove(struct ps3_system_bus_device *_dev)
&ps3disk_mask);
mutex_unlock(&ps3disk_mask_mutex);
del_gendisk(priv->gendisk);
- blk_cleanup_disk(priv->gendisk);
+ put_disk(priv->gendisk);
blk_mq_free_tag_set(&priv->tag_set);
dev_notice(&dev->sbd.core, "Synchronizing disk cache\n");
ps3disk_sync_cache(dev);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index c1876646a4cb..c76e0148eada 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -586,7 +586,7 @@ static void ps3vram_submit_bio(struct bio *bio)
dev_dbg(&dev->core, "%s\n", __func__);
- blk_queue_split(&bio);
+ bio = bio_split_to_limits(bio);
spin_lock_irq(&priv->lock);
busy = !bio_list_empty(&priv->list);
@@ -742,9 +742,10 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
priv->gendisk = gendisk;
gendisk->major = ps3vram_major;
gendisk->minors = 1;
+ gendisk->flags |= GENHD_FL_NO_PART;
gendisk->fops = &ps3vram_fops;
gendisk->private_data = dev;
- strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
+ strscpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
set_capacity(gendisk, priv->size >> 9);
blk_queue_max_segments(gendisk->queue, BLK_MAX_SEGMENTS);
blk_queue_max_segment_size(gendisk->queue, BLK_MAX_SEGMENT_SIZE);
@@ -760,7 +761,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
return 0;
out_cleanup_disk:
- blk_cleanup_disk(gendisk);
+ put_disk(gendisk);
out_cache_cleanup:
remove_proc_entry(DEVICE_NAME, NULL);
ps3vram_cache_cleanup(dev);
@@ -791,7 +792,7 @@ static void ps3vram_remove(struct ps3_system_bus_device *dev)
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
del_gendisk(priv->gendisk);
- blk_cleanup_disk(priv->gendisk);
+ put_disk(priv->gendisk);
remove_proc_entry(DEVICE_NAME, NULL);
ps3vram_cache_cleanup(dev);
iounmap(priv->reports);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 953fa134cd3d..04453f4a319c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -756,24 +756,23 @@ static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
*/
static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
{
- struct rbd_client *client_node;
- bool found = false;
+ struct rbd_client *rbdc = NULL, *iter;
if (ceph_opts->flags & CEPH_OPT_NOSHARE)
return NULL;
spin_lock(&rbd_client_list_lock);
- list_for_each_entry(client_node, &rbd_client_list, node) {
- if (!ceph_compare_options(ceph_opts, client_node->client)) {
- __rbd_get_client(client_node);
+ list_for_each_entry(iter, &rbd_client_list, node) {
+ if (!ceph_compare_options(ceph_opts, iter->client)) {
+ __rbd_get_client(iter);
- found = true;
+ rbdc = iter;
break;
}
}
spin_unlock(&rbd_client_list_lock);
- return found ? client_node : NULL;
+ return rbdc;
}
/*
@@ -1298,7 +1297,7 @@ static void rbd_osd_submit(struct ceph_osd_request *osd_req)
dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n",
__func__, osd_req, obj_req, obj_req->ex.oe_objno,
obj_req->ex.oe_off, obj_req->ex.oe_len);
- ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
+ ceph_osdc_start_request(osd_req->r_osdc, osd_req);
}
/*
@@ -2082,7 +2081,7 @@ static int rbd_object_map_update(struct rbd_obj_request *obj_req, u64 snap_id,
if (ret)
return ret;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
return 0;
}
@@ -4730,7 +4729,7 @@ static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
static void rbd_free_disk(struct rbd_device *rbd_dev)
{
- blk_cleanup_disk(rbd_dev->disk);
+ put_disk(rbd_dev->disk);
blk_mq_free_tag_set(&rbd_dev->tag_set);
rbd_dev->disk = NULL;
}
@@ -4769,7 +4768,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
if (ret)
goto out_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0)
ceph_copy_from_page_vector(pages, buf, 0, ret);
@@ -4924,12 +4923,10 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
rbd_dev->dev_id);
disk->major = rbd_dev->major;
disk->first_minor = rbd_dev->minor;
- if (single_major) {
+ if (single_major)
disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT);
- disk->flags |= GENHD_FL_EXT_DEVT;
- } else {
+ else
disk->minors = RBD_MINORS_PER_MAJOR;
- }
disk->fops = &rbd_bd_ops;
disk->private_data = rbd_dev;
@@ -4944,7 +4941,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
if (rbd_dev->opts->trim) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = rbd_dev->opts->alloc_size;
blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
@@ -6191,7 +6187,7 @@ static inline size_t next_token(const char **buf)
* These are the characters that produce nonzero for
* isspace() in the "C" and "POSIX" locales.
*/
- const char *spaces = " \f\n\r\t\v";
+ static const char spaces[] = " \f\n\r\t\v";
*buf += strspn(*buf, spaces); /* Find start of token */
@@ -6497,7 +6493,8 @@ static int rbd_add_parse_args(const char *buf,
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT;
- ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
+ ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL,
+ ',');
if (ret)
goto out_err;
@@ -7186,7 +7183,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
* IO to complete/fail.
*/
blk_mq_freeze_queue(rbd_dev->disk->queue);
- blk_set_queue_dying(rbd_dev->disk->queue);
+ blk_mark_disk_dead(rbd_dev->disk);
}
del_gendisk(rbd_dev->disk);
@@ -7225,8 +7222,10 @@ static int __init rbd_sysfs_init(void)
int ret;
ret = device_register(&rbd_root_dev);
- if (ret < 0)
+ if (ret < 0) {
+ put_device(&rbd_root_dev);
return ret;
+ }
ret = bus_register(&rbd_bus_type);
if (ret < 0)
diff --git a/drivers/block/rnbd/Makefile b/drivers/block/rnbd/Makefile
index 5bb1a7ad1ada..40b31630822c 100644
--- a/drivers/block/rnbd/Makefile
+++ b/drivers/block/rnbd/Makefile
@@ -6,10 +6,12 @@ rnbd-client-y := rnbd-clt.o \
rnbd-clt-sysfs.o \
rnbd-common.o
+CFLAGS_rnbd-srv-trace.o = -I$(src)
+
rnbd-server-y := rnbd-common.o \
rnbd-srv.o \
- rnbd-srv-dev.o \
- rnbd-srv-sysfs.o
+ rnbd-srv-sysfs.o \
+ rnbd-srv-trace.o
obj-$(CONFIG_BLK_DEV_RNBD_CLIENT) += rnbd-client.o
obj-$(CONFIG_BLK_DEV_RNBD_SERVER) += rnbd-server.o
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 44e45af00e83..e7c7d9a68168 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -376,7 +376,7 @@ static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj,
if (ret)
return ret;
- ret = rnbd_clt_resize_disk(dev, (size_t)sectors);
+ ret = rnbd_clt_resize_disk(dev, sectors);
if (ret)
return ret;
@@ -452,6 +452,7 @@ static struct attribute *rnbd_dev_attrs[] = {
&rnbd_clt_nr_poll_queues.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rnbd_dev);
void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev)
{
@@ -474,7 +475,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev)
static struct kobj_type rnbd_dev_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
- .default_attrs = rnbd_dev_attrs,
+ .default_groups = rnbd_dev_groups,
};
static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev)
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 2df0657cdf00..78334da74d8b 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -23,9 +23,9 @@ MODULE_LICENSE("GPL");
static int rnbd_client_major;
static DEFINE_IDA(index_ida);
-static DEFINE_MUTEX(ida_lock);
static DEFINE_MUTEX(sess_lock);
static LIST_HEAD(sess_list);
+static struct workqueue_struct *rnbd_clt_wq;
/*
* Maximum number of partitions an instance can have.
@@ -55,9 +55,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev)
if (!refcount_dec_and_test(&dev->refcount))
return;
- mutex_lock(&ida_lock);
- ida_simple_remove(&index_ida, dev->clt_device_id);
- mutex_unlock(&ida_lock);
+ ida_free(&index_ida, dev->clt_device_id);
kfree(dev->hw_queues);
kfree(dev->pathname);
rnbd_clt_put_sess(dev->sess);
@@ -70,41 +68,18 @@ static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev)
return refcount_inc_not_zero(&dev->refcount);
}
-static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
- const struct rnbd_msg_open_rsp *rsp)
+static void rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
+ sector_t new_nsectors)
{
- struct rnbd_clt_session *sess = dev->sess;
-
- if (!rsp->logical_block_size)
- return -EINVAL;
-
- dev->device_id = le32_to_cpu(rsp->device_id);
- dev->nsectors = le64_to_cpu(rsp->nsectors);
- dev->logical_block_size = le16_to_cpu(rsp->logical_block_size);
- dev->physical_block_size = le16_to_cpu(rsp->physical_block_size);
- dev->max_write_same_sectors = le32_to_cpu(rsp->max_write_same_sectors);
- dev->max_discard_sectors = le32_to_cpu(rsp->max_discard_sectors);
- dev->discard_granularity = le32_to_cpu(rsp->discard_granularity);
- dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
- dev->secure_discard = le16_to_cpu(rsp->secure_discard);
- dev->rotational = rsp->rotational;
- dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
- dev->fua = !!(rsp->cache_policy & RNBD_FUA);
-
- dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
- dev->max_segments = sess->max_segments;
-
- return 0;
-}
+ if (get_capacity(dev->gd) == new_nsectors)
+ return;
-static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
- size_t new_nsectors)
-{
- rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n",
- dev->nsectors, new_nsectors);
- dev->nsectors = new_nsectors;
- set_capacity_and_notify(dev->gd, dev->nsectors);
- return 0;
+ /*
+ * If the size changed, we need to revalidate it
+ */
+ rnbd_clt_info(dev, "Device size changed from %llu to %llu sectors\n",
+ get_capacity(dev->gd), new_nsectors);
+ set_capacity_and_notify(dev->gd, new_nsectors);
}
static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
@@ -123,19 +98,16 @@ static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) {
u64 nsectors = le64_to_cpu(rsp->nsectors);
- /*
- * If the device was remapped and the size changed in the
- * meantime we need to revalidate it
- */
- if (dev->nsectors != nsectors)
- rnbd_clt_change_capacity(dev, nsectors);
+ rnbd_clt_change_capacity(dev, nsectors);
gd_kobj = &disk_to_dev(dev->gd)->kobj;
kobject_uevent(gd_kobj, KOBJ_ONLINE);
rnbd_clt_info(dev, "Device online, device remapped successfully\n");
}
- err = rnbd_clt_set_dev_attr(dev, rsp);
- if (err)
+ if (!rsp->logical_block_size) {
+ err = -EINVAL;
goto out;
+ }
+ dev->device_id = le32_to_cpu(rsp->device_id);
dev->dev_state = DEV_STATE_MAPPED;
out:
@@ -144,7 +116,7 @@ out:
return err;
}
-int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
+int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize)
{
int ret = 0;
@@ -154,7 +126,7 @@ int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
ret = -ENOENT;
goto out;
}
- ret = rnbd_clt_change_capacity(dev, newsize);
+ rnbd_clt_change_capacity(dev, newsize);
out:
mutex_unlock(&dev->lock);
@@ -196,7 +168,7 @@ rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu)
return per_cpu_ptr(sess->cpu_queues, bit);
} else if (cpu != 0) {
/* Search from 0 to cpu */
- bit = find_next_bit(sess->cpu_queues_bm, cpu, 0);
+ bit = find_first_bit(sess->cpu_queues_bm, cpu);
if (bit < cpu)
return per_cpu_ptr(sess->cpu_queues, bit);
}
@@ -393,7 +365,7 @@ static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu)
static void rnbd_softirq_done_fn(struct request *rq)
{
- struct rnbd_clt_dev *dev = rq->rq_disk->private_data;
+ struct rnbd_clt_dev *dev = rq->q->disk->private_data;
struct rnbd_clt_session *sess = dev->sess;
struct rnbd_iu *iu;
@@ -433,7 +405,7 @@ static void msg_conf(void *priv, int errno)
schedule_work(&iu->work);
}
-static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
+static int send_usr_msg(struct rtrs_clt_sess *rtrs, int dir,
struct rnbd_iu *iu, struct kvec *vec,
size_t len, struct scatterlist *sg, unsigned int sg_len,
void (*conf)(struct work_struct *work),
@@ -511,6 +483,11 @@ static void msg_open_conf(struct work_struct *work)
struct rnbd_msg_open_rsp *rsp = iu->buf;
struct rnbd_clt_dev *dev = iu->dev;
int errno = iu->errno;
+ bool from_map = false;
+
+ /* INIT state is only triggered from rnbd_clt_map_device */
+ if (dev->dev_state == DEV_STATE_INIT)
+ from_map = true;
if (errno) {
rnbd_clt_err(dev,
@@ -527,7 +504,9 @@ static void msg_open_conf(struct work_struct *work)
send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT);
}
}
- kfree(rsp);
+ /* We free rsp in rnbd_clt_map_device for map scenario */
+ if (!from_map)
+ kfree(rsp);
wake_up_iu_comp(iu, errno);
rnbd_put_iu(dev->sess, iu);
rnbd_clt_put_dev(dev);
@@ -946,7 +925,7 @@ static int rnbd_client_open(struct block_device *block_device, fmode_t mode)
{
struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
- if (dev->read_only && (mode & FMODE_WRITE))
+ if (get_disk_ro(dev->gd) && (mode & FMODE_WRITE))
return -EPERM;
if (dev->dev_state == DEV_STATE_UNMAPPED ||
@@ -967,10 +946,10 @@ static int rnbd_client_getgeo(struct block_device *block_device,
struct hd_geometry *geo)
{
u64 size;
- struct rnbd_clt_dev *dev;
+ struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
+ struct queue_limits *limit = &dev->queue->limits;
- dev = block_device->bd_disk->private_data;
- size = dev->size * (dev->logical_block_size / SECTOR_SIZE);
+ size = dev->size * (limit->logical_block_size / SECTOR_SIZE);
geo->cylinders = size >> 6; /* size/64 */
geo->heads = 4;
geo->sectors = 16;
@@ -1010,7 +989,7 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev,
struct request *rq,
struct rnbd_iu *iu)
{
- struct rtrs_clt *rtrs = dev->sess->rtrs;
+ struct rtrs_clt_sess *rtrs = dev->sess->rtrs;
struct rtrs_permit *permit = iu->permit;
struct rnbd_msg_io msg;
struct rtrs_clt_req_ops req_ops;
@@ -1133,7 +1112,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
- struct rnbd_clt_dev *dev = rq->rq_disk->private_data;
+ struct rnbd_clt_dev *dev = rq->q->disk->private_data;
struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq);
int err;
blk_status_t ret = BLK_STS_IOERR;
@@ -1180,13 +1159,11 @@ static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct rnbd_queue *q = hctx->driver_data;
struct rnbd_clt_dev *dev = q->dev;
- int cnt;
- cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
- return cnt;
+ return rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
}
-static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
+static void rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
{
struct rnbd_clt_session *sess = set->driver_data;
@@ -1215,8 +1192,6 @@ static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
set->map[HCTX_TYPE_DEFAULT].nr_queues,
set->map[HCTX_TYPE_READ].nr_queues);
}
-
- return 0;
}
static struct blk_mq_ops rnbd_mq_ops = {
@@ -1262,9 +1237,9 @@ find_and_get_or_create_sess(const char *sessname,
struct rtrs_clt_ops rtrs_ops;
sess = find_or_create_sess(sessname, &first);
- if (sess == ERR_PTR(-ENOMEM))
+ if (sess == ERR_PTR(-ENOMEM)) {
return ERR_PTR(-ENOMEM);
- else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
+ } else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
/*
* A device MUST have its own session to use the polling-mode.
* It must fail to map new device with the same session.
@@ -1343,7 +1318,7 @@ static inline void rnbd_init_hw_queue(struct rnbd_clt_dev *dev,
static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
{
- int i;
+ unsigned long i;
struct blk_mq_hw_ctx *hctx;
struct rnbd_queue *q;
@@ -1354,13 +1329,15 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
}
}
-static void setup_request_queue(struct rnbd_clt_dev *dev)
+static void setup_request_queue(struct rnbd_clt_dev *dev,
+ struct rnbd_msg_open_rsp *rsp)
{
- blk_queue_logical_block_size(dev->queue, dev->logical_block_size);
- blk_queue_physical_block_size(dev->queue, dev->physical_block_size);
- blk_queue_max_hw_sectors(dev->queue, dev->max_hw_sectors);
- blk_queue_max_write_same_sectors(dev->queue,
- dev->max_write_same_sectors);
+ blk_queue_logical_block_size(dev->queue,
+ le16_to_cpu(rsp->logical_block_size));
+ blk_queue_physical_block_size(dev->queue,
+ le16_to_cpu(rsp->physical_block_size));
+ blk_queue_max_hw_sectors(dev->queue,
+ dev->sess->max_io_size / SECTOR_SIZE);
/*
* we don't support discards to "discontiguous" segments
@@ -1368,23 +1345,27 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
*/
blk_queue_max_discard_segments(dev->queue, 1);
- blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
- dev->queue->limits.discard_granularity = dev->discard_granularity;
- dev->queue->limits.discard_alignment = dev->discard_alignment;
- if (dev->max_discard_sectors)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue);
- if (dev->secure_discard)
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue);
-
+ blk_queue_max_discard_sectors(dev->queue,
+ le32_to_cpu(rsp->max_discard_sectors));
+ dev->queue->limits.discard_granularity =
+ le32_to_cpu(rsp->discard_granularity);
+ dev->queue->limits.discard_alignment =
+ le32_to_cpu(rsp->discard_alignment);
+ if (le16_to_cpu(rsp->secure_discard))
+ blk_queue_max_secure_erase_sectors(dev->queue,
+ le32_to_cpu(rsp->max_discard_sectors));
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
- blk_queue_max_segments(dev->queue, dev->max_segments);
+ blk_queue_max_segments(dev->queue, dev->sess->max_segments);
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
- blk_queue_write_cache(dev->queue, dev->wc, dev->fua);
+ blk_queue_write_cache(dev->queue,
+ !!(rsp->cache_policy & RNBD_WRITEBACK),
+ !!(rsp->cache_policy & RNBD_FUA));
}
-static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
+static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
+ struct rnbd_msg_open_rsp *rsp, int idx)
{
int err;
@@ -1396,34 +1377,34 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d",
idx);
- pr_debug("disk_name=%s, capacity=%zu\n",
+ pr_debug("disk_name=%s, capacity=%llu\n",
dev->gd->disk_name,
- dev->nsectors * (dev->logical_block_size / SECTOR_SIZE)
- );
+ le64_to_cpu(rsp->nsectors) *
+ (le16_to_cpu(rsp->logical_block_size) / SECTOR_SIZE));
- set_capacity(dev->gd, dev->nsectors);
+ set_capacity(dev->gd, le64_to_cpu(rsp->nsectors));
- if (dev->access_mode == RNBD_ACCESS_RO) {
- dev->read_only = true;
+ if (dev->access_mode == RNBD_ACCESS_RO)
set_disk_ro(dev->gd, true);
- } else {
- dev->read_only = false;
- }
- if (!dev->rotational)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
+ /*
+ * Network device does not need rotational
+ */
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
err = add_disk(dev->gd);
if (err)
- blk_cleanup_disk(dev->gd);
+ put_disk(dev->gd);
return err;
}
-static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
+static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
+ struct rnbd_msg_open_rsp *rsp)
{
int idx = dev->clt_device_id;
- dev->size = dev->nsectors * dev->logical_block_size;
+ dev->size = le64_to_cpu(rsp->nsectors) *
+ le16_to_cpu(rsp->logical_block_size);
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
if (IS_ERR(dev->gd))
@@ -1431,8 +1412,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
dev->queue = dev->gd->queue;
rnbd_init_mq_hw_queues(dev);
- setup_request_queue(dev);
- return rnbd_clt_setup_gen_disk(dev, idx);
+ setup_request_queue(dev, rsp);
+ return rnbd_clt_setup_gen_disk(dev, rsp, idx);
}
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
@@ -1459,10 +1440,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
goto out_alloc;
}
- mutex_lock(&ida_lock);
- ret = ida_simple_get(&index_ida, 0, 1 << (MINORBITS - RNBD_PART_BITS),
- GFP_KERNEL);
- mutex_unlock(&ida_lock);
+ ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS),
+ GFP_KERNEL);
if (ret < 0) {
pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n",
pathname, sess->sessname, ret);
@@ -1570,7 +1549,14 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
{
struct rnbd_clt_session *sess;
struct rnbd_clt_dev *dev;
- int ret;
+ int ret, errno;
+ struct rnbd_msg_open_rsp *rsp;
+ struct rnbd_msg_open msg;
+ struct rnbd_iu *iu;
+ struct kvec vec = {
+ .iov_base = &msg,
+ .iov_len = sizeof(msg)
+ };
if (exists_devpath(pathname, sessname))
return ERR_PTR(-EEXIST);
@@ -1590,17 +1576,47 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
ret = -EEXIST;
goto put_dev;
}
- ret = send_msg_open(dev, RTRS_PERMIT_WAIT);
+
+ rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+ if (!rsp) {
+ ret = -ENOMEM;
+ goto del_dev;
+ }
+
+ iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT);
+ if (!iu) {
+ ret = -ENOMEM;
+ kfree(rsp);
+ goto del_dev;
+ }
+ iu->buf = rsp;
+ iu->dev = dev;
+ sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
+
+ msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN);
+ msg.access_mode = dev->access_mode;
+ strscpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name));
+
+ WARN_ON(!rnbd_clt_get_dev(dev));
+ ret = send_usr_msg(sess->rtrs, READ, iu,
+ &vec, sizeof(*rsp), iu->sgt.sgl, 1,
+ msg_open_conf, &errno, RTRS_PERMIT_WAIT);
+ if (ret) {
+ rnbd_clt_put_dev(dev);
+ rnbd_put_iu(sess, iu);
+ } else {
+ ret = errno;
+ }
if (ret) {
rnbd_clt_err(dev,
"map_device: failed, can't open remote device, err: %d\n",
ret);
- goto del_dev;
+ goto put_iu;
}
mutex_lock(&dev->lock);
pr_debug("Opened remote device: session=%s, path='%s'\n",
sess->sessname, pathname);
- ret = rnbd_client_setup_device(dev);
+ ret = rnbd_client_setup_device(dev, rsp);
if (ret) {
rnbd_clt_err(dev,
"map_device: Failed to configure device, err: %d\n",
@@ -1610,21 +1626,30 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
}
rnbd_clt_info(dev,
- "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n",
- dev->gd->disk_name, dev->nsectors,
- dev->logical_block_size, dev->physical_block_size,
- dev->max_write_same_sectors, dev->max_discard_sectors,
- dev->discard_granularity, dev->discard_alignment,
- dev->secure_discard, dev->max_segments,
- dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua);
+ "map_device: Device mapped as %s (nsectors: %llu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
+ dev->gd->disk_name, le64_to_cpu(rsp->nsectors),
+ le16_to_cpu(rsp->logical_block_size),
+ le16_to_cpu(rsp->physical_block_size),
+ le32_to_cpu(rsp->max_discard_sectors),
+ le32_to_cpu(rsp->discard_granularity),
+ le32_to_cpu(rsp->discard_alignment),
+ le16_to_cpu(rsp->secure_discard),
+ sess->max_segments, sess->max_io_size / SECTOR_SIZE,
+ !!(rsp->cache_policy & RNBD_WRITEBACK),
+ !!(rsp->cache_policy & RNBD_FUA));
mutex_unlock(&dev->lock);
+ kfree(rsp);
+ rnbd_put_iu(sess, iu);
rnbd_clt_put_sess(sess);
return dev;
send_close:
send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
+put_iu:
+ kfree(rsp);
+ rnbd_put_iu(sess, iu);
del_dev:
delete_dev(dev);
put_dev:
@@ -1638,7 +1663,7 @@ put_sess:
static void destroy_gen_disk(struct rnbd_clt_dev *dev)
{
del_gendisk(dev->gd);
- blk_cleanup_disk(dev->gd);
+ put_disk(dev->gd);
}
static void destroy_sysfs(struct rnbd_clt_dev *dev,
@@ -1763,17 +1788,17 @@ static void rnbd_destroy_sessions(void)
list_for_each_entry_safe(dev, tn, &sess->devs_list, list) {
/*
* Here unmap happens in parallel for only one reason:
- * blk_cleanup_queue() takes around half a second, so
+ * del_gendisk() takes around half a second, so
* on huge amount of devices the whole module unload
* procedure takes minutes.
*/
INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work);
- queue_work(system_long_wq, &dev->unmap_on_rmmod_work);
+ queue_work(rnbd_clt_wq, &dev->unmap_on_rmmod_work);
}
rnbd_clt_put_sess(sess);
}
/* Wait for all scheduled unmap works */
- flush_workqueue(system_long_wq);
+ flush_workqueue(rnbd_clt_wq);
WARN_ON(!list_empty(&sess_list));
}
@@ -1798,6 +1823,14 @@ static int __init rnbd_client_init(void)
pr_err("Failed to load module, creating sysfs device files failed, err: %d\n",
err);
unregister_blkdev(rnbd_client_major, "rnbd");
+ return err;
+ }
+ rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
+ if (!rnbd_clt_wq) {
+ pr_err("Failed to load module, alloc_workqueue failed.\n");
+ rnbd_clt_destroy_sysfs_files();
+ unregister_blkdev(rnbd_client_major, "rnbd");
+ err = -ENOMEM;
}
return err;
@@ -1808,6 +1841,7 @@ static void __exit rnbd_client_exit(void)
rnbd_destroy_sessions();
unregister_blkdev(rnbd_client_major, "rnbd");
ida_destroy(&index_ida);
+ destroy_workqueue(rnbd_clt_wq);
}
module_init(rnbd_client_init);
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index 9ef8c4f306f2..a48e040abe63 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -75,7 +75,7 @@ struct rnbd_cpu_qlist {
struct rnbd_clt_session {
struct list_head list;
- struct rtrs_clt *rtrs;
+ struct rtrs_clt_sess *rtrs;
wait_queue_head_t rtrs_waitq;
bool rtrs_ready;
struct rnbd_cpu_qlist __percpu
@@ -106,6 +106,7 @@ struct rnbd_queue {
};
struct rnbd_clt_dev {
+ struct kobject kobj;
struct rnbd_clt_session *sess;
struct request_queue *queue;
struct rnbd_queue *hw_queues;
@@ -114,29 +115,14 @@ struct rnbd_clt_dev {
u32 clt_device_id;
struct mutex lock;
enum rnbd_clt_dev_state dev_state;
+ refcount_t refcount;
char *pathname;
enum rnbd_access_mode access_mode;
u32 nr_poll_queues;
- bool read_only;
- bool rotational;
- bool wc;
- bool fua;
- u32 max_hw_sectors;
- u32 max_write_same_sectors;
- u32 max_discard_sectors;
- u32 discard_granularity;
- u32 discard_alignment;
- u16 secure_discard;
- u16 physical_block_size;
- u16 logical_block_size;
- u16 max_segments;
- size_t nsectors;
u64 size; /* device size in bytes */
struct list_head list;
struct gendisk *gd;
- struct kobject kobj;
char *blk_symlink_name;
- refcount_t refcount;
struct work_struct unmap_on_rmmod_work;
};
@@ -152,7 +138,7 @@ int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
const struct attribute *sysfs_self);
int rnbd_clt_remap_device(struct rnbd_clt_dev *dev);
-int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize);
+int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize);
/* rnbd-clt-sysfs.c */
diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
index de5d5a8df81d..ea7ac8bca63c 100644
--- a/drivers/block/rnbd/rnbd-proto.h
+++ b/drivers/block/rnbd/rnbd-proto.h
@@ -128,7 +128,7 @@ enum rnbd_cache_policy {
* @logical_block_size: logical block size device supports in bytes
* @max_segments: max segments hardware support in one transfer
* @secure_discard: supports secure discard
- * @rotation: is a rotational disc?
+ * @obsolete_rotational: obsolete, not in used.
* @cache_policy: support write-back caching or FUA?
*/
struct rnbd_msg_open_rsp {
@@ -144,7 +144,7 @@ struct rnbd_msg_open_rsp {
__le16 logical_block_size;
__le16 max_segments;
__le16 secure_discard;
- u8 rotational;
+ u8 obsolete_rotational;
u8 cache_policy;
u8 reserved[10];
};
@@ -229,9 +229,9 @@ static inline bool rnbd_flags_supported(u32 flags)
return true;
}
-static inline u32 rnbd_to_bio_flags(u32 rnbd_opf)
+static inline blk_opf_t rnbd_to_bio_flags(u32 rnbd_opf)
{
- u32 bio_opf;
+ blk_opf_t bio_opf;
switch (rnbd_op(rnbd_opf)) {
case RNBD_OP_READ:
@@ -249,9 +249,6 @@ static inline u32 rnbd_to_bio_flags(u32 rnbd_opf)
case RNBD_OP_SECURE_ERASE:
bio_opf = REQ_OP_SECURE_ERASE;
break;
- case RNBD_OP_WRITE_SAME:
- bio_opf = REQ_OP_WRITE_SAME;
- break;
default:
WARN(1, "Unknown RNBD type: %d (flags %d)\n",
rnbd_op(rnbd_opf), rnbd_opf);
@@ -284,15 +281,13 @@ static inline u32 rq_to_rnbd_flags(struct request *rq)
case REQ_OP_SECURE_ERASE:
rnbd_opf = RNBD_OP_SECURE_ERASE;
break;
- case REQ_OP_WRITE_SAME:
- rnbd_opf = RNBD_OP_WRITE_SAME;
- break;
case REQ_OP_FLUSH:
rnbd_opf = RNBD_OP_FLUSH;
break;
default:
WARN(1, "Unknown request type %d (flags %llu)\n",
- req_op(rq), (unsigned long long)rq->cmd_flags);
+ (__force u32)req_op(rq),
+ (__force unsigned long long)rq->cmd_flags);
rnbd_opf = 0;
}
diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c
deleted file mode 100644
index b241a099aeae..000000000000
--- a/drivers/block/rnbd/rnbd-srv-dev.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RDMA Network Block Driver
- *
- * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
- * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
- * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
- */
-#undef pr_fmt
-#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
-
-#include "rnbd-srv-dev.h"
-#include "rnbd-log.h"
-
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
- struct bio_set *bs)
-{
- struct rnbd_dev *dev;
- int ret;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return ERR_PTR(-ENOMEM);
-
- dev->blk_open_flags = flags;
- dev->bdev = blkdev_get_by_path(path, flags, THIS_MODULE);
- ret = PTR_ERR_OR_ZERO(dev->bdev);
- if (ret)
- goto err;
-
- dev->blk_open_flags = flags;
- bdevname(dev->bdev, dev->name);
- dev->ibd_bio_set = bs;
-
- return dev;
-
-err:
- kfree(dev);
- return ERR_PTR(ret);
-}
-
-void rnbd_dev_close(struct rnbd_dev *dev)
-{
- blkdev_put(dev->bdev, dev->blk_open_flags);
- kfree(dev);
-}
-
-void rnbd_dev_bi_end_io(struct bio *bio)
-{
- struct rnbd_dev_blk_io *io = bio->bi_private;
-
- rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status));
- bio_put(bio);
-}
-
-/**
- * rnbd_bio_map_kern - map kernel address into bio
- * @data: pointer to buffer to map
- * @bs: bio_set to use.
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
- unsigned int len, gfp_t gfp_mask)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- int offset, i;
- struct bio *bio;
-
- bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (bio_add_page(bio, virt_to_page(data), bytes,
- offset) < bytes) {
- /* we don't support partial mappings */
- bio_put(bio);
- return ERR_PTR(-EINVAL);
- }
-
- data += bytes;
- len -= bytes;
- offset = 0;
- }
-
- return bio;
-}
diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h
deleted file mode 100644
index 0eb23850afb9..000000000000
--- a/drivers/block/rnbd/rnbd-srv-dev.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * RDMA Network Block Driver
- *
- * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
- * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
- * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
- */
-#ifndef RNBD_SRV_DEV_H
-#define RNBD_SRV_DEV_H
-
-#include <linux/fs.h>
-#include "rnbd-proto.h"
-
-struct rnbd_dev {
- struct block_device *bdev;
- struct bio_set *ibd_bio_set;
- fmode_t blk_open_flags;
- char name[BDEVNAME_SIZE];
-};
-
-struct rnbd_dev_blk_io {
- struct rnbd_dev *dev;
- void *priv;
- /* have to be last member for front_pad usage of bioset_init */
- struct bio bio;
-};
-
-/**
- * rnbd_dev_open() - Open a device
- * @flags: open flags
- * @bs: bio_set to use during block io,
- */
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
- struct bio_set *bs);
-
-/**
- * rnbd_dev_close() - Close a device
- */
-void rnbd_dev_close(struct rnbd_dev *dev);
-
-void rnbd_endio(void *priv, int error);
-
-void rnbd_dev_bi_end_io(struct bio *bio);
-
-struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
- unsigned int len, gfp_t gfp_mask);
-
-static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev)
-{
- return queue_max_segments(bdev_get_queue(dev->bdev));
-}
-
-static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev)
-{
- return queue_max_hw_sectors(bdev_get_queue(dev->bdev));
-}
-
-static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev)
-{
- return blk_queue_secure_erase(bdev_get_queue(dev->bdev));
-}
-
-static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev)
-{
- if (!blk_queue_discard(bdev_get_queue(dev->bdev)))
- return 0;
-
- return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev),
- REQ_OP_DISCARD);
-}
-
-static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
-{
- return bdev_get_queue(dev->bdev)->limits.discard_granularity;
-}
-
-static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
-{
- return bdev_get_queue(dev->bdev)->limits.discard_alignment;
-}
-
-#endif /* RNBD_SRV_DEV_H */
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index 4db98e0e76f0..297a6924ff4e 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -13,7 +13,6 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/stat.h>
-#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/moduleparam.h>
#include <linux/device.h>
@@ -39,14 +38,13 @@ static struct kobj_type dev_ktype = {
};
int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev,
- struct block_device *bdev,
- const char *dev_name)
+ struct block_device *bdev)
{
struct kobject *bdev_kobj;
int ret;
ret = kobject_init_and_add(&dev->dev_kobj, &dev_ktype,
- rnbd_devs_kobj, dev_name);
+ rnbd_devs_kobj, "%pg", bdev);
if (ret) {
kobject_put(&dev->dev_kobj);
return ret;
diff --git a/drivers/block/rnbd/rnbd-srv-trace.c b/drivers/block/rnbd/rnbd-srv-trace.c
new file mode 100644
index 000000000000..30f0895c18f5
--- /dev/null
+++ b/drivers/block/rnbd/rnbd-srv-trace.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#include "rtrs.h"
+#include "rtrs-srv.h"
+#include "rnbd-srv.h"
+#include "rnbd-proto.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "rnbd-srv-trace.h"
diff --git a/drivers/block/rnbd/rnbd-srv-trace.h b/drivers/block/rnbd/rnbd-srv-trace.h
new file mode 100644
index 000000000000..8dedf73bdd28
--- /dev/null
+++ b/drivers/block/rnbd/rnbd-srv-trace.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rnbd_srv
+
+#if !defined(_TRACE_RNBD_SRV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RNBD_SRV_H
+
+#include <linux/tracepoint.h>
+
+struct rnbd_srv_session;
+struct rtrs_srv_op;
+
+DECLARE_EVENT_CLASS(rnbd_srv_link_class,
+ TP_PROTO(struct rnbd_srv_session *srv),
+
+ TP_ARGS(srv),
+
+ TP_STRUCT__entry(
+ __field(int, qdepth)
+ __string(sessname, srv->sessname)
+ ),
+
+ TP_fast_assign(
+ __entry->qdepth = srv->queue_depth;
+ __assign_str(sessname, srv->sessname);
+ ),
+
+ TP_printk("sessname: %s qdepth: %d",
+ __get_str(sessname),
+ __entry->qdepth
+ )
+);
+
+#define DEFINE_LINK_EVENT(name) \
+DEFINE_EVENT(rnbd_srv_link_class, name, \
+ TP_PROTO(struct rnbd_srv_session *srv), \
+ TP_ARGS(srv))
+
+DEFINE_LINK_EVENT(create_sess);
+DEFINE_LINK_EVENT(destroy_sess);
+
+TRACE_DEFINE_ENUM(RNBD_OP_READ);
+TRACE_DEFINE_ENUM(RNBD_OP_WRITE);
+TRACE_DEFINE_ENUM(RNBD_OP_FLUSH);
+TRACE_DEFINE_ENUM(RNBD_OP_DISCARD);
+TRACE_DEFINE_ENUM(RNBD_OP_SECURE_ERASE);
+TRACE_DEFINE_ENUM(RNBD_F_SYNC);
+TRACE_DEFINE_ENUM(RNBD_F_FUA);
+
+#define show_rnbd_rw_flags(x) \
+ __print_flags(x, "|", \
+ { RNBD_OP_READ, "READ" }, \
+ { RNBD_OP_WRITE, "WRITE" }, \
+ { RNBD_OP_FLUSH, "FLUSH" }, \
+ { RNBD_OP_DISCARD, "DISCARD" }, \
+ { RNBD_OP_SECURE_ERASE, "SECURE_ERASE" }, \
+ { RNBD_F_SYNC, "SYNC" }, \
+ { RNBD_F_FUA, "FUA" })
+
+TRACE_EVENT(process_rdma,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_io *msg,
+ struct rtrs_srv_op *id,
+ u32 datalen,
+ size_t usrlen),
+
+ TP_ARGS(srv, msg, id, datalen, usrlen),
+
+ TP_STRUCT__entry(
+ __string(sessname, srv->sessname)
+ __field(u8, dir)
+ __field(u8, ver)
+ __field(u32, device_id)
+ __field(u64, sector)
+ __field(u32, flags)
+ __field(u32, bi_size)
+ __field(u16, ioprio)
+ __field(u32, datalen)
+ __field(size_t, usrlen)
+ ),
+
+ TP_fast_assign(
+ __assign_str(sessname, srv->sessname);
+ __entry->dir = id->dir;
+ __entry->ver = srv->ver;
+ __entry->device_id = le32_to_cpu(msg->device_id);
+ __entry->sector = le64_to_cpu(msg->sector);
+ __entry->bi_size = le32_to_cpu(msg->bi_size);
+ __entry->flags = le32_to_cpu(msg->rw);
+ __entry->ioprio = le16_to_cpu(msg->prio);
+ __entry->datalen = datalen;
+ __entry->usrlen = usrlen;
+ ),
+
+ TP_printk("I/O req: sess: %s, type: %s, ver: %d, devid: %u, sector: %llu, bsize: %u, flags: %s, ioprio: %d, datalen: %u, usrlen: %zu",
+ __get_str(sessname),
+ __print_symbolic(__entry->dir,
+ { READ, "READ" },
+ { WRITE, "WRITE" }),
+ __entry->ver,
+ __entry->device_id,
+ __entry->sector,
+ __entry->bi_size,
+ show_rnbd_rw_flags(__entry->flags),
+ __entry->ioprio,
+ __entry->datalen,
+ __entry->usrlen
+ )
+);
+
+TRACE_EVENT(process_msg_sess_info,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_sess_info *msg),
+
+ TP_ARGS(srv, msg),
+
+ TP_STRUCT__entry(
+ __field(u8, proto_ver)
+ __field(u8, clt_ver)
+ __field(u8, srv_ver)
+ __string(sessname, srv->sessname)
+ ),
+
+ TP_fast_assign(
+ __entry->proto_ver = srv->ver;
+ __entry->clt_ver = msg->ver;
+ __entry->srv_ver = RNBD_PROTO_VER_MAJOR;
+ __assign_str(sessname, srv->sessname);
+ ),
+
+ TP_printk("Session %s using proto-ver %d (clt-ver: %d, srv-ver: %d)",
+ __get_str(sessname),
+ __entry->proto_ver,
+ __entry->clt_ver,
+ __entry->srv_ver
+ )
+);
+
+TRACE_DEFINE_ENUM(RNBD_ACCESS_RO);
+TRACE_DEFINE_ENUM(RNBD_ACCESS_RW);
+TRACE_DEFINE_ENUM(RNBD_ACCESS_MIGRATION);
+
+#define show_rnbd_access_mode(x) \
+ __print_symbolic(x, \
+ { RNBD_ACCESS_RO, "RO" }, \
+ { RNBD_ACCESS_RW, "RW" }, \
+ { RNBD_ACCESS_MIGRATION, "MIGRATION" })
+
+TRACE_EVENT(process_msg_open,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_open *msg),
+
+ TP_ARGS(srv, msg),
+
+ TP_STRUCT__entry(
+ __field(u8, access_mode)
+ __string(sessname, srv->sessname)
+ __string(dev_name, msg->dev_name)
+ ),
+
+ TP_fast_assign(
+ __entry->access_mode = msg->access_mode;
+ __assign_str(sessname, srv->sessname);
+ __assign_str(dev_name, msg->dev_name);
+ ),
+
+ TP_printk("Open message received: session='%s' path='%s' access_mode=%s",
+ __get_str(sessname),
+ __get_str(dev_name),
+ show_rnbd_access_mode(__entry->access_mode)
+ )
+);
+
+TRACE_EVENT(process_msg_close,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_close *msg),
+
+ TP_ARGS(srv, msg),
+
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __string(sessname, srv->sessname)
+ ),
+
+ TP_fast_assign(
+ __entry->device_id = le32_to_cpu(msg->device_id);
+ __assign_str(sessname, srv->sessname);
+ ),
+
+ TP_printk("Close message received: session='%s' device id='%d'",
+ __get_str(sessname),
+ __entry->device_id
+ )
+);
+
+#endif /* _TRACE_RNBD_SRV_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE rnbd-srv-trace
+#include <trace/define_trace.h>
+
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index aafecfe97055..2cfed2e58d64 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -13,7 +13,7 @@
#include <linux/blkdev.h>
#include "rnbd-srv.h"
-#include "rnbd-srv-dev.h"
+#include "rnbd-srv-trace.h"
MODULE_DESCRIPTION("RDMA Network Block Device Server");
MODULE_LICENSE("GPL");
@@ -84,18 +84,6 @@ static inline void rnbd_put_sess_dev(struct rnbd_srv_sess_dev *sess_dev)
kref_put(&sess_dev->kref, rnbd_sess_dev_release);
}
-void rnbd_endio(void *priv, int error)
-{
- struct rnbd_io_private *rnbd_priv = priv;
- struct rnbd_srv_sess_dev *sess_dev = rnbd_priv->sess_dev;
-
- rnbd_put_sess_dev(sess_dev);
-
- rtrs_srv_resp_rdma(rnbd_priv->id, error);
-
- kfree(priv);
-}
-
static struct rnbd_srv_sess_dev *
rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
{
@@ -114,6 +102,18 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
return sess_dev;
}
+static void rnbd_dev_bi_end_io(struct bio *bio)
+{
+ struct rnbd_io_private *rnbd_priv = bio->bi_private;
+ struct rnbd_srv_sess_dev *sess_dev = rnbd_priv->sess_dev;
+
+ rnbd_put_sess_dev(sess_dev);
+ rtrs_srv_resp_rdma(rnbd_priv->id, blk_status_to_errno(bio->bi_status));
+
+ kfree(rnbd_priv);
+ bio_put(bio);
+}
+
static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rtrs_srv_op *id, void *data, u32 datalen,
const void *usr, size_t usrlen)
@@ -123,10 +123,11 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_sess_dev *sess_dev;
u32 dev_id;
int err;
- struct rnbd_dev_blk_io *io;
struct bio *bio;
short prio;
+ trace_process_rdma(srv_sess, msg, id, datalen, usrlen);
+
priv = kmalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
@@ -144,33 +145,29 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
priv->sess_dev = sess_dev;
priv->id = id;
- /* Generate bio with pages pointing to the rdma buffer */
- bio = rnbd_bio_map_kern(data, sess_dev->rnbd_dev->ibd_bio_set, datalen, GFP_KERNEL);
- if (IS_ERR(bio)) {
- err = PTR_ERR(bio);
- rnbd_srv_err(sess_dev, "Failed to generate bio, err: %d\n", err);
- goto sess_dev_put;
+ bio = bio_alloc(sess_dev->bdev, 1,
+ rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
+ if (bio_add_page(bio, virt_to_page(data), datalen,
+ offset_in_page(data)) != datalen) {
+ rnbd_srv_err(sess_dev, "Failed to map data to bio\n");
+ err = -EINVAL;
+ goto bio_put;
}
- io = container_of(bio, struct rnbd_dev_blk_io, bio);
- io->dev = sess_dev->rnbd_dev;
- io->priv = priv;
-
bio->bi_end_io = rnbd_dev_bi_end_io;
- bio->bi_private = io;
- bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
+ bio->bi_private = priv;
bio->bi_iter.bi_sector = le64_to_cpu(msg->sector);
bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size);
prio = srv_sess->ver < RNBD_PROTO_VER_MAJOR ||
usrlen < sizeof(*msg) ? 0 : le16_to_cpu(msg->prio);
bio_set_prio(bio, prio);
- bio_set_dev(bio, sess_dev->rnbd_dev->bdev);
submit_bio(bio);
return 0;
-sess_dev_put:
+bio_put:
+ bio_put(bio);
rnbd_put_sess_dev(sess_dev);
err:
kfree(priv);
@@ -222,8 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
rnbd_put_sess_dev(sess_dev);
wait_for_completion(&dc); /* wait for inflights to drop to zero */
- rnbd_dev_close(sess_dev->rnbd_dev);
- list_del(&sess_dev->sess_list);
+ blkdev_put(sess_dev->bdev, sess_dev->open_flags);
mutex_lock(&sess_dev->dev->lock);
list_del(&sess_dev->dev_list);
if (sess_dev->open_flags & FMODE_WRITE)
@@ -238,20 +234,21 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
static void destroy_sess(struct rnbd_srv_session *srv_sess)
{
- struct rnbd_srv_sess_dev *sess_dev, *tmp;
+ struct rnbd_srv_sess_dev *sess_dev;
+ unsigned long index;
- if (list_empty(&srv_sess->sess_dev_list))
+ if (xa_empty(&srv_sess->index_idr))
goto out;
+ trace_destroy_sess(srv_sess);
+
mutex_lock(&srv_sess->lock);
- list_for_each_entry_safe(sess_dev, tmp, &srv_sess->sess_dev_list,
- sess_list)
+ xa_for_each(&srv_sess->index_idr, index, sess_dev)
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
mutex_unlock(&srv_sess->lock);
out:
xa_destroy(&srv_sess->index_idr);
- bioset_exit(&srv_sess->sess_bio_set);
pr_info("RTRS Session %s disconnected\n", srv_sess->sessname);
@@ -263,15 +260,15 @@ out:
kfree(srv_sess);
}
-static int create_sess(struct rtrs_srv *rtrs)
+static int create_sess(struct rtrs_srv_sess *rtrs)
{
struct rnbd_srv_session *srv_sess;
- char sessname[NAME_MAX];
+ char pathname[NAME_MAX];
int err;
- err = rtrs_srv_get_sess_name(rtrs, sessname, sizeof(sessname));
+ err = rtrs_srv_get_path_name(rtrs, pathname, sizeof(pathname));
if (err) {
- pr_err("rtrs_srv_get_sess_name(%s): %d\n", sessname, err);
+ pr_err("rtrs_srv_get_path_name(%s): %d\n", pathname, err);
return err;
}
@@ -280,32 +277,23 @@ static int create_sess(struct rtrs_srv *rtrs)
return -ENOMEM;
srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
- err = bioset_init(&srv_sess->sess_bio_set, srv_sess->queue_depth,
- offsetof(struct rnbd_dev_blk_io, bio),
- BIOSET_NEED_BVECS);
- if (err) {
- pr_err("Allocating srv_session for session %s failed\n",
- sessname);
- kfree(srv_sess);
- return err;
- }
-
xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
- INIT_LIST_HEAD(&srv_sess->sess_dev_list);
mutex_init(&srv_sess->lock);
mutex_lock(&sess_lock);
list_add(&srv_sess->list, &sess_list);
mutex_unlock(&sess_lock);
srv_sess->rtrs = rtrs;
- strscpy(srv_sess->sessname, sessname, sizeof(srv_sess->sessname));
+ strscpy(srv_sess->sessname, pathname, sizeof(srv_sess->sessname));
rtrs_srv_set_sess_priv(rtrs, srv_sess);
+ trace_create_sess(srv_sess);
+
return 0;
}
-static int rnbd_srv_link_ev(struct rtrs_srv *rtrs,
+static int rnbd_srv_link_ev(struct rtrs_srv_sess *rtrs,
enum rtrs_srv_link_ev ev, void *priv)
{
struct rnbd_srv_session *srv_sess = priv;
@@ -333,33 +321,35 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
{
struct rnbd_srv_session *sess = sess_dev->sess;
- sess_dev->keep_id = true;
/* It is already started to close by client's close message. */
if (!mutex_trylock(&sess->lock))
return;
+
+ sess_dev->keep_id = true;
/* first remove sysfs itself to avoid deadlock */
sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
mutex_unlock(&sess->lock);
}
-static int process_msg_close(struct rnbd_srv_session *srv_sess,
+static void process_msg_close(struct rnbd_srv_session *srv_sess,
void *data, size_t datalen, const void *usr,
size_t usrlen)
{
const struct rnbd_msg_close *close_msg = usr;
struct rnbd_srv_sess_dev *sess_dev;
+ trace_process_msg_close(srv_sess, close_msg);
+
sess_dev = rnbd_get_sess_dev(le32_to_cpu(close_msg->device_id),
srv_sess);
if (IS_ERR(sess_dev))
- return 0;
+ return;
rnbd_put_sess_dev(sess_dev);
mutex_lock(&srv_sess->lock);
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
mutex_unlock(&srv_sess->lock);
- return 0;
}
static int process_msg_open(struct rnbd_srv_session *srv_sess,
@@ -370,10 +360,9 @@ static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen);
-static int rnbd_srv_rdma_ev(void *priv,
- struct rtrs_srv_op *id, int dir,
- void *data, size_t datalen, const void *usr,
- size_t usrlen)
+static int rnbd_srv_rdma_ev(void *priv, struct rtrs_srv_op *id,
+ void *data, size_t datalen,
+ const void *usr, size_t usrlen)
{
struct rnbd_srv_session *srv_sess = priv;
const struct rnbd_msg_hdr *hdr = usr;
@@ -389,7 +378,7 @@ static int rnbd_srv_rdma_ev(void *priv,
case RNBD_MSG_IO:
return process_rdma(srv_sess, id, data, datalen, usr, usrlen);
case RNBD_MSG_CLOSE:
- ret = process_msg_close(srv_sess, data, datalen, usr, usrlen);
+ process_msg_close(srv_sess, data, datalen, usr, usrlen);
break;
case RNBD_MSG_OPEN:
ret = process_msg_open(srv_sess, usr, usrlen, data, datalen);
@@ -399,11 +388,16 @@ static int rnbd_srv_rdma_ev(void *priv,
datalen);
break;
default:
- pr_warn("Received unexpected message type %d with dir %d from session %s\n",
- type, dir, srv_sess->sessname);
+ pr_warn("Received unexpected message type %d from session %s\n",
+ type, srv_sess->sessname);
return -EINVAL;
}
+ /*
+ * Since ret is passed to rtrs to handle the failure case, we
+ * just return 0 at the end otherwise callers in rtrs would call
+ * send_io_resp_imm again to print redundant err message.
+ */
rtrs_srv_resp_rdma(id, ret);
return 0;
}
@@ -429,7 +423,7 @@ static struct rnbd_srv_sess_dev
return sess_dev;
}
-static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(const char *id)
+static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(struct block_device *bdev)
{
struct rnbd_srv_dev *dev;
@@ -437,7 +431,7 @@ static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(const char *id)
if (!dev)
return ERR_PTR(-ENOMEM);
- strscpy(dev->id, id, sizeof(dev->id));
+ snprintf(dev->id, sizeof(dev->id), "%pg", bdev);
kref_init(&dev->kref);
INIT_LIST_HEAD(&dev->sess_dev_list);
mutex_init(&dev->lock);
@@ -515,14 +509,14 @@ static int rnbd_srv_check_update_open_perm(struct rnbd_srv_dev *srv_dev,
}
static struct rnbd_srv_dev *
-rnbd_srv_get_or_create_srv_dev(struct rnbd_dev *rnbd_dev,
+rnbd_srv_get_or_create_srv_dev(struct block_device *bdev,
struct rnbd_srv_session *srv_sess,
enum rnbd_access_mode access_mode)
{
int ret;
struct rnbd_srv_dev *new_dev, *dev;
- new_dev = rnbd_srv_init_srv_dev(rnbd_dev->name);
+ new_dev = rnbd_srv_init_srv_dev(bdev);
if (IS_ERR(new_dev))
return new_dev;
@@ -542,44 +536,32 @@ rnbd_srv_get_or_create_srv_dev(struct rnbd_dev *rnbd_dev,
static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
struct rnbd_srv_sess_dev *sess_dev)
{
- struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev;
- struct request_queue *q = bdev_get_queue(rnbd_dev->bdev);
+ struct block_device *bdev = sess_dev->bdev;
rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
- rsp->device_id =
- cpu_to_le32(sess_dev->device_id);
- rsp->nsectors =
- cpu_to_le64(get_capacity(rnbd_dev->bdev->bd_disk));
- rsp->logical_block_size =
- cpu_to_le16(bdev_logical_block_size(rnbd_dev->bdev));
- rsp->physical_block_size =
- cpu_to_le16(bdev_physical_block_size(rnbd_dev->bdev));
- rsp->max_segments =
- cpu_to_le16(rnbd_dev_get_max_segs(rnbd_dev));
+ rsp->device_id = cpu_to_le32(sess_dev->device_id);
+ rsp->nsectors = cpu_to_le64(bdev_nr_sectors(bdev));
+ rsp->logical_block_size = cpu_to_le16(bdev_logical_block_size(bdev));
+ rsp->physical_block_size = cpu_to_le16(bdev_physical_block_size(bdev));
+ rsp->max_segments = cpu_to_le16(bdev_max_segments(bdev));
rsp->max_hw_sectors =
- cpu_to_le32(rnbd_dev_get_max_hw_sects(rnbd_dev));
- rsp->max_write_same_sectors =
- cpu_to_le32(bdev_write_same(rnbd_dev->bdev));
- rsp->max_discard_sectors =
- cpu_to_le32(rnbd_dev_get_max_discard_sects(rnbd_dev));
- rsp->discard_granularity =
- cpu_to_le32(rnbd_dev_get_discard_granularity(rnbd_dev));
- rsp->discard_alignment =
- cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev));
- rsp->secure_discard =
- cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
- rsp->rotational = !blk_queue_nonrot(q);
+ cpu_to_le32(queue_max_hw_sectors(bdev_get_queue(bdev)));
+ rsp->max_write_same_sectors = 0;
+ rsp->max_discard_sectors = cpu_to_le32(bdev_max_discard_sectors(bdev));
+ rsp->discard_granularity = cpu_to_le32(bdev_discard_granularity(bdev));
+ rsp->discard_alignment = cpu_to_le32(bdev_discard_alignment(bdev));
+ rsp->secure_discard = cpu_to_le16(bdev_max_secure_erase_sectors(bdev));
rsp->cache_policy = 0;
- if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ if (bdev_write_cache(bdev))
rsp->cache_policy |= RNBD_WRITEBACK;
- if (blk_queue_fua(q))
+ if (bdev_fua(bdev))
rsp->cache_policy |= RNBD_FUA;
}
static struct rnbd_srv_sess_dev *
rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
const struct rnbd_msg_open *open_msg,
- struct rnbd_dev *rnbd_dev, fmode_t open_flags,
+ struct block_device *bdev, fmode_t open_flags,
struct rnbd_srv_dev *srv_dev)
{
struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess);
@@ -591,7 +573,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname));
- sdev->rnbd_dev = rnbd_dev;
+ sdev->bdev = bdev;
sdev->sess = srv_sess;
sdev->dev = srv_dev;
sdev->open_flags = open_flags;
@@ -657,9 +639,8 @@ static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
struct rnbd_msg_sess_info_rsp *rsp = data;
srv_sess->ver = min_t(u8, sess_info_msg->ver, RNBD_PROTO_VER_MAJOR);
- pr_debug("Session %s using protocol version %d (client version: %d, server version: %d)\n",
- srv_sess->sessname, srv_sess->ver,
- sess_info_msg->ver, RNBD_PROTO_VER_MAJOR);
+
+ trace_process_msg_sess_info(srv_sess, sess_info_msg);
rsp->hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO_RSP);
rsp->ver = srv_sess->ver;
@@ -679,11 +660,12 @@ static struct rnbd_srv_sess_dev *
find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name)
{
struct rnbd_srv_sess_dev *sess_dev;
+ unsigned long index;
- if (list_empty(&srv_sess->sess_dev_list))
+ if (xa_empty(&srv_sess->index_idr))
return NULL;
- list_for_each_entry(sess_dev, &srv_sess->sess_dev_list, sess_list)
+ xa_for_each(&srv_sess->index_idr, index, sess_dev)
if (!strcmp(sess_dev->pathname, dev_name))
return sess_dev;
@@ -698,14 +680,13 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_dev *srv_dev;
struct rnbd_srv_sess_dev *srv_sess_dev;
const struct rnbd_msg_open *open_msg = msg;
+ struct block_device *bdev;
fmode_t open_flags;
char *full_path;
- struct rnbd_dev *rnbd_dev;
struct rnbd_msg_open_rsp *rsp = data;
- pr_debug("Open message received: session='%s' path='%s' access_mode=%d\n",
- srv_sess->sessname, open_msg->dev_name,
- open_msg->access_mode);
+ trace_process_msg_open(srv_sess, open_msg);
+
open_flags = FMODE_READ;
if (open_msg->access_mode != RNBD_ACCESS_RO)
open_flags |= FMODE_WRITE;
@@ -738,26 +719,25 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto reject;
}
- rnbd_dev = rnbd_dev_open(full_path, open_flags,
- &srv_sess->sess_bio_set);
- if (IS_ERR(rnbd_dev)) {
- pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %ld\n",
- full_path, srv_sess->sessname, PTR_ERR(rnbd_dev));
- ret = PTR_ERR(rnbd_dev);
+ bdev = blkdev_get_by_path(full_path, open_flags, THIS_MODULE);
+ if (IS_ERR(bdev)) {
+ ret = PTR_ERR(bdev);
+ pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %d\n",
+ full_path, srv_sess->sessname, ret);
goto free_path;
}
- srv_dev = rnbd_srv_get_or_create_srv_dev(rnbd_dev, srv_sess,
+ srv_dev = rnbd_srv_get_or_create_srv_dev(bdev, srv_sess,
open_msg->access_mode);
if (IS_ERR(srv_dev)) {
pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %ld\n",
full_path, srv_sess->sessname, PTR_ERR(srv_dev));
ret = PTR_ERR(srv_dev);
- goto rnbd_dev_close;
+ goto blkdev_put;
}
srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg,
- rnbd_dev, open_flags,
+ bdev, open_flags,
srv_dev);
if (IS_ERR(srv_sess_dev)) {
pr_err("Opening device '%s' on session %s failed, creating sess_dev failed, err: %ld\n",
@@ -772,8 +752,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
*/
mutex_lock(&srv_dev->lock);
if (!srv_dev->dev_kobj.state_in_sysfs) {
- ret = rnbd_srv_create_dev_sysfs(srv_dev, rnbd_dev->bdev,
- rnbd_dev->name);
+ ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev);
if (ret) {
mutex_unlock(&srv_dev->lock);
rnbd_srv_err(srv_sess_dev,
@@ -795,8 +774,6 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
list_add(&srv_sess_dev->dev_list, &srv_dev->sess_dev_list);
mutex_unlock(&srv_dev->lock);
- list_add(&srv_sess_dev->sess_list, &srv_sess->sess_dev_list);
-
rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->id);
kfree(full_path);
@@ -817,8 +794,8 @@ srv_dev_put:
mutex_unlock(&srv_dev->lock);
}
rnbd_put_srv_dev(srv_dev);
-rnbd_dev_close:
- rnbd_dev_close(rnbd_dev);
+blkdev_put:
+ blkdev_put(bdev, open_flags);
free_path:
kfree(full_path);
reject:
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index 98ddc31eb408..f5962fd31d62 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -20,14 +20,11 @@
struct rnbd_srv_session {
/* Entry inside global sess_list */
struct list_head list;
- struct rtrs_srv *rtrs;
+ struct rtrs_srv_sess *rtrs;
char sessname[NAME_MAX];
int queue_depth;
- struct bio_set sess_bio_set;
struct xarray index_idr;
- /* List of struct rnbd_srv_sess_dev */
- struct list_head sess_dev_list;
struct mutex lock;
u8 ver;
};
@@ -49,9 +46,7 @@ struct rnbd_srv_dev {
struct rnbd_srv_sess_dev {
/* Entry inside rnbd_srv_dev struct */
struct list_head dev_list;
- /* Entry inside rnbd_srv_session struct */
- struct list_head sess_list;
- struct rnbd_dev *rnbd_dev;
+ struct block_device *bdev;
struct rnbd_srv_session *sess;
struct rnbd_srv_dev *dev;
struct kobject kobj;
@@ -69,8 +64,7 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
/* rnbd-srv-sysfs.c */
int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev,
- struct block_device *bdev,
- const char *dir_name);
+ struct block_device *bdev);
void rnbd_srv_destroy_dev_sysfs(struct rnbd_srv_dev *dev);
int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev);
void rnbd_srv_destroy_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev);
diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile
deleted file mode 100644
index 7ef158099d33..000000000000
--- a/drivers/block/rsxx/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o
-rsxx-objs := config.o core.o cregs.o dev.o dma.o
diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c
deleted file mode 100644
index 11ed1d9646b9..000000000000
--- a/drivers/block/rsxx/config.c
+++ /dev/null
@@ -1,197 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-* Filename: config.c
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#include <linux/types.h>
-#include <linux/crc32.h>
-#include <linux/swab.h>
-
-#include "rsxx_priv.h"
-#include "rsxx_cfg.h"
-
-static void initialize_config(struct rsxx_card_cfg *cfg)
-{
- cfg->hdr.version = RSXX_CFG_VERSION;
-
- cfg->data.block_size = RSXX_HW_BLK_SIZE;
- cfg->data.stripe_size = RSXX_HW_BLK_SIZE;
- cfg->data.vendor_id = RSXX_VENDOR_ID_IBM;
- cfg->data.cache_order = (-1);
- cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED;
- cfg->data.intr_coal.count = 0;
- cfg->data.intr_coal.latency = 0;
-}
-
-static u32 config_data_crc32(struct rsxx_card_cfg *cfg)
-{
- /*
- * Return the compliment of the CRC to ensure compatibility
- * (i.e. this is how early rsxx drivers did it.)
- */
-
- return ~crc32(~0, &cfg->data, sizeof(cfg->data));
-}
-
-
-/*----------------- Config Byte Swap Functions -------------------*/
-static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr)
-{
- hdr->version = be32_to_cpu((__force __be32) hdr->version);
- hdr->crc = be32_to_cpu((__force __be32) hdr->crc);
-}
-
-static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr)
-{
- hdr->version = (__force u32) cpu_to_be32(hdr->version);
- hdr->crc = (__force u32) cpu_to_be32(hdr->crc);
-}
-
-static void config_data_swab(struct rsxx_card_cfg *cfg)
-{
- u32 *data = (u32 *) &cfg->data;
- int i;
-
- for (i = 0; i < (sizeof(cfg->data) / 4); i++)
- data[i] = swab32(data[i]);
-}
-
-static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg)
-{
- u32 *data = (u32 *) &cfg->data;
- int i;
-
- for (i = 0; i < (sizeof(cfg->data) / 4); i++)
- data[i] = le32_to_cpu((__force __le32) data[i]);
-}
-
-static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg)
-{
- u32 *data = (u32 *) &cfg->data;
- int i;
-
- for (i = 0; i < (sizeof(cfg->data) / 4); i++)
- data[i] = (__force u32) cpu_to_le32(data[i]);
-}
-
-
-/*----------------- Config Operations ------------------*/
-static int rsxx_save_config(struct rsxx_cardinfo *card)
-{
- struct rsxx_card_cfg cfg;
- int st;
-
- memcpy(&cfg, &card->config, sizeof(cfg));
-
- if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) {
- dev_err(CARD_TO_DEV(card),
- "Cannot save config with invalid version %d\n",
- cfg.hdr.version);
- return -EINVAL;
- }
-
- /* Convert data to little endian for the CRC calculation. */
- config_data_cpu_to_le(&cfg);
-
- cfg.hdr.crc = config_data_crc32(&cfg);
-
- /*
- * Swap the data from little endian to big endian so it can be
- * stored.
- */
- config_data_swab(&cfg);
- config_hdr_cpu_to_be(&cfg.hdr);
-
- st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1);
- if (st)
- return st;
-
- return 0;
-}
-
-int rsxx_load_config(struct rsxx_cardinfo *card)
-{
- int st;
- u32 crc;
-
- st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config),
- &card->config, 1);
- if (st) {
- dev_err(CARD_TO_DEV(card),
- "Failed reading card config.\n");
- return st;
- }
-
- config_hdr_be_to_cpu(&card->config.hdr);
-
- if (card->config.hdr.version == RSXX_CFG_VERSION) {
- /*
- * We calculate the CRC with the data in little endian, because
- * early drivers did not take big endian CPUs into account.
- * The data is always stored in big endian, so we need to byte
- * swap it before calculating the CRC.
- */
-
- config_data_swab(&card->config);
-
- /* Check the CRC */
- crc = config_data_crc32(&card->config);
- if (crc != card->config.hdr.crc) {
- dev_err(CARD_TO_DEV(card),
- "Config corruption detected!\n");
- dev_info(CARD_TO_DEV(card),
- "CRC (sb x%08x is x%08x)\n",
- card->config.hdr.crc, crc);
- return -EIO;
- }
-
- /* Convert the data to CPU byteorder */
- config_data_le_to_cpu(&card->config);
-
- } else if (card->config.hdr.version != 0) {
- dev_err(CARD_TO_DEV(card),
- "Invalid config version %d.\n",
- card->config.hdr.version);
- /*
- * Config version changes require special handling from the
- * user
- */
- return -EINVAL;
- } else {
- dev_info(CARD_TO_DEV(card),
- "Initializing card configuration.\n");
- initialize_config(&card->config);
- st = rsxx_save_config(card);
- if (st)
- return st;
- }
-
- card->config_valid = 1;
-
- dev_dbg(CARD_TO_DEV(card), "version: x%08x\n",
- card->config.hdr.version);
- dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n",
- card->config.hdr.crc);
- dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n",
- card->config.data.block_size);
- dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n",
- card->config.data.stripe_size);
- dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n",
- card->config.data.vendor_id);
- dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n",
- card->config.data.cache_order);
- dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n",
- card->config.data.intr_coal.mode);
- dev_dbg(CARD_TO_DEV(card), "count: x%08x\n",
- card->config.data.intr_coal.count);
- dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n",
- card->config.data.intr_coal.latency);
-
- return 0;
-}
-
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
deleted file mode 100644
index 8d9d69f5dfbc..000000000000
--- a/drivers/block/rsxx/core.c
+++ /dev/null
@@ -1,1126 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-* Filename: core.c
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/reboot.h>
-#include <linux/slab.h>
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include <linux/genhd.h>
-#include <linux/idr.h>
-
-#include "rsxx_priv.h"
-#include "rsxx_cfg.h"
-
-#define NO_LEGACY 0
-#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
-
-MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver");
-MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRIVER_VERSION);
-
-static unsigned int force_legacy = NO_LEGACY;
-module_param(force_legacy, uint, 0444);
-MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
-
-static unsigned int sync_start = 1;
-module_param(sync_start, uint, 0444);
-MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
- "until the card startup has completed.");
-
-static DEFINE_IDA(rsxx_disk_ida);
-
-/* --------------------Debugfs Setup ------------------- */
-
-static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
-{
- struct rsxx_cardinfo *card = m->private;
-
- seq_printf(m, "HWID 0x%08x\n",
- ioread32(card->regmap + HWID));
- seq_printf(m, "SCRATCH 0x%08x\n",
- ioread32(card->regmap + SCRATCH));
- seq_printf(m, "IER 0x%08x\n",
- ioread32(card->regmap + IER));
- seq_printf(m, "IPR 0x%08x\n",
- ioread32(card->regmap + IPR));
- seq_printf(m, "CREG_CMD 0x%08x\n",
- ioread32(card->regmap + CREG_CMD));
- seq_printf(m, "CREG_ADD 0x%08x\n",
- ioread32(card->regmap + CREG_ADD));
- seq_printf(m, "CREG_CNT 0x%08x\n",
- ioread32(card->regmap + CREG_CNT));
- seq_printf(m, "CREG_STAT 0x%08x\n",
- ioread32(card->regmap + CREG_STAT));
- seq_printf(m, "CREG_DATA0 0x%08x\n",
- ioread32(card->regmap + CREG_DATA0));
- seq_printf(m, "CREG_DATA1 0x%08x\n",
- ioread32(card->regmap + CREG_DATA1));
- seq_printf(m, "CREG_DATA2 0x%08x\n",
- ioread32(card->regmap + CREG_DATA2));
- seq_printf(m, "CREG_DATA3 0x%08x\n",
- ioread32(card->regmap + CREG_DATA3));
- seq_printf(m, "CREG_DATA4 0x%08x\n",
- ioread32(card->regmap + CREG_DATA4));
- seq_printf(m, "CREG_DATA5 0x%08x\n",
- ioread32(card->regmap + CREG_DATA5));
- seq_printf(m, "CREG_DATA6 0x%08x\n",
- ioread32(card->regmap + CREG_DATA6));
- seq_printf(m, "CREG_DATA7 0x%08x\n",
- ioread32(card->regmap + CREG_DATA7));
- seq_printf(m, "INTR_COAL 0x%08x\n",
- ioread32(card->regmap + INTR_COAL));
- seq_printf(m, "HW_ERROR 0x%08x\n",
- ioread32(card->regmap + HW_ERROR));
- seq_printf(m, "DEBUG0 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG0));
- seq_printf(m, "DEBUG1 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG1));
- seq_printf(m, "DEBUG2 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG2));
- seq_printf(m, "DEBUG3 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG3));
- seq_printf(m, "DEBUG4 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG4));
- seq_printf(m, "DEBUG5 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG5));
- seq_printf(m, "DEBUG6 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG6));
- seq_printf(m, "DEBUG7 0x%08x\n",
- ioread32(card->regmap + PCI_DEBUG7));
- seq_printf(m, "RECONFIG 0x%08x\n",
- ioread32(card->regmap + PCI_RECONFIG));
-
- return 0;
-}
-
-static int rsxx_attr_stats_show(struct seq_file *m, void *p)
-{
- struct rsxx_cardinfo *card = m->private;
- int i;
-
- for (i = 0; i < card->n_targets; i++) {
- seq_printf(m, "Ctrl %d CRC Errors = %d\n",
- i, card->ctrl[i].stats.crc_errors);
- seq_printf(m, "Ctrl %d Hard Errors = %d\n",
- i, card->ctrl[i].stats.hard_errors);
- seq_printf(m, "Ctrl %d Soft Errors = %d\n",
- i, card->ctrl[i].stats.soft_errors);
- seq_printf(m, "Ctrl %d Writes Issued = %d\n",
- i, card->ctrl[i].stats.writes_issued);
- seq_printf(m, "Ctrl %d Writes Failed = %d\n",
- i, card->ctrl[i].stats.writes_failed);
- seq_printf(m, "Ctrl %d Reads Issued = %d\n",
- i, card->ctrl[i].stats.reads_issued);
- seq_printf(m, "Ctrl %d Reads Failed = %d\n",
- i, card->ctrl[i].stats.reads_failed);
- seq_printf(m, "Ctrl %d Reads Retried = %d\n",
- i, card->ctrl[i].stats.reads_retried);
- seq_printf(m, "Ctrl %d Discards Issued = %d\n",
- i, card->ctrl[i].stats.discards_issued);
- seq_printf(m, "Ctrl %d Discards Failed = %d\n",
- i, card->ctrl[i].stats.discards_failed);
- seq_printf(m, "Ctrl %d DMA SW Errors = %d\n",
- i, card->ctrl[i].stats.dma_sw_err);
- seq_printf(m, "Ctrl %d DMA HW Faults = %d\n",
- i, card->ctrl[i].stats.dma_hw_fault);
- seq_printf(m, "Ctrl %d DMAs Cancelled = %d\n",
- i, card->ctrl[i].stats.dma_cancelled);
- seq_printf(m, "Ctrl %d SW Queue Depth = %d\n",
- i, card->ctrl[i].stats.sw_q_depth);
- seq_printf(m, "Ctrl %d HW Queue Depth = %d\n",
- i, atomic_read(&card->ctrl[i].stats.hw_q_depth));
- }
-
- return 0;
-}
-
-static int rsxx_attr_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rsxx_attr_stats_show, inode->i_private);
-}
-
-static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rsxx_attr_pci_regs_show, inode->i_private);
-}
-
-static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- struct rsxx_cardinfo *card = file_inode(fp)->i_private;
- char *buf;
- int st;
-
- buf = kzalloc(cnt, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1);
- if (!st) {
- if (copy_to_user(ubuf, buf, cnt))
- st = -EFAULT;
- }
- kfree(buf);
- if (st)
- return st;
- *ppos += cnt;
- return cnt;
-}
-
-static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- struct rsxx_cardinfo *card = file_inode(fp)->i_private;
- char *buf;
- ssize_t st;
-
- buf = memdup_user(ubuf, cnt);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- st = rsxx_creg_write(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1);
- kfree(buf);
- if (st)
- return st;
- *ppos += cnt;
- return cnt;
-}
-
-static const struct file_operations debugfs_cram_fops = {
- .owner = THIS_MODULE,
- .read = rsxx_cram_read,
- .write = rsxx_cram_write,
-};
-
-static const struct file_operations debugfs_stats_fops = {
- .owner = THIS_MODULE,
- .open = rsxx_attr_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static const struct file_operations debugfs_pci_regs_fops = {
- .owner = THIS_MODULE,
- .open = rsxx_attr_pci_regs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
-{
- struct dentry *debugfs_stats;
- struct dentry *debugfs_pci_regs;
- struct dentry *debugfs_cram;
-
- card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL);
- if (IS_ERR_OR_NULL(card->debugfs_dir))
- goto failed_debugfs_dir;
-
- debugfs_stats = debugfs_create_file("stats", 0444,
- card->debugfs_dir, card,
- &debugfs_stats_fops);
- if (IS_ERR_OR_NULL(debugfs_stats))
- goto failed_debugfs_stats;
-
- debugfs_pci_regs = debugfs_create_file("pci_regs", 0444,
- card->debugfs_dir, card,
- &debugfs_pci_regs_fops);
- if (IS_ERR_OR_NULL(debugfs_pci_regs))
- goto failed_debugfs_pci_regs;
-
- debugfs_cram = debugfs_create_file("cram", 0644,
- card->debugfs_dir, card,
- &debugfs_cram_fops);
- if (IS_ERR_OR_NULL(debugfs_cram))
- goto failed_debugfs_cram;
-
- return;
-failed_debugfs_cram:
- debugfs_remove(debugfs_pci_regs);
-failed_debugfs_pci_regs:
- debugfs_remove(debugfs_stats);
-failed_debugfs_stats:
- debugfs_remove(card->debugfs_dir);
-failed_debugfs_dir:
- card->debugfs_dir = NULL;
-}
-
-/*----------------- Interrupt Control & Handling -------------------*/
-
-static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
-{
- card->isr_mask = 0;
- card->ier_mask = 0;
-}
-
-static void __enable_intr(unsigned int *mask, unsigned int intr)
-{
- *mask |= intr;
-}
-
-static void __disable_intr(unsigned int *mask, unsigned int intr)
-{
- *mask &= ~intr;
-}
-
-/*
- * NOTE: Disabling the IER will disable the hardware interrupt.
- * Disabling the ISR will disable the software handling of the ISR bit.
- *
- * Enable/Disable interrupt functions assume the card->irq_lock
- * is held by the caller.
- */
-void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
-{
- if (unlikely(card->halt) ||
- unlikely(card->eeh_state))
- return;
-
- __enable_intr(&card->ier_mask, intr);
- iowrite32(card->ier_mask, card->regmap + IER);
-}
-
-void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
-{
- if (unlikely(card->eeh_state))
- return;
-
- __disable_intr(&card->ier_mask, intr);
- iowrite32(card->ier_mask, card->regmap + IER);
-}
-
-void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
- unsigned int intr)
-{
- if (unlikely(card->halt) ||
- unlikely(card->eeh_state))
- return;
-
- __enable_intr(&card->isr_mask, intr);
- __enable_intr(&card->ier_mask, intr);
- iowrite32(card->ier_mask, card->regmap + IER);
-}
-void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
- unsigned int intr)
-{
- if (unlikely(card->eeh_state))
- return;
-
- __disable_intr(&card->isr_mask, intr);
- __disable_intr(&card->ier_mask, intr);
- iowrite32(card->ier_mask, card->regmap + IER);
-}
-
-static irqreturn_t rsxx_isr(int irq, void *pdata)
-{
- struct rsxx_cardinfo *card = pdata;
- unsigned int isr;
- int handled = 0;
- int reread_isr;
- int i;
-
- spin_lock(&card->irq_lock);
-
- do {
- reread_isr = 0;
-
- if (unlikely(card->eeh_state))
- break;
-
- isr = ioread32(card->regmap + ISR);
- if (isr == 0xffffffff) {
- /*
- * A few systems seem to have an intermittent issue
- * where PCI reads return all Fs, but retrying the read
- * a little later will return as expected.
- */
- dev_info(CARD_TO_DEV(card),
- "ISR = 0xFFFFFFFF, retrying later\n");
- break;
- }
-
- isr &= card->isr_mask;
- if (!isr)
- break;
-
- for (i = 0; i < card->n_targets; i++) {
- if (isr & CR_INTR_DMA(i)) {
- if (card->ier_mask & CR_INTR_DMA(i)) {
- rsxx_disable_ier(card, CR_INTR_DMA(i));
- reread_isr = 1;
- }
- queue_work(card->ctrl[i].done_wq,
- &card->ctrl[i].dma_done_work);
- handled++;
- }
- }
-
- if (isr & CR_INTR_CREG) {
- queue_work(card->creg_ctrl.creg_wq,
- &card->creg_ctrl.done_work);
- handled++;
- }
-
- if (isr & CR_INTR_EVENT) {
- queue_work(card->event_wq, &card->event_work);
- rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
- handled++;
- }
- } while (reread_isr);
-
- spin_unlock(&card->irq_lock);
-
- return handled ? IRQ_HANDLED : IRQ_NONE;
-}
-
-/*----------------- Card Event Handler -------------------*/
-static const char *rsxx_card_state_to_str(unsigned int state)
-{
- static const char * const state_strings[] = {
- "Unknown", "Shutdown", "Starting", "Formatting",
- "Uninitialized", "Good", "Shutting Down",
- "Fault", "Read Only Fault", "dStroying"
- };
-
- return state_strings[ffs(state)];
-}
-
-static void card_state_change(struct rsxx_cardinfo *card,
- unsigned int new_state)
-{
- int st;
-
- dev_info(CARD_TO_DEV(card),
- "card state change detected.(%s -> %s)\n",
- rsxx_card_state_to_str(card->state),
- rsxx_card_state_to_str(new_state));
-
- card->state = new_state;
-
- /* Don't attach DMA interfaces if the card has an invalid config */
- if (!card->config_valid)
- return;
-
- switch (new_state) {
- case CARD_STATE_RD_ONLY_FAULT:
- dev_crit(CARD_TO_DEV(card),
- "Hardware has entered read-only mode!\n");
- /*
- * Fall through so the DMA devices can be attached and
- * the user can attempt to pull off their data.
- */
- fallthrough;
- case CARD_STATE_GOOD:
- st = rsxx_get_card_size8(card, &card->size8);
- if (st)
- dev_err(CARD_TO_DEV(card),
- "Failed attaching DMA devices\n");
-
- if (card->config_valid)
- set_capacity(card->gendisk, card->size8 >> 9);
- break;
-
- case CARD_STATE_FAULT:
- dev_crit(CARD_TO_DEV(card),
- "Hardware Fault reported!\n");
- fallthrough;
-
- /* Everything else, detach DMA interface if it's attached. */
- case CARD_STATE_SHUTDOWN:
- case CARD_STATE_STARTING:
- case CARD_STATE_FORMATTING:
- case CARD_STATE_UNINITIALIZED:
- case CARD_STATE_SHUTTING_DOWN:
- /*
- * dStroy is a term coined by marketing to represent the low level
- * secure erase.
- */
- case CARD_STATE_DSTROYING:
- set_capacity(card->gendisk, 0);
- break;
- }
-}
-
-static void card_event_handler(struct work_struct *work)
-{
- struct rsxx_cardinfo *card;
- unsigned int state;
- unsigned long flags;
- int st;
-
- card = container_of(work, struct rsxx_cardinfo, event_work);
-
- if (unlikely(card->halt))
- return;
-
- /*
- * Enable the interrupt now to avoid any weird race conditions where a
- * state change might occur while rsxx_get_card_state() is
- * processing a returned creg cmd.
- */
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
- spin_unlock_irqrestore(&card->irq_lock, flags);
-
- st = rsxx_get_card_state(card, &state);
- if (st) {
- dev_info(CARD_TO_DEV(card),
- "Failed reading state after event.\n");
- return;
- }
-
- if (card->state != state)
- card_state_change(card, state);
-
- if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING)
- rsxx_read_hw_log(card);
-}
-
-/*----------------- Card Operations -------------------*/
-static int card_shutdown(struct rsxx_cardinfo *card)
-{
- unsigned int state;
- signed long start;
- const int timeout = msecs_to_jiffies(120000);
- int st;
-
- /* We can't issue a shutdown if the card is in a transition state */
- start = jiffies;
- do {
- st = rsxx_get_card_state(card, &state);
- if (st)
- return st;
- } while (state == CARD_STATE_STARTING &&
- (jiffies - start < timeout));
-
- if (state == CARD_STATE_STARTING)
- return -ETIMEDOUT;
-
- /* Only issue a shutdown if we need to */
- if ((state != CARD_STATE_SHUTTING_DOWN) &&
- (state != CARD_STATE_SHUTDOWN)) {
- st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN);
- if (st)
- return st;
- }
-
- start = jiffies;
- do {
- st = rsxx_get_card_state(card, &state);
- if (st)
- return st;
- } while (state != CARD_STATE_SHUTDOWN &&
- (jiffies - start < timeout));
-
- if (state != CARD_STATE_SHUTDOWN)
- return -ETIMEDOUT;
-
- return 0;
-}
-
-static int rsxx_eeh_frozen(struct pci_dev *dev)
-{
- struct rsxx_cardinfo *card = pci_get_drvdata(dev);
- int i;
- int st;
-
- dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n");
-
- card->eeh_state = 1;
- rsxx_mask_interrupts(card);
-
- /*
- * We need to guarantee that the write for eeh_state and masking
- * interrupts does not become reordered. This will prevent a possible
- * race condition with the EEH code.
- */
- wmb();
-
- pci_disable_device(dev);
-
- st = rsxx_eeh_save_issued_dmas(card);
- if (st)
- return st;
-
- rsxx_eeh_save_issued_creg(card);
-
- for (i = 0; i < card->n_targets; i++) {
- if (card->ctrl[i].status.buf)
- dma_free_coherent(&card->dev->dev,
- STATUS_BUFFER_SIZE8,
- card->ctrl[i].status.buf,
- card->ctrl[i].status.dma_addr);
- if (card->ctrl[i].cmd.buf)
- dma_free_coherent(&card->dev->dev,
- COMMAND_BUFFER_SIZE8,
- card->ctrl[i].cmd.buf,
- card->ctrl[i].cmd.dma_addr);
- }
-
- return 0;
-}
-
-static void rsxx_eeh_failure(struct pci_dev *dev)
-{
- struct rsxx_cardinfo *card = pci_get_drvdata(dev);
- int i;
- int cnt = 0;
-
- dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n");
-
- card->eeh_state = 1;
- card->halt = 1;
-
- for (i = 0; i < card->n_targets; i++) {
- spin_lock_bh(&card->ctrl[i].queue_lock);
- cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
- &card->ctrl[i].queue,
- COMPLETE_DMA);
- spin_unlock_bh(&card->ctrl[i].queue_lock);
-
- cnt += rsxx_dma_cancel(&card->ctrl[i]);
-
- if (cnt)
- dev_info(CARD_TO_DEV(card),
- "Freed %d queued DMAs on channel %d\n",
- cnt, card->ctrl[i].id);
- }
-}
-
-static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
-{
- unsigned int status;
- int iter = 0;
-
- /* We need to wait for the hardware to reset */
- while (iter++ < 10) {
- status = ioread32(card->regmap + PCI_RECONFIG);
-
- if (status & RSXX_FLUSH_BUSY) {
- ssleep(1);
- continue;
- }
-
- if (status & RSXX_FLUSH_TIMEOUT)
- dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n");
- return 0;
- }
-
- /* Hardware failed resetting itself. */
- return -1;
-}
-
-static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
- pci_channel_state_t error)
-{
- int st;
-
- if (dev->revision < RSXX_EEH_SUPPORT)
- return PCI_ERS_RESULT_NONE;
-
- if (error == pci_channel_io_perm_failure) {
- rsxx_eeh_failure(dev);
- return PCI_ERS_RESULT_DISCONNECT;
- }
-
- st = rsxx_eeh_frozen(dev);
- if (st) {
- dev_err(&dev->dev, "Slot reset setup failed\n");
- rsxx_eeh_failure(dev);
- return PCI_ERS_RESULT_DISCONNECT;
- }
-
- return PCI_ERS_RESULT_NEED_RESET;
-}
-
-static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
-{
- struct rsxx_cardinfo *card = pci_get_drvdata(dev);
- unsigned long flags;
- int i;
- int st;
-
- dev_warn(&dev->dev,
- "IBM Flash Adapter PCI: recovering from slot reset.\n");
-
- st = pci_enable_device(dev);
- if (st)
- goto failed_hw_setup;
-
- pci_set_master(dev);
-
- st = rsxx_eeh_fifo_flush_poll(card);
- if (st)
- goto failed_hw_setup;
-
- rsxx_dma_queue_reset(card);
-
- for (i = 0; i < card->n_targets; i++) {
- st = rsxx_hw_buffers_init(dev, &card->ctrl[i]);
- if (st)
- goto failed_hw_buffers_init;
- }
-
- if (card->config_valid)
- rsxx_dma_configure(card);
-
- /* Clears the ISR register from spurious interrupts */
- st = ioread32(card->regmap + ISR);
-
- card->eeh_state = 0;
-
- spin_lock_irqsave(&card->irq_lock, flags);
- if (card->n_targets & RSXX_MAX_TARGETS)
- rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
- else
- rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C);
- spin_unlock_irqrestore(&card->irq_lock, flags);
-
- rsxx_kick_creg_queue(card);
-
- for (i = 0; i < card->n_targets; i++) {
- spin_lock(&card->ctrl[i].queue_lock);
- if (list_empty(&card->ctrl[i].queue)) {
- spin_unlock(&card->ctrl[i].queue_lock);
- continue;
- }
- spin_unlock(&card->ctrl[i].queue_lock);
-
- queue_work(card->ctrl[i].issue_wq,
- &card->ctrl[i].issue_dma_work);
- }
-
- dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n");
-
- return PCI_ERS_RESULT_RECOVERED;
-
-failed_hw_buffers_init:
- for (i = 0; i < card->n_targets; i++) {
- if (card->ctrl[i].status.buf)
- dma_free_coherent(&card->dev->dev,
- STATUS_BUFFER_SIZE8,
- card->ctrl[i].status.buf,
- card->ctrl[i].status.dma_addr);
- if (card->ctrl[i].cmd.buf)
- dma_free_coherent(&card->dev->dev,
- COMMAND_BUFFER_SIZE8,
- card->ctrl[i].cmd.buf,
- card->ctrl[i].cmd.dma_addr);
- }
-failed_hw_setup:
- rsxx_eeh_failure(dev);
- return PCI_ERS_RESULT_DISCONNECT;
-
-}
-
-/*----------------- Driver Initialization & Setup -------------------*/
-/* Returns: 0 if the driver is compatible with the device
- -1 if the driver is NOT compatible with the device */
-static int rsxx_compatibility_check(struct rsxx_cardinfo *card)
-{
- unsigned char pci_rev;
-
- pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
-
- if (pci_rev > RS70_PCI_REV_SUPPORTED)
- return -1;
- return 0;
-}
-
-static int rsxx_pci_probe(struct pci_dev *dev,
- const struct pci_device_id *id)
-{
- struct rsxx_cardinfo *card;
- int st;
- unsigned int sync_timeout;
-
- dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
-
- card = kzalloc(sizeof(*card), GFP_KERNEL);
- if (!card)
- return -ENOMEM;
-
- card->dev = dev;
- pci_set_drvdata(dev, card);
-
- st = ida_alloc(&rsxx_disk_ida, GFP_KERNEL);
- if (st < 0)
- goto failed_ida_get;
- card->disk_id = st;
-
- st = pci_enable_device(dev);
- if (st)
- goto failed_enable;
-
- pci_set_master(dev);
-
- st = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
- if (st) {
- dev_err(CARD_TO_DEV(card),
- "No usable DMA configuration,aborting\n");
- goto failed_dma_mask;
- }
-
- st = pci_request_regions(dev, DRIVER_NAME);
- if (st) {
- dev_err(CARD_TO_DEV(card),
- "Failed to request memory region\n");
- goto failed_request_regions;
- }
-
- if (pci_resource_len(dev, 0) == 0) {
- dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n");
- st = -ENOMEM;
- goto failed_iomap;
- }
-
- card->regmap = pci_iomap(dev, 0, 0);
- if (!card->regmap) {
- dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n");
- st = -ENOMEM;
- goto failed_iomap;
- }
-
- spin_lock_init(&card->irq_lock);
- card->halt = 0;
- card->eeh_state = 0;
-
- spin_lock_irq(&card->irq_lock);
- rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
- spin_unlock_irq(&card->irq_lock);
-
- if (!force_legacy) {
- st = pci_enable_msi(dev);
- if (st)
- dev_warn(CARD_TO_DEV(card),
- "Failed to enable MSI\n");
- }
-
- st = request_irq(dev->irq, rsxx_isr, IRQF_SHARED,
- DRIVER_NAME, card);
- if (st) {
- dev_err(CARD_TO_DEV(card),
- "Failed requesting IRQ%d\n", dev->irq);
- goto failed_irq;
- }
-
- /************* Setup Processor Command Interface *************/
- st = rsxx_creg_setup(card);
- if (st) {
- dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n");
- goto failed_creg_setup;
- }
-
- spin_lock_irq(&card->irq_lock);
- rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
- spin_unlock_irq(&card->irq_lock);
-
- st = rsxx_compatibility_check(card);
- if (st) {
- dev_warn(CARD_TO_DEV(card),
- "Incompatible driver detected. Please update the driver.\n");
- st = -EINVAL;
- goto failed_compatiblity_check;
- }
-
- /************* Load Card Config *************/
- st = rsxx_load_config(card);
- if (st)
- dev_err(CARD_TO_DEV(card),
- "Failed loading card config\n");
-
- /************* Setup DMA Engine *************/
- st = rsxx_get_num_targets(card, &card->n_targets);
- if (st)
- dev_info(CARD_TO_DEV(card),
- "Failed reading the number of DMA targets\n");
-
- card->ctrl = kcalloc(card->n_targets, sizeof(*card->ctrl),
- GFP_KERNEL);
- if (!card->ctrl) {
- st = -ENOMEM;
- goto failed_dma_setup;
- }
-
- st = rsxx_dma_setup(card);
- if (st) {
- dev_info(CARD_TO_DEV(card),
- "Failed to setup DMA engine\n");
- goto failed_dma_setup;
- }
-
- /************* Setup Card Event Handler *************/
- card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
- if (!card->event_wq) {
- dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
- st = -ENOMEM;
- goto failed_event_handler;
- }
-
- INIT_WORK(&card->event_work, card_event_handler);
-
- st = rsxx_setup_dev(card);
- if (st)
- goto failed_create_dev;
-
- rsxx_get_card_state(card, &card->state);
-
- dev_info(CARD_TO_DEV(card),
- "card state: %s\n",
- rsxx_card_state_to_str(card->state));
-
- /*
- * Now that the DMA Engine and devices have been setup,
- * we can enable the event interrupt(it kicks off actions in
- * those layers so we couldn't enable it right away.)
- */
- spin_lock_irq(&card->irq_lock);
- rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
- spin_unlock_irq(&card->irq_lock);
-
- if (card->state == CARD_STATE_SHUTDOWN) {
- st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP);
- if (st)
- dev_crit(CARD_TO_DEV(card),
- "Failed issuing card startup\n");
- if (sync_start) {
- sync_timeout = SYNC_START_TIMEOUT;
-
- dev_info(CARD_TO_DEV(card),
- "Waiting for card to startup\n");
-
- do {
- ssleep(1);
- sync_timeout--;
-
- rsxx_get_card_state(card, &card->state);
- } while (sync_timeout &&
- (card->state == CARD_STATE_STARTING));
-
- if (card->state == CARD_STATE_STARTING) {
- dev_warn(CARD_TO_DEV(card),
- "Card startup timed out\n");
- card->size8 = 0;
- } else {
- dev_info(CARD_TO_DEV(card),
- "card state: %s\n",
- rsxx_card_state_to_str(card->state));
- st = rsxx_get_card_size8(card, &card->size8);
- if (st)
- card->size8 = 0;
- }
- }
- } else if (card->state == CARD_STATE_GOOD ||
- card->state == CARD_STATE_RD_ONLY_FAULT) {
- st = rsxx_get_card_size8(card, &card->size8);
- if (st)
- card->size8 = 0;
- }
-
- st = rsxx_attach_dev(card);
- if (st)
- goto failed_create_dev;
-
- /************* Setup Debugfs *************/
- rsxx_debugfs_dev_new(card);
-
- return 0;
-
-failed_create_dev:
- destroy_workqueue(card->event_wq);
- card->event_wq = NULL;
-failed_event_handler:
- rsxx_dma_destroy(card);
-failed_dma_setup:
-failed_compatiblity_check:
- destroy_workqueue(card->creg_ctrl.creg_wq);
- card->creg_ctrl.creg_wq = NULL;
-failed_creg_setup:
- spin_lock_irq(&card->irq_lock);
- rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
- spin_unlock_irq(&card->irq_lock);
- free_irq(dev->irq, card);
- if (!force_legacy)
- pci_disable_msi(dev);
-failed_irq:
- pci_iounmap(dev, card->regmap);
-failed_iomap:
- pci_release_regions(dev);
-failed_request_regions:
-failed_dma_mask:
- pci_disable_device(dev);
-failed_enable:
- ida_free(&rsxx_disk_ida, card->disk_id);
-failed_ida_get:
- kfree(card);
-
- return st;
-}
-
-static void rsxx_pci_remove(struct pci_dev *dev)
-{
- struct rsxx_cardinfo *card = pci_get_drvdata(dev);
- unsigned long flags;
- int st;
- int i;
-
- if (!card)
- return;
-
- dev_info(CARD_TO_DEV(card),
- "Removing PCI-Flash SSD.\n");
-
- rsxx_detach_dev(card);
-
- for (i = 0; i < card->n_targets; i++) {
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
- spin_unlock_irqrestore(&card->irq_lock, flags);
- }
-
- st = card_shutdown(card);
- if (st)
- dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n");
-
- /* Sync outstanding event handlers. */
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
- spin_unlock_irqrestore(&card->irq_lock, flags);
-
- cancel_work_sync(&card->event_work);
-
- destroy_workqueue(card->event_wq);
- rsxx_destroy_dev(card);
- rsxx_dma_destroy(card);
- destroy_workqueue(card->creg_ctrl.creg_wq);
-
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
- spin_unlock_irqrestore(&card->irq_lock, flags);
-
- /* Prevent work_structs from re-queuing themselves. */
- card->halt = 1;
-
- debugfs_remove_recursive(card->debugfs_dir);
-
- free_irq(dev->irq, card);
-
- if (!force_legacy)
- pci_disable_msi(dev);
-
- rsxx_creg_destroy(card);
-
- pci_iounmap(dev, card->regmap);
-
- pci_disable_device(dev);
- pci_release_regions(dev);
-
- ida_free(&rsxx_disk_ida, card->disk_id);
- kfree(card);
-}
-
-static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state)
-{
- /* We don't support suspend at this time. */
- return -ENOSYS;
-}
-
-static void rsxx_pci_shutdown(struct pci_dev *dev)
-{
- struct rsxx_cardinfo *card = pci_get_drvdata(dev);
- unsigned long flags;
- int i;
-
- if (!card)
- return;
-
- dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n");
-
- rsxx_detach_dev(card);
-
- for (i = 0; i < card->n_targets; i++) {
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
- spin_unlock_irqrestore(&card->irq_lock, flags);
- }
-
- card_shutdown(card);
-}
-
-static const struct pci_error_handlers rsxx_err_handler = {
- .error_detected = rsxx_error_detected,
- .slot_reset = rsxx_slot_reset,
-};
-
-static const struct pci_device_id rsxx_pci_ids[] = {
- {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)},
- {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)},
- {0,},
-};
-
-MODULE_DEVICE_TABLE(pci, rsxx_pci_ids);
-
-static struct pci_driver rsxx_pci_driver = {
- .name = DRIVER_NAME,
- .id_table = rsxx_pci_ids,
- .probe = rsxx_pci_probe,
- .remove = rsxx_pci_remove,
- .suspend = rsxx_pci_suspend,
- .shutdown = rsxx_pci_shutdown,
- .err_handler = &rsxx_err_handler,
-};
-
-static int __init rsxx_core_init(void)
-{
- int st;
-
- st = rsxx_dev_init();
- if (st)
- return st;
-
- st = rsxx_dma_init();
- if (st)
- goto dma_init_failed;
-
- st = rsxx_creg_init();
- if (st)
- goto creg_init_failed;
-
- return pci_register_driver(&rsxx_pci_driver);
-
-creg_init_failed:
- rsxx_dma_cleanup();
-dma_init_failed:
- rsxx_dev_cleanup();
-
- return st;
-}
-
-static void __exit rsxx_core_cleanup(void)
-{
- pci_unregister_driver(&rsxx_pci_driver);
- rsxx_creg_cleanup();
- rsxx_dma_cleanup();
- rsxx_dev_cleanup();
-}
-
-module_init(rsxx_core_init);
-module_exit(rsxx_core_cleanup);
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
deleted file mode 100644
index 60ecd3f7cbd2..000000000000
--- a/drivers/block/rsxx/cregs.c
+++ /dev/null
@@ -1,789 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-* Filename: cregs.c
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#include <linux/completion.h>
-#include <linux/slab.h>
-
-#include "rsxx_priv.h"
-
-#define CREG_TIMEOUT_MSEC 10000
-
-typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card,
- struct creg_cmd *cmd,
- int st);
-
-struct creg_cmd {
- struct list_head list;
- creg_cmd_cb cb;
- void *cb_private;
- unsigned int op;
- unsigned int addr;
- int cnt8;
- void *buf;
- unsigned int stream;
- unsigned int status;
-};
-
-static struct kmem_cache *creg_cmd_pool;
-
-
-/*------------ Private Functions --------------*/
-
-#if defined(__LITTLE_ENDIAN)
-#define LITTLE_ENDIAN 1
-#elif defined(__BIG_ENDIAN)
-#define LITTLE_ENDIAN 0
-#else
-#error Unknown endianess!!! Aborting...
-#endif
-
-static int copy_to_creg_data(struct rsxx_cardinfo *card,
- int cnt8,
- void *buf,
- unsigned int stream)
-{
- int i = 0;
- u32 *data = buf;
-
- if (unlikely(card->eeh_state))
- return -EIO;
-
- for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
- /*
- * Firmware implementation makes it necessary to byte swap on
- * little endian processors.
- */
- if (LITTLE_ENDIAN && stream)
- iowrite32be(data[i], card->regmap + CREG_DATA(i));
- else
- iowrite32(data[i], card->regmap + CREG_DATA(i));
- }
-
- return 0;
-}
-
-
-static int copy_from_creg_data(struct rsxx_cardinfo *card,
- int cnt8,
- void *buf,
- unsigned int stream)
-{
- int i = 0;
- u32 *data = buf;
-
- if (unlikely(card->eeh_state))
- return -EIO;
-
- for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
- /*
- * Firmware implementation makes it necessary to byte swap on
- * little endian processors.
- */
- if (LITTLE_ENDIAN && stream)
- data[i] = ioread32be(card->regmap + CREG_DATA(i));
- else
- data[i] = ioread32(card->regmap + CREG_DATA(i));
- }
-
- return 0;
-}
-
-static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
-{
- int st;
-
- if (unlikely(card->eeh_state))
- return;
-
- iowrite32(cmd->addr, card->regmap + CREG_ADD);
- iowrite32(cmd->cnt8, card->regmap + CREG_CNT);
-
- if (cmd->op == CREG_OP_WRITE) {
- if (cmd->buf) {
- st = copy_to_creg_data(card, cmd->cnt8,
- cmd->buf, cmd->stream);
- if (st)
- return;
- }
- }
-
- if (unlikely(card->eeh_state))
- return;
-
- /* Setting the valid bit will kick off the command. */
- iowrite32(cmd->op, card->regmap + CREG_CMD);
-}
-
-static void creg_kick_queue(struct rsxx_cardinfo *card)
-{
- if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue))
- return;
-
- card->creg_ctrl.active = 1;
- card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue,
- struct creg_cmd, list);
- list_del(&card->creg_ctrl.active_cmd->list);
- card->creg_ctrl.q_depth--;
-
- /*
- * We have to set the timer before we push the new command. Otherwise,
- * we could create a race condition that would occur if the timer
- * was not canceled, and expired after the new command was pushed,
- * but before the command was issued to hardware.
- */
- mod_timer(&card->creg_ctrl.cmd_timer,
- jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC));
-
- creg_issue_cmd(card, card->creg_ctrl.active_cmd);
-}
-
-static int creg_queue_cmd(struct rsxx_cardinfo *card,
- unsigned int op,
- unsigned int addr,
- unsigned int cnt8,
- void *buf,
- int stream,
- creg_cmd_cb callback,
- void *cb_private)
-{
- struct creg_cmd *cmd;
-
- /* Don't queue stuff up if we're halted. */
- if (unlikely(card->halt))
- return -EINVAL;
-
- if (card->creg_ctrl.reset)
- return -EAGAIN;
-
- if (cnt8 > MAX_CREG_DATA8)
- return -EINVAL;
-
- cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&cmd->list);
-
- cmd->op = op;
- cmd->addr = addr;
- cmd->cnt8 = cnt8;
- cmd->buf = buf;
- cmd->stream = stream;
- cmd->cb = callback;
- cmd->cb_private = cb_private;
- cmd->status = 0;
-
- spin_lock_bh(&card->creg_ctrl.lock);
- list_add_tail(&cmd->list, &card->creg_ctrl.queue);
- card->creg_ctrl.q_depth++;
- creg_kick_queue(card);
- spin_unlock_bh(&card->creg_ctrl.lock);
-
- return 0;
-}
-
-static void creg_cmd_timed_out(struct timer_list *t)
-{
- struct rsxx_cardinfo *card = from_timer(card, t, creg_ctrl.cmd_timer);
- struct creg_cmd *cmd;
-
- spin_lock(&card->creg_ctrl.lock);
- cmd = card->creg_ctrl.active_cmd;
- card->creg_ctrl.active_cmd = NULL;
- spin_unlock(&card->creg_ctrl.lock);
-
- if (cmd == NULL) {
- card->creg_ctrl.creg_stats.creg_timeout++;
- dev_warn(CARD_TO_DEV(card),
- "No active command associated with timeout!\n");
- return;
- }
-
- if (cmd->cb)
- cmd->cb(card, cmd, -ETIMEDOUT);
-
- kmem_cache_free(creg_cmd_pool, cmd);
-
-
- spin_lock(&card->creg_ctrl.lock);
- card->creg_ctrl.active = 0;
- creg_kick_queue(card);
- spin_unlock(&card->creg_ctrl.lock);
-}
-
-
-static void creg_cmd_done(struct work_struct *work)
-{
- struct rsxx_cardinfo *card;
- struct creg_cmd *cmd;
- int st = 0;
-
- card = container_of(work, struct rsxx_cardinfo,
- creg_ctrl.done_work);
-
- /*
- * The timer could not be cancelled for some reason,
- * race to pop the active command.
- */
- if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0)
- card->creg_ctrl.creg_stats.failed_cancel_timer++;
-
- spin_lock_bh(&card->creg_ctrl.lock);
- cmd = card->creg_ctrl.active_cmd;
- card->creg_ctrl.active_cmd = NULL;
- spin_unlock_bh(&card->creg_ctrl.lock);
-
- if (cmd == NULL) {
- dev_err(CARD_TO_DEV(card),
- "Spurious creg interrupt!\n");
- return;
- }
-
- card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT);
- cmd->status = card->creg_ctrl.creg_stats.stat;
- if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) {
- dev_err(CARD_TO_DEV(card),
- "Invalid status on creg command\n");
- /*
- * At this point we're probably reading garbage from HW. Don't
- * do anything else that could mess up the system and let
- * the sync function return an error.
- */
- st = -EIO;
- goto creg_done;
- } else if (cmd->status & CREG_STAT_ERROR) {
- st = -EIO;
- }
-
- if (cmd->op == CREG_OP_READ) {
- unsigned int cnt8 = ioread32(card->regmap + CREG_CNT);
-
- /* Paranoid Sanity Checks */
- if (!cmd->buf) {
- dev_err(CARD_TO_DEV(card),
- "Buffer not given for read.\n");
- st = -EIO;
- goto creg_done;
- }
- if (cnt8 != cmd->cnt8) {
- dev_err(CARD_TO_DEV(card),
- "count mismatch\n");
- st = -EIO;
- goto creg_done;
- }
-
- st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
- }
-
-creg_done:
- if (cmd->cb)
- cmd->cb(card, cmd, st);
-
- kmem_cache_free(creg_cmd_pool, cmd);
-
- spin_lock_bh(&card->creg_ctrl.lock);
- card->creg_ctrl.active = 0;
- creg_kick_queue(card);
- spin_unlock_bh(&card->creg_ctrl.lock);
-}
-
-static void creg_reset(struct rsxx_cardinfo *card)
-{
- struct creg_cmd *cmd = NULL;
- struct creg_cmd *tmp;
- unsigned long flags;
-
- /*
- * mutex_trylock is used here because if reset_lock is taken then a
- * reset is already happening. So, we can just go ahead and return.
- */
- if (!mutex_trylock(&card->creg_ctrl.reset_lock))
- return;
-
- card->creg_ctrl.reset = 1;
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
- spin_unlock_irqrestore(&card->irq_lock, flags);
-
- dev_warn(CARD_TO_DEV(card),
- "Resetting creg interface for recovery\n");
-
- /* Cancel outstanding commands */
- spin_lock_bh(&card->creg_ctrl.lock);
- list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
- list_del(&cmd->list);
- card->creg_ctrl.q_depth--;
- if (cmd->cb)
- cmd->cb(card, cmd, -ECANCELED);
- kmem_cache_free(creg_cmd_pool, cmd);
- }
-
- cmd = card->creg_ctrl.active_cmd;
- card->creg_ctrl.active_cmd = NULL;
- if (cmd) {
- if (timer_pending(&card->creg_ctrl.cmd_timer))
- del_timer_sync(&card->creg_ctrl.cmd_timer);
-
- if (cmd->cb)
- cmd->cb(card, cmd, -ECANCELED);
- kmem_cache_free(creg_cmd_pool, cmd);
-
- card->creg_ctrl.active = 0;
- }
- spin_unlock_bh(&card->creg_ctrl.lock);
-
- card->creg_ctrl.reset = 0;
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
- spin_unlock_irqrestore(&card->irq_lock, flags);
-
- mutex_unlock(&card->creg_ctrl.reset_lock);
-}
-
-/* Used for synchronous accesses */
-struct creg_completion {
- struct completion *cmd_done;
- int st;
- u32 creg_status;
-};
-
-static void creg_cmd_done_cb(struct rsxx_cardinfo *card,
- struct creg_cmd *cmd,
- int st)
-{
- struct creg_completion *cmd_completion;
-
- cmd_completion = cmd->cb_private;
- BUG_ON(!cmd_completion);
-
- cmd_completion->st = st;
- cmd_completion->creg_status = cmd->status;
- complete(cmd_completion->cmd_done);
-}
-
-static int __issue_creg_rw(struct rsxx_cardinfo *card,
- unsigned int op,
- unsigned int addr,
- unsigned int cnt8,
- void *buf,
- int stream,
- unsigned int *hw_stat)
-{
- DECLARE_COMPLETION_ONSTACK(cmd_done);
- struct creg_completion completion;
- unsigned long timeout;
- int st;
-
- completion.cmd_done = &cmd_done;
- completion.st = 0;
- completion.creg_status = 0;
-
- st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb,
- &completion);
- if (st)
- return st;
-
- /*
- * This timeout is necessary for unresponsive hardware. The additional
- * 20 seconds to used to guarantee that each cregs requests has time to
- * complete.
- */
- timeout = msecs_to_jiffies(CREG_TIMEOUT_MSEC *
- card->creg_ctrl.q_depth + 20000);
-
- /*
- * The creg interface is guaranteed to complete. It has a timeout
- * mechanism that will kick in if hardware does not respond.
- */
- st = wait_for_completion_timeout(completion.cmd_done, timeout);
- if (st == 0) {
- /*
- * This is really bad, because the kernel timer did not
- * expire and notify us of a timeout!
- */
- dev_crit(CARD_TO_DEV(card),
- "cregs timer failed\n");
- creg_reset(card);
- return -EIO;
- }
-
- *hw_stat = completion.creg_status;
-
- if (completion.st) {
- /*
- * This read is needed to verify that there has not been any
- * extreme errors that might have occurred, i.e. EEH. The
- * function iowrite32 will not detect EEH errors, so it is
- * necessary that we recover if such an error is the reason
- * for the timeout. This is a dummy read.
- */
- ioread32(card->regmap + SCRATCH);
-
- dev_warn(CARD_TO_DEV(card),
- "creg command failed(%d x%08x)\n",
- completion.st, addr);
- return completion.st;
- }
-
- return 0;
-}
-
-static int issue_creg_rw(struct rsxx_cardinfo *card,
- u32 addr,
- unsigned int size8,
- void *data,
- int stream,
- int read)
-{
- unsigned int hw_stat;
- unsigned int xfer;
- unsigned int op;
- int st;
-
- op = read ? CREG_OP_READ : CREG_OP_WRITE;
-
- do {
- xfer = min_t(unsigned int, size8, MAX_CREG_DATA8);
-
- st = __issue_creg_rw(card, op, addr, xfer,
- data, stream, &hw_stat);
- if (st)
- return st;
-
- data = (char *)data + xfer;
- addr += xfer;
- size8 -= xfer;
- } while (size8);
-
- return 0;
-}
-
-/* ---------------------------- Public API ---------------------------------- */
-int rsxx_creg_write(struct rsxx_cardinfo *card,
- u32 addr,
- unsigned int size8,
- void *data,
- int byte_stream)
-{
- return issue_creg_rw(card, addr, size8, data, byte_stream, 0);
-}
-
-int rsxx_creg_read(struct rsxx_cardinfo *card,
- u32 addr,
- unsigned int size8,
- void *data,
- int byte_stream)
-{
- return issue_creg_rw(card, addr, size8, data, byte_stream, 1);
-}
-
-int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state)
-{
- return rsxx_creg_read(card, CREG_ADD_CARD_STATE,
- sizeof(*state), state, 0);
-}
-
-int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8)
-{
- unsigned int size;
- int st;
-
- st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE,
- sizeof(size), &size, 0);
- if (st)
- return st;
-
- *size8 = (u64)size * RSXX_HW_BLK_SIZE;
- return 0;
-}
-
-int rsxx_get_num_targets(struct rsxx_cardinfo *card,
- unsigned int *n_targets)
-{
- return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS,
- sizeof(*n_targets), n_targets, 0);
-}
-
-int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
- u32 *capabilities)
-{
- return rsxx_creg_read(card, CREG_ADD_CAPABILITIES,
- sizeof(*capabilities), capabilities, 0);
-}
-
-int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd)
-{
- return rsxx_creg_write(card, CREG_ADD_CARD_CMD,
- sizeof(cmd), &cmd, 0);
-}
-
-
-/*----------------- HW Log Functions -------------------*/
-static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len)
-{
- static char level;
-
- /*
- * New messages start with "<#>", where # is the log level. Messages
- * that extend past the log buffer will use the previous level
- */
- if ((len > 3) && (str[0] == '<') && (str[2] == '>')) {
- level = str[1];
- str += 3; /* Skip past the log level. */
- len -= 3;
- }
-
- switch (level) {
- case '0':
- dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- case '1':
- dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- case '2':
- dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- case '3':
- dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- case '4':
- dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- case '5':
- dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- case '6':
- dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- case '7':
- dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- default:
- dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
- break;
- }
-}
-
-/*
- * The substrncpy function copies the src string (which includes the
- * terminating '\0' character), up to the count into the dest pointer.
- * Returns the number of bytes copied to dest.
- */
-static int substrncpy(char *dest, const char *src, int count)
-{
- int max_cnt = count;
-
- while (count) {
- count--;
- *dest = *src;
- if (*dest == '\0')
- break;
- src++;
- dest++;
- }
- return max_cnt - count;
-}
-
-
-static void read_hw_log_done(struct rsxx_cardinfo *card,
- struct creg_cmd *cmd,
- int st)
-{
- char *buf;
- char *log_str;
- int cnt;
- int len;
- int off;
-
- buf = cmd->buf;
- off = 0;
-
- /* Failed getting the log message */
- if (st)
- return;
-
- while (off < cmd->cnt8) {
- log_str = &card->log.buf[card->log.buf_len];
- cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len);
- len = substrncpy(log_str, &buf[off], cnt);
-
- off += len;
- card->log.buf_len += len;
-
- /*
- * Flush the log if we've hit the end of a message or if we've
- * run out of buffer space.
- */
- if ((log_str[len - 1] == '\0') ||
- (card->log.buf_len == LOG_BUF_SIZE8)) {
- if (card->log.buf_len != 1) /* Don't log blank lines. */
- hw_log_msg(card, card->log.buf,
- card->log.buf_len);
- card->log.buf_len = 0;
- }
-
- }
-
- if (cmd->status & CREG_STAT_LOG_PENDING)
- rsxx_read_hw_log(card);
-}
-
-int rsxx_read_hw_log(struct rsxx_cardinfo *card)
-{
- int st;
-
- st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG,
- sizeof(card->log.tmp), card->log.tmp,
- 1, read_hw_log_done, NULL);
- if (st)
- dev_err(CARD_TO_DEV(card),
- "Failed getting log text\n");
-
- return st;
-}
-
-/*-------------- IOCTL REG Access ------------------*/
-static int issue_reg_cmd(struct rsxx_cardinfo *card,
- struct rsxx_reg_access *cmd,
- int read)
-{
- unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE;
-
- return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data,
- cmd->stream, &cmd->stat);
-}
-
-int rsxx_reg_access(struct rsxx_cardinfo *card,
- struct rsxx_reg_access __user *ucmd,
- int read)
-{
- struct rsxx_reg_access cmd;
- int st;
-
- st = copy_from_user(&cmd, ucmd, sizeof(cmd));
- if (st)
- return -EFAULT;
-
- if (cmd.cnt > RSXX_MAX_REG_CNT)
- return -EFAULT;
-
- st = issue_reg_cmd(card, &cmd, read);
- if (st)
- return st;
-
- st = put_user(cmd.stat, &ucmd->stat);
- if (st)
- return -EFAULT;
-
- if (read) {
- st = copy_to_user(ucmd->data, cmd.data, cmd.cnt);
- if (st)
- return -EFAULT;
- }
-
- return 0;
-}
-
-void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card)
-{
- struct creg_cmd *cmd = NULL;
-
- cmd = card->creg_ctrl.active_cmd;
- card->creg_ctrl.active_cmd = NULL;
-
- if (cmd) {
- del_timer_sync(&card->creg_ctrl.cmd_timer);
-
- spin_lock_bh(&card->creg_ctrl.lock);
- list_add(&cmd->list, &card->creg_ctrl.queue);
- card->creg_ctrl.q_depth++;
- card->creg_ctrl.active = 0;
- spin_unlock_bh(&card->creg_ctrl.lock);
- }
-}
-
-void rsxx_kick_creg_queue(struct rsxx_cardinfo *card)
-{
- spin_lock_bh(&card->creg_ctrl.lock);
- if (!list_empty(&card->creg_ctrl.queue))
- creg_kick_queue(card);
- spin_unlock_bh(&card->creg_ctrl.lock);
-}
-
-/*------------ Initialization & Setup --------------*/
-int rsxx_creg_setup(struct rsxx_cardinfo *card)
-{
- card->creg_ctrl.active_cmd = NULL;
-
- card->creg_ctrl.creg_wq =
- create_singlethread_workqueue(DRIVER_NAME"_creg");
- if (!card->creg_ctrl.creg_wq)
- return -ENOMEM;
-
- INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
- mutex_init(&card->creg_ctrl.reset_lock);
- INIT_LIST_HEAD(&card->creg_ctrl.queue);
- spin_lock_init(&card->creg_ctrl.lock);
- timer_setup(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out, 0);
-
- return 0;
-}
-
-void rsxx_creg_destroy(struct rsxx_cardinfo *card)
-{
- struct creg_cmd *cmd;
- struct creg_cmd *tmp;
- int cnt = 0;
-
- /* Cancel outstanding commands */
- spin_lock_bh(&card->creg_ctrl.lock);
- list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
- list_del(&cmd->list);
- if (cmd->cb)
- cmd->cb(card, cmd, -ECANCELED);
- kmem_cache_free(creg_cmd_pool, cmd);
- cnt++;
- }
-
- if (cnt)
- dev_info(CARD_TO_DEV(card),
- "Canceled %d queue creg commands\n", cnt);
-
- cmd = card->creg_ctrl.active_cmd;
- card->creg_ctrl.active_cmd = NULL;
- if (cmd) {
- if (timer_pending(&card->creg_ctrl.cmd_timer))
- del_timer_sync(&card->creg_ctrl.cmd_timer);
-
- if (cmd->cb)
- cmd->cb(card, cmd, -ECANCELED);
- dev_info(CARD_TO_DEV(card),
- "Canceled active creg command\n");
- kmem_cache_free(creg_cmd_pool, cmd);
- }
- spin_unlock_bh(&card->creg_ctrl.lock);
-
- cancel_work_sync(&card->creg_ctrl.done_work);
-}
-
-
-int rsxx_creg_init(void)
-{
- creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN);
- if (!creg_cmd_pool)
- return -ENOMEM;
-
- return 0;
-}
-
-void rsxx_creg_cleanup(void)
-{
- kmem_cache_destroy(creg_cmd_pool);
-}
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
deleted file mode 100644
index dd33f1bdf3b8..000000000000
--- a/drivers/block/rsxx/dev.c
+++ /dev/null
@@ -1,306 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-* Filename: dev.c
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-
-#include <linux/hdreg.h>
-#include <linux/genhd.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-
-#include <linux/fs.h>
-
-#include "rsxx_priv.h"
-
-static unsigned int blkdev_minors = 64;
-module_param(blkdev_minors, uint, 0444);
-MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)");
-
-/*
- * For now I'm making this tweakable in case any applications hit this limit.
- * If you see a "bio too big" error in the log you will need to raise this
- * value.
- */
-static unsigned int blkdev_max_hw_sectors = 1024;
-module_param(blkdev_max_hw_sectors, uint, 0444);
-MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO");
-
-static unsigned int enable_blkdev = 1;
-module_param(enable_blkdev , uint, 0444);
-MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces");
-
-
-struct rsxx_bio_meta {
- struct bio *bio;
- atomic_t pending_dmas;
- atomic_t error;
- unsigned long start_time;
-};
-
-static struct kmem_cache *bio_meta_pool;
-
-static void rsxx_submit_bio(struct bio *bio);
-
-/*----------------- Block Device Operations -----------------*/
-static int rsxx_blkdev_ioctl(struct block_device *bdev,
- fmode_t mode,
- unsigned int cmd,
- unsigned long arg)
-{
- struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
-
- switch (cmd) {
- case RSXX_GETREG:
- return rsxx_reg_access(card, (void __user *)arg, 1);
- case RSXX_SETREG:
- return rsxx_reg_access(card, (void __user *)arg, 0);
- }
-
- return -ENOTTY;
-}
-
-static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
- u64 blocks = card->size8 >> 9;
-
- /*
- * get geometry: Fake it. I haven't found any drivers that set
- * geo->start, so we won't either.
- */
- if (card->size8) {
- geo->heads = 64;
- geo->sectors = 16;
- do_div(blocks, (geo->heads * geo->sectors));
- geo->cylinders = blocks;
- } else {
- geo->heads = 0;
- geo->sectors = 0;
- geo->cylinders = 0;
- }
- return 0;
-}
-
-static const struct block_device_operations rsxx_fops = {
- .owner = THIS_MODULE,
- .submit_bio = rsxx_submit_bio,
- .getgeo = rsxx_getgeo,
- .ioctl = rsxx_blkdev_ioctl,
-};
-
-static void bio_dma_done_cb(struct rsxx_cardinfo *card,
- void *cb_data,
- unsigned int error)
-{
- struct rsxx_bio_meta *meta = cb_data;
-
- if (error)
- atomic_set(&meta->error, 1);
-
- if (atomic_dec_and_test(&meta->pending_dmas)) {
- if (!card->eeh_state && card->gendisk)
- bio_end_io_acct(meta->bio, meta->start_time);
-
- if (atomic_read(&meta->error))
- bio_io_error(meta->bio);
- else
- bio_endio(meta->bio);
- kmem_cache_free(bio_meta_pool, meta);
- }
-}
-
-static void rsxx_submit_bio(struct bio *bio)
-{
- struct rsxx_cardinfo *card = bio->bi_bdev->bd_disk->private_data;
- struct rsxx_bio_meta *bio_meta;
- blk_status_t st = BLK_STS_IOERR;
-
- blk_queue_split(&bio);
-
- might_sleep();
-
- if (!card)
- goto req_err;
-
- if (bio_end_sector(bio) > get_capacity(card->gendisk))
- goto req_err;
-
- if (unlikely(card->halt))
- goto req_err;
-
- if (unlikely(card->dma_fault))
- goto req_err;
-
- if (bio->bi_iter.bi_size == 0) {
- dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
- goto req_err;
- }
-
- bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
- if (!bio_meta) {
- st = BLK_STS_RESOURCE;
- goto req_err;
- }
-
- bio_meta->bio = bio;
- atomic_set(&bio_meta->error, 0);
- atomic_set(&bio_meta->pending_dmas, 0);
-
- if (!unlikely(card->halt))
- bio_meta->start_time = bio_start_io_acct(bio);
-
- dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
- bio_data_dir(bio) ? 'W' : 'R', bio_meta,
- (u64)bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
-
- st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
- bio_dma_done_cb, bio_meta);
- if (st)
- goto queue_err;
-
- return;
-
-queue_err:
- kmem_cache_free(bio_meta_pool, bio_meta);
-req_err:
- if (st)
- bio->bi_status = st;
- bio_endio(bio);
-}
-
-/*----------------- Device Setup -------------------*/
-static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
-{
- unsigned char pci_rev;
-
- pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
-
- return (pci_rev >= RSXX_DISCARD_SUPPORT);
-}
-
-int rsxx_attach_dev(struct rsxx_cardinfo *card)
-{
- int err = 0;
-
- mutex_lock(&card->dev_lock);
-
- /* The block device requires the stripe size from the config. */
- if (enable_blkdev) {
- if (card->config_valid)
- set_capacity(card->gendisk, card->size8 >> 9);
- else
- set_capacity(card->gendisk, 0);
- err = device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL);
- if (err == 0)
- card->bdev_attached = 1;
- }
-
- mutex_unlock(&card->dev_lock);
-
- if (err)
- blk_cleanup_disk(card->gendisk);
-
- return err;
-}
-
-void rsxx_detach_dev(struct rsxx_cardinfo *card)
-{
- mutex_lock(&card->dev_lock);
-
- if (card->bdev_attached) {
- del_gendisk(card->gendisk);
- card->bdev_attached = 0;
- }
-
- mutex_unlock(&card->dev_lock);
-}
-
-int rsxx_setup_dev(struct rsxx_cardinfo *card)
-{
- unsigned short blk_size;
-
- mutex_init(&card->dev_lock);
-
- if (!enable_blkdev)
- return 0;
-
- card->major = register_blkdev(0, DRIVER_NAME);
- if (card->major < 0) {
- dev_err(CARD_TO_DEV(card), "Failed to get major number\n");
- return -ENOMEM;
- }
-
- card->gendisk = blk_alloc_disk(blkdev_minors);
- if (!card->gendisk) {
- dev_err(CARD_TO_DEV(card), "Failed disk alloc\n");
- unregister_blkdev(card->major, DRIVER_NAME);
- return -ENOMEM;
- }
-
- if (card->config_valid) {
- blk_size = card->config.data.block_size;
- blk_queue_dma_alignment(card->gendisk->queue, blk_size - 1);
- blk_queue_logical_block_size(card->gendisk->queue, blk_size);
- }
-
- blk_queue_max_hw_sectors(card->gendisk->queue, blkdev_max_hw_sectors);
- blk_queue_physical_block_size(card->gendisk->queue, RSXX_HW_BLK_SIZE);
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, card->gendisk->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->gendisk->queue);
- if (rsxx_discard_supported(card)) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->gendisk->queue);
- blk_queue_max_discard_sectors(card->gendisk->queue,
- RSXX_HW_BLK_SIZE >> 9);
- card->gendisk->queue->limits.discard_granularity =
- RSXX_HW_BLK_SIZE;
- card->gendisk->queue->limits.discard_alignment =
- RSXX_HW_BLK_SIZE;
- }
-
- snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
- "rsxx%d", card->disk_id);
- card->gendisk->major = card->major;
- card->gendisk->minors = blkdev_minors;
- card->gendisk->fops = &rsxx_fops;
- card->gendisk->private_data = card;
-
- return 0;
-}
-
-void rsxx_destroy_dev(struct rsxx_cardinfo *card)
-{
- if (!enable_blkdev)
- return;
-
- blk_cleanup_disk(card->gendisk);
- card->gendisk = NULL;
- unregister_blkdev(card->major, DRIVER_NAME);
-}
-
-int rsxx_dev_init(void)
-{
- bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN);
- if (!bio_meta_pool)
- return -ENOMEM;
-
- return 0;
-}
-
-void rsxx_dev_cleanup(void)
-{
- kmem_cache_destroy(bio_meta_pool);
-}
-
-
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
deleted file mode 100644
index ed182f3dd054..000000000000
--- a/drivers/block/rsxx/dma.c
+++ /dev/null
@@ -1,1085 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-* Filename: dma.c
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#include <linux/slab.h>
-#include "rsxx_priv.h"
-
-struct rsxx_dma {
- struct list_head list;
- u8 cmd;
- unsigned int laddr; /* Logical address */
- struct {
- u32 off;
- u32 cnt;
- } sub_page;
- dma_addr_t dma_addr;
- struct page *page;
- unsigned int pg_off; /* Page Offset */
- rsxx_dma_cb cb;
- void *cb_data;
-};
-
-/* This timeout is used to detect a stalled DMA channel */
-#define DMA_ACTIVITY_TIMEOUT msecs_to_jiffies(10000)
-
-struct hw_status {
- u8 status;
- u8 tag;
- __le16 count;
- __le32 _rsvd2;
- __le64 _rsvd3;
-} __packed;
-
-enum rsxx_dma_status {
- DMA_SW_ERR = 0x1,
- DMA_HW_FAULT = 0x2,
- DMA_CANCELLED = 0x4,
-};
-
-struct hw_cmd {
- u8 command;
- u8 tag;
- u8 _rsvd;
- u8 sub_page; /* Bit[0:2]: 512byte offset */
- /* Bit[4:6]: 512byte count */
- __le32 device_addr;
- __le64 host_addr;
-} __packed;
-
-enum rsxx_hw_cmd {
- HW_CMD_BLK_DISCARD = 0x70,
- HW_CMD_BLK_WRITE = 0x80,
- HW_CMD_BLK_READ = 0xC0,
- HW_CMD_BLK_RECON_READ = 0xE0,
-};
-
-enum rsxx_hw_status {
- HW_STATUS_CRC = 0x01,
- HW_STATUS_HARD_ERR = 0x02,
- HW_STATUS_SOFT_ERR = 0x04,
- HW_STATUS_FAULT = 0x08,
-};
-
-static struct kmem_cache *rsxx_dma_pool;
-
-struct dma_tracker {
- int next_tag;
- struct rsxx_dma *dma;
-};
-
-struct dma_tracker_list {
- spinlock_t lock;
- int head;
- struct dma_tracker list[];
-};
-
-
-/*----------------- Misc Utility Functions -------------------*/
-static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card)
-{
- unsigned long long tgt_addr8;
-
- tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) &
- card->_stripe.upper_mask) |
- ((addr8) & card->_stripe.lower_mask);
- do_div(tgt_addr8, RSXX_HW_BLK_SIZE);
- return tgt_addr8;
-}
-
-static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8)
-{
- unsigned int tgt;
-
- tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask;
-
- return tgt;
-}
-
-void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)
-{
- /* Reset all DMA Command/Status Queues */
- iowrite32(DMA_QUEUE_RESET, card->regmap + RESET);
-}
-
-static unsigned int get_dma_size(struct rsxx_dma *dma)
-{
- if (dma->sub_page.cnt)
- return dma->sub_page.cnt << 9;
- else
- return RSXX_HW_BLK_SIZE;
-}
-
-
-/*----------------- DMA Tracker -------------------*/
-static void set_tracker_dma(struct dma_tracker_list *trackers,
- int tag,
- struct rsxx_dma *dma)
-{
- trackers->list[tag].dma = dma;
-}
-
-static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers,
- int tag)
-{
- return trackers->list[tag].dma;
-}
-
-static int pop_tracker(struct dma_tracker_list *trackers)
-{
- int tag;
-
- spin_lock(&trackers->lock);
- tag = trackers->head;
- if (tag != -1) {
- trackers->head = trackers->list[tag].next_tag;
- trackers->list[tag].next_tag = -1;
- }
- spin_unlock(&trackers->lock);
-
- return tag;
-}
-
-static void push_tracker(struct dma_tracker_list *trackers, int tag)
-{
- spin_lock(&trackers->lock);
- trackers->list[tag].next_tag = trackers->head;
- trackers->head = tag;
- trackers->list[tag].dma = NULL;
- spin_unlock(&trackers->lock);
-}
-
-
-/*----------------- Interrupt Coalescing -------------*/
-/*
- * Interrupt Coalescing Register Format:
- * Interrupt Timer (64ns units) [15:0]
- * Interrupt Count [24:16]
- * Reserved [31:25]
-*/
-#define INTR_COAL_LATENCY_MASK (0x0000ffff)
-
-#define INTR_COAL_COUNT_SHIFT 16
-#define INTR_COAL_COUNT_BITS 9
-#define INTR_COAL_COUNT_MASK (((1 << INTR_COAL_COUNT_BITS) - 1) << \
- INTR_COAL_COUNT_SHIFT)
-#define INTR_COAL_LATENCY_UNITS_NS 64
-
-
-static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency)
-{
- u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS;
-
- if (mode == RSXX_INTR_COAL_DISABLED)
- return 0;
-
- return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) |
- (latency_units & INTR_COAL_LATENCY_MASK);
-
-}
-
-static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
-{
- int i;
- u32 q_depth = 0;
- u32 intr_coal;
-
- if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE ||
- unlikely(card->eeh_state))
- return;
-
- for (i = 0; i < card->n_targets; i++)
- q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth);
-
- intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode,
- q_depth / 2,
- card->config.data.intr_coal.latency);
- iowrite32(intr_coal, card->regmap + INTR_COAL);
-}
-
-/*----------------- RSXX DMA Handling -------------------*/
-static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma)
-{
- if (dma->cmd != HW_CMD_BLK_DISCARD) {
- if (!dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) {
- dma_unmap_page(&ctrl->card->dev->dev, dma->dma_addr,
- get_dma_size(dma),
- dma->cmd == HW_CMD_BLK_WRITE ?
- DMA_TO_DEVICE :
- DMA_FROM_DEVICE);
- }
- }
-
- kmem_cache_free(rsxx_dma_pool, dma);
-}
-
-static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
- struct rsxx_dma *dma,
- unsigned int status)
-{
- if (status & DMA_SW_ERR)
- ctrl->stats.dma_sw_err++;
- if (status & DMA_HW_FAULT)
- ctrl->stats.dma_hw_fault++;
- if (status & DMA_CANCELLED)
- ctrl->stats.dma_cancelled++;
-
- if (dma->cb)
- dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0);
-
- rsxx_free_dma(ctrl, dma);
-}
-
-int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
- struct list_head *q, unsigned int done)
-{
- struct rsxx_dma *dma;
- struct rsxx_dma *tmp;
- int cnt = 0;
-
- list_for_each_entry_safe(dma, tmp, q, list) {
- list_del(&dma->list);
- if (done & COMPLETE_DMA)
- rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
- else
- rsxx_free_dma(ctrl, dma);
- cnt++;
- }
-
- return cnt;
-}
-
-static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
- struct rsxx_dma *dma)
-{
- /*
- * Requeued DMAs go to the front of the queue so they are issued
- * first.
- */
- spin_lock_bh(&ctrl->queue_lock);
- ctrl->stats.sw_q_depth++;
- list_add(&dma->list, &ctrl->queue);
- spin_unlock_bh(&ctrl->queue_lock);
-}
-
-static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
- struct rsxx_dma *dma,
- u8 hw_st)
-{
- unsigned int status = 0;
- int requeue_cmd = 0;
-
- dev_dbg(CARD_TO_DEV(ctrl->card),
- "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n",
- dma->cmd, dma->laddr, hw_st);
-
- if (hw_st & HW_STATUS_CRC)
- ctrl->stats.crc_errors++;
- if (hw_st & HW_STATUS_HARD_ERR)
- ctrl->stats.hard_errors++;
- if (hw_st & HW_STATUS_SOFT_ERR)
- ctrl->stats.soft_errors++;
-
- switch (dma->cmd) {
- case HW_CMD_BLK_READ:
- if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) {
- if (ctrl->card->scrub_hard) {
- dma->cmd = HW_CMD_BLK_RECON_READ;
- requeue_cmd = 1;
- ctrl->stats.reads_retried++;
- } else {
- status |= DMA_HW_FAULT;
- ctrl->stats.reads_failed++;
- }
- } else if (hw_st & HW_STATUS_FAULT) {
- status |= DMA_HW_FAULT;
- ctrl->stats.reads_failed++;
- }
-
- break;
- case HW_CMD_BLK_RECON_READ:
- if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) {
- /* Data could not be reconstructed. */
- status |= DMA_HW_FAULT;
- ctrl->stats.reads_failed++;
- }
-
- break;
- case HW_CMD_BLK_WRITE:
- status |= DMA_HW_FAULT;
- ctrl->stats.writes_failed++;
-
- break;
- case HW_CMD_BLK_DISCARD:
- status |= DMA_HW_FAULT;
- ctrl->stats.discards_failed++;
-
- break;
- default:
- dev_err(CARD_TO_DEV(ctrl->card),
- "Unknown command in DMA!(cmd: x%02x "
- "laddr x%08x st: x%02x\n",
- dma->cmd, dma->laddr, hw_st);
- status |= DMA_SW_ERR;
-
- break;
- }
-
- if (requeue_cmd)
- rsxx_requeue_dma(ctrl, dma);
- else
- rsxx_complete_dma(ctrl, dma, status);
-}
-
-static void dma_engine_stalled(struct timer_list *t)
-{
- struct rsxx_dma_ctrl *ctrl = from_timer(ctrl, t, activity_timer);
- int cnt;
-
- if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
- unlikely(ctrl->card->eeh_state))
- return;
-
- if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) {
- /*
- * The dma engine was stalled because the SW_CMD_IDX write
- * was lost. Issue it again to recover.
- */
- dev_warn(CARD_TO_DEV(ctrl->card),
- "SW_CMD_IDX write was lost, re-writing...\n");
- iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
- mod_timer(&ctrl->activity_timer,
- jiffies + DMA_ACTIVITY_TIMEOUT);
- } else {
- dev_warn(CARD_TO_DEV(ctrl->card),
- "DMA channel %d has stalled, faulting interface.\n",
- ctrl->id);
- ctrl->card->dma_fault = 1;
-
- /* Clean up the DMA queue */
- spin_lock(&ctrl->queue_lock);
- cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
- spin_unlock(&ctrl->queue_lock);
-
- cnt += rsxx_dma_cancel(ctrl);
-
- if (cnt)
- dev_info(CARD_TO_DEV(ctrl->card),
- "Freed %d queued DMAs on channel %d\n",
- cnt, ctrl->id);
- }
-}
-
-static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
-{
- struct rsxx_dma *dma;
- int tag;
- int cmds_pending = 0;
- struct hw_cmd *hw_cmd_buf;
- int dir;
-
- hw_cmd_buf = ctrl->cmd.buf;
-
- if (unlikely(ctrl->card->halt) ||
- unlikely(ctrl->card->eeh_state))
- return;
-
- while (1) {
- spin_lock_bh(&ctrl->queue_lock);
- if (list_empty(&ctrl->queue)) {
- spin_unlock_bh(&ctrl->queue_lock);
- break;
- }
- spin_unlock_bh(&ctrl->queue_lock);
-
- tag = pop_tracker(ctrl->trackers);
- if (tag == -1)
- break;
-
- spin_lock_bh(&ctrl->queue_lock);
- dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
- list_del(&dma->list);
- ctrl->stats.sw_q_depth--;
- spin_unlock_bh(&ctrl->queue_lock);
-
- /*
- * This will catch any DMAs that slipped in right before the
- * fault, but was queued after all the other DMAs were
- * cancelled.
- */
- if (unlikely(ctrl->card->dma_fault)) {
- push_tracker(ctrl->trackers, tag);
- rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
- continue;
- }
-
- if (dma->cmd != HW_CMD_BLK_DISCARD) {
- if (dma->cmd == HW_CMD_BLK_WRITE)
- dir = DMA_TO_DEVICE;
- else
- dir = DMA_FROM_DEVICE;
-
- /*
- * The function dma_map_page is placed here because we
- * can only, by design, issue up to 255 commands to the
- * hardware at one time per DMA channel. So the maximum
- * amount of mapped memory would be 255 * 4 channels *
- * 4096 Bytes which is less than 2GB, the limit of a x8
- * Non-HWWD PCIe slot. This way the dma_map_page
- * function should never fail because of a lack of
- * mappable memory.
- */
- dma->dma_addr = dma_map_page(&ctrl->card->dev->dev, dma->page,
- dma->pg_off, dma->sub_page.cnt << 9, dir);
- if (dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) {
- push_tracker(ctrl->trackers, tag);
- rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
- continue;
- }
- }
-
- set_tracker_dma(ctrl->trackers, tag, dma);
- hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd;
- hw_cmd_buf[ctrl->cmd.idx].tag = tag;
- hw_cmd_buf[ctrl->cmd.idx]._rsvd = 0;
- hw_cmd_buf[ctrl->cmd.idx].sub_page =
- ((dma->sub_page.cnt & 0x7) << 4) |
- (dma->sub_page.off & 0x7);
-
- hw_cmd_buf[ctrl->cmd.idx].device_addr =
- cpu_to_le32(dma->laddr);
-
- hw_cmd_buf[ctrl->cmd.idx].host_addr =
- cpu_to_le64(dma->dma_addr);
-
- dev_dbg(CARD_TO_DEV(ctrl->card),
- "Issue DMA%d(laddr %d tag %d) to idx %d\n",
- ctrl->id, dma->laddr, tag, ctrl->cmd.idx);
-
- ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK;
- cmds_pending++;
-
- if (dma->cmd == HW_CMD_BLK_WRITE)
- ctrl->stats.writes_issued++;
- else if (dma->cmd == HW_CMD_BLK_DISCARD)
- ctrl->stats.discards_issued++;
- else
- ctrl->stats.reads_issued++;
- }
-
- /* Let HW know we've queued commands. */
- if (cmds_pending) {
- atomic_add(cmds_pending, &ctrl->stats.hw_q_depth);
- mod_timer(&ctrl->activity_timer,
- jiffies + DMA_ACTIVITY_TIMEOUT);
-
- if (unlikely(ctrl->card->eeh_state)) {
- del_timer_sync(&ctrl->activity_timer);
- return;
- }
-
- iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
- }
-}
-
-static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl)
-{
- struct rsxx_dma *dma;
- unsigned long flags;
- u16 count;
- u8 status;
- u8 tag;
- struct hw_status *hw_st_buf;
-
- hw_st_buf = ctrl->status.buf;
-
- if (unlikely(ctrl->card->halt) ||
- unlikely(ctrl->card->dma_fault) ||
- unlikely(ctrl->card->eeh_state))
- return;
-
- count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
-
- while (count == ctrl->e_cnt) {
- /*
- * The read memory-barrier is necessary to keep aggressive
- * processors/optimizers (such as the PPC Apple G5) from
- * reordering the following status-buffer tag & status read
- * *before* the count read on subsequent iterations of the
- * loop!
- */
- rmb();
-
- status = hw_st_buf[ctrl->status.idx].status;
- tag = hw_st_buf[ctrl->status.idx].tag;
-
- dma = get_tracker_dma(ctrl->trackers, tag);
- if (dma == NULL) {
- spin_lock_irqsave(&ctrl->card->irq_lock, flags);
- rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL);
- spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
-
- dev_err(CARD_TO_DEV(ctrl->card),
- "No tracker for tag %d "
- "(idx %d id %d)\n",
- tag, ctrl->status.idx, ctrl->id);
- return;
- }
-
- dev_dbg(CARD_TO_DEV(ctrl->card),
- "Completing DMA%d"
- "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n",
- ctrl->id, dma->laddr, tag, status, count,
- ctrl->status.idx);
-
- atomic_dec(&ctrl->stats.hw_q_depth);
-
- mod_timer(&ctrl->activity_timer,
- jiffies + DMA_ACTIVITY_TIMEOUT);
-
- if (status)
- rsxx_handle_dma_error(ctrl, dma, status);
- else
- rsxx_complete_dma(ctrl, dma, 0);
-
- push_tracker(ctrl->trackers, tag);
-
- ctrl->status.idx = (ctrl->status.idx + 1) &
- RSXX_CS_IDX_MASK;
- ctrl->e_cnt++;
-
- count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
- }
-
- dma_intr_coal_auto_tune(ctrl->card);
-
- if (atomic_read(&ctrl->stats.hw_q_depth) == 0)
- del_timer_sync(&ctrl->activity_timer);
-
- spin_lock_irqsave(&ctrl->card->irq_lock, flags);
- rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
- spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
-
- spin_lock_bh(&ctrl->queue_lock);
- if (ctrl->stats.sw_q_depth)
- queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
- spin_unlock_bh(&ctrl->queue_lock);
-}
-
-static void rsxx_schedule_issue(struct work_struct *work)
-{
- struct rsxx_dma_ctrl *ctrl;
-
- ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
-
- mutex_lock(&ctrl->work_lock);
- rsxx_issue_dmas(ctrl);
- mutex_unlock(&ctrl->work_lock);
-}
-
-static void rsxx_schedule_done(struct work_struct *work)
-{
- struct rsxx_dma_ctrl *ctrl;
-
- ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
-
- mutex_lock(&ctrl->work_lock);
- rsxx_dma_done(ctrl);
- mutex_unlock(&ctrl->work_lock);
-}
-
-static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
- struct list_head *q,
- unsigned int laddr,
- rsxx_dma_cb cb,
- void *cb_data)
-{
- struct rsxx_dma *dma;
-
- dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
- if (!dma)
- return BLK_STS_RESOURCE;
-
- dma->cmd = HW_CMD_BLK_DISCARD;
- dma->laddr = laddr;
- dma->dma_addr = 0;
- dma->sub_page.off = 0;
- dma->sub_page.cnt = 0;
- dma->page = NULL;
- dma->pg_off = 0;
- dma->cb = cb;
- dma->cb_data = cb_data;
-
- dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr);
-
- list_add_tail(&dma->list, q);
-
- return 0;
-}
-
-static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
- struct list_head *q,
- int dir,
- unsigned int dma_off,
- unsigned int dma_len,
- unsigned int laddr,
- struct page *page,
- unsigned int pg_off,
- rsxx_dma_cb cb,
- void *cb_data)
-{
- struct rsxx_dma *dma;
-
- dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
- if (!dma)
- return BLK_STS_RESOURCE;
-
- dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
- dma->laddr = laddr;
- dma->sub_page.off = (dma_off >> 9);
- dma->sub_page.cnt = (dma_len >> 9);
- dma->page = page;
- dma->pg_off = pg_off;
- dma->cb = cb;
- dma->cb_data = cb_data;
-
- dev_dbg(CARD_TO_DEV(card),
- "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n",
- dir ? 'W' : 'R', dma->laddr, dma->sub_page.off,
- dma->sub_page.cnt, dma->page, dma->pg_off);
-
- /* Queue the DMA */
- list_add_tail(&dma->list, q);
-
- return 0;
-}
-
-blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
- struct bio *bio,
- atomic_t *n_dmas,
- rsxx_dma_cb cb,
- void *cb_data)
-{
- struct list_head dma_list[RSXX_MAX_TARGETS];
- struct bio_vec bvec;
- struct bvec_iter iter;
- unsigned long long addr8;
- unsigned int laddr;
- unsigned int bv_len;
- unsigned int bv_off;
- unsigned int dma_off;
- unsigned int dma_len;
- int dma_cnt[RSXX_MAX_TARGETS];
- int tgt;
- blk_status_t st;
- int i;
-
- addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
- atomic_set(n_dmas, 0);
-
- for (i = 0; i < card->n_targets; i++) {
- INIT_LIST_HEAD(&dma_list[i]);
- dma_cnt[i] = 0;
- }
-
- if (bio_op(bio) == REQ_OP_DISCARD) {
- bv_len = bio->bi_iter.bi_size;
-
- while (bv_len > 0) {
- tgt = rsxx_get_dma_tgt(card, addr8);
- laddr = rsxx_addr8_to_laddr(addr8, card);
-
- st = rsxx_queue_discard(card, &dma_list[tgt], laddr,
- cb, cb_data);
- if (st)
- goto bvec_err;
-
- dma_cnt[tgt]++;
- atomic_inc(n_dmas);
- addr8 += RSXX_HW_BLK_SIZE;
- bv_len -= RSXX_HW_BLK_SIZE;
- }
- } else {
- bio_for_each_segment(bvec, bio, iter) {
- bv_len = bvec.bv_len;
- bv_off = bvec.bv_offset;
-
- while (bv_len > 0) {
- tgt = rsxx_get_dma_tgt(card, addr8);
- laddr = rsxx_addr8_to_laddr(addr8, card);
- dma_off = addr8 & RSXX_HW_BLK_MASK;
- dma_len = min(bv_len,
- RSXX_HW_BLK_SIZE - dma_off);
-
- st = rsxx_queue_dma(card, &dma_list[tgt],
- bio_data_dir(bio),
- dma_off, dma_len,
- laddr, bvec.bv_page,
- bv_off, cb, cb_data);
- if (st)
- goto bvec_err;
-
- dma_cnt[tgt]++;
- atomic_inc(n_dmas);
- addr8 += dma_len;
- bv_off += dma_len;
- bv_len -= dma_len;
- }
- }
- }
-
- for (i = 0; i < card->n_targets; i++) {
- if (!list_empty(&dma_list[i])) {
- spin_lock_bh(&card->ctrl[i].queue_lock);
- card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
- list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
- spin_unlock_bh(&card->ctrl[i].queue_lock);
-
- queue_work(card->ctrl[i].issue_wq,
- &card->ctrl[i].issue_dma_work);
- }
- }
-
- return 0;
-
-bvec_err:
- for (i = 0; i < card->n_targets; i++)
- rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
- FREE_DMA);
- return st;
-}
-
-
-/*----------------- DMA Engine Initialization & Setup -------------------*/
-int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl)
-{
- ctrl->status.buf = dma_alloc_coherent(&dev->dev, STATUS_BUFFER_SIZE8,
- &ctrl->status.dma_addr, GFP_KERNEL);
- ctrl->cmd.buf = dma_alloc_coherent(&dev->dev, COMMAND_BUFFER_SIZE8,
- &ctrl->cmd.dma_addr, GFP_KERNEL);
- if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
- return -ENOMEM;
-
- memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8);
- iowrite32(lower_32_bits(ctrl->status.dma_addr),
- ctrl->regmap + SB_ADD_LO);
- iowrite32(upper_32_bits(ctrl->status.dma_addr),
- ctrl->regmap + SB_ADD_HI);
-
- memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8);
- iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO);
- iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI);
-
- ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT);
- if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) {
- dev_crit(&dev->dev, "Failed reading status cnt x%x\n",
- ctrl->status.idx);
- return -EINVAL;
- }
- iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT);
- iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT);
-
- ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX);
- if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) {
- dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n",
- ctrl->status.idx);
- return -EINVAL;
- }
- iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
- iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
-
- return 0;
-}
-
-static int rsxx_dma_ctrl_init(struct pci_dev *dev,
- struct rsxx_dma_ctrl *ctrl)
-{
- int i;
- int st;
-
- memset(&ctrl->stats, 0, sizeof(ctrl->stats));
-
- ctrl->trackers = vmalloc(struct_size(ctrl->trackers, list,
- RSXX_MAX_OUTSTANDING_CMDS));
- if (!ctrl->trackers)
- return -ENOMEM;
-
- ctrl->trackers->head = 0;
- for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
- ctrl->trackers->list[i].next_tag = i + 1;
- ctrl->trackers->list[i].dma = NULL;
- }
- ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1;
- spin_lock_init(&ctrl->trackers->lock);
-
- spin_lock_init(&ctrl->queue_lock);
- mutex_init(&ctrl->work_lock);
- INIT_LIST_HEAD(&ctrl->queue);
-
- timer_setup(&ctrl->activity_timer, dma_engine_stalled, 0);
-
- ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0);
- if (!ctrl->issue_wq)
- return -ENOMEM;
-
- ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0);
- if (!ctrl->done_wq)
- return -ENOMEM;
-
- INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue);
- INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done);
-
- st = rsxx_hw_buffers_init(dev, ctrl);
- if (st)
- return st;
-
- return 0;
-}
-
-static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card,
- unsigned int stripe_size8)
-{
- if (!is_power_of_2(stripe_size8)) {
- dev_err(CARD_TO_DEV(card),
- "stripe_size is NOT a power of 2!\n");
- return -EINVAL;
- }
-
- card->_stripe.lower_mask = stripe_size8 - 1;
-
- card->_stripe.upper_mask = ~(card->_stripe.lower_mask);
- card->_stripe.upper_shift = ffs(card->n_targets) - 1;
-
- card->_stripe.target_mask = card->n_targets - 1;
- card->_stripe.target_shift = ffs(stripe_size8) - 1;
-
- dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask = x%016llx\n",
- card->_stripe.lower_mask);
- dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift = x%016llx\n",
- card->_stripe.upper_shift);
- dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask = x%016llx\n",
- card->_stripe.upper_mask);
- dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask = x%016llx\n",
- card->_stripe.target_mask);
- dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n",
- card->_stripe.target_shift);
-
- return 0;
-}
-
-int rsxx_dma_configure(struct rsxx_cardinfo *card)
-{
- u32 intr_coal;
-
- intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode,
- card->config.data.intr_coal.count,
- card->config.data.intr_coal.latency);
- iowrite32(intr_coal, card->regmap + INTR_COAL);
-
- return rsxx_dma_stripe_setup(card, card->config.data.stripe_size);
-}
-
-int rsxx_dma_setup(struct rsxx_cardinfo *card)
-{
- unsigned long flags;
- int st;
- int i;
-
- dev_info(CARD_TO_DEV(card),
- "Initializing %d DMA targets\n",
- card->n_targets);
-
- /* Regmap is divided up into 4K chunks. One for each DMA channel */
- for (i = 0; i < card->n_targets; i++)
- card->ctrl[i].regmap = card->regmap + (i * 4096);
-
- card->dma_fault = 0;
-
- /* Reset the DMA queues */
- rsxx_dma_queue_reset(card);
-
- /************* Setup DMA Control *************/
- for (i = 0; i < card->n_targets; i++) {
- st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]);
- if (st)
- goto failed_dma_setup;
-
- card->ctrl[i].card = card;
- card->ctrl[i].id = i;
- }
-
- card->scrub_hard = 1;
-
- if (card->config_valid)
- rsxx_dma_configure(card);
-
- /* Enable the interrupts after all setup has completed. */
- for (i = 0; i < card->n_targets; i++) {
- spin_lock_irqsave(&card->irq_lock, flags);
- rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i));
- spin_unlock_irqrestore(&card->irq_lock, flags);
- }
-
- return 0;
-
-failed_dma_setup:
- for (i = 0; i < card->n_targets; i++) {
- struct rsxx_dma_ctrl *ctrl = &card->ctrl[i];
-
- if (ctrl->issue_wq) {
- destroy_workqueue(ctrl->issue_wq);
- ctrl->issue_wq = NULL;
- }
-
- if (ctrl->done_wq) {
- destroy_workqueue(ctrl->done_wq);
- ctrl->done_wq = NULL;
- }
-
- vfree(ctrl->trackers);
-
- if (ctrl->status.buf)
- dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8,
- ctrl->status.buf,
- ctrl->status.dma_addr);
- if (ctrl->cmd.buf)
- dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8,
- ctrl->cmd.buf, ctrl->cmd.dma_addr);
- }
-
- return st;
-}
-
-int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl)
-{
- struct rsxx_dma *dma;
- int i;
- int cnt = 0;
-
- /* Clean up issued DMAs */
- for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
- dma = get_tracker_dma(ctrl->trackers, i);
- if (dma) {
- atomic_dec(&ctrl->stats.hw_q_depth);
- rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
- push_tracker(ctrl->trackers, i);
- cnt++;
- }
- }
-
- return cnt;
-}
-
-void rsxx_dma_destroy(struct rsxx_cardinfo *card)
-{
- struct rsxx_dma_ctrl *ctrl;
- int i;
-
- for (i = 0; i < card->n_targets; i++) {
- ctrl = &card->ctrl[i];
-
- if (ctrl->issue_wq) {
- destroy_workqueue(ctrl->issue_wq);
- ctrl->issue_wq = NULL;
- }
-
- if (ctrl->done_wq) {
- destroy_workqueue(ctrl->done_wq);
- ctrl->done_wq = NULL;
- }
-
- if (timer_pending(&ctrl->activity_timer))
- del_timer_sync(&ctrl->activity_timer);
-
- /* Clean up the DMA queue */
- spin_lock_bh(&ctrl->queue_lock);
- rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
- spin_unlock_bh(&ctrl->queue_lock);
-
- rsxx_dma_cancel(ctrl);
-
- vfree(ctrl->trackers);
-
- dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8,
- ctrl->status.buf, ctrl->status.dma_addr);
- dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8,
- ctrl->cmd.buf, ctrl->cmd.dma_addr);
- }
-}
-
-int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
-{
- int i;
- int j;
- int cnt;
- struct rsxx_dma *dma;
- struct list_head *issued_dmas;
-
- issued_dmas = kcalloc(card->n_targets, sizeof(*issued_dmas),
- GFP_KERNEL);
- if (!issued_dmas)
- return -ENOMEM;
-
- for (i = 0; i < card->n_targets; i++) {
- INIT_LIST_HEAD(&issued_dmas[i]);
- cnt = 0;
- for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
- dma = get_tracker_dma(card->ctrl[i].trackers, j);
- if (dma == NULL)
- continue;
-
- if (dma->cmd == HW_CMD_BLK_WRITE)
- card->ctrl[i].stats.writes_issued--;
- else if (dma->cmd == HW_CMD_BLK_DISCARD)
- card->ctrl[i].stats.discards_issued--;
- else
- card->ctrl[i].stats.reads_issued--;
-
- if (dma->cmd != HW_CMD_BLK_DISCARD) {
- dma_unmap_page(&card->dev->dev, dma->dma_addr,
- get_dma_size(dma),
- dma->cmd == HW_CMD_BLK_WRITE ?
- DMA_TO_DEVICE :
- DMA_FROM_DEVICE);
- }
-
- list_add_tail(&dma->list, &issued_dmas[i]);
- push_tracker(card->ctrl[i].trackers, j);
- cnt++;
- }
-
- spin_lock_bh(&card->ctrl[i].queue_lock);
- list_splice(&issued_dmas[i], &card->ctrl[i].queue);
-
- atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
- card->ctrl[i].stats.sw_q_depth += cnt;
- card->ctrl[i].e_cnt = 0;
- spin_unlock_bh(&card->ctrl[i].queue_lock);
- }
-
- kfree(issued_dmas);
-
- return 0;
-}
-
-int rsxx_dma_init(void)
-{
- rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN);
- if (!rsxx_dma_pool)
- return -ENOMEM;
-
- return 0;
-}
-
-
-void rsxx_dma_cleanup(void)
-{
- kmem_cache_destroy(rsxx_dma_pool);
-}
-
diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h
deleted file mode 100644
index 4f84905a6fd2..000000000000
--- a/drivers/block/rsxx/rsxx.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
-* Filename: rsxx.h
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#ifndef __RSXX_H__
-#define __RSXX_H__
-
-/*----------------- IOCTL Definitions -------------------*/
-
-#define RSXX_MAX_DATA 8
-
-struct rsxx_reg_access {
- __u32 addr;
- __u32 cnt;
- __u32 stat;
- __u32 stream;
- __u32 data[RSXX_MAX_DATA];
-};
-
-#define RSXX_MAX_REG_CNT (RSXX_MAX_DATA * (sizeof(__u32)))
-
-#define RSXX_IOC_MAGIC 'r'
-
-#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access)
-#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access)
-
-#endif /* __RSXX_H_ */
diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h
deleted file mode 100644
index 2b79015f5849..000000000000
--- a/drivers/block/rsxx/rsxx_cfg.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
-* Filename: rsXX_cfg.h
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#ifndef __RSXX_CFG_H__
-#define __RSXX_CFG_H__
-
-/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */
-#include <linux/types.h>
-
-/*
- * The card config version must match the driver's expected version. If it does
- * not, the DMA interfaces will not be attached and the user will need to
- * initialize/upgrade the card configuration using the card config utility.
- */
-#define RSXX_CFG_VERSION 4
-
-struct card_cfg_hdr {
- __u32 version;
- __u32 crc;
-};
-
-struct card_cfg_data {
- __u32 block_size;
- __u32 stripe_size;
- __u32 vendor_id;
- __u32 cache_order;
- struct {
- __u32 mode; /* Disabled, manual, auto-tune... */
- __u32 count; /* Number of intr to coalesce */
- __u32 latency;/* Max wait time (in ns) */
- } intr_coal;
-};
-
-struct rsxx_card_cfg {
- struct card_cfg_hdr hdr;
- struct card_cfg_data data;
-};
-
-/* Vendor ID Values */
-#define RSXX_VENDOR_ID_IBM 0
-#define RSXX_VENDOR_ID_DSI 1
-#define RSXX_VENDOR_COUNT 2
-
-/* Interrupt Coalescing Values */
-#define RSXX_INTR_COAL_DISABLED 0
-#define RSXX_INTR_COAL_EXPLICIT 1
-#define RSXX_INTR_COAL_AUTO_TUNE 2
-
-
-#endif /* __RSXX_CFG_H__ */
-
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
deleted file mode 100644
index 26c320c0d924..000000000000
--- a/drivers/block/rsxx/rsxx_priv.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
-* Filename: rsxx_priv.h
-*
-* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
-* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-*
-* (C) Copyright 2013 IBM Corporation
-*/
-
-#ifndef __RSXX_PRIV_H__
-#define __RSXX_PRIV_H__
-
-#include <linux/semaphore.h>
-
-#include <linux/fs.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/sysfs.h>
-#include <linux/workqueue.h>
-#include <linux/bio.h>
-#include <linux/vmalloc.h>
-#include <linux/timer.h>
-#include <linux/ioctl.h>
-#include <linux/delay.h>
-
-#include "rsxx.h"
-#include "rsxx_cfg.h"
-
-struct proc_cmd;
-
-#define PCI_DEVICE_ID_FS70_FLASH 0x04A9
-#define PCI_DEVICE_ID_FS80_FLASH 0x04AA
-
-#define RS70_PCI_REV_SUPPORTED 4
-
-#define DRIVER_NAME "rsxx"
-#define DRIVER_VERSION "4.0.3.2516"
-
-/* Block size is 4096 */
-#define RSXX_HW_BLK_SHIFT 12
-#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT)
-#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1)
-
-#define MAX_CREG_DATA8 32
-#define LOG_BUF_SIZE8 128
-
-#define RSXX_MAX_OUTSTANDING_CMDS 255
-#define RSXX_CS_IDX_MASK 0xff
-
-#define STATUS_BUFFER_SIZE8 4096
-#define COMMAND_BUFFER_SIZE8 4096
-
-#define RSXX_MAX_TARGETS 8
-
-struct dma_tracker_list;
-
-/* DMA Command/Status Buffer structure */
-struct rsxx_cs_buffer {
- dma_addr_t dma_addr;
- void *buf;
- u32 idx;
-};
-
-struct rsxx_dma_stats {
- u32 crc_errors;
- u32 hard_errors;
- u32 soft_errors;
- u32 writes_issued;
- u32 writes_failed;
- u32 reads_issued;
- u32 reads_failed;
- u32 reads_retried;
- u32 discards_issued;
- u32 discards_failed;
- u32 done_rescheduled;
- u32 issue_rescheduled;
- u32 dma_sw_err;
- u32 dma_hw_fault;
- u32 dma_cancelled;
- u32 sw_q_depth; /* Number of DMAs on the SW queue. */
- atomic_t hw_q_depth; /* Number of DMAs queued to HW. */
-};
-
-struct rsxx_dma_ctrl {
- struct rsxx_cardinfo *card;
- int id;
- void __iomem *regmap;
- struct rsxx_cs_buffer status;
- struct rsxx_cs_buffer cmd;
- u16 e_cnt;
- spinlock_t queue_lock;
- struct list_head queue;
- struct workqueue_struct *issue_wq;
- struct work_struct issue_dma_work;
- struct workqueue_struct *done_wq;
- struct work_struct dma_done_work;
- struct timer_list activity_timer;
- struct dma_tracker_list *trackers;
- struct rsxx_dma_stats stats;
- struct mutex work_lock;
-};
-
-struct rsxx_cardinfo {
- struct pci_dev *dev;
- unsigned int halt;
- unsigned int eeh_state;
-
- void __iomem *regmap;
- spinlock_t irq_lock;
- unsigned int isr_mask;
- unsigned int ier_mask;
-
- struct rsxx_card_cfg config;
- int config_valid;
-
- /* Embedded CPU Communication */
- struct {
- spinlock_t lock;
- bool active;
- struct creg_cmd *active_cmd;
- struct workqueue_struct *creg_wq;
- struct work_struct done_work;
- struct list_head queue;
- unsigned int q_depth;
- /* Cache the creg status to prevent ioreads */
- struct {
- u32 stat;
- u32 failed_cancel_timer;
- u32 creg_timeout;
- } creg_stats;
- struct timer_list cmd_timer;
- struct mutex reset_lock;
- int reset;
- } creg_ctrl;
-
- struct {
- char tmp[MAX_CREG_DATA8];
- char buf[LOG_BUF_SIZE8]; /* terminated */
- int buf_len;
- } log;
-
- struct workqueue_struct *event_wq;
- struct work_struct event_work;
- unsigned int state;
- u64 size8;
-
- /* Lock the device attach/detach function */
- struct mutex dev_lock;
-
- /* Block Device Variables */
- bool bdev_attached;
- int disk_id;
- int major;
- struct gendisk *gendisk;
- struct {
- /* Used to convert a byte address to a device address. */
- u64 lower_mask;
- u64 upper_shift;
- u64 upper_mask;
- u64 target_mask;
- u64 target_shift;
- } _stripe;
- unsigned int dma_fault;
-
- int scrub_hard;
-
- int n_targets;
- struct rsxx_dma_ctrl *ctrl;
-
- struct dentry *debugfs_dir;
-};
-
-enum rsxx_pci_regmap {
- HWID = 0x00, /* Hardware Identification Register */
- SCRATCH = 0x04, /* Scratch/Debug Register */
- RESET = 0x08, /* Reset Register */
- ISR = 0x10, /* Interrupt Status Register */
- IER = 0x14, /* Interrupt Enable Register */
- IPR = 0x18, /* Interrupt Poll Register */
- CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */
- CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/
- HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */
- SW_CMD_IDX = 0x2C, /* Software Processed Command Index */
- SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */
- SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */
- HW_STATUS_CNT = 0x38, /* Hardware Status Counter */
- SW_STATUS_CNT = 0x3C, /* Deprecated */
- CREG_CMD = 0x40, /* CPU Command Register */
- CREG_ADD = 0x44, /* CPU Address Register */
- CREG_CNT = 0x48, /* CPU Count Register */
- CREG_STAT = 0x4C, /* CPU Status Register */
- CREG_DATA0 = 0x50, /* CPU Data Registers */
- CREG_DATA1 = 0x54,
- CREG_DATA2 = 0x58,
- CREG_DATA3 = 0x5C,
- CREG_DATA4 = 0x60,
- CREG_DATA5 = 0x64,
- CREG_DATA6 = 0x68,
- CREG_DATA7 = 0x6c,
- INTR_COAL = 0x70, /* Interrupt Coalescing Register */
- HW_ERROR = 0x74, /* Card Error Register */
- PCI_DEBUG0 = 0x78, /* PCI Debug Registers */
- PCI_DEBUG1 = 0x7C,
- PCI_DEBUG2 = 0x80,
- PCI_DEBUG3 = 0x84,
- PCI_DEBUG4 = 0x88,
- PCI_DEBUG5 = 0x8C,
- PCI_DEBUG6 = 0x90,
- PCI_DEBUG7 = 0x94,
- PCI_POWER_THROTTLE = 0x98,
- PERF_CTRL = 0x9c,
- PERF_TIMER_LO = 0xa0,
- PERF_TIMER_HI = 0xa4,
- PERF_RD512_LO = 0xa8,
- PERF_RD512_HI = 0xac,
- PERF_WR512_LO = 0xb0,
- PERF_WR512_HI = 0xb4,
- PCI_RECONFIG = 0xb8,
-};
-
-enum rsxx_intr {
- CR_INTR_DMA0 = 0x00000001,
- CR_INTR_CREG = 0x00000002,
- CR_INTR_DMA1 = 0x00000004,
- CR_INTR_EVENT = 0x00000008,
- CR_INTR_DMA2 = 0x00000010,
- CR_INTR_DMA3 = 0x00000020,
- CR_INTR_DMA4 = 0x00000040,
- CR_INTR_DMA5 = 0x00000080,
- CR_INTR_DMA6 = 0x00000100,
- CR_INTR_DMA7 = 0x00000200,
- CR_INTR_ALL_C = 0x0000003f,
- CR_INTR_ALL_G = 0x000003ff,
- CR_INTR_DMA_ALL = 0x000003f5,
- CR_INTR_ALL = 0xffffffff,
-};
-
-static inline int CR_INTR_DMA(int N)
-{
- static const unsigned int _CR_INTR_DMA[] = {
- CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3,
- CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7
- };
- return _CR_INTR_DMA[N];
-}
-enum rsxx_pci_reset {
- DMA_QUEUE_RESET = 0x00000001,
-};
-
-enum rsxx_hw_fifo_flush {
- RSXX_FLUSH_BUSY = 0x00000002,
- RSXX_FLUSH_TIMEOUT = 0x00000004,
-};
-
-enum rsxx_pci_revision {
- RSXX_DISCARD_SUPPORT = 2,
- RSXX_EEH_SUPPORT = 3,
-};
-
-enum rsxx_creg_cmd {
- CREG_CMD_TAG_MASK = 0x0000FF00,
- CREG_OP_WRITE = 0x000000C0,
- CREG_OP_READ = 0x000000E0,
-};
-
-enum rsxx_creg_addr {
- CREG_ADD_CARD_CMD = 0x80001000,
- CREG_ADD_CARD_STATE = 0x80001004,
- CREG_ADD_CARD_SIZE = 0x8000100c,
- CREG_ADD_CAPABILITIES = 0x80001050,
- CREG_ADD_LOG = 0x80002000,
- CREG_ADD_NUM_TARGETS = 0x80003000,
- CREG_ADD_CRAM = 0xA0000000,
- CREG_ADD_CONFIG = 0xB0000000,
-};
-
-enum rsxx_creg_card_cmd {
- CARD_CMD_STARTUP = 1,
- CARD_CMD_SHUTDOWN = 2,
- CARD_CMD_LOW_LEVEL_FORMAT = 3,
- CARD_CMD_FPGA_RECONFIG_BR = 4,
- CARD_CMD_FPGA_RECONFIG_MAIN = 5,
- CARD_CMD_BACKUP = 6,
- CARD_CMD_RESET = 7,
- CARD_CMD_deprecated = 8,
- CARD_CMD_UNINITIALIZE = 9,
- CARD_CMD_DSTROY_EMERGENCY = 10,
- CARD_CMD_DSTROY_NORMAL = 11,
- CARD_CMD_DSTROY_EXTENDED = 12,
- CARD_CMD_DSTROY_ABORT = 13,
-};
-
-enum rsxx_card_state {
- CARD_STATE_SHUTDOWN = 0x00000001,
- CARD_STATE_STARTING = 0x00000002,
- CARD_STATE_FORMATTING = 0x00000004,
- CARD_STATE_UNINITIALIZED = 0x00000008,
- CARD_STATE_GOOD = 0x00000010,
- CARD_STATE_SHUTTING_DOWN = 0x00000020,
- CARD_STATE_FAULT = 0x00000040,
- CARD_STATE_RD_ONLY_FAULT = 0x00000080,
- CARD_STATE_DSTROYING = 0x00000100,
-};
-
-enum rsxx_led {
- LED_DEFAULT = 0x0,
- LED_IDENTIFY = 0x1,
- LED_SOAK = 0x2,
-};
-
-enum rsxx_creg_flash_lock {
- CREG_FLASH_LOCK = 1,
- CREG_FLASH_UNLOCK = 2,
-};
-
-enum rsxx_card_capabilities {
- CARD_CAP_SUBPAGE_WRITES = 0x00000080,
-};
-
-enum rsxx_creg_stat {
- CREG_STAT_STATUS_MASK = 0x00000003,
- CREG_STAT_SUCCESS = 0x1,
- CREG_STAT_ERROR = 0x2,
- CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */
- CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */
- CREG_STAT_TAG_MASK = 0x0000ff00,
-};
-
-enum rsxx_dma_finish {
- FREE_DMA = 0x0,
- COMPLETE_DMA = 0x1,
-};
-
-static inline unsigned int CREG_DATA(int N)
-{
- return CREG_DATA0 + (N << 2);
-}
-
-/*----------------- Convenient Log Wrappers -------------------*/
-#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev)
-
-/***** config.c *****/
-int rsxx_load_config(struct rsxx_cardinfo *card);
-
-/***** core.c *****/
-void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr);
-void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr);
-void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
- unsigned int intr);
-void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
- unsigned int intr);
-
-/***** dev.c *****/
-int rsxx_attach_dev(struct rsxx_cardinfo *card);
-void rsxx_detach_dev(struct rsxx_cardinfo *card);
-int rsxx_setup_dev(struct rsxx_cardinfo *card);
-void rsxx_destroy_dev(struct rsxx_cardinfo *card);
-int rsxx_dev_init(void);
-void rsxx_dev_cleanup(void);
-
-/***** dma.c ****/
-typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
- void *cb_data,
- unsigned int status);
-int rsxx_dma_setup(struct rsxx_cardinfo *card);
-void rsxx_dma_destroy(struct rsxx_cardinfo *card);
-int rsxx_dma_init(void);
-int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
- struct list_head *q,
- unsigned int done);
-int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
-void rsxx_dma_cleanup(void);
-void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
-int rsxx_dma_configure(struct rsxx_cardinfo *card);
-blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
- struct bio *bio,
- atomic_t *n_dmas,
- rsxx_dma_cb cb,
- void *cb_data);
-int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
-int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
-int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
-
-/***** cregs.c *****/
-int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr,
- unsigned int size8,
- void *data,
- int byte_stream);
-int rsxx_creg_read(struct rsxx_cardinfo *card,
- u32 addr,
- unsigned int size8,
- void *data,
- int byte_stream);
-int rsxx_read_hw_log(struct rsxx_cardinfo *card);
-int rsxx_get_card_state(struct rsxx_cardinfo *card,
- unsigned int *state);
-int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8);
-int rsxx_get_num_targets(struct rsxx_cardinfo *card,
- unsigned int *n_targets);
-int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
- u32 *capabilities);
-int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd);
-int rsxx_creg_setup(struct rsxx_cardinfo *card);
-void rsxx_creg_destroy(struct rsxx_cardinfo *card);
-int rsxx_creg_init(void);
-void rsxx_creg_cleanup(void);
-int rsxx_reg_access(struct rsxx_cardinfo *card,
- struct rsxx_reg_access __user *ucmd,
- int read);
-void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card);
-void rsxx_kick_creg_queue(struct rsxx_cardinfo *card);
-
-
-
-#endif /* __DRIVERS_BLOCK_RSXX_H__ */
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6f45a53f7cbf..fb855da971ee 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/blk-mq.h>
#include <linux/hdreg.h>
-#include <linux/genhd.h>
#include <linux/cdrom.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -143,8 +142,8 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
unsigned command, unsigned long argument)
{
+ struct vdc_port *port = bdev->bd_disk->private_data;
int i;
- struct gendisk *disk;
switch (command) {
case CDROMMULTISESSION:
@@ -155,12 +154,15 @@ static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
return 0;
case CDROM_GET_CAPABILITY:
- disk = bdev->bd_disk;
-
- if (bdev->bd_disk && (disk->flags & GENHD_FL_CD))
+ if (!vdc_version_supported(port, 1, 1))
+ return -EINVAL;
+ switch (port->vdisk_mtype) {
+ case VD_MEDIA_TYPE_CD:
+ case VD_MEDIA_TYPE_DVD:
return 0;
- return -EINVAL;
-
+ default:
+ return -EINVAL;
+ }
default:
pr_debug(PFX "ioctl %08x not supported\n", command);
return -EINVAL;
@@ -459,7 +461,7 @@ static int __vdc_tx_trigger(struct vdc_port *port)
static int __send_request(struct request *req)
{
- struct vdc_port *port = req->rq_disk->private_data;
+ struct vdc_port *port = req->q->disk->private_data;
struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
struct scatterlist sg[MAX_RING_COOKIES];
struct vdc_req_entry *rqe;
@@ -854,14 +856,12 @@ static int probe_disk(struct vdc_port *port)
switch (port->vdisk_mtype) {
case VD_MEDIA_TYPE_CD:
pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
- g->flags |= GENHD_FL_CD;
g->flags |= GENHD_FL_REMOVABLE;
set_disk_ro(g, 1);
break;
case VD_MEDIA_TYPE_DVD:
pr_info(PFX "Virtual DVD %s\n", port->disk_name);
- g->flags |= GENHD_FL_CD;
g->flags |= GENHD_FL_REMOVABLE;
set_disk_ro(g, 1);
break;
@@ -886,7 +886,7 @@ static int probe_disk(struct vdc_port *port)
return 0;
out_cleanup_disk:
- blk_cleanup_disk(g);
+ put_disk(g);
out_free_tag:
blk_mq_free_tag_set(&port->tag_set);
return err;
@@ -1070,7 +1070,7 @@ static void vdc_port_remove(struct vio_dev *vdev)
del_timer_sync(&port->vio.timer);
del_gendisk(port->disk);
- blk_cleanup_disk(port->disk);
+ put_disk(port->disk);
blk_mq_free_tag_set(&port->tag_set);
vdc_free_tx_ring(port);
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 821594cd1315..42b4b6828690 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -783,7 +783,7 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs)
if (fs->registered)
del_gendisk(fs->disk);
- blk_cleanup_disk(disk);
+ put_disk(disk);
blk_mq_free_tag_set(&fs->tag_set);
}
@@ -840,6 +840,7 @@ static int swim_floppy_init(struct swim_priv *swd)
swd->unit[drive].disk->minors = 1;
sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
swd->unit[drive].disk->fops = &floppy_fops;
+ swd->unit[drive].disk->flags |= GENHD_FL_NO_PART;
swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE;
swd->unit[drive].disk->private_data = &swd->unit[drive];
set_capacity(swd->unit[drive].disk, 2880);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 4b91c9aa5892..da811a7da03f 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1227,7 +1227,7 @@ static int swim3_attach(struct macio_dev *mdev,
disk->fops = &floppy_fops;
disk->private_data = fs;
disk->events = DISK_EVENT_MEDIA_CHANGE;
- disk->flags |= GENHD_FL_REMOVABLE;
+ disk->flags |= GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
sprintf(disk->disk_name, "fd%d", floppy_count);
set_capacity(disk, 2880);
rc = add_disk(disk);
@@ -1238,7 +1238,7 @@ static int swim3_attach(struct macio_dev *mdev,
return 0;
out_cleanup_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_free_tag_set:
blk_mq_free_tag_set(&fs->tag_set);
out_unregister:
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
deleted file mode 100644
index d1676fe0da1a..000000000000
--- a/drivers/block/sx8.c
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
- *
- * Copyright 2004-2005 Red Hat, Inc.
- *
- * Author/maintainer: Jeff Garzik <jgarzik@pobox.com>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/blk-mq.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/compiler.h>
-#include <linux/workqueue.h>
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/ktime.h>
-#include <linux/hdreg.h>
-#include <linux/dma-mapping.h>
-#include <linux/completion.h>
-#include <linux/scatterlist.h>
-#include <asm/io.h>
-#include <linux/uaccess.h>
-
-#if 0
-#define CARM_DEBUG
-#define CARM_VERBOSE_DEBUG
-#else
-#undef CARM_DEBUG
-#undef CARM_VERBOSE_DEBUG
-#endif
-#undef CARM_NDEBUG
-
-#define DRV_NAME "sx8"
-#define DRV_VERSION "1.0"
-#define PFX DRV_NAME ": "
-
-MODULE_AUTHOR("Jeff Garzik");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Promise SATA SX8 block driver");
-MODULE_VERSION(DRV_VERSION);
-
-/*
- * SX8 hardware has a single message queue for all ATA ports.
- * When this driver was written, the hardware (firmware?) would
- * corrupt data eventually, if more than one request was outstanding.
- * As one can imagine, having 8 ports bottlenecking on a single
- * command hurts performance.
- *
- * Based on user reports, later versions of the hardware (firmware?)
- * seem to be able to survive with more than one command queued.
- *
- * Therefore, we default to the safe option -- 1 command -- but
- * allow the user to increase this.
- *
- * SX8 should be able to support up to ~60 queued commands (CARM_MAX_REQ),
- * but problems seem to occur when you exceed ~30, even on newer hardware.
- */
-static int max_queue = 1;
-module_param(max_queue, int, 0444);
-MODULE_PARM_DESC(max_queue, "Maximum number of queued commands. (min==1, max==30, safe==1)");
-
-
-#define NEXT_RESP(idx) ((idx + 1) % RMSG_Q_LEN)
-
-/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
-#define TAG_ENCODE(tag) (((tag) << 16) | 0xf)
-#define TAG_DECODE(tag) (((tag) >> 16) & 0x1f)
-#define TAG_VALID(tag) ((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))
-
-/* note: prints function name for you */
-#ifdef CARM_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#ifdef CARM_VERBOSE_DEBUG
-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define VPRINTK(fmt, args...)
-#endif /* CARM_VERBOSE_DEBUG */
-#else
-#define DPRINTK(fmt, args...)
-#define VPRINTK(fmt, args...)
-#endif /* CARM_DEBUG */
-
-#ifdef CARM_NDEBUG
-#define assert(expr)
-#else
-#define assert(expr) \
- if(unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- }
-#endif
-
-/* defines only for the constants which don't work well as enums */
-struct carm_host;
-
-enum {
- /* adapter-wide limits */
- CARM_MAX_PORTS = 8,
- CARM_SHM_SIZE = (4096 << 7),
- CARM_MINORS_PER_MAJOR = 256 / CARM_MAX_PORTS,
- CARM_MAX_WAIT_Q = CARM_MAX_PORTS + 1,
-
- /* command message queue limits */
- CARM_MAX_REQ = 64, /* max command msgs per host */
- CARM_MSG_LOW_WATER = (CARM_MAX_REQ / 4), /* refill mark */
-
- /* S/G limits, host-wide and per-request */
- CARM_MAX_REQ_SG = 32, /* max s/g entries per request */
- CARM_MAX_HOST_SG = 600, /* max s/g entries per host */
- CARM_SG_LOW_WATER = (CARM_MAX_HOST_SG / 4), /* re-fill mark */
-
- /* hardware registers */
- CARM_IHQP = 0x1c,
- CARM_INT_STAT = 0x10, /* interrupt status */
- CARM_INT_MASK = 0x14, /* interrupt mask */
- CARM_HMUC = 0x18, /* host message unit control */
- RBUF_ADDR_LO = 0x20, /* response msg DMA buf low 32 bits */
- RBUF_ADDR_HI = 0x24, /* response msg DMA buf high 32 bits */
- RBUF_BYTE_SZ = 0x28,
- CARM_RESP_IDX = 0x2c,
- CARM_CMS0 = 0x30, /* command message size reg 0 */
- CARM_LMUC = 0x48,
- CARM_HMPHA = 0x6c,
- CARM_INITC = 0xb5,
-
- /* bits in CARM_INT_{STAT,MASK} */
- INT_RESERVED = 0xfffffff0,
- INT_WATCHDOG = (1 << 3), /* watchdog timer */
- INT_Q_OVERFLOW = (1 << 2), /* cmd msg q overflow */
- INT_Q_AVAILABLE = (1 << 1), /* cmd msg q has free space */
- INT_RESPONSE = (1 << 0), /* response msg available */
- INT_ACK_MASK = INT_WATCHDOG | INT_Q_OVERFLOW,
- INT_DEF_MASK = INT_RESERVED | INT_Q_OVERFLOW |
- INT_RESPONSE,
-
- /* command messages, and related register bits */
- CARM_HAVE_RESP = 0x01,
- CARM_MSG_READ = 1,
- CARM_MSG_WRITE = 2,
- CARM_MSG_VERIFY = 3,
- CARM_MSG_GET_CAPACITY = 4,
- CARM_MSG_FLUSH = 5,
- CARM_MSG_IOCTL = 6,
- CARM_MSG_ARRAY = 8,
- CARM_MSG_MISC = 9,
- CARM_CME = (1 << 2),
- CARM_RME = (1 << 1),
- CARM_WZBC = (1 << 0),
- CARM_RMI = (1 << 0),
- CARM_Q_FULL = (1 << 3),
- CARM_MSG_SIZE = 288,
- CARM_Q_LEN = 48,
-
- /* CARM_MSG_IOCTL messages */
- CARM_IOC_SCAN_CHAN = 5, /* scan channels for devices */
- CARM_IOC_GET_TCQ = 13, /* get tcq/ncq depth */
- CARM_IOC_SET_TCQ = 14, /* set tcq/ncq depth */
-
- IOC_SCAN_CHAN_NODEV = 0x1f,
- IOC_SCAN_CHAN_OFFSET = 0x40,
-
- /* CARM_MSG_ARRAY messages */
- CARM_ARRAY_INFO = 0,
-
- ARRAY_NO_EXIST = (1 << 31),
-
- /* response messages */
- RMSG_SZ = 8, /* sizeof(struct carm_response) */
- RMSG_Q_LEN = 48, /* resp. msg list length */
- RMSG_OK = 1, /* bit indicating msg was successful */
- /* length of entire resp. msg buffer */
- RBUF_LEN = RMSG_SZ * RMSG_Q_LEN,
-
- PDC_SHM_SIZE = (4096 << 7), /* length of entire h/w buffer */
-
- /* CARM_MSG_MISC messages */
- MISC_GET_FW_VER = 2,
- MISC_ALLOC_MEM = 3,
- MISC_SET_TIME = 5,
-
- /* MISC_GET_FW_VER feature bits */
- FW_VER_4PORT = (1 << 2), /* 1=4 ports, 0=8 ports */
- FW_VER_NON_RAID = (1 << 1), /* 1=non-RAID firmware, 0=RAID */
- FW_VER_ZCR = (1 << 0), /* zero channel RAID (whatever that is) */
-
- /* carm_host flags */
- FL_NON_RAID = FW_VER_NON_RAID,
- FL_4PORT = FW_VER_4PORT,
- FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT),
- FL_DYN_MAJOR = (1 << 17),
-};
-
-enum {
- CARM_SG_BOUNDARY = 0xffffUL, /* s/g segment boundary */
-};
-
-enum scatter_gather_types {
- SGT_32BIT = 0,
- SGT_64BIT = 1,
-};
-
-enum host_states {
- HST_INVALID, /* invalid state; never used */
- HST_ALLOC_BUF, /* setting up master SHM area */
- HST_ERROR, /* we never leave here */
- HST_PORT_SCAN, /* start dev scan */
- HST_DEV_SCAN_START, /* start per-device probe */
- HST_DEV_SCAN, /* continue per-device probe */
- HST_DEV_ACTIVATE, /* activate devices we found */
- HST_PROBE_FINISHED, /* probe is complete */
- HST_PROBE_START, /* initiate probe */
- HST_SYNC_TIME, /* tell firmware what time it is */
- HST_GET_FW_VER, /* get firmware version, adapter port cnt */
-};
-
-#ifdef CARM_DEBUG
-static const char *state_name[] = {
- "HST_INVALID",
- "HST_ALLOC_BUF",
- "HST_ERROR",
- "HST_PORT_SCAN",
- "HST_DEV_SCAN_START",
- "HST_DEV_SCAN",
- "HST_DEV_ACTIVATE",
- "HST_PROBE_FINISHED",
- "HST_PROBE_START",
- "HST_SYNC_TIME",
- "HST_GET_FW_VER",
-};
-#endif
-
-struct carm_port {
- unsigned int port_no;
- struct gendisk *disk;
- struct carm_host *host;
-
- /* attached device characteristics */
- u64 capacity;
- char name[41];
- u16 dev_geom_head;
- u16 dev_geom_sect;
- u16 dev_geom_cyl;
-};
-
-struct carm_request {
- int n_elem;
- unsigned int msg_type;
- unsigned int msg_subtype;
- unsigned int msg_bucket;
- struct scatterlist sg[CARM_MAX_REQ_SG];
-};
-
-struct carm_host {
- unsigned long flags;
- void __iomem *mmio;
- void *shm;
- dma_addr_t shm_dma;
-
- int major;
- int id;
- char name[32];
-
- spinlock_t lock;
- struct pci_dev *pdev;
- unsigned int state;
- u32 fw_ver;
-
- struct blk_mq_tag_set tag_set;
- struct request_queue *oob_q;
- unsigned int n_oob;
-
- unsigned int hw_sg_used;
-
- unsigned int resp_idx;
-
- unsigned int wait_q_prod;
- unsigned int wait_q_cons;
- struct request_queue *wait_q[CARM_MAX_WAIT_Q];
-
- void *msg_base;
- dma_addr_t msg_dma;
-
- int cur_scan_dev;
- unsigned long dev_active;
- unsigned long dev_present;
- struct carm_port port[CARM_MAX_PORTS];
-
- struct work_struct fsm_task;
-
- int probe_err;
- struct completion probe_comp;
-};
-
-struct carm_response {
- __le32 ret_handle;
- __le32 status;
-} __attribute__((packed));
-
-struct carm_msg_sg {
- __le32 start;
- __le32 len;
-} __attribute__((packed));
-
-struct carm_msg_rw {
- u8 type;
- u8 id;
- u8 sg_count;
- u8 sg_type;
- __le32 handle;
- __le32 lba;
- __le16 lba_count;
- __le16 lba_high;
- struct carm_msg_sg sg[32];
-} __attribute__((packed));
-
-struct carm_msg_allocbuf {
- u8 type;
- u8 subtype;
- u8 n_sg;
- u8 sg_type;
- __le32 handle;
- __le32 addr;
- __le32 len;
- __le32 evt_pool;
- __le32 n_evt;
- __le32 rbuf_pool;
- __le32 n_rbuf;
- __le32 msg_pool;
- __le32 n_msg;
- struct carm_msg_sg sg[8];
-} __attribute__((packed));
-
-struct carm_msg_ioctl {
- u8 type;
- u8 subtype;
- u8 array_id;
- u8 reserved1;
- __le32 handle;
- __le32 data_addr;
- u32 reserved2;
-} __attribute__((packed));
-
-struct carm_msg_sync_time {
- u8 type;
- u8 subtype;
- u16 reserved1;
- __le32 handle;
- u32 reserved2;
- __le32 timestamp;
-} __attribute__((packed));
-
-struct carm_msg_get_fw_ver {
- u8 type;
- u8 subtype;
- u16 reserved1;
- __le32 handle;
- __le32 data_addr;
- u32 reserved2;
-} __attribute__((packed));
-
-struct carm_fw_ver {
- __le32 version;
- u8 features;
- u8 reserved1;
- u16 reserved2;
-} __attribute__((packed));
-
-struct carm_array_info {
- __le32 size;
-
- __le16 size_hi;
- __le16 stripe_size;
-
- __le32 mode;
-
- __le16 stripe_blk_sz;
- __le16 reserved1;
-
- __le16 cyl;
- __le16 head;
-
- __le16 sect;
- u8 array_id;
- u8 reserved2;
-
- char name[40];
-
- __le32 array_status;
-
- /* device list continues beyond this point? */
-} __attribute__((packed));
-
-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
-static void carm_remove_one (struct pci_dev *pdev);
-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo);
-
-static const struct pci_device_id carm_pci_tbl[] = {
- { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
- { PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
- { } /* terminate list */
-};
-MODULE_DEVICE_TABLE(pci, carm_pci_tbl);
-
-static struct pci_driver carm_driver = {
- .name = DRV_NAME,
- .id_table = carm_pci_tbl,
- .probe = carm_init_one,
- .remove = carm_remove_one,
-};
-
-static const struct block_device_operations carm_bd_ops = {
- .owner = THIS_MODULE,
- .getgeo = carm_bdev_getgeo,
-};
-
-static unsigned int carm_host_id;
-static unsigned long carm_major_alloc;
-
-
-
-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- struct carm_port *port = bdev->bd_disk->private_data;
-
- geo->heads = (u8) port->dev_geom_head;
- geo->sectors = (u8) port->dev_geom_sect;
- geo->cylinders = port->dev_geom_cyl;
- return 0;
-}
-
-static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
-
-static inline int carm_lookup_bucket(u32 msg_size)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
- if (msg_size <= msg_sizes[i])
- return i;
-
- return -ENOENT;
-}
-
-static void carm_init_buckets(void __iomem *mmio)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
- writel(msg_sizes[i], mmio + CARM_CMS0 + (4 * i));
-}
-
-static inline void *carm_ref_msg(struct carm_host *host,
- unsigned int msg_idx)
-{
- return host->msg_base + (msg_idx * CARM_MSG_SIZE);
-}
-
-static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
- unsigned int msg_idx)
-{
- return host->msg_dma + (msg_idx * CARM_MSG_SIZE);
-}
-
-static int carm_send_msg(struct carm_host *host,
- struct carm_request *crq, unsigned tag)
-{
- void __iomem *mmio = host->mmio;
- u32 msg = (u32) carm_ref_msg_dma(host, tag);
- u32 cm_bucket = crq->msg_bucket;
- u32 tmp;
- int rc = 0;
-
- VPRINTK("ENTER\n");
-
- tmp = readl(mmio + CARM_HMUC);
- if (tmp & CARM_Q_FULL) {
-#if 0
- tmp = readl(mmio + CARM_INT_MASK);
- tmp |= INT_Q_AVAILABLE;
- writel(tmp, mmio + CARM_INT_MASK);
- readl(mmio + CARM_INT_MASK); /* flush */
-#endif
- DPRINTK("host msg queue full\n");
- rc = -EBUSY;
- } else {
- writel(msg | (cm_bucket << 1), mmio + CARM_IHQP);
- readl(mmio + CARM_IHQP); /* flush */
- }
-
- return rc;
-}
-
-static int carm_array_info (struct carm_host *host, unsigned int array_idx)
-{
- struct carm_msg_ioctl *ioc;
- u32 msg_data;
- dma_addr_t msg_dma;
- struct carm_request *crq;
- struct request *rq;
- int rc;
-
- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq)) {
- rc = -ENOMEM;
- goto err_out;
- }
- crq = blk_mq_rq_to_pdu(rq);
-
- ioc = carm_ref_msg(host, rq->tag);
- msg_dma = carm_ref_msg_dma(host, rq->tag);
- msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
-
- crq->msg_type = CARM_MSG_ARRAY;
- crq->msg_subtype = CARM_ARRAY_INFO;
- rc = carm_lookup_bucket(sizeof(struct carm_msg_ioctl) +
- sizeof(struct carm_array_info));
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_ARRAY;
- ioc->subtype = CARM_ARRAY_INFO;
- ioc->array_id = (u8) array_idx;
- ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- spin_lock_irq(&host->lock);
- assert(host->state == HST_DEV_SCAN_START ||
- host->state == HST_DEV_SCAN);
- spin_unlock_irq(&host->lock);
-
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
- blk_execute_rq_nowait(NULL, rq, true, NULL);
-
- return 0;
-
-err_out:
- spin_lock_irq(&host->lock);
- host->state = HST_ERROR;
- spin_unlock_irq(&host->lock);
- return rc;
-}
-
-typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *);
-
-static int carm_send_special (struct carm_host *host, carm_sspc_t func)
-{
- struct request *rq;
- struct carm_request *crq;
- struct carm_msg_ioctl *ioc;
- void *mem;
- unsigned int msg_size;
- int rc;
-
- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq))
- return -ENOMEM;
- crq = blk_mq_rq_to_pdu(rq);
-
- mem = carm_ref_msg(host, rq->tag);
-
- msg_size = func(host, rq->tag, mem);
-
- ioc = mem;
- crq->msg_type = ioc->type;
- crq->msg_subtype = ioc->subtype;
- rc = carm_lookup_bucket(msg_size);
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
- blk_execute_rq_nowait(NULL, rq, true, NULL);
-
- return 0;
-}
-
-static unsigned int carm_fill_sync_time(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_sync_time *st = mem;
-
- time64_t tv = ktime_get_real_seconds();
-
- memset(st, 0, sizeof(*st));
- st->type = CARM_MSG_MISC;
- st->subtype = MISC_SET_TIME;
- st->handle = cpu_to_le32(TAG_ENCODE(idx));
- st->timestamp = cpu_to_le32(tv);
-
- return sizeof(struct carm_msg_sync_time);
-}
-
-static unsigned int carm_fill_alloc_buf(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_allocbuf *ab = mem;
-
- memset(ab, 0, sizeof(*ab));
- ab->type = CARM_MSG_MISC;
- ab->subtype = MISC_ALLOC_MEM;
- ab->handle = cpu_to_le32(TAG_ENCODE(idx));
- ab->n_sg = 1;
- ab->sg_type = SGT_32BIT;
- ab->addr = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
- ab->len = cpu_to_le32(PDC_SHM_SIZE >> 1);
- ab->evt_pool = cpu_to_le32(host->shm_dma + (16 * 1024));
- ab->n_evt = cpu_to_le32(1024);
- ab->rbuf_pool = cpu_to_le32(host->shm_dma);
- ab->n_rbuf = cpu_to_le32(RMSG_Q_LEN);
- ab->msg_pool = cpu_to_le32(host->shm_dma + RBUF_LEN);
- ab->n_msg = cpu_to_le32(CARM_Q_LEN);
- ab->sg[0].start = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
- ab->sg[0].len = cpu_to_le32(65536);
-
- return sizeof(struct carm_msg_allocbuf);
-}
-
-static unsigned int carm_fill_scan_channels(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_ioctl *ioc = mem;
- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +
- IOC_SCAN_CHAN_OFFSET);
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_IOCTL;
- ioc->subtype = CARM_IOC_SCAN_CHAN;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- /* fill output data area with "no device" default values */
- mem += IOC_SCAN_CHAN_OFFSET;
- memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);
-
- return IOC_SCAN_CHAN_OFFSET + CARM_MAX_PORTS;
-}
-
-static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_get_fw_ver *ioc = mem;
- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_MISC;
- ioc->subtype = MISC_GET_FW_VER;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- return sizeof(struct carm_msg_get_fw_ver) +
- sizeof(struct carm_fw_ver);
-}
-
-static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
-{
- unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
-
- blk_mq_stop_hw_queues(q);
- VPRINTK("STOPPED QUEUE %p\n", q);
-
- host->wait_q[idx] = q;
- host->wait_q_prod++;
- BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */
-}
-
-static inline struct request_queue *carm_pop_q(struct carm_host *host)
-{
- unsigned int idx;
-
- if (host->wait_q_prod == host->wait_q_cons)
- return NULL;
-
- idx = host->wait_q_cons % CARM_MAX_WAIT_Q;
- host->wait_q_cons++;
-
- return host->wait_q[idx];
-}
-
-static inline void carm_round_robin(struct carm_host *host)
-{
- struct request_queue *q = carm_pop_q(host);
- if (q) {
- blk_mq_start_hw_queues(q);
- VPRINTK("STARTED QUEUE %p\n", q);
- }
-}
-
-static inline enum dma_data_direction carm_rq_dir(struct request *rq)
-{
- return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-}
-
-static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
-{
- struct request_queue *q = hctx->queue;
- struct request *rq = bd->rq;
- struct carm_port *port = q->queuedata;
- struct carm_host *host = port->host;
- struct carm_request *crq = blk_mq_rq_to_pdu(rq);
- struct carm_msg_rw *msg;
- struct scatterlist *sg;
- int i, n_elem = 0, rc;
- unsigned int msg_size;
- u32 tmp;
-
- crq->n_elem = 0;
- sg_init_table(crq->sg, CARM_MAX_REQ_SG);
-
- blk_mq_start_request(rq);
-
- spin_lock_irq(&host->lock);
- if (req_op(rq) == REQ_OP_DRV_OUT)
- goto send_msg;
-
- /* get scatterlist from block layer */
- sg = &crq->sg[0];
- n_elem = blk_rq_map_sg(q, rq, sg);
- if (n_elem <= 0)
- goto out_ioerr;
-
- /* map scatterlist to PCI bus addresses */
- n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq));
- if (n_elem <= 0)
- goto out_ioerr;
-
- /* obey global hardware limit on S/G entries */
- if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem)
- goto out_resource;
-
- crq->n_elem = n_elem;
- host->hw_sg_used += n_elem;
-
- /*
- * build read/write message
- */
-
- VPRINTK("build msg\n");
- msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag);
-
- if (rq_data_dir(rq) == WRITE) {
- msg->type = CARM_MSG_WRITE;
- crq->msg_type = CARM_MSG_WRITE;
- } else {
- msg->type = CARM_MSG_READ;
- crq->msg_type = CARM_MSG_READ;
- }
-
- msg->id = port->port_no;
- msg->sg_count = n_elem;
- msg->sg_type = SGT_32BIT;
- msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
- msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
- tmp = (blk_rq_pos(rq) >> 16) >> 16;
- msg->lba_high = cpu_to_le16( (u16) tmp );
- msg->lba_count = cpu_to_le16(blk_rq_sectors(rq));
-
- msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
- for (i = 0; i < n_elem; i++) {
- struct carm_msg_sg *carm_sg = &msg->sg[i];
- carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));
- carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));
- msg_size += sizeof(struct carm_msg_sg);
- }
-
- rc = carm_lookup_bucket(msg_size);
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-send_msg:
- /*
- * queue read/write message to hardware
- */
- VPRINTK("send msg, tag == %u\n", rq->tag);
- rc = carm_send_msg(host, crq, rq->tag);
- if (rc) {
- host->hw_sg_used -= n_elem;
- goto out_resource;
- }
-
- spin_unlock_irq(&host->lock);
- return BLK_STS_OK;
-out_resource:
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq));
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
-out_ioerr:
- carm_round_robin(host);
- spin_unlock_irq(&host->lock);
- return BLK_STS_IOERR;
-}
-
-static void carm_handle_array_info(struct carm_host *host,
- struct carm_request *crq, u8 *mem,
- blk_status_t error)
-{
- struct carm_port *port;
- u8 *msg_data = mem + sizeof(struct carm_array_info);
- struct carm_array_info *desc = (struct carm_array_info *) msg_data;
- u64 lo, hi;
- int cur_port;
- size_t slen;
-
- DPRINTK("ENTER\n");
-
- if (error)
- goto out;
- if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
- goto out;
-
- cur_port = host->cur_scan_dev;
-
- /* should never occur */
- if ((cur_port < 0) || (cur_port >= CARM_MAX_PORTS)) {
- printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",
- cur_port, (int) desc->array_id);
- goto out;
- }
-
- port = &host->port[cur_port];
-
- lo = (u64) le32_to_cpu(desc->size);
- hi = (u64) le16_to_cpu(desc->size_hi);
-
- port->capacity = lo | (hi << 32);
- port->dev_geom_head = le16_to_cpu(desc->head);
- port->dev_geom_sect = le16_to_cpu(desc->sect);
- port->dev_geom_cyl = le16_to_cpu(desc->cyl);
-
- host->dev_active |= (1 << cur_port);
-
- strncpy(port->name, desc->name, sizeof(port->name));
- port->name[sizeof(port->name) - 1] = 0;
- slen = strlen(port->name);
- while (slen && (port->name[slen - 1] == ' ')) {
- port->name[slen - 1] = 0;
- slen--;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",
- pci_name(host->pdev), port->port_no,
- (unsigned long long) port->capacity);
- printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",
- pci_name(host->pdev), port->port_no, port->name);
-
-out:
- assert(host->state == HST_DEV_SCAN);
- schedule_work(&host->fsm_task);
-}
-
-static void carm_handle_scan_chan(struct carm_host *host,
- struct carm_request *crq, u8 *mem,
- blk_status_t error)
-{
- u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
- unsigned int i, dev_count = 0;
- int new_state = HST_DEV_SCAN_START;
-
- DPRINTK("ENTER\n");
-
- if (error) {
- new_state = HST_ERROR;
- goto out;
- }
-
- /* TODO: scan and support non-disk devices */
- for (i = 0; i < 8; i++)
- if (msg_data[i] == 0) { /* direct-access device (disk) */
- host->dev_present |= (1 << i);
- dev_count++;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",
- pci_name(host->pdev), dev_count);
-
-out:
- assert(host->state == HST_PORT_SCAN);
- host->state = new_state;
- schedule_work(&host->fsm_task);
-}
-
-static void carm_handle_generic(struct carm_host *host,
- struct carm_request *crq, blk_status_t error,
- int cur_state, int next_state)
-{
- DPRINTK("ENTER\n");
-
- assert(host->state == cur_state);
- if (error)
- host->state = HST_ERROR;
- else
- host->state = next_state;
- schedule_work(&host->fsm_task);
-}
-
-static inline void carm_handle_resp(struct carm_host *host,
- __le32 ret_handle_le, u32 status)
-{
- u32 handle = le32_to_cpu(ret_handle_le);
- unsigned int msg_idx;
- struct request *rq;
- struct carm_request *crq;
- blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
- u8 *mem;
-
- VPRINTK("ENTER, handle == 0x%x\n", handle);
-
- if (unlikely(!TAG_VALID(handle))) {
- printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",
- pci_name(host->pdev), handle);
- return;
- }
-
- msg_idx = TAG_DECODE(handle);
- VPRINTK("tag == %u\n", msg_idx);
-
- rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx);
- crq = blk_mq_rq_to_pdu(rq);
-
- /* fast path */
- if (likely(crq->msg_type == CARM_MSG_READ ||
- crq->msg_type == CARM_MSG_WRITE)) {
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem,
- carm_rq_dir(rq));
- goto done;
- }
-
- mem = carm_ref_msg(host, msg_idx);
-
- switch (crq->msg_type) {
- case CARM_MSG_IOCTL: {
- switch (crq->msg_subtype) {
- case CARM_IOC_SCAN_CHAN:
- carm_handle_scan_chan(host, crq, mem, error);
- goto done;
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- case CARM_MSG_MISC: {
- switch (crq->msg_subtype) {
- case MISC_ALLOC_MEM:
- carm_handle_generic(host, crq, error,
- HST_ALLOC_BUF, HST_SYNC_TIME);
- goto done;
- case MISC_SET_TIME:
- carm_handle_generic(host, crq, error,
- HST_SYNC_TIME, HST_GET_FW_VER);
- goto done;
- case MISC_GET_FW_VER: {
- struct carm_fw_ver *ver = (struct carm_fw_ver *)
- (mem + sizeof(struct carm_msg_get_fw_ver));
- if (!error) {
- host->fw_ver = le32_to_cpu(ver->version);
- host->flags |= (ver->features & FL_FW_VER_MASK);
- }
- carm_handle_generic(host, crq, error,
- HST_GET_FW_VER, HST_PORT_SCAN);
- goto done;
- }
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- case CARM_MSG_ARRAY: {
- switch (crq->msg_subtype) {
- case CARM_ARRAY_INFO:
- carm_handle_array_info(host, crq, mem, error);
- break;
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- default:
- /* unknown / invalid response */
- goto err_out;
- }
-
- return;
-
-err_out:
- printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
- pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- error = BLK_STS_IOERR;
-done:
- host->hw_sg_used -= crq->n_elem;
- blk_mq_end_request(blk_mq_rq_from_pdu(crq), error);
-
- if (host->hw_sg_used <= CARM_SG_LOW_WATER)
- carm_round_robin(host);
-}
-
-static inline void carm_handle_responses(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- struct carm_response *resp = (struct carm_response *) host->shm;
- unsigned int work = 0;
- unsigned int idx = host->resp_idx % RMSG_Q_LEN;
-
- while (1) {
- u32 status = le32_to_cpu(resp[idx].status);
-
- if (status == 0xffffffff) {
- VPRINTK("ending response on index %u\n", idx);
- writel(idx << 3, mmio + CARM_RESP_IDX);
- break;
- }
-
- /* response to a message we sent */
- else if ((status & (1 << 31)) == 0) {
- VPRINTK("handling msg response on index %u\n", idx);
- carm_handle_resp(host, resp[idx].ret_handle, status);
- resp[idx].status = cpu_to_le32(0xffffffff);
- }
-
- /* asynchronous events the hardware throws our way */
- else if ((status & 0xff000000) == (1 << 31)) {
- u8 *evt_type_ptr = (u8 *) &resp[idx];
- u8 evt_type = *evt_type_ptr;
- printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",
- pci_name(host->pdev), (int) evt_type);
- resp[idx].status = cpu_to_le32(0xffffffff);
- }
-
- idx = NEXT_RESP(idx);
- work++;
- }
-
- VPRINTK("EXIT, work==%u\n", work);
- host->resp_idx += work;
-}
-
-static irqreturn_t carm_interrupt(int irq, void *__host)
-{
- struct carm_host *host = __host;
- void __iomem *mmio;
- u32 mask;
- int handled = 0;
- unsigned long flags;
-
- if (!host) {
- VPRINTK("no host\n");
- return IRQ_NONE;
- }
-
- spin_lock_irqsave(&host->lock, flags);
-
- mmio = host->mmio;
-
- /* reading should also clear interrupts */
- mask = readl(mmio + CARM_INT_STAT);
-
- if (mask == 0 || mask == 0xffffffff) {
- VPRINTK("no work, mask == 0x%x\n", mask);
- goto out;
- }
-
- if (mask & INT_ACK_MASK)
- writel(mask, mmio + CARM_INT_STAT);
-
- if (unlikely(host->state == HST_INVALID)) {
- VPRINTK("not initialized yet, mask = 0x%x\n", mask);
- goto out;
- }
-
- if (mask & CARM_HAVE_RESP) {
- handled = 1;
- carm_handle_responses(host);
- }
-
-out:
- spin_unlock_irqrestore(&host->lock, flags);
- VPRINTK("EXIT\n");
- return IRQ_RETVAL(handled);
-}
-
-static void carm_fsm_task (struct work_struct *work)
-{
- struct carm_host *host =
- container_of(work, struct carm_host, fsm_task);
- unsigned long flags;
- unsigned int state;
- int rc, i, next_dev;
- int reschedule = 0;
- int new_state = HST_INVALID;
-
- spin_lock_irqsave(&host->lock, flags);
- state = host->state;
- spin_unlock_irqrestore(&host->lock, flags);
-
- DPRINTK("ENTER, state == %s\n", state_name[state]);
-
- switch (state) {
- case HST_PROBE_START:
- new_state = HST_ALLOC_BUF;
- reschedule = 1;
- break;
-
- case HST_ALLOC_BUF:
- rc = carm_send_special(host, carm_fill_alloc_buf);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_SYNC_TIME:
- rc = carm_send_special(host, carm_fill_sync_time);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_GET_FW_VER:
- rc = carm_send_special(host, carm_fill_get_fw_ver);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_PORT_SCAN:
- rc = carm_send_special(host, carm_fill_scan_channels);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_DEV_SCAN_START:
- host->cur_scan_dev = -1;
- new_state = HST_DEV_SCAN;
- reschedule = 1;
- break;
-
- case HST_DEV_SCAN:
- next_dev = -1;
- for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)
- if (host->dev_present & (1 << i)) {
- next_dev = i;
- break;
- }
-
- if (next_dev >= 0) {
- host->cur_scan_dev = next_dev;
- rc = carm_array_info(host, next_dev);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- } else {
- new_state = HST_DEV_ACTIVATE;
- reschedule = 1;
- }
- break;
-
- case HST_DEV_ACTIVATE: {
- int activated = 0;
- for (i = 0; i < CARM_MAX_PORTS; i++)
- if (host->dev_active & (1 << i)) {
- struct carm_port *port = &host->port[i];
- struct gendisk *disk = port->disk;
-
- set_capacity(disk, port->capacity);
- host->probe_err = add_disk(disk);
- if (!host->probe_err)
- activated++;
- else
- break;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",
- pci_name(host->pdev), activated);
-
- new_state = HST_PROBE_FINISHED;
- reschedule = 1;
- break;
- }
- case HST_PROBE_FINISHED:
- complete(&host->probe_comp);
- break;
- case HST_ERROR:
- /* FIXME: TODO */
- break;
-
- default:
- /* should never occur */
- printk(KERN_ERR PFX "BUG: unknown state %d\n", state);
- assert(0);
- break;
- }
-
- if (new_state != HST_INVALID) {
- spin_lock_irqsave(&host->lock, flags);
- host->state = new_state;
- spin_unlock_irqrestore(&host->lock, flags);
- }
- if (reschedule)
- schedule_work(&host->fsm_task);
-}
-
-static int carm_init_wait(void __iomem *mmio, u32 bits, unsigned int test_bit)
-{
- unsigned int i;
-
- for (i = 0; i < 50000; i++) {
- u32 tmp = readl(mmio + CARM_LMUC);
- udelay(100);
-
- if (test_bit) {
- if ((tmp & bits) == bits)
- return 0;
- } else {
- if ((tmp & bits) == 0)
- return 0;
- }
-
- cond_resched();
- }
-
- printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",
- bits, test_bit ? "yes" : "no");
- return -EBUSY;
-}
-
-static void carm_init_responses(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- unsigned int i;
- struct carm_response *resp = (struct carm_response *) host->shm;
-
- for (i = 0; i < RMSG_Q_LEN; i++)
- resp[i].status = cpu_to_le32(0xffffffff);
-
- writel(0, mmio + CARM_RESP_IDX);
-}
-
-static int carm_init_host(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- u32 tmp;
- u8 tmp8;
- int rc;
-
- DPRINTK("ENTER\n");
-
- writel(0, mmio + CARM_INT_MASK);
-
- tmp8 = readb(mmio + CARM_INITC);
- if (tmp8 & 0x01) {
- tmp8 &= ~0x01;
- writeb(tmp8, mmio + CARM_INITC);
- readb(mmio + CARM_INITC); /* flush */
-
- DPRINTK("snooze...\n");
- msleep(5000);
- }
-
- tmp = readl(mmio + CARM_HMUC);
- if (tmp & CARM_CME) {
- DPRINTK("CME bit present, waiting\n");
- rc = carm_init_wait(mmio, CARM_CME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 1 failed\n");
- return rc;
- }
- }
- if (tmp & CARM_RME) {
- DPRINTK("RME bit present, waiting\n");
- rc = carm_init_wait(mmio, CARM_RME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 2 failed\n");
- return rc;
- }
- }
-
- tmp &= ~(CARM_RME | CARM_CME);
- writel(tmp, mmio + CARM_HMUC);
- readl(mmio + CARM_HMUC); /* flush */
-
- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 0);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 3 failed\n");
- return rc;
- }
-
- carm_init_buckets(mmio);
-
- writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);
- writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);
- writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);
-
- tmp = readl(mmio + CARM_HMUC);
- tmp |= (CARM_RME | CARM_CME | CARM_WZBC);
- writel(tmp, mmio + CARM_HMUC);
- readl(mmio + CARM_HMUC); /* flush */
-
- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 4 failed\n");
- return rc;
- }
-
- writel(0, mmio + CARM_HMPHA);
- writel(INT_DEF_MASK, mmio + CARM_INT_MASK);
-
- carm_init_responses(host);
-
- /* start initialization, probing state machine */
- spin_lock_irq(&host->lock);
- assert(host->state == HST_INVALID);
- host->state = HST_PROBE_START;
- spin_unlock_irq(&host->lock);
- schedule_work(&host->fsm_task);
-
- DPRINTK("EXIT\n");
- return 0;
-}
-
-static const struct blk_mq_ops carm_mq_ops = {
- .queue_rq = carm_queue_rq,
-};
-
-static int carm_init_disk(struct carm_host *host, unsigned int port_no)
-{
- struct carm_port *port = &host->port[port_no];
- struct gendisk *disk;
-
- port->host = host;
- port->port_no = port_no;
-
- disk = blk_mq_alloc_disk(&host->tag_set, port);
- if (IS_ERR(disk))
- return PTR_ERR(disk);
-
- port->disk = disk;
- sprintf(disk->disk_name, DRV_NAME "/%u",
- (unsigned int)host->id * CARM_MAX_PORTS + port_no);
- disk->major = host->major;
- disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
- disk->minors = CARM_MINORS_PER_MAJOR;
- disk->fops = &carm_bd_ops;
- disk->private_data = port;
-
- blk_queue_max_segments(disk->queue, CARM_MAX_REQ_SG);
- blk_queue_segment_boundary(disk->queue, CARM_SG_BOUNDARY);
- return 0;
-}
-
-static void carm_free_disk(struct carm_host *host, unsigned int port_no)
-{
- struct carm_port *port = &host->port[port_no];
- struct gendisk *disk = port->disk;
-
- if (!disk)
- return;
-
- if (host->state > HST_DEV_ACTIVATE)
- del_gendisk(disk);
- blk_cleanup_disk(disk);
-}
-
-static int carm_init_shm(struct carm_host *host)
-{
- host->shm = dma_alloc_coherent(&host->pdev->dev, CARM_SHM_SIZE,
- &host->shm_dma, GFP_KERNEL);
- if (!host->shm)
- return -ENOMEM;
-
- host->msg_base = host->shm + RBUF_LEN;
- host->msg_dma = host->shm_dma + RBUF_LEN;
-
- memset(host->shm, 0xff, RBUF_LEN);
- memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);
-
- return 0;
-}
-
-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
-{
- struct carm_host *host;
- int rc;
- struct request_queue *q;
- unsigned int i;
-
- printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
-
- rc = pci_enable_device(pdev);
- if (rc)
- return rc;
-
- rc = pci_request_regions(pdev, DRV_NAME);
- if (rc)
- goto err_out;
-
- rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
- pci_name(pdev));
- goto err_out_regions;
- }
-
- host = kzalloc(sizeof(*host), GFP_KERNEL);
- if (!host) {
- rc = -ENOMEM;
- goto err_out_regions;
- }
-
- host->pdev = pdev;
- spin_lock_init(&host->lock);
- INIT_WORK(&host->fsm_task, carm_fsm_task);
- init_completion(&host->probe_comp);
-
- host->mmio = ioremap(pci_resource_start(pdev, 0),
- pci_resource_len(pdev, 0));
- if (!host->mmio) {
- printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",
- pci_name(pdev));
- rc = -ENOMEM;
- goto err_out_kfree;
- }
-
- rc = carm_init_shm(host);
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",
- pci_name(pdev));
- goto err_out_iounmap;
- }
-
- memset(&host->tag_set, 0, sizeof(host->tag_set));
- host->tag_set.ops = &carm_mq_ops;
- host->tag_set.cmd_size = sizeof(struct carm_request);
- host->tag_set.nr_hw_queues = 1;
- host->tag_set.nr_maps = 1;
- host->tag_set.queue_depth = max_queue;
- host->tag_set.numa_node = NUMA_NO_NODE;
- host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-
- rc = blk_mq_alloc_tag_set(&host->tag_set);
- if (rc)
- goto err_out_dma_free;
-
- q = blk_mq_init_queue(&host->tag_set);
- if (IS_ERR(q)) {
- rc = PTR_ERR(q);
- blk_mq_free_tag_set(&host->tag_set);
- goto err_out_dma_free;
- }
-
- host->oob_q = q;
- q->queuedata = host;
-
- /*
- * Figure out which major to use: 160, 161, or dynamic
- */
- if (!test_and_set_bit(0, &carm_major_alloc))
- host->major = 160;
- else if (!test_and_set_bit(1, &carm_major_alloc))
- host->major = 161;
- else
- host->flags |= FL_DYN_MAJOR;
-
- host->id = carm_host_id;
- sprintf(host->name, DRV_NAME "%d", carm_host_id);
-
- rc = register_blkdev(host->major, host->name);
- if (rc < 0)
- goto err_out_free_majors;
- if (host->flags & FL_DYN_MAJOR)
- host->major = rc;
-
- for (i = 0; i < CARM_MAX_PORTS; i++) {
- rc = carm_init_disk(host, i);
- if (rc)
- goto err_out_blkdev_disks;
- }
-
- pci_set_master(pdev);
-
- rc = request_irq(pdev->irq, carm_interrupt, IRQF_SHARED, DRV_NAME, host);
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",
- pci_name(pdev));
- goto err_out_blkdev_disks;
- }
-
- rc = carm_init_host(host);
- if (rc)
- goto err_out_free_irq;
-
- DPRINTK("waiting for probe_comp\n");
- host->probe_err = -ENODEV;
- wait_for_completion(&host->probe_comp);
- if (host->probe_err) {
- rc = host->probe_err;
- goto err_out_free_irq;
- }
-
- printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n",
- host->name, pci_name(pdev), (int) CARM_MAX_PORTS,
- (unsigned long long)pci_resource_start(pdev, 0),
- pdev->irq, host->major);
-
- carm_host_id++;
- pci_set_drvdata(pdev, host);
- return 0;
-
-err_out_free_irq:
- free_irq(pdev->irq, host);
-err_out_blkdev_disks:
- for (i = 0; i < CARM_MAX_PORTS; i++)
- carm_free_disk(host, i);
- unregister_blkdev(host->major, host->name);
-err_out_free_majors:
- if (host->major == 160)
- clear_bit(0, &carm_major_alloc);
- else if (host->major == 161)
- clear_bit(1, &carm_major_alloc);
- blk_cleanup_queue(host->oob_q);
- blk_mq_free_tag_set(&host->tag_set);
-err_out_dma_free:
- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
-err_out_iounmap:
- iounmap(host->mmio);
-err_out_kfree:
- kfree(host);
-err_out_regions:
- pci_release_regions(pdev);
-err_out:
- pci_disable_device(pdev);
- return rc;
-}
-
-static void carm_remove_one (struct pci_dev *pdev)
-{
- struct carm_host *host = pci_get_drvdata(pdev);
- unsigned int i;
-
- if (!host) {
- printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
- pci_name(pdev));
- return;
- }
-
- free_irq(pdev->irq, host);
- for (i = 0; i < CARM_MAX_PORTS; i++)
- carm_free_disk(host, i);
- unregister_blkdev(host->major, host->name);
- if (host->major == 160)
- clear_bit(0, &carm_major_alloc);
- else if (host->major == 161)
- clear_bit(1, &carm_major_alloc);
- blk_cleanup_queue(host->oob_q);
- blk_mq_free_tag_set(&host->tag_set);
- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
- iounmap(host->mmio);
- kfree(host);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
-}
-
-module_pci_driver(carm_driver);
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
new file mode 100644
index 000000000000..f96cb01e9604
--- /dev/null
+++ b/drivers/block/ublk_drv.c
@@ -0,0 +1,2113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Userspace block device - block device which IO is handled from userspace
+ *
+ * Take full use of io_uring passthrough command for communicating with
+ * ublk userspace daemon(ublksrvd) for handling basic IO request.
+ *
+ * Copyright 2022 Ming Lei <ming.lei@redhat.com>
+ *
+ * (part of code stolen from loop.c)
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/wait.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <linux/mutex.h>
+#include <linux/writeback.h>
+#include <linux/completion.h>
+#include <linux/highmem.h>
+#include <linux/sysfs.h>
+#include <linux/miscdevice.h>
+#include <linux/falloc.h>
+#include <linux/uio.h>
+#include <linux/ioprio.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+#include <linux/cdev.h>
+#include <linux/io_uring.h>
+#include <linux/blk-mq.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <linux/task_work.h>
+#include <uapi/linux/ublk_cmd.h>
+
+#define UBLK_MINORS (1U << MINORBITS)
+
+/* All UBLK_F_* have to be included into UBLK_F_ALL */
+#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
+ | UBLK_F_URING_CMD_COMP_IN_TASK \
+ | UBLK_F_NEED_GET_DATA \
+ | UBLK_F_USER_RECOVERY \
+ | UBLK_F_USER_RECOVERY_REISSUE)
+
+/* All UBLK_PARAM_TYPE_* should be included here */
+#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
+
+struct ublk_rq_data {
+ union {
+ struct callback_head work;
+ struct llist_node node;
+ };
+};
+
+struct ublk_uring_cmd_pdu {
+ struct ublk_queue *ubq;
+};
+
+/*
+ * io command is active: sqe cmd is received, and its cqe isn't done
+ *
+ * If the flag is set, the io command is owned by ublk driver, and waited
+ * for incoming blk-mq request from the ublk block device.
+ *
+ * If the flag is cleared, the io command will be completed, and owned by
+ * ublk server.
+ */
+#define UBLK_IO_FLAG_ACTIVE 0x01
+
+/*
+ * IO command is completed via cqe, and it is being handled by ublksrv, and
+ * not committed yet
+ *
+ * Basically exclusively with UBLK_IO_FLAG_ACTIVE, so can be served for
+ * cross verification
+ */
+#define UBLK_IO_FLAG_OWNED_BY_SRV 0x02
+
+/*
+ * IO command is aborted, so this flag is set in case of
+ * !UBLK_IO_FLAG_ACTIVE.
+ *
+ * After this flag is observed, any pending or new incoming request
+ * associated with this io command will be failed immediately
+ */
+#define UBLK_IO_FLAG_ABORTED 0x04
+
+/*
+ * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
+ * get data buffer address from ublksrv.
+ *
+ * Then, bio data could be copied into this data buffer for a WRITE request
+ * after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset.
+ */
+#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+
+struct ublk_io {
+ /* userspace buffer address from io cmd */
+ __u64 addr;
+ unsigned int flags;
+ int res;
+
+ struct io_uring_cmd *cmd;
+};
+
+struct ublk_queue {
+ int q_id;
+ int q_depth;
+
+ unsigned long flags;
+ struct task_struct *ubq_daemon;
+ char *io_cmd_buf;
+
+ struct llist_head io_cmds;
+
+ unsigned long io_addr; /* mapped vm address */
+ unsigned int max_io_sz;
+ bool force_abort;
+ unsigned short nr_io_ready; /* how many ios setup */
+ struct ublk_device *dev;
+ struct ublk_io ios[];
+};
+
+#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)
+
+struct ublk_device {
+ struct gendisk *ub_disk;
+
+ char *__queues;
+
+ unsigned short queue_size;
+ struct ublksrv_ctrl_dev_info dev_info;
+
+ struct blk_mq_tag_set tag_set;
+
+ struct cdev cdev;
+ struct device cdev_dev;
+
+#define UB_STATE_OPEN 0
+#define UB_STATE_USED 1
+ unsigned long state;
+ int ub_number;
+
+ struct mutex mutex;
+
+ spinlock_t mm_lock;
+ struct mm_struct *mm;
+
+ struct ublk_params params;
+
+ struct completion completion;
+ unsigned int nr_queues_ready;
+ atomic_t nr_aborted_queues;
+
+ /*
+ * Our ubq->daemon may be killed without any notification, so
+ * monitor each queue's daemon periodically
+ */
+ struct delayed_work monitor_work;
+ struct work_struct quiesce_work;
+ struct work_struct stop_work;
+};
+
+/* header of ublk_params */
+struct ublk_params_header {
+ __u32 len;
+ __u32 types;
+};
+
+static dev_t ublk_chr_devt;
+static struct class *ublk_chr_class;
+
+static DEFINE_IDR(ublk_index_idr);
+static DEFINE_SPINLOCK(ublk_idr_lock);
+static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */
+
+static DEFINE_MUTEX(ublk_ctl_mutex);
+
+static struct miscdevice ublk_misc;
+
+static void ublk_dev_param_basic_apply(struct ublk_device *ub)
+{
+ struct request_queue *q = ub->ub_disk->queue;
+ const struct ublk_param_basic *p = &ub->params.basic;
+
+ blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
+ blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
+ blk_queue_io_min(q, 1 << p->io_min_shift);
+ blk_queue_io_opt(q, 1 << p->io_opt_shift);
+
+ blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
+ p->attrs & UBLK_ATTR_FUA);
+ if (p->attrs & UBLK_ATTR_ROTATIONAL)
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+ else
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+
+ blk_queue_max_hw_sectors(q, p->max_sectors);
+ blk_queue_chunk_sectors(q, p->chunk_sectors);
+ blk_queue_virt_boundary(q, p->virt_boundary_mask);
+
+ if (p->attrs & UBLK_ATTR_READ_ONLY)
+ set_disk_ro(ub->ub_disk, true);
+
+ set_capacity(ub->ub_disk, p->dev_sectors);
+}
+
+static void ublk_dev_param_discard_apply(struct ublk_device *ub)
+{
+ struct request_queue *q = ub->ub_disk->queue;
+ const struct ublk_param_discard *p = &ub->params.discard;
+
+ q->limits.discard_alignment = p->discard_alignment;
+ q->limits.discard_granularity = p->discard_granularity;
+ blk_queue_max_discard_sectors(q, p->max_discard_sectors);
+ blk_queue_max_write_zeroes_sectors(q,
+ p->max_write_zeroes_sectors);
+ blk_queue_max_discard_segments(q, p->max_discard_segments);
+}
+
+static int ublk_validate_params(const struct ublk_device *ub)
+{
+ /* basic param is the only one which must be set */
+ if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
+ const struct ublk_param_basic *p = &ub->params.basic;
+
+ if (p->logical_bs_shift > PAGE_SHIFT)
+ return -EINVAL;
+
+ if (p->logical_bs_shift > p->physical_bs_shift)
+ return -EINVAL;
+
+ if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
+ const struct ublk_param_discard *p = &ub->params.discard;
+
+ /* So far, only support single segment discard */
+ if (p->max_discard_sectors && p->max_discard_segments != 1)
+ return -EINVAL;
+
+ if (!p->discard_granularity)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ublk_apply_params(struct ublk_device *ub)
+{
+ if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
+ return -EINVAL;
+
+ ublk_dev_param_basic_apply(ub);
+
+ if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
+ ublk_dev_param_discard_apply(ub);
+
+ return 0;
+}
+
+static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
+{
+ if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
+ !(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK))
+ return true;
+ return false;
+}
+
+static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
+{
+ if (ubq->flags & UBLK_F_NEED_GET_DATA)
+ return true;
+ return false;
+}
+
+static struct ublk_device *ublk_get_device(struct ublk_device *ub)
+{
+ if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
+ return ub;
+ return NULL;
+}
+
+static void ublk_put_device(struct ublk_device *ub)
+{
+ put_device(&ub->cdev_dev);
+}
+
+static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev,
+ int qid)
+{
+ return (struct ublk_queue *)&(dev->__queues[qid * dev->queue_size]);
+}
+
+static inline bool ublk_rq_has_data(const struct request *rq)
+{
+ return rq->bio && bio_has_data(rq->bio);
+}
+
+static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
+ int tag)
+{
+ return (struct ublksrv_io_desc *)
+ &(ubq->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
+}
+
+static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id)
+{
+ return ublk_get_queue(ub, q_id)->io_cmd_buf;
+}
+
+static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
+{
+ struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+
+ return round_up(ubq->q_depth * sizeof(struct ublksrv_io_desc),
+ PAGE_SIZE);
+}
+
+static inline bool ublk_queue_can_use_recovery_reissue(
+ struct ublk_queue *ubq)
+{
+ if ((ubq->flags & UBLK_F_USER_RECOVERY) &&
+ (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE))
+ return true;
+ return false;
+}
+
+static inline bool ublk_queue_can_use_recovery(
+ struct ublk_queue *ubq)
+{
+ if (ubq->flags & UBLK_F_USER_RECOVERY)
+ return true;
+ return false;
+}
+
+static inline bool ublk_can_use_recovery(struct ublk_device *ub)
+{
+ if (ub->dev_info.flags & UBLK_F_USER_RECOVERY)
+ return true;
+ return false;
+}
+
+static void ublk_free_disk(struct gendisk *disk)
+{
+ struct ublk_device *ub = disk->private_data;
+
+ clear_bit(UB_STATE_USED, &ub->state);
+ put_device(&ub->cdev_dev);
+}
+
+static const struct block_device_operations ub_fops = {
+ .owner = THIS_MODULE,
+ .free_disk = ublk_free_disk,
+};
+
+#define UBLK_MAX_PIN_PAGES 32
+
+struct ublk_map_data {
+ const struct ublk_queue *ubq;
+ const struct request *rq;
+ const struct ublk_io *io;
+ unsigned max_bytes;
+};
+
+struct ublk_io_iter {
+ struct page *pages[UBLK_MAX_PIN_PAGES];
+ unsigned pg_off; /* offset in the 1st page in pages */
+ int nr_pages; /* how many page pointers in pages */
+ struct bio *bio;
+ struct bvec_iter iter;
+};
+
+static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
+ unsigned max_bytes, bool to_vm)
+{
+ const unsigned total = min_t(unsigned, max_bytes,
+ PAGE_SIZE - data->pg_off +
+ ((data->nr_pages - 1) << PAGE_SHIFT));
+ unsigned done = 0;
+ unsigned pg_idx = 0;
+
+ while (done < total) {
+ struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
+ const unsigned int bytes = min3(bv.bv_len, total - done,
+ (unsigned)(PAGE_SIZE - data->pg_off));
+ void *bv_buf = bvec_kmap_local(&bv);
+ void *pg_buf = kmap_local_page(data->pages[pg_idx]);
+
+ if (to_vm)
+ memcpy(pg_buf + data->pg_off, bv_buf, bytes);
+ else
+ memcpy(bv_buf, pg_buf + data->pg_off, bytes);
+
+ kunmap_local(pg_buf);
+ kunmap_local(bv_buf);
+
+ /* advance page array */
+ data->pg_off += bytes;
+ if (data->pg_off == PAGE_SIZE) {
+ pg_idx += 1;
+ data->pg_off = 0;
+ }
+
+ done += bytes;
+
+ /* advance bio */
+ bio_advance_iter_single(data->bio, &data->iter, bytes);
+ if (!data->iter.bi_size) {
+ data->bio = data->bio->bi_next;
+ if (data->bio == NULL)
+ break;
+ data->iter = data->bio->bi_iter;
+ }
+ }
+
+ return done;
+}
+
+static inline int ublk_copy_user_pages(struct ublk_map_data *data,
+ bool to_vm)
+{
+ const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0;
+ const unsigned long start_vm = data->io->addr;
+ unsigned int done = 0;
+ struct ublk_io_iter iter = {
+ .pg_off = start_vm & (PAGE_SIZE - 1),
+ .bio = data->rq->bio,
+ .iter = data->rq->bio->bi_iter,
+ };
+ const unsigned int nr_pages = round_up(data->max_bytes +
+ (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT;
+
+ while (done < nr_pages) {
+ const unsigned to_pin = min_t(unsigned, UBLK_MAX_PIN_PAGES,
+ nr_pages - done);
+ unsigned i, len;
+
+ iter.nr_pages = get_user_pages_fast(start_vm +
+ (done << PAGE_SHIFT), to_pin, gup_flags,
+ iter.pages);
+ if (iter.nr_pages <= 0)
+ return done == 0 ? iter.nr_pages : done;
+ len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm);
+ for (i = 0; i < iter.nr_pages; i++) {
+ if (to_vm)
+ set_page_dirty(iter.pages[i]);
+ put_page(iter.pages[i]);
+ }
+ data->max_bytes -= len;
+ done += iter.nr_pages;
+ }
+
+ return done;
+}
+
+static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
+ struct ublk_io *io)
+{
+ const unsigned int rq_bytes = blk_rq_bytes(req);
+ /*
+ * no zero copy, we delay copy WRITE request data into ublksrv
+ * context and the big benefit is that pinning pages in current
+ * context is pretty fast, see ublk_pin_user_pages
+ */
+ if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH)
+ return rq_bytes;
+
+ if (ublk_rq_has_data(req)) {
+ struct ublk_map_data data = {
+ .ubq = ubq,
+ .rq = req,
+ .io = io,
+ .max_bytes = rq_bytes,
+ };
+
+ ublk_copy_user_pages(&data, true);
+
+ return rq_bytes - data.max_bytes;
+ }
+ return rq_bytes;
+}
+
+static int ublk_unmap_io(const struct ublk_queue *ubq,
+ const struct request *req,
+ struct ublk_io *io)
+{
+ const unsigned int rq_bytes = blk_rq_bytes(req);
+
+ if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
+ struct ublk_map_data data = {
+ .ubq = ubq,
+ .rq = req,
+ .io = io,
+ .max_bytes = io->res,
+ };
+
+ WARN_ON_ONCE(io->res > rq_bytes);
+
+ ublk_copy_user_pages(&data, false);
+
+ return io->res - data.max_bytes;
+ }
+ return rq_bytes;
+}
+
+static inline unsigned int ublk_req_build_flags(struct request *req)
+{
+ unsigned flags = 0;
+
+ if (req->cmd_flags & REQ_FAILFAST_DEV)
+ flags |= UBLK_IO_F_FAILFAST_DEV;
+
+ if (req->cmd_flags & REQ_FAILFAST_TRANSPORT)
+ flags |= UBLK_IO_F_FAILFAST_TRANSPORT;
+
+ if (req->cmd_flags & REQ_FAILFAST_DRIVER)
+ flags |= UBLK_IO_F_FAILFAST_DRIVER;
+
+ if (req->cmd_flags & REQ_META)
+ flags |= UBLK_IO_F_META;
+
+ if (req->cmd_flags & REQ_FUA)
+ flags |= UBLK_IO_F_FUA;
+
+ if (req->cmd_flags & REQ_NOUNMAP)
+ flags |= UBLK_IO_F_NOUNMAP;
+
+ if (req->cmd_flags & REQ_SWAP)
+ flags |= UBLK_IO_F_SWAP;
+
+ return flags;
+}
+
+static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
+{
+ struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
+ struct ublk_io *io = &ubq->ios[req->tag];
+ u32 ublk_op;
+
+ switch (req_op(req)) {
+ case REQ_OP_READ:
+ ublk_op = UBLK_IO_OP_READ;
+ break;
+ case REQ_OP_WRITE:
+ ublk_op = UBLK_IO_OP_WRITE;
+ break;
+ case REQ_OP_FLUSH:
+ ublk_op = UBLK_IO_OP_FLUSH;
+ break;
+ case REQ_OP_DISCARD:
+ ublk_op = UBLK_IO_OP_DISCARD;
+ break;
+ case REQ_OP_WRITE_ZEROES:
+ ublk_op = UBLK_IO_OP_WRITE_ZEROES;
+ break;
+ default:
+ return BLK_STS_IOERR;
+ }
+
+ /* need to translate since kernel may change */
+ iod->op_flags = ublk_op | ublk_req_build_flags(req);
+ iod->nr_sectors = blk_rq_sectors(req);
+ iod->start_sector = blk_rq_pos(req);
+ iod->addr = io->addr;
+
+ return BLK_STS_OK;
+}
+
+static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
+ struct io_uring_cmd *ioucmd)
+{
+ return (struct ublk_uring_cmd_pdu *)&ioucmd->pdu;
+}
+
+static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
+{
+ return ubq->ubq_daemon->flags & PF_EXITING;
+}
+
+/* todo: handle partial completion */
+static void ublk_complete_rq(struct request *req)
+{
+ struct ublk_queue *ubq = req->mq_hctx->driver_data;
+ struct ublk_io *io = &ubq->ios[req->tag];
+ unsigned int unmapped_bytes;
+
+ /* failed read IO if nothing is read */
+ if (!io->res && req_op(req) == REQ_OP_READ)
+ io->res = -EIO;
+
+ if (io->res < 0) {
+ blk_mq_end_request(req, errno_to_blk_status(io->res));
+ return;
+ }
+
+ /*
+ * FLUSH or DISCARD usually won't return bytes returned, so end them
+ * directly.
+ *
+ * Both the two needn't unmap.
+ */
+ if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
+ blk_mq_end_request(req, BLK_STS_OK);
+ return;
+ }
+
+ /* for READ request, writing data in iod->addr to rq buffers */
+ unmapped_bytes = ublk_unmap_io(ubq, req, io);
+
+ /*
+ * Extremely impossible since we got data filled in just before
+ *
+ * Re-read simply for this unlikely case.
+ */
+ if (unlikely(unmapped_bytes < io->res))
+ io->res = unmapped_bytes;
+
+ if (blk_update_request(req, BLK_STS_OK, io->res))
+ blk_mq_requeue_request(req, true);
+ else
+ __blk_mq_end_request(req, BLK_STS_OK);
+}
+
+/*
+ * Since __ublk_rq_task_work always fails requests immediately during
+ * exiting, __ublk_fail_req() is only called from abort context during
+ * exiting. So lock is unnecessary.
+ *
+ * Also aborting may not be started yet, keep in mind that one failed
+ * request may be issued by block layer again.
+ */
+static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
+ struct request *req)
+{
+ WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
+
+ if (!(io->flags & UBLK_IO_FLAG_ABORTED)) {
+ io->flags |= UBLK_IO_FLAG_ABORTED;
+ if (ublk_queue_can_use_recovery_reissue(ubq))
+ blk_mq_requeue_request(req, false);
+ else
+ blk_mq_end_request(req, BLK_STS_IOERR);
+ }
+}
+
+static void ubq_complete_io_cmd(struct ublk_io *io, int res)
+{
+ /* mark this cmd owned by ublksrv */
+ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
+
+ /*
+ * clear ACTIVE since we are done with this sqe/cmd slot
+ * We can only accept io cmd in case of being not active.
+ */
+ io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+
+ /* tell ublksrv one io request is coming */
+ io_uring_cmd_done(io->cmd, res, 0);
+}
+
+#define UBLK_REQUEUE_DELAY_MS 3
+
+static inline void __ublk_abort_rq(struct ublk_queue *ubq,
+ struct request *rq)
+{
+ /* We cannot process this rq so just requeue it. */
+ if (ublk_queue_can_use_recovery(ubq))
+ blk_mq_requeue_request(rq, false);
+ else
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+
+ mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
+}
+
+static inline void __ublk_rq_task_work(struct request *req)
+{
+ struct ublk_queue *ubq = req->mq_hctx->driver_data;
+ int tag = req->tag;
+ struct ublk_io *io = &ubq->ios[tag];
+ unsigned int mapped_bytes;
+
+ pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n",
+ __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
+ ublk_get_iod(ubq, req->tag)->addr);
+
+ /*
+ * Task is exiting if either:
+ *
+ * (1) current != ubq_daemon.
+ * io_uring_cmd_complete_in_task() tries to run task_work
+ * in a workqueue if ubq_daemon(cmd's task) is PF_EXITING.
+ *
+ * (2) current->flags & PF_EXITING.
+ */
+ if (unlikely(current != ubq->ubq_daemon || current->flags & PF_EXITING)) {
+ __ublk_abort_rq(ubq, req);
+ return;
+ }
+
+ if (ublk_need_get_data(ubq) &&
+ (req_op(req) == REQ_OP_WRITE ||
+ req_op(req) == REQ_OP_FLUSH)) {
+ /*
+ * We have not handled UBLK_IO_NEED_GET_DATA command yet,
+ * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
+ * and notify it.
+ */
+ if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
+ io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
+ pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
+ __func__, io->cmd->cmd_op, ubq->q_id,
+ req->tag, io->flags);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
+ return;
+ }
+ /*
+ * We have handled UBLK_IO_NEED_GET_DATA command,
+ * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
+ * do the copy work.
+ */
+ io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
+ /* update iod->addr because ublksrv may have passed a new io buffer */
+ ublk_get_iod(ubq, req->tag)->addr = io->addr;
+ pr_devel("%s: update iod->addr: op %d, qid %d tag %d io_flags %x addr %llx\n",
+ __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
+ ublk_get_iod(ubq, req->tag)->addr);
+ }
+
+ mapped_bytes = ublk_map_io(ubq, req, io);
+
+ /* partially mapped, update io descriptor */
+ if (unlikely(mapped_bytes != blk_rq_bytes(req))) {
+ /*
+ * Nothing mapped, retry until we succeed.
+ *
+ * We may never succeed in mapping any bytes here because
+ * of OOM. TODO: reserve one buffer with single page pinned
+ * for providing forward progress guarantee.
+ */
+ if (unlikely(!mapped_bytes)) {
+ blk_mq_requeue_request(req, false);
+ blk_mq_delay_kick_requeue_list(req->q,
+ UBLK_REQUEUE_DELAY_MS);
+ return;
+ }
+
+ ublk_get_iod(ubq, req->tag)->nr_sectors =
+ mapped_bytes >> 9;
+ }
+
+ ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
+}
+
+static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
+{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+ struct ublk_queue *ubq = pdu->ubq;
+ struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
+ struct ublk_rq_data *data;
+
+ llist_for_each_entry(data, io_cmds, node)
+ __ublk_rq_task_work(blk_mq_rq_from_pdu(data));
+}
+
+static void ublk_rq_task_work_fn(struct callback_head *work)
+{
+ struct ublk_rq_data *data = container_of(work,
+ struct ublk_rq_data, work);
+ struct request *req = blk_mq_rq_from_pdu(data);
+
+ __ublk_rq_task_work(req);
+}
+
+static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq)
+{
+ struct ublk_io *io = &ubq->ios[rq->tag];
+
+ /*
+ * If the check pass, we know that this is a re-issued request aborted
+ * previously in monitor_work because the ubq_daemon(cmd's task) is
+ * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
+ * because this ioucmd's io_uring context may be freed now if no inflight
+ * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
+ *
+ * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
+ * the tag). Then the request is re-started(allocating the tag) and we are here.
+ * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
+ * guarantees that here is a re-issued request aborted previously.
+ */
+ if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
+ struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
+ struct ublk_rq_data *data;
+
+ llist_for_each_entry(data, io_cmds, node)
+ __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
+ } else {
+ struct io_uring_cmd *cmd = io->cmd;
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->ubq = ubq;
+ io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
+ }
+}
+
+static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq,
+ bool last)
+{
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
+
+ if (ublk_can_use_task_work(ubq)) {
+ enum task_work_notify_mode notify_mode = last ?
+ TWA_SIGNAL_NO_IPI : TWA_NONE;
+
+ if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
+ __ublk_abort_rq(ubq, rq);
+ } else {
+ if (llist_add(&data->node, &ubq->io_cmds))
+ ublk_submit_cmd(ubq, rq);
+ }
+}
+
+static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct ublk_queue *ubq = hctx->driver_data;
+ struct request *rq = bd->rq;
+ blk_status_t res;
+
+ /* fill iod to slot in io cmd buffer */
+ res = ublk_setup_iod(ubq, rq);
+ if (unlikely(res != BLK_STS_OK))
+ return BLK_STS_IOERR;
+
+ /* With recovery feature enabled, force_abort is set in
+ * ublk_stop_dev() before calling del_gendisk(). We have to
+ * abort all requeued and new rqs here to let del_gendisk()
+ * move on. Besides, we cannot not call io_uring_cmd_complete_in_task()
+ * to avoid UAF on io_uring ctx.
+ *
+ * Note: force_abort is guaranteed to be seen because it is set
+ * before request queue is unqiuesced.
+ */
+ if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort))
+ return BLK_STS_IOERR;
+
+ blk_mq_start_request(bd->rq);
+
+ if (unlikely(ubq_daemon_is_dying(ubq))) {
+ __ublk_abort_rq(ubq, rq);
+ return BLK_STS_OK;
+ }
+
+ ublk_queue_cmd(ubq, rq, bd->last);
+
+ return BLK_STS_OK;
+}
+
+static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+ struct ublk_queue *ubq = hctx->driver_data;
+
+ if (ublk_can_use_task_work(ubq))
+ __set_notify_signal(ubq->ubq_daemon);
+}
+
+static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
+ unsigned int hctx_idx)
+{
+ struct ublk_device *ub = driver_data;
+ struct ublk_queue *ubq = ublk_get_queue(ub, hctx->queue_num);
+
+ hctx->driver_data = ubq;
+ return 0;
+}
+
+static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx, unsigned int numa_node)
+{
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ init_task_work(&data->work, ublk_rq_task_work_fn);
+ return 0;
+}
+
+static const struct blk_mq_ops ublk_mq_ops = {
+ .queue_rq = ublk_queue_rq,
+ .commit_rqs = ublk_commit_rqs,
+ .init_hctx = ublk_init_hctx,
+ .init_request = ublk_init_rq,
+};
+
+static int ublk_ch_open(struct inode *inode, struct file *filp)
+{
+ struct ublk_device *ub = container_of(inode->i_cdev,
+ struct ublk_device, cdev);
+
+ if (test_and_set_bit(UB_STATE_OPEN, &ub->state))
+ return -EBUSY;
+ filp->private_data = ub;
+ return 0;
+}
+
+static int ublk_ch_release(struct inode *inode, struct file *filp)
+{
+ struct ublk_device *ub = filp->private_data;
+
+ clear_bit(UB_STATE_OPEN, &ub->state);
+ return 0;
+}
+
+/* map pre-allocated per-queue cmd buffer to ublksrv daemon */
+static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct ublk_device *ub = filp->private_data;
+ size_t sz = vma->vm_end - vma->vm_start;
+ unsigned max_sz = UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc);
+ unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT;
+ int q_id, ret = 0;
+
+ spin_lock(&ub->mm_lock);
+ if (!ub->mm)
+ ub->mm = current->mm;
+ if (current->mm != ub->mm)
+ ret = -EINVAL;
+ spin_unlock(&ub->mm_lock);
+
+ if (ret)
+ return ret;
+
+ if (vma->vm_flags & VM_WRITE)
+ return -EPERM;
+
+ end = UBLKSRV_CMD_BUF_OFFSET + ub->dev_info.nr_hw_queues * max_sz;
+ if (phys_off < UBLKSRV_CMD_BUF_OFFSET || phys_off >= end)
+ return -EINVAL;
+
+ q_id = (phys_off - UBLKSRV_CMD_BUF_OFFSET) / max_sz;
+ pr_devel("%s: qid %d, pid %d, addr %lx pg_off %lx sz %lu\n",
+ __func__, q_id, current->pid, vma->vm_start,
+ phys_off, (unsigned long)sz);
+
+ if (sz != ublk_queue_cmd_buf_size(ub, q_id))
+ return -EINVAL;
+
+ pfn = virt_to_phys(ublk_queue_cmd_buf(ub, q_id)) >> PAGE_SHIFT;
+ return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
+}
+
+static void ublk_commit_completion(struct ublk_device *ub,
+ struct ublksrv_io_cmd *ub_cmd)
+{
+ u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
+ struct ublk_queue *ubq = ublk_get_queue(ub, qid);
+ struct ublk_io *io = &ubq->ios[tag];
+ struct request *req;
+
+ /* now this cmd slot is owned by nbd driver */
+ io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
+ io->res = ub_cmd->result;
+
+ /* find the io request and complete */
+ req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
+
+ if (req && likely(!blk_should_fake_timeout(req->q)))
+ ublk_complete_rq(req);
+}
+
+/*
+ * When ->ubq_daemon is exiting, either new request is ended immediately,
+ * or any queued io command is drained, so it is safe to abort queue
+ * lockless
+ */
+static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
+{
+ int i;
+
+ if (!ublk_get_device(ub))
+ return;
+
+ for (i = 0; i < ubq->q_depth; i++) {
+ struct ublk_io *io = &ubq->ios[i];
+
+ if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) {
+ struct request *rq;
+
+ /*
+ * Either we fail the request or ublk_rq_task_work_fn
+ * will do it
+ */
+ rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
+ if (rq)
+ __ublk_fail_req(ubq, io, rq);
+ }
+ }
+ ublk_put_device(ub);
+}
+
+static void ublk_daemon_monitor_work(struct work_struct *work)
+{
+ struct ublk_device *ub =
+ container_of(work, struct ublk_device, monitor_work.work);
+ int i;
+
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ struct ublk_queue *ubq = ublk_get_queue(ub, i);
+
+ if (ubq_daemon_is_dying(ubq)) {
+ if (ublk_queue_can_use_recovery(ubq))
+ schedule_work(&ub->quiesce_work);
+ else
+ schedule_work(&ub->stop_work);
+
+ /* abort queue is for making forward progress */
+ ublk_abort_queue(ub, ubq);
+ }
+ }
+
+ /*
+ * We can't schedule monitor work after ub's state is not UBLK_S_DEV_LIVE.
+ * after ublk_remove() or __ublk_quiesce_dev() is started.
+ *
+ * No need ub->mutex, monitor work are canceled after state is marked
+ * as not LIVE, so new state is observed reliably.
+ */
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE)
+ schedule_delayed_work(&ub->monitor_work,
+ UBLK_DAEMON_MONITOR_PERIOD);
+}
+
+static inline bool ublk_queue_ready(struct ublk_queue *ubq)
+{
+ return ubq->nr_io_ready == ubq->q_depth;
+}
+
+static void ublk_cancel_queue(struct ublk_queue *ubq)
+{
+ int i;
+
+ if (!ublk_queue_ready(ubq))
+ return;
+
+ for (i = 0; i < ubq->q_depth; i++) {
+ struct ublk_io *io = &ubq->ios[i];
+
+ if (io->flags & UBLK_IO_FLAG_ACTIVE)
+ io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
+ }
+
+ /* all io commands are canceled */
+ ubq->nr_io_ready = 0;
+}
+
+/* Cancel all pending commands, must be called after del_gendisk() returns */
+static void ublk_cancel_dev(struct ublk_device *ub)
+{
+ int i;
+
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+ ublk_cancel_queue(ublk_get_queue(ub, i));
+}
+
+static bool ublk_check_inflight_rq(struct request *rq, void *data)
+{
+ bool *idle = data;
+
+ if (blk_mq_request_started(rq)) {
+ *idle = false;
+ return false;
+ }
+ return true;
+}
+
+static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
+{
+ bool idle;
+
+ WARN_ON_ONCE(!blk_queue_quiesced(ub->ub_disk->queue));
+ while (true) {
+ idle = true;
+ blk_mq_tagset_busy_iter(&ub->tag_set,
+ ublk_check_inflight_rq, &idle);
+ if (idle)
+ break;
+ msleep(UBLK_REQUEUE_DELAY_MS);
+ }
+}
+
+static void __ublk_quiesce_dev(struct ublk_device *ub)
+{
+ pr_devel("%s: quiesce ub: dev_id %d state %s\n",
+ __func__, ub->dev_info.dev_id,
+ ub->dev_info.state == UBLK_S_DEV_LIVE ?
+ "LIVE" : "QUIESCED");
+ blk_mq_quiesce_queue(ub->ub_disk->queue);
+ ublk_wait_tagset_rqs_idle(ub);
+ ub->dev_info.state = UBLK_S_DEV_QUIESCED;
+ ublk_cancel_dev(ub);
+ /* we are going to release task_struct of ubq_daemon and resets
+ * ->ubq_daemon to NULL. So in monitor_work, check on ubq_daemon causes UAF.
+ * Besides, monitor_work is not necessary in QUIESCED state since we have
+ * already scheduled quiesce_work and quiesced all ubqs.
+ *
+ * Do not let monitor_work schedule itself if state it QUIESCED. And we cancel
+ * it here and re-schedule it in END_USER_RECOVERY to avoid UAF.
+ */
+ cancel_delayed_work_sync(&ub->monitor_work);
+}
+
+static void ublk_quiesce_work_fn(struct work_struct *work)
+{
+ struct ublk_device *ub =
+ container_of(work, struct ublk_device, quiesce_work);
+
+ mutex_lock(&ub->mutex);
+ if (ub->dev_info.state != UBLK_S_DEV_LIVE)
+ goto unlock;
+ __ublk_quiesce_dev(ub);
+ unlock:
+ mutex_unlock(&ub->mutex);
+}
+
+static void ublk_unquiesce_dev(struct ublk_device *ub)
+{
+ int i;
+
+ pr_devel("%s: unquiesce ub: dev_id %d state %s\n",
+ __func__, ub->dev_info.dev_id,
+ ub->dev_info.state == UBLK_S_DEV_LIVE ?
+ "LIVE" : "QUIESCED");
+ /* quiesce_work has run. We let requeued rqs be aborted
+ * before running fallback_wq. "force_abort" must be seen
+ * after request queue is unqiuesced. Then del_gendisk()
+ * can move on.
+ */
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+ ublk_get_queue(ub, i)->force_abort = true;
+
+ blk_mq_unquiesce_queue(ub->ub_disk->queue);
+ /* We may have requeued some rqs in ublk_quiesce_queue() */
+ blk_mq_kick_requeue_list(ub->ub_disk->queue);
+}
+
+static void ublk_stop_dev(struct ublk_device *ub)
+{
+ mutex_lock(&ub->mutex);
+ if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+ goto unlock;
+ if (ublk_can_use_recovery(ub)) {
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE)
+ __ublk_quiesce_dev(ub);
+ ublk_unquiesce_dev(ub);
+ }
+ del_gendisk(ub->ub_disk);
+ ub->dev_info.state = UBLK_S_DEV_DEAD;
+ ub->dev_info.ublksrv_pid = -1;
+ put_disk(ub->ub_disk);
+ ub->ub_disk = NULL;
+ unlock:
+ ublk_cancel_dev(ub);
+ mutex_unlock(&ub->mutex);
+ cancel_delayed_work_sync(&ub->monitor_work);
+}
+
+/* device can only be started after all IOs are ready */
+static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
+{
+ mutex_lock(&ub->mutex);
+ ubq->nr_io_ready++;
+ if (ublk_queue_ready(ubq)) {
+ ubq->ubq_daemon = current;
+ get_task_struct(ubq->ubq_daemon);
+ ub->nr_queues_ready++;
+ }
+ if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
+ complete_all(&ub->completion);
+ mutex_unlock(&ub->mutex);
+}
+
+static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
+ int tag)
+{
+ struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+ struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
+
+ ublk_queue_cmd(ubq, req, true);
+}
+
+static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
+ struct ublk_device *ub = cmd->file->private_data;
+ struct ublk_queue *ubq;
+ struct ublk_io *io;
+ u32 cmd_op = cmd->cmd_op;
+ unsigned tag = ub_cmd->tag;
+ int ret = -EINVAL;
+
+ pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
+ __func__, cmd->cmd_op, ub_cmd->q_id, tag,
+ ub_cmd->result);
+
+ if (!(issue_flags & IO_URING_F_SQE128))
+ goto out;
+
+ if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues)
+ goto out;
+
+ ubq = ublk_get_queue(ub, ub_cmd->q_id);
+ if (!ubq || ub_cmd->q_id != ubq->q_id)
+ goto out;
+
+ if (ubq->ubq_daemon && ubq->ubq_daemon != current)
+ goto out;
+
+ if (tag >= ubq->q_depth)
+ goto out;
+
+ io = &ubq->ios[tag];
+
+ /* there is pending io cmd, something must be wrong */
+ if (io->flags & UBLK_IO_FLAG_ACTIVE) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * ensure that the user issues UBLK_IO_NEED_GET_DATA
+ * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
+ */
+ if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
+ ^ (cmd_op == UBLK_IO_NEED_GET_DATA))
+ goto out;
+
+ switch (cmd_op) {
+ case UBLK_IO_FETCH_REQ:
+ /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
+ if (ublk_queue_ready(ubq)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ /*
+ * The io is being handled by server, so COMMIT_RQ is expected
+ * instead of FETCH_REQ
+ */
+ if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
+ goto out;
+ /* FETCH_RQ has to provide IO buffer */
+ if (!ub_cmd->addr)
+ goto out;
+ io->cmd = cmd;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ io->addr = ub_cmd->addr;
+
+ ublk_mark_io_ready(ub, ubq);
+ break;
+ case UBLK_IO_COMMIT_AND_FETCH_REQ:
+ /* FETCH_RQ has to provide IO buffer */
+ if (!ub_cmd->addr)
+ goto out;
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+ goto out;
+ io->addr = ub_cmd->addr;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ io->cmd = cmd;
+ ublk_commit_completion(ub, ub_cmd);
+ break;
+ case UBLK_IO_NEED_GET_DATA:
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+ goto out;
+ io->addr = ub_cmd->addr;
+ io->cmd = cmd;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
+ break;
+ default:
+ goto out;
+ }
+ return -EIOCBQUEUED;
+
+ out:
+ io_uring_cmd_done(cmd, ret, 0);
+ pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
+ __func__, cmd_op, tag, ret, io->flags);
+ return -EIOCBQUEUED;
+}
+
+static const struct file_operations ublk_ch_fops = {
+ .owner = THIS_MODULE,
+ .open = ublk_ch_open,
+ .release = ublk_ch_release,
+ .llseek = no_llseek,
+ .uring_cmd = ublk_ch_uring_cmd,
+ .mmap = ublk_ch_mmap,
+};
+
+static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
+{
+ int size = ublk_queue_cmd_buf_size(ub, q_id);
+ struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+
+ if (ubq->ubq_daemon)
+ put_task_struct(ubq->ubq_daemon);
+ if (ubq->io_cmd_buf)
+ free_pages((unsigned long)ubq->io_cmd_buf, get_order(size));
+}
+
+static int ublk_init_queue(struct ublk_device *ub, int q_id)
+{
+ struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
+ void *ptr;
+ int size;
+
+ ubq->flags = ub->dev_info.flags;
+ ubq->q_id = q_id;
+ ubq->q_depth = ub->dev_info.queue_depth;
+ size = ublk_queue_cmd_buf_size(ub, q_id);
+
+ ptr = (void *) __get_free_pages(gfp_flags, get_order(size));
+ if (!ptr)
+ return -ENOMEM;
+
+ ubq->io_cmd_buf = ptr;
+ ubq->dev = ub;
+ return 0;
+}
+
+static void ublk_deinit_queues(struct ublk_device *ub)
+{
+ int nr_queues = ub->dev_info.nr_hw_queues;
+ int i;
+
+ if (!ub->__queues)
+ return;
+
+ for (i = 0; i < nr_queues; i++)
+ ublk_deinit_queue(ub, i);
+ kfree(ub->__queues);
+}
+
+static int ublk_init_queues(struct ublk_device *ub)
+{
+ int nr_queues = ub->dev_info.nr_hw_queues;
+ int depth = ub->dev_info.queue_depth;
+ int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io);
+ int i, ret = -ENOMEM;
+
+ ub->queue_size = ubq_size;
+ ub->__queues = kcalloc(nr_queues, ubq_size, GFP_KERNEL);
+ if (!ub->__queues)
+ return ret;
+
+ for (i = 0; i < nr_queues; i++) {
+ if (ublk_init_queue(ub, i))
+ goto fail;
+ }
+
+ init_completion(&ub->completion);
+ return 0;
+
+ fail:
+ ublk_deinit_queues(ub);
+ return ret;
+}
+
+static int ublk_alloc_dev_number(struct ublk_device *ub, int idx)
+{
+ int i = idx;
+ int err;
+
+ spin_lock(&ublk_idr_lock);
+ /* allocate id, if @id >= 0, we're requesting that specific id */
+ if (i >= 0) {
+ err = idr_alloc(&ublk_index_idr, ub, i, i + 1, GFP_NOWAIT);
+ if (err == -ENOSPC)
+ err = -EEXIST;
+ } else {
+ err = idr_alloc(&ublk_index_idr, ub, 0, 0, GFP_NOWAIT);
+ }
+ spin_unlock(&ublk_idr_lock);
+
+ if (err >= 0)
+ ub->ub_number = err;
+
+ return err;
+}
+
+static void ublk_free_dev_number(struct ublk_device *ub)
+{
+ spin_lock(&ublk_idr_lock);
+ idr_remove(&ublk_index_idr, ub->ub_number);
+ wake_up_all(&ublk_idr_wq);
+ spin_unlock(&ublk_idr_lock);
+}
+
+static void ublk_cdev_rel(struct device *dev)
+{
+ struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev);
+
+ blk_mq_free_tag_set(&ub->tag_set);
+ ublk_deinit_queues(ub);
+ ublk_free_dev_number(ub);
+ mutex_destroy(&ub->mutex);
+ kfree(ub);
+}
+
+static int ublk_add_chdev(struct ublk_device *ub)
+{
+ struct device *dev = &ub->cdev_dev;
+ int minor = ub->ub_number;
+ int ret;
+
+ dev->parent = ublk_misc.this_device;
+ dev->devt = MKDEV(MAJOR(ublk_chr_devt), minor);
+ dev->class = ublk_chr_class;
+ dev->release = ublk_cdev_rel;
+ device_initialize(dev);
+
+ ret = dev_set_name(dev, "ublkc%d", minor);
+ if (ret)
+ goto fail;
+
+ cdev_init(&ub->cdev, &ublk_ch_fops);
+ ret = cdev_device_add(&ub->cdev, dev);
+ if (ret)
+ goto fail;
+ return 0;
+ fail:
+ put_device(dev);
+ return ret;
+}
+
+static void ublk_stop_work_fn(struct work_struct *work)
+{
+ struct ublk_device *ub =
+ container_of(work, struct ublk_device, stop_work);
+
+ ublk_stop_dev(ub);
+}
+
+/* align max io buffer size with PAGE_SIZE */
+static void ublk_align_max_io_size(struct ublk_device *ub)
+{
+ unsigned int max_io_bytes = ub->dev_info.max_io_buf_bytes;
+
+ ub->dev_info.max_io_buf_bytes =
+ round_down(max_io_bytes, PAGE_SIZE);
+}
+
+static int ublk_add_tag_set(struct ublk_device *ub)
+{
+ ub->tag_set.ops = &ublk_mq_ops;
+ ub->tag_set.nr_hw_queues = ub->dev_info.nr_hw_queues;
+ ub->tag_set.queue_depth = ub->dev_info.queue_depth;
+ ub->tag_set.numa_node = NUMA_NO_NODE;
+ ub->tag_set.cmd_size = sizeof(struct ublk_rq_data);
+ ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ ub->tag_set.driver_data = ub;
+ return blk_mq_alloc_tag_set(&ub->tag_set);
+}
+
+static void ublk_remove(struct ublk_device *ub)
+{
+ ublk_stop_dev(ub);
+ cancel_work_sync(&ub->stop_work);
+ cancel_work_sync(&ub->quiesce_work);
+ cdev_device_del(&ub->cdev, &ub->cdev_dev);
+ put_device(&ub->cdev_dev);
+}
+
+static struct ublk_device *ublk_get_device_from_id(int idx)
+{
+ struct ublk_device *ub = NULL;
+
+ if (idx < 0)
+ return NULL;
+
+ spin_lock(&ublk_idr_lock);
+ ub = idr_find(&ublk_index_idr, idx);
+ if (ub)
+ ub = ublk_get_device(ub);
+ spin_unlock(&ublk_idr_lock);
+
+ return ub;
+}
+
+static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ int ublksrv_pid = (int)header->data[0];
+ struct ublk_device *ub;
+ struct gendisk *disk;
+ int ret = -EINVAL;
+
+ if (ublksrv_pid <= 0)
+ return -EINVAL;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ wait_for_completion_interruptible(&ub->completion);
+
+ schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
+
+ mutex_lock(&ub->mutex);
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE ||
+ test_bit(UB_STATE_USED, &ub->state)) {
+ ret = -EEXIST;
+ goto out_unlock;
+ }
+
+ disk = blk_mq_alloc_disk(&ub->tag_set, ub);
+ if (IS_ERR(disk)) {
+ ret = PTR_ERR(disk);
+ goto out_unlock;
+ }
+ sprintf(disk->disk_name, "ublkb%d", ub->ub_number);
+ disk->fops = &ub_fops;
+ disk->private_data = ub;
+
+ ub->dev_info.ublksrv_pid = ublksrv_pid;
+ ub->ub_disk = disk;
+
+ ret = ublk_apply_params(ub);
+ if (ret)
+ goto out_put_disk;
+
+ get_device(&ub->cdev_dev);
+ ret = add_disk(disk);
+ if (ret) {
+ /*
+ * Has to drop the reference since ->free_disk won't be
+ * called in case of add_disk failure.
+ */
+ ublk_put_device(ub);
+ goto out_put_disk;
+ }
+ set_bit(UB_STATE_USED, &ub->state);
+ ub->dev_info.state = UBLK_S_DEV_LIVE;
+out_put_disk:
+ if (ret)
+ put_disk(disk);
+out_unlock:
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+ return ret;
+}
+
+static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_device *ub;
+ cpumask_var_t cpumask;
+ unsigned long queue;
+ unsigned int retlen;
+ unsigned int i;
+ int ret = -EINVAL;
+
+ if (header->len * BITS_PER_BYTE < nr_cpu_ids)
+ return -EINVAL;
+ if (header->len & (sizeof(unsigned long)-1))
+ return -EINVAL;
+ if (!header->addr)
+ return -EINVAL;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ queue = header->data[0];
+ if (queue >= ub->dev_info.nr_hw_queues)
+ goto out_put_device;
+
+ ret = -ENOMEM;
+ if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+ goto out_put_device;
+
+ for_each_possible_cpu(i) {
+ if (ub->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[i] == queue)
+ cpumask_set_cpu(i, cpumask);
+ }
+
+ ret = -EFAULT;
+ retlen = min_t(unsigned short, header->len, cpumask_size());
+ if (copy_to_user(argp, cpumask, retlen))
+ goto out_free_cpumask;
+ if (retlen != header->len &&
+ clear_user(argp + retlen, header->len - retlen))
+ goto out_free_cpumask;
+
+ ret = 0;
+out_free_cpumask:
+ free_cpumask_var(cpumask);
+out_put_device:
+ ublk_put_device(ub);
+ return ret;
+}
+
+static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
+{
+ pr_devel("%s: dev id %d flags %llx\n", __func__,
+ info->dev_id, info->flags);
+ pr_devel("\t nr_hw_queues %d queue_depth %d\n",
+ info->nr_hw_queues, info->queue_depth);
+}
+
+static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublksrv_ctrl_dev_info info;
+ struct ublk_device *ub;
+ int ret = -EINVAL;
+
+ if (header->len < sizeof(info) || !header->addr)
+ return -EINVAL;
+ if (header->queue_id != (u16)-1) {
+ pr_warn("%s: queue_id is wrong %x\n",
+ __func__, header->queue_id);
+ return -EINVAL;
+ }
+ if (copy_from_user(&info, argp, sizeof(info)))
+ return -EFAULT;
+ ublk_dump_dev_info(&info);
+ if (header->dev_id != info.dev_id) {
+ pr_warn("%s: dev id not match %u %u\n",
+ __func__, header->dev_id, info.dev_id);
+ return -EINVAL;
+ }
+
+ ret = mutex_lock_killable(&ublk_ctl_mutex);
+ if (ret)
+ return ret;
+
+ ret = -ENOMEM;
+ ub = kzalloc(sizeof(*ub), GFP_KERNEL);
+ if (!ub)
+ goto out_unlock;
+ mutex_init(&ub->mutex);
+ spin_lock_init(&ub->mm_lock);
+ INIT_WORK(&ub->quiesce_work, ublk_quiesce_work_fn);
+ INIT_WORK(&ub->stop_work, ublk_stop_work_fn);
+ INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work);
+
+ ret = ublk_alloc_dev_number(ub, header->dev_id);
+ if (ret < 0)
+ goto out_free_ub;
+
+ memcpy(&ub->dev_info, &info, sizeof(info));
+
+ /* update device id */
+ ub->dev_info.dev_id = ub->ub_number;
+
+ /*
+ * 64bit flags will be copied back to userspace as feature
+ * negotiation result, so have to clear flags which driver
+ * doesn't support yet, then userspace can get correct flags
+ * (features) to handle.
+ */
+ ub->dev_info.flags &= UBLK_F_ALL;
+
+ if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
+ ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+
+ /* We are not ready to support zero copy */
+ ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
+
+ ub->dev_info.nr_hw_queues = min_t(unsigned int,
+ ub->dev_info.nr_hw_queues, nr_cpu_ids);
+ ublk_align_max_io_size(ub);
+
+ ret = ublk_init_queues(ub);
+ if (ret)
+ goto out_free_dev_number;
+
+ ret = ublk_add_tag_set(ub);
+ if (ret)
+ goto out_deinit_queues;
+
+ ret = -EFAULT;
+ if (copy_to_user(argp, &ub->dev_info, sizeof(info)))
+ goto out_free_tag_set;
+
+ /*
+ * Add the char dev so that ublksrv daemon can be setup.
+ * ublk_add_chdev() will cleanup everything if it fails.
+ */
+ ret = ublk_add_chdev(ub);
+ goto out_unlock;
+
+out_free_tag_set:
+ blk_mq_free_tag_set(&ub->tag_set);
+out_deinit_queues:
+ ublk_deinit_queues(ub);
+out_free_dev_number:
+ ublk_free_dev_number(ub);
+out_free_ub:
+ mutex_destroy(&ub->mutex);
+ kfree(ub);
+out_unlock:
+ mutex_unlock(&ublk_ctl_mutex);
+ return ret;
+}
+
+static inline bool ublk_idr_freed(int id)
+{
+ void *ptr;
+
+ spin_lock(&ublk_idr_lock);
+ ptr = idr_find(&ublk_index_idr, id);
+ spin_unlock(&ublk_idr_lock);
+
+ return ptr == NULL;
+}
+
+static int ublk_ctrl_del_dev(int idx)
+{
+ struct ublk_device *ub;
+ int ret;
+
+ ret = mutex_lock_killable(&ublk_ctl_mutex);
+ if (ret)
+ return ret;
+
+ ub = ublk_get_device_from_id(idx);
+ if (ub) {
+ ublk_remove(ub);
+ ublk_put_device(ub);
+ ret = 0;
+ } else {
+ ret = -ENODEV;
+ }
+
+ /*
+ * Wait until the idr is removed, then it can be reused after
+ * DEL_DEV command is returned.
+ */
+ if (!ret)
+ wait_event(ublk_idr_wq, ublk_idr_freed(idx));
+ mutex_unlock(&ublk_ctl_mutex);
+
+ return ret;
+}
+
+static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+
+ pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
+ __func__, cmd->cmd_op, header->dev_id, header->queue_id,
+ header->data[0], header->addr, header->len);
+}
+
+static int ublk_ctrl_stop_dev(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ struct ublk_device *ub;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ ublk_stop_dev(ub);
+ cancel_work_sync(&ub->stop_work);
+ cancel_work_sync(&ub->quiesce_work);
+
+ ublk_put_device(ub);
+ return 0;
+}
+
+static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_device *ub;
+ int ret = 0;
+
+ if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
+ return -EINVAL;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ if (copy_to_user(argp, &ub->dev_info, sizeof(ub->dev_info)))
+ ret = -EFAULT;
+ ublk_put_device(ub);
+
+ return ret;
+}
+
+static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_params_header ph;
+ struct ublk_device *ub;
+ int ret;
+
+ if (header->len <= sizeof(ph) || !header->addr)
+ return -EINVAL;
+
+ if (copy_from_user(&ph, argp, sizeof(ph)))
+ return -EFAULT;
+
+ if (ph.len > header->len || !ph.len)
+ return -EINVAL;
+
+ if (ph.len > sizeof(struct ublk_params))
+ ph.len = sizeof(struct ublk_params);
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ mutex_lock(&ub->mutex);
+ if (copy_to_user(argp, &ub->params, ph.len))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ mutex_unlock(&ub->mutex);
+
+ ublk_put_device(ub);
+ return ret;
+}
+
+static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_params_header ph;
+ struct ublk_device *ub;
+ int ret = -EFAULT;
+
+ if (header->len <= sizeof(ph) || !header->addr)
+ return -EINVAL;
+
+ if (copy_from_user(&ph, argp, sizeof(ph)))
+ return -EFAULT;
+
+ if (ph.len > header->len || !ph.len || !ph.types)
+ return -EINVAL;
+
+ if (ph.len > sizeof(struct ublk_params))
+ ph.len = sizeof(struct ublk_params);
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ /* parameters can only be changed when device isn't live */
+ mutex_lock(&ub->mutex);
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
+ ret = -EACCES;
+ } else if (copy_from_user(&ub->params, argp, ph.len)) {
+ ret = -EFAULT;
+ } else {
+ /* clear all we don't support yet */
+ ub->params.types &= UBLK_PARAM_TYPE_ALL;
+ ret = ublk_validate_params(ub);
+ }
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+
+ return ret;
+}
+
+static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
+{
+ int i;
+
+ WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq)));
+ /* All old ioucmds have to be completed */
+ WARN_ON_ONCE(ubq->nr_io_ready);
+ /* old daemon is PF_EXITING, put it now */
+ put_task_struct(ubq->ubq_daemon);
+ /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
+ ubq->ubq_daemon = NULL;
+
+ for (i = 0; i < ubq->q_depth; i++) {
+ struct ublk_io *io = &ubq->ios[i];
+
+ /* forget everything now and be ready for new FETCH_REQ */
+ io->flags = 0;
+ io->cmd = NULL;
+ io->addr = 0;
+ }
+}
+
+static int ublk_ctrl_start_recovery(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ struct ublk_device *ub;
+ int ret = -EINVAL;
+ int i;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return ret;
+
+ mutex_lock(&ub->mutex);
+ if (!ublk_can_use_recovery(ub))
+ goto out_unlock;
+ /*
+ * START_RECOVERY is only allowd after:
+ *
+ * (1) UB_STATE_OPEN is not set, which means the dying process is exited
+ * and related io_uring ctx is freed so file struct of /dev/ublkcX is
+ * released.
+ *
+ * (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work:
+ * (a)has quiesced request queue
+ * (b)has requeued every inflight rqs whose io_flags is ACTIVE
+ * (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE
+ * (d)has completed/camceled all ioucmds owned by ther dying process
+ */
+ if (test_bit(UB_STATE_OPEN, &ub->state) ||
+ ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id);
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+ ublk_queue_reinit(ub, ublk_get_queue(ub, i));
+ /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
+ ub->mm = NULL;
+ ub->nr_queues_ready = 0;
+ init_completion(&ub->completion);
+ ret = 0;
+ out_unlock:
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+ return ret;
+}
+
+static int ublk_ctrl_end_recovery(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ int ublksrv_pid = (int)header->data[0];
+ struct ublk_device *ub;
+ int ret = -EINVAL;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return ret;
+
+ pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
+ __func__, ub->dev_info.nr_hw_queues, header->dev_id);
+ /* wait until new ubq_daemon sending all FETCH_REQ */
+ wait_for_completion_interruptible(&ub->completion);
+ pr_devel("%s: All new ubq_daemons(nr: %d) are ready, dev id %d\n",
+ __func__, ub->dev_info.nr_hw_queues, header->dev_id);
+
+ mutex_lock(&ub->mutex);
+ if (!ublk_can_use_recovery(ub))
+ goto out_unlock;
+
+ if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ ub->dev_info.ublksrv_pid = ublksrv_pid;
+ pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
+ __func__, ublksrv_pid, header->dev_id);
+ blk_mq_unquiesce_queue(ub->ub_disk->queue);
+ pr_devel("%s: queue unquiesced, dev id %d.\n",
+ __func__, header->dev_id);
+ blk_mq_kick_requeue_list(ub->ub_disk->queue);
+ ub->dev_info.state = UBLK_S_DEV_LIVE;
+ schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
+ ret = 0;
+ out_unlock:
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+ return ret;
+}
+
+static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ int ret = -EINVAL;
+
+ ublk_ctrl_cmd_dump(cmd);
+
+ if (!(issue_flags & IO_URING_F_SQE128))
+ goto out;
+
+ ret = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ ret = -ENODEV;
+ switch (cmd->cmd_op) {
+ case UBLK_CMD_START_DEV:
+ ret = ublk_ctrl_start_dev(cmd);
+ break;
+ case UBLK_CMD_STOP_DEV:
+ ret = ublk_ctrl_stop_dev(cmd);
+ break;
+ case UBLK_CMD_GET_DEV_INFO:
+ ret = ublk_ctrl_get_dev_info(cmd);
+ break;
+ case UBLK_CMD_ADD_DEV:
+ ret = ublk_ctrl_add_dev(cmd);
+ break;
+ case UBLK_CMD_DEL_DEV:
+ ret = ublk_ctrl_del_dev(header->dev_id);
+ break;
+ case UBLK_CMD_GET_QUEUE_AFFINITY:
+ ret = ublk_ctrl_get_queue_affinity(cmd);
+ break;
+ case UBLK_CMD_GET_PARAMS:
+ ret = ublk_ctrl_get_params(cmd);
+ break;
+ case UBLK_CMD_SET_PARAMS:
+ ret = ublk_ctrl_set_params(cmd);
+ break;
+ case UBLK_CMD_START_USER_RECOVERY:
+ ret = ublk_ctrl_start_recovery(cmd);
+ break;
+ case UBLK_CMD_END_USER_RECOVERY:
+ ret = ublk_ctrl_end_recovery(cmd);
+ break;
+ default:
+ break;
+ }
+ out:
+ io_uring_cmd_done(cmd, ret, 0);
+ pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
+ __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
+ return -EIOCBQUEUED;
+}
+
+static const struct file_operations ublk_ctl_fops = {
+ .open = nonseekable_open,
+ .uring_cmd = ublk_ctrl_uring_cmd,
+ .owner = THIS_MODULE,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice ublk_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "ublk-control",
+ .fops = &ublk_ctl_fops,
+};
+
+static int __init ublk_init(void)
+{
+ int ret;
+
+ init_waitqueue_head(&ublk_idr_wq);
+
+ ret = misc_register(&ublk_misc);
+ if (ret)
+ return ret;
+
+ ret = alloc_chrdev_region(&ublk_chr_devt, 0, UBLK_MINORS, "ublk-char");
+ if (ret)
+ goto unregister_mis;
+
+ ublk_chr_class = class_create(THIS_MODULE, "ublk-char");
+ if (IS_ERR(ublk_chr_class)) {
+ ret = PTR_ERR(ublk_chr_class);
+ goto free_chrdev_region;
+ }
+ return 0;
+
+free_chrdev_region:
+ unregister_chrdev_region(ublk_chr_devt, UBLK_MINORS);
+unregister_mis:
+ misc_deregister(&ublk_misc);
+ return ret;
+}
+
+static void __exit ublk_exit(void)
+{
+ struct ublk_device *ub;
+ int id;
+
+ class_destroy(ublk_chr_class);
+
+ misc_deregister(&ublk_misc);
+
+ idr_for_each_entry(&ublk_index_idr, ub, id)
+ ublk_remove(ub);
+
+ idr_destroy(&ublk_index_idr);
+ unregister_chrdev_region(ublk_chr_devt, UBLK_MINORS);
+}
+
+module_init(ublk_init);
+module_exit(ublk_exit);
+
+MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6ae38776e30e..19da5defd734 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -37,6 +37,10 @@ MODULE_PARM_DESC(num_request_queues,
"0 for no limit. "
"Values > nr_cpu_ids truncated to nr_cpu_ids.");
+static unsigned int poll_queues;
+module_param(poll_queues, uint, 0644);
+MODULE_PARM_DESC(poll_queues, "The number of dedicated virtqueues for polling I/O");
+
static int major;
static DEFINE_IDA(vd_index_ida);
@@ -69,21 +73,12 @@ struct virtio_blk {
/* Process context for config space updates */
struct work_struct config_work;
- /*
- * Tracks references from block_device_operations open/release and
- * virtio_driver probe/remove so this object can be freed once no
- * longer in use.
- */
- refcount_t refs;
-
- /* What host tells us, plus 2 for header & tailer. */
- unsigned int sg_elems;
-
/* Ida index - used to track minor number allocations. */
int index;
/* num of vqs */
int num_vqs;
+ int io_queues[HCTX_MAX_TYPES];
struct virtio_blk_vq *vqs;
};
@@ -106,8 +101,15 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
}
}
-static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
- struct scatterlist *data_sg, bool have_data)
+static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
+{
+ struct virtio_blk *vblk = hctx->queue->queuedata;
+ struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
+
+ return vq;
+}
+
+static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
{
struct scatterlist hdr, status, *sgs[3];
unsigned int num_out = 0, num_in = 0;
@@ -115,11 +117,11 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
sgs[num_out++] = &hdr;
- if (have_data) {
+ if (vbr->sg_table.nents) {
if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
- sgs[num_out++] = data_sg;
+ sgs[num_out++] = vbr->sg_table.sgl;
else
- sgs[num_out + num_in++] = data_sg;
+ sgs[num_out + num_in++] = vbr->sg_table.sgl;
}
sg_init_one(&status, &vbr->status, sizeof(vbr->status));
@@ -128,7 +130,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
}
-static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
+static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool unmap)
{
unsigned short segments = blk_rq_nr_discard_segments(req);
unsigned short n = 0;
@@ -238,6 +240,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
type = VIRTIO_BLK_T_WRITE_ZEROES;
unmap = !(req->cmd_flags & REQ_NOUNMAP);
break;
+ case REQ_OP_SECURE_ERASE:
+ type = VIRTIO_BLK_T_SECURE_ERASE;
+ break;
case REQ_OP_DRV_IN:
type = VIRTIO_BLK_T_GET_ID;
break;
@@ -249,8 +254,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
- if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
- if (virtblk_setup_discard_write_zeroes(req, unmap))
+ if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES ||
+ type == VIRTIO_BLK_T_SECURE_ERASE) {
+ if (virtblk_setup_discard_write_zeroes_erase(req, unmap))
return BLK_STS_RESOURCE;
}
@@ -309,6 +315,28 @@ static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
virtqueue_notify(vq->vq);
}
+static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx,
+ struct virtio_blk *vblk,
+ struct request *req,
+ struct virtblk_req *vbr)
+{
+ blk_status_t status;
+
+ status = virtblk_setup_cmd(vblk->vdev, req, vbr);
+ if (unlikely(status))
+ return status;
+
+ vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr);
+ if (unlikely(vbr->sg_table.nents < 0)) {
+ virtblk_cleanup_cmd(req);
+ return BLK_STS_RESOURCE;
+ }
+
+ blk_mq_start_request(req);
+
+ return BLK_STS_OK;
+}
+
static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -316,28 +344,17 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
struct request *req = bd->rq;
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
unsigned long flags;
- int num;
int qid = hctx->queue_num;
bool notify = false;
blk_status_t status;
int err;
- BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
-
- status = virtblk_setup_cmd(vblk->vdev, req, vbr);
+ status = virtblk_prep_rq(hctx, vblk, req, vbr);
if (unlikely(status))
return status;
- blk_mq_start_request(req);
-
- num = virtblk_map_data(hctx, req, vbr);
- if (unlikely(num < 0)) {
- virtblk_cleanup_cmd(req);
- return BLK_STS_RESOURCE;
- }
-
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
- err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num);
+ err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
if (err) {
virtqueue_kick(vblk->vqs[qid].vq);
/* Don't stop the queue if -ENOMEM: we may have failed to
@@ -367,6 +384,74 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
+static bool virtblk_prep_rq_batch(struct request *req)
+{
+ struct virtio_blk *vblk = req->mq_hctx->queue->queuedata;
+ struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
+
+ req->mq_hctx->tags->rqs[req->tag] = req;
+
+ return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK;
+}
+
+static bool virtblk_add_req_batch(struct virtio_blk_vq *vq,
+ struct request **rqlist)
+{
+ unsigned long flags;
+ int err;
+ bool kick;
+
+ spin_lock_irqsave(&vq->lock, flags);
+
+ while (!rq_list_empty(*rqlist)) {
+ struct request *req = rq_list_pop(rqlist);
+ struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
+
+ err = virtblk_add_req(vq->vq, vbr);
+ if (err) {
+ virtblk_unmap_data(req, vbr);
+ virtblk_cleanup_cmd(req);
+ blk_mq_requeue_request(req, true);
+ }
+ }
+
+ kick = virtqueue_kick_prepare(vq->vq);
+ spin_unlock_irqrestore(&vq->lock, flags);
+
+ return kick;
+}
+
+static void virtio_queue_rqs(struct request **rqlist)
+{
+ struct request *req, *next, *prev = NULL;
+ struct request *requeue_list = NULL;
+
+ rq_list_for_each_safe(rqlist, req, next) {
+ struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
+ bool kick;
+
+ if (!virtblk_prep_rq_batch(req)) {
+ rq_list_move(rqlist, &requeue_list, req, prev);
+ req = prev;
+ if (!req)
+ continue;
+ }
+
+ if (!next || req->mq_hctx != next->mq_hctx) {
+ req->rq_next = NULL;
+ kick = virtblk_add_req_batch(vq, rqlist);
+ if (kick)
+ virtqueue_notify(vq->vq);
+
+ *rqlist = next;
+ prev = NULL;
+ } else
+ prev = req;
+ }
+
+ *rqlist = requeue_list;
+}
+
/* return id (s/n) string for *disk to *id_str
*/
static int virtblk_get_id(struct gendisk *disk, char *id_str)
@@ -384,50 +469,13 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
if (err)
goto out;
- blk_execute_rq(vblk->disk, req, false);
+ blk_execute_rq(req, false);
err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
out:
blk_mq_free_request(req);
return err;
}
-static void virtblk_get(struct virtio_blk *vblk)
-{
- refcount_inc(&vblk->refs);
-}
-
-static void virtblk_put(struct virtio_blk *vblk)
-{
- if (refcount_dec_and_test(&vblk->refs)) {
- ida_simple_remove(&vd_index_ida, vblk->index);
- mutex_destroy(&vblk->vdev_mutex);
- kfree(vblk);
- }
-}
-
-static int virtblk_open(struct block_device *bd, fmode_t mode)
-{
- struct virtio_blk *vblk = bd->bd_disk->private_data;
- int ret = 0;
-
- mutex_lock(&vblk->vdev_mutex);
-
- if (vblk->vdev)
- virtblk_get(vblk);
- else
- ret = -ENXIO;
-
- mutex_unlock(&vblk->vdev_mutex);
- return ret;
-}
-
-static void virtblk_release(struct gendisk *disk, fmode_t mode)
-{
- struct virtio_blk *vblk = disk->private_data;
-
- virtblk_put(vblk);
-}
-
/* We provide getgeo only to please some old bootloader/partitioning tools */
static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
{
@@ -460,11 +508,19 @@ out:
return ret;
}
+static void virtblk_free_disk(struct gendisk *disk)
+{
+ struct virtio_blk *vblk = disk->private_data;
+
+ ida_simple_remove(&vd_index_ida, vblk->index);
+ mutex_destroy(&vblk->vdev_mutex);
+ kfree(vblk);
+}
+
static const struct block_device_operations virtblk_fops = {
- .owner = THIS_MODULE,
- .open = virtblk_open,
- .release = virtblk_release,
- .getgeo = virtblk_getgeo,
+ .owner = THIS_MODULE,
+ .getgeo = virtblk_getgeo,
+ .free_disk = virtblk_free_disk,
};
static int index_to_minor(int index)
@@ -553,6 +609,7 @@ static int init_vq(struct virtio_blk *vblk)
const char **names;
struct virtqueue **vqs;
unsigned short num_vqs;
+ unsigned int num_poll_vqs;
struct virtio_device *vdev = vblk->vdev;
struct irq_affinity desc = { 0, };
@@ -561,6 +618,7 @@ static int init_vq(struct virtio_blk *vblk)
&num_vqs);
if (err)
num_vqs = 1;
+
if (!err && !num_vqs) {
dev_err(&vdev->dev, "MQ advertised but zero queues reported\n");
return -EINVAL;
@@ -570,6 +628,17 @@ static int init_vq(struct virtio_blk *vblk)
min_not_zero(num_request_queues, nr_cpu_ids),
num_vqs);
+ num_poll_vqs = min_t(unsigned int, poll_queues, num_vqs - 1);
+
+ vblk->io_queues[HCTX_TYPE_DEFAULT] = num_vqs - num_poll_vqs;
+ vblk->io_queues[HCTX_TYPE_READ] = 0;
+ vblk->io_queues[HCTX_TYPE_POLL] = num_poll_vqs;
+
+ dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n",
+ vblk->io_queues[HCTX_TYPE_DEFAULT],
+ vblk->io_queues[HCTX_TYPE_READ],
+ vblk->io_queues[HCTX_TYPE_POLL]);
+
vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
if (!vblk->vqs)
return -ENOMEM;
@@ -582,12 +651,18 @@ static int init_vq(struct virtio_blk *vblk)
goto out;
}
- for (i = 0; i < num_vqs; i++) {
+ for (i = 0; i < num_vqs - num_poll_vqs; i++) {
callbacks[i] = virtblk_done;
snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
names[i] = vblk->vqs[i].name;
}
+ for (; i < num_vqs; i++) {
+ callbacks[i] = NULL;
+ snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%d", i);
+ names[i] = vblk->vqs[i].name;
+ }
+
/* Discover virtqueues and write information to configuration. */
err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
if (err)
@@ -730,19 +805,79 @@ static const struct attribute_group *virtblk_attr_groups[] = {
NULL,
};
-static int virtblk_map_queues(struct blk_mq_tag_set *set)
+static void virtblk_map_queues(struct blk_mq_tag_set *set)
{
struct virtio_blk *vblk = set->driver_data;
+ int i, qoff;
- return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT],
- vblk->vdev, 0);
+ for (i = 0, qoff = 0; i < set->nr_maps; i++) {
+ struct blk_mq_queue_map *map = &set->map[i];
+
+ map->nr_queues = vblk->io_queues[i];
+ map->queue_offset = qoff;
+ qoff += map->nr_queues;
+
+ if (map->nr_queues == 0)
+ continue;
+
+ /*
+ * Regular queues have interrupts and hence CPU affinity is
+ * defined by the core virtio code, but polling queues have
+ * no interrupts so we let the block layer assign CPU affinity.
+ */
+ if (i == HCTX_TYPE_POLL)
+ blk_mq_map_queues(&set->map[i]);
+ else
+ blk_mq_virtio_map_queues(&set->map[i], vblk->vdev, 0);
+ }
+}
+
+static void virtblk_complete_batch(struct io_comp_batch *iob)
+{
+ struct request *req;
+
+ rq_list_for_each(&iob->req_list, req) {
+ virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
+ virtblk_cleanup_cmd(req);
+ }
+ blk_mq_end_request_batch(iob);
+}
+
+static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
+{
+ struct virtio_blk *vblk = hctx->queue->queuedata;
+ struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
+ struct virtblk_req *vbr;
+ unsigned long flags;
+ unsigned int len;
+ int found = 0;
+
+ spin_lock_irqsave(&vq->lock, flags);
+
+ while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
+ struct request *req = blk_mq_rq_from_pdu(vbr);
+
+ found++;
+ if (!blk_mq_add_to_batch(req, iob, vbr->status,
+ virtblk_complete_batch))
+ blk_mq_complete_request(req);
+ }
+
+ if (found)
+ blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+
+ spin_unlock_irqrestore(&vq->lock, flags);
+
+ return found;
}
static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
+ .queue_rqs = virtio_queue_rqs,
.commit_rqs = virtio_commit_rqs,
.complete = virtblk_request_done,
.map_queues = virtblk_map_queues,
+ .poll = virtblk_poll,
};
static unsigned int virtblk_queue_depth;
@@ -755,6 +890,8 @@ static int virtblk_probe(struct virtio_device *vdev)
int err, index;
u32 v, blk_size, max_size, sg_elems, opt_io_size;
+ u32 max_discard_segs = 0;
+ u32 discard_granularity = 0;
u16 min_io_size;
u8 physical_block_exp, alignment_offset;
unsigned int queue_depth;
@@ -783,20 +920,15 @@ static int virtblk_probe(struct virtio_device *vdev)
/* Prevent integer overflows and honor max vq size */
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
- /* We need extra sg elements at head and tail. */
- sg_elems += 2;
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
if (!vblk) {
err = -ENOMEM;
goto out_free_index;
}
- /* This reference is dropped in virtblk_remove(). */
- refcount_set(&vblk->refs, 1);
mutex_init(&vblk->vdev_mutex);
vblk->vdev = vdev;
- vblk->sg_elems = sg_elems;
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -824,6 +956,9 @@ static int virtblk_probe(struct virtio_device *vdev)
sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
vblk->tag_set.driver_data = vblk;
vblk->tag_set.nr_hw_queues = vblk->num_vqs;
+ vblk->tag_set.nr_maps = 1;
+ if (vblk->io_queues[HCTX_TYPE_POLL])
+ vblk->tag_set.nr_maps = 3;
err = blk_mq_alloc_tag_set(&vblk->tag_set);
if (err)
@@ -843,7 +978,6 @@ static int virtblk_probe(struct virtio_device *vdev)
vblk->disk->minors = 1 << PART_BITS;
vblk->disk->private_data = vblk;
vblk->disk->fops = &virtblk_fops;
- vblk->disk->flags |= GENHD_FL_EXT_DEVT;
vblk->index = index;
/* configure queue flush support */
@@ -854,7 +988,7 @@ static int virtblk_probe(struct virtio_device *vdev)
set_disk_ro(vblk->disk, 1);
/* We can handle whatever the host told us to handle. */
- blk_queue_max_segments(q, vblk->sg_elems-2);
+ blk_queue_max_segments(q, sg_elems);
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
@@ -914,23 +1048,15 @@ static int virtblk_probe(struct virtio_device *vdev)
blk_queue_io_opt(q, blk_size * opt_io_size);
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
- q->limits.discard_granularity = blk_size;
-
virtio_cread(vdev, struct virtio_blk_config,
- discard_sector_alignment, &v);
- q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
+ discard_sector_alignment, &discard_granularity);
virtio_cread(vdev, struct virtio_blk_config,
max_discard_sectors, &v);
blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
- &v);
- blk_queue_max_discard_segments(q,
- min_not_zero(v,
- MAX_DISCARD_SEGMENTS));
-
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+ &max_discard_segs);
}
if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
@@ -939,6 +1065,85 @@ static int virtblk_probe(struct virtio_device *vdev)
blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
}
+ /* The discard and secure erase limits are combined since the Linux
+ * block layer uses the same limit for both commands.
+ *
+ * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features
+ * are negotiated, we will use the minimum between the limits.
+ *
+ * discard sector alignment is set to the minimum between discard_sector_alignment
+ * and secure_erase_sector_alignment.
+ *
+ * max discard sectors is set to the minimum between max_discard_seg and
+ * max_secure_erase_seg.
+ */
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
+
+ virtio_cread(vdev, struct virtio_blk_config,
+ secure_erase_sector_alignment, &v);
+
+ /* secure_erase_sector_alignment should not be zero, the device should set a
+ * valid number of sectors.
+ */
+ if (!v) {
+ dev_err(&vdev->dev,
+ "virtio_blk: secure_erase_sector_alignment can't be 0\n");
+ err = -EINVAL;
+ goto out_cleanup_disk;
+ }
+
+ discard_granularity = min_not_zero(discard_granularity, v);
+
+ virtio_cread(vdev, struct virtio_blk_config,
+ max_secure_erase_sectors, &v);
+
+ /* max_secure_erase_sectors should not be zero, the device should set a
+ * valid number of sectors.
+ */
+ if (!v) {
+ dev_err(&vdev->dev,
+ "virtio_blk: max_secure_erase_sectors can't be 0\n");
+ err = -EINVAL;
+ goto out_cleanup_disk;
+ }
+
+ blk_queue_max_secure_erase_sectors(q, v);
+
+ virtio_cread(vdev, struct virtio_blk_config,
+ max_secure_erase_seg, &v);
+
+ /* max_secure_erase_seg should not be zero, the device should set a
+ * valid number of segments
+ */
+ if (!v) {
+ dev_err(&vdev->dev,
+ "virtio_blk: max_secure_erase_seg can't be 0\n");
+ err = -EINVAL;
+ goto out_cleanup_disk;
+ }
+
+ max_discard_segs = min_not_zero(max_discard_segs, v);
+ }
+
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) ||
+ virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
+ /* max_discard_seg and discard_granularity will be 0 only
+ * if max_discard_seg and discard_sector_alignment fields in the virtio
+ * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated.
+ * In this case, we use default values.
+ */
+ if (!max_discard_segs)
+ max_discard_segs = sg_elems;
+
+ blk_queue_max_discard_segments(q,
+ min(max_discard_segs, MAX_DISCARD_SEGMENTS));
+
+ if (discard_granularity)
+ q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT;
+ else
+ q->limits.discard_granularity = blk_size;
+ }
+
virtblk_update_capacity(vblk, false);
virtio_device_ready(vdev);
@@ -949,7 +1154,7 @@ static int virtblk_probe(struct virtio_device *vdev)
return 0;
out_cleanup_disk:
- blk_cleanup_disk(vblk->disk);
+ put_disk(vblk->disk);
out_free_tags:
blk_mq_free_tag_set(&vblk->tag_set);
out_free_vq:
@@ -971,13 +1176,12 @@ static void virtblk_remove(struct virtio_device *vdev)
flush_work(&vblk->config_work);
del_gendisk(vblk->disk);
- blk_cleanup_disk(vblk->disk);
blk_mq_free_tag_set(&vblk->tag_set);
mutex_lock(&vblk->vdev_mutex);
/* Stop all the virtqueues. */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
/* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
vblk->vdev = NULL;
@@ -987,7 +1191,7 @@ static void virtblk_remove(struct virtio_device *vdev)
mutex_unlock(&vblk->vdev_mutex);
- virtblk_put(vblk);
+ put_disk(vblk->disk);
}
#ifdef CONFIG_PM_SLEEP
@@ -996,7 +1200,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
struct virtio_blk *vblk = vdev->priv;
/* Ensure we don't receive any more interrupts */
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
/* Make sure no work handler is accessing the device. */
flush_work(&vblk->config_work);
@@ -1035,6 +1239,7 @@ static unsigned int features_legacy[] = {
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
+ VIRTIO_BLK_F_SECURE_ERASE,
}
;
static unsigned int features[] = {
@@ -1042,6 +1247,7 @@ static unsigned int features[] = {
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
+ VIRTIO_BLK_F_SECURE_ERASE,
};
static struct virtio_driver virtio_blk = {
@@ -1061,7 +1267,7 @@ static struct virtio_driver virtio_blk = {
#endif
};
-static int __init init(void)
+static int __init virtio_blk_init(void)
{
int error;
@@ -1087,14 +1293,14 @@ out_destroy_workqueue:
return error;
}
-static void __exit fini(void)
+static void __exit virtio_blk_fini(void)
{
unregister_virtio_driver(&virtio_blk);
unregister_blkdev(major, "virtblk");
destroy_workqueue(virtblk_wq);
}
-module_init(init);
-module_exit(fini);
+module_init(virtio_blk_init);
+module_exit(virtio_blk_fini);
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio block driver");
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 14e452896d04..a5cf7f1e871c 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -442,7 +442,7 @@ static void free_req(struct xen_blkif_ring *ring, struct pending_req *req)
* Routines for managing virtual block devices (vbds).
*/
static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
- int operation)
+ enum req_op operation)
{
struct xen_vbd *vbd = &blkif->vbd;
int rc = -EACCES;
@@ -931,7 +931,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
if (rc)
goto unmap;
- for (n = 0, i = 0; n < nseg; n++) {
+ for (n = 0; n < nseg; n++) {
uint8_t first_sect, last_sect;
if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
@@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
int status = BLKIF_RSP_OKAY;
struct xen_blkif *blkif = ring->blkif;
struct block_device *bdev = blkif->vbd.bdev;
- unsigned long secure;
struct phys_req preq;
xen_blkif_get(blkif);
@@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
}
ring->st_ds_req++;
- secure = (blkif->vbd.discard_secure &&
- (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
- BLKDEV_DISCARD_SECURE : 0;
+ if (blkif->vbd.discard_secure &&
+ (req->u.discard.flag & BLKIF_DISCARD_SECURE))
+ err = blkdev_issue_secure_erase(bdev,
+ req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL);
+ else
+ err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL);
- err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
- req->u.discard.nr_sectors,
- GFP_KERNEL, secure);
fail_response:
if (err == -EOPNOTSUPP) {
pr_debug("discard op failed, not supported\n");
@@ -1192,8 +1193,8 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct bio *bio = NULL;
struct bio **biolist = pending_req->biolist;
int i, nbio = 0;
- int operation;
- int operation_flags = 0;
+ enum req_op operation;
+ blk_opf_t operation_flags = 0;
struct blk_plug plug;
bool drain = false;
struct grant_page **pages = pending_req->segments;
@@ -1326,16 +1327,13 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
pages[i]->page,
seg[i].nsec << 9,
seg[i].offset) == 0)) {
- bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
- if (unlikely(bio == NULL))
- goto fail_put_bio;
-
+ bio = bio_alloc(preq.bdev, bio_max_segs(nseg - i),
+ operation | operation_flags,
+ GFP_KERNEL);
biolist[nbio++] = bio;
- bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_iter.bi_sector = preq.sector_number;
- bio_set_op_attrs(bio, operation, operation_flags);
}
preq.sector_number += seg[i].nsec;
@@ -1345,15 +1343,11 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
if (!bio) {
BUG_ON(operation_flags != REQ_PREFLUSH);
- bio = bio_alloc(GFP_KERNEL, 0);
- if (unlikely(bio == NULL))
- goto fail_put_bio;
-
+ bio = bio_alloc(preq.bdev, 0, operation | operation_flags,
+ GFP_KERNEL);
biolist[nbio++] = bio;
- bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
- bio_set_op_attrs(bio, operation, operation_flags);
}
atomic_set(&pending_req->pendcnt, nbio);
@@ -1381,14 +1375,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
free_req(ring, pending_req);
msleep(1); /* back off a bit */
return -EIO;
-
- fail_put_bio:
- for (i = 0; i < nbio; i++)
- bio_put(biolist[i]);
- atomic_set(&pending_req->pendcnt, 1);
- __end_block_io_op(pending_req, BLK_STS_RESOURCE);
- msleep(1); /* back off a bit */
- return -EIO;
}
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index bda5c815e441..a28473470e66 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -226,6 +226,9 @@ struct xen_vbd {
sector_t size;
unsigned int flush_support:1;
unsigned int discard_secure:1;
+ /* Connect-time cached feature_persistent parameter value */
+ unsigned int feature_gnt_persistent_parm:1;
+ /* Persistent grants feature negotiation result */
unsigned int feature_gnt_persistent:1;
unsigned int overflow_max_grants:1;
};
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 914587aabca0..c0227dfa4688 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/kthread.h>
+#include <linux/pagemap.h>
#include <xen/events.h>
#include <xen/grant_table.h>
#include "common.h"
@@ -156,6 +157,11 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
return 0;
}
+/* Enable the persistent grants feature. */
+static bool feature_persistent = true;
+module_param(feature_persistent, bool, 0644);
+MODULE_PARM_DESC(feature_persistent, "Enables the persistent grants feature");
+
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
{
struct xen_blkif *blkif;
@@ -471,19 +477,12 @@ static void xen_vbd_free(struct xen_vbd *vbd)
vbd->bdev = NULL;
}
-/* Enable the persistent grants feature. */
-static bool feature_persistent = true;
-module_param(feature_persistent, bool, 0644);
-MODULE_PARM_DESC(feature_persistent,
- "Enables the persistent grants feature");
-
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
unsigned major, unsigned minor, int readonly,
int cdrom)
{
struct xen_vbd *vbd;
struct block_device *bdev;
- struct request_queue *q;
vbd = &blkif->vbd;
vbd->handle = handle;
@@ -510,20 +509,16 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
}
vbd->size = vbd_sz(vbd);
- if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
+ if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
vbd->type |= VDISK_CDROM;
if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
vbd->type |= VDISK_REMOVABLE;
- q = bdev_get_queue(bdev);
- if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ if (bdev_write_cache(bdev))
vbd->flush_support = true;
-
- if (q && blk_queue_secure_erase(q))
+ if (bdev_max_secure_erase_sectors(bdev))
vbd->discard_secure = true;
- vbd->feature_gnt_persistent = feature_persistent;
-
pr_debug("Successful creation of handle=%04x (dom=%u)\n",
handle, blkif->domid);
return 0;
@@ -577,22 +572,21 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
int err;
int state = 0;
struct block_device *bdev = be->blkif->vbd.bdev;
- struct request_queue *q = bdev_get_queue(bdev);
if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
return;
- if (blk_queue_discard(q)) {
+ if (bdev_max_discard_sectors(bdev)) {
err = xenbus_printf(xbt, dev->nodename,
"discard-granularity", "%u",
- q->limits.discard_granularity);
+ bdev_discard_granularity(bdev));
if (err) {
dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
return;
}
err = xenbus_printf(xbt, dev->nodename,
"discard-alignment", "%u",
- q->limits.discard_alignment);
+ bdev_discard_alignment(bdev));
if (err) {
dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
return;
@@ -913,7 +907,7 @@ again:
xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
- be->blkif->vbd.feature_gnt_persistent);
+ be->blkif->vbd.feature_gnt_persistent_parm);
if (err) {
xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
dev->nodename);
@@ -1090,10 +1084,11 @@ static int connect_ring(struct backend_info *be)
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
return -ENOSYS;
}
- if (blkif->vbd.feature_gnt_persistent)
- blkif->vbd.feature_gnt_persistent =
- xenbus_read_unsigned(dev->otherend,
- "feature-persistent", 0);
+
+ blkif->vbd.feature_gnt_persistent_parm = feature_persistent;
+ blkif->vbd.feature_gnt_persistent =
+ blkif->vbd.feature_gnt_persistent_parm &&
+ xenbus_read_unsigned(dev->otherend, "feature-persistent", 0);
blkif->vbd.overflow_max_grants = 0;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8e3983e456f3..35b9bcad9db9 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -152,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+static bool __read_mostly xen_blkif_trusted = true;
+module_param_named(trusted, xen_blkif_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
#define BLK_RING_SIZE(info) \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
@@ -198,6 +202,7 @@ struct blkfront_info
struct gendisk *gd;
u16 sector_size;
unsigned int physical_sector_size;
+ unsigned long vdisk_info;
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
@@ -208,7 +213,11 @@ struct blkfront_info
unsigned int feature_fua:1;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
+ /* Connect-time cached feature_persistent parameter */
+ unsigned int feature_persistent_parm:1;
+ /* Persistent grants feature negotiation result */
unsigned int feature_persistent:1;
+ unsigned int bounce:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
/* Number of 4KB segments handled */
@@ -228,8 +237,6 @@ static unsigned int nr_minors;
static unsigned long *minors;
static DEFINE_SPINLOCK(minor_lock);
-#define GRANT_INVALID_REF 0
-
#define PARTS_PER_DISK 16
#define PARTS_PER_EXT_DISK 256
@@ -311,8 +318,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
if (!gnt_list_entry)
goto out_of_memory;
- if (info->feature_persistent) {
- granted_page = alloc_page(GFP_NOIO);
+ if (info->bounce) {
+ granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
if (!granted_page) {
kfree(gnt_list_entry);
goto out_of_memory;
@@ -320,7 +327,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
gnt_list_entry->page = granted_page;
}
- gnt_list_entry->gref = GRANT_INVALID_REF;
+ gnt_list_entry->gref = INVALID_GRANT_REF;
list_add(&gnt_list_entry->node, &rinfo->grants);
i++;
}
@@ -331,7 +338,7 @@ out_of_memory:
list_for_each_entry_safe(gnt_list_entry, n,
&rinfo->grants, node) {
list_del(&gnt_list_entry->node);
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(gnt_list_entry->page);
kfree(gnt_list_entry);
i--;
@@ -349,7 +356,7 @@ static struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
node);
list_del(&gnt_list_entry->node);
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
rinfo->persistent_gnts_c--;
return gnt_list_entry;
@@ -371,13 +378,13 @@ static struct grant *get_grant(grant_ref_t *gref_head,
struct grant *gnt_list_entry = get_free_grant(rinfo);
struct blkfront_info *info = rinfo->dev_info;
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
return gnt_list_entry;
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (info->feature_persistent)
+ if (info->bounce)
grant_foreign_access(gnt_list_entry, info);
else {
/* Grant access to the GFN passed by the caller */
@@ -395,13 +402,13 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
struct grant *gnt_list_entry = get_free_grant(rinfo);
struct blkfront_info *info = rinfo->dev_info;
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
return gnt_list_entry;
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (!info->feature_persistent) {
+ if (!info->bounce) {
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
@@ -505,6 +512,7 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
unsigned command, unsigned long argument)
{
+ struct blkfront_info *info = bdev->bd_disk->private_data;
int i;
switch (command) {
@@ -514,9 +522,9 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
return -EFAULT;
return 0;
case CDROM_GET_CAPABILITY:
- if (bdev->bd_disk->flags & GENHD_FL_CD)
- return 0;
- return -EINVAL;
+ if (!(info->vdisk_info & VDISK_CDROM))
+ return -EINVAL;
+ return 0;
default:
return -EINVAL;
}
@@ -574,7 +582,7 @@ struct setup_rw_req {
struct blkif_request *ring_req;
grant_ref_t gref_head;
unsigned int id;
- /* Only used when persistent grant is used and it's a read request */
+ /* Only used when persistent grant is used and it's a write request */
bool need_copy;
unsigned int bvec_off;
char *bvec_data;
@@ -703,7 +711,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
.grant_idx = 0,
.segments = NULL,
.rinfo = rinfo,
- .need_copy = rq_data_dir(req) && info->feature_persistent,
+ .need_copy = rq_data_dir(req) && info->bounce,
};
/*
@@ -942,13 +950,13 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
if (info->feature_discard) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
blk_queue_max_discard_sectors(rq, get_capacity(gd));
rq->limits.discard_granularity = info->discard_granularity ?:
info->physical_sector_size;
rq->limits.discard_alignment = info->discard_alignment;
if (info->feature_secdiscard)
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_max_secure_erase_sectors(rq,
+ get_capacity(gd));
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -981,11 +989,12 @@ static void xlvbd_flush(struct blkfront_info *info)
{
blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
info->feature_fua ? true : false);
- pr_info("blkfront: %s: %s %s %s %s %s\n",
+ pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info),
"persistent grants:", info->feature_persistent ?
"enabled;" : "disabled;", "indirect descriptors:",
- info->max_indirect_segments ? "enabled;" : "disabled;");
+ info->max_indirect_segments ? "enabled;" : "disabled;",
+ "bounce buffer:", info->bounce ? "enabled" : "disabled;");
}
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -1057,9 +1066,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)
}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info,
- u16 vdisk_info, u16 sector_size,
- unsigned int physical_sector_size)
+ struct blkfront_info *info, u16 sector_size,
+ unsigned int physical_sector_size)
{
struct gendisk *gd;
int nr_minors = 1;
@@ -1157,15 +1165,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
xlvbd_flush(info);
- if (vdisk_info & VDISK_READONLY)
+ if (info->vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
-
- if (vdisk_info & VDISK_REMOVABLE)
+ if (info->vdisk_info & VDISK_REMOVABLE)
gd->flags |= GENHD_FL_REMOVABLE;
- if (vdisk_info & VDISK_CDROM)
- gd->flags |= GENHD_FL_CD;
-
return 0;
out_free_tag_set:
@@ -1212,7 +1216,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
- BUG_ON(info->feature_persistent);
+ BUG_ON(info->bounce);
list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
@@ -1224,12 +1228,12 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
list_for_each_entry_safe(persistent_gnt, n,
&rinfo->grants, node) {
list_del(&persistent_gnt->node);
- if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ if (persistent_gnt->gref != INVALID_GRANT_REF) {
gnttab_end_foreign_access(persistent_gnt->gref,
- 0, 0UL);
+ NULL);
rinfo->persistent_gnts_c--;
}
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1249,8 +1253,8 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
rinfo->shadow[i].req.u.rw.nr_segments;
for (j = 0; j < segs; j++) {
persistent_gnt = rinfo->shadow[i].grants_used[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
- if (info->feature_persistent)
+ gnttab_end_foreign_access(persistent_gnt->gref, NULL);
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1264,7 +1268,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
for (j = 0; j < INDIRECT_GREFS(segs); j++) {
persistent_gnt = rinfo->shadow[i].indirect_grants[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ gnttab_end_foreign_access(persistent_gnt->gref, NULL);
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1285,14 +1289,8 @@ free_shadow:
flush_work(&rinfo->work);
/* Free resources associated with old device channel. */
- for (i = 0; i < info->nr_ring_pages; i++) {
- if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
- rinfo->ring_ref[i] = GRANT_INVALID_REF;
- }
- }
- free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
- rinfo->ring.sring = NULL;
+ xenbus_teardown_ring((void **)&rinfo->ring.sring, info->nr_ring_pages,
+ rinfo->ring_ref);
if (rinfo->irq)
unbind_from_irqhandler(rinfo->irq, rinfo);
@@ -1375,9 +1373,15 @@ static int blkif_get_final_status(enum blk_req_status s1,
return BLKIF_RSP_OKAY;
}
-static bool blkif_completion(unsigned long *id,
- struct blkfront_ring_info *rinfo,
- struct blkif_response *bret)
+/*
+ * Return values:
+ * 1 response processed.
+ * 0 missing further responses.
+ * -1 error while processing.
+ */
+static int blkif_completion(unsigned long *id,
+ struct blkfront_ring_info *rinfo,
+ struct blkif_response *bret)
{
int i = 0;
struct scatterlist *sg;
@@ -1400,7 +1404,7 @@ static bool blkif_completion(unsigned long *id,
/* Wait the second response if not yet here. */
if (s2->status < REQ_DONE)
- return false;
+ return 0;
bret->status = blkif_get_final_status(s->status,
s2->status);
@@ -1433,7 +1437,7 @@ static bool blkif_completion(unsigned long *id,
data.s = s;
num_sg = s->num_sg;
- if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
+ if (bret->operation == BLKIF_OP_READ && info->bounce) {
for_each_sg(s->sg, sg, num_sg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
@@ -1451,57 +1455,58 @@ static bool blkif_completion(unsigned long *id,
}
/* Add the persistent grant into the list of free grants */
for (i = 0; i < num_grant; i++) {
- if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
+ if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
/*
* If the grant is still mapped by the backend (the
* backend has chosen to make this grant persistent)
* we add it at the head of the list, so it will be
* reused first.
*/
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->grants_used[i]->gref);
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->grants_used[i]->gref);
+ return -1;
+ }
list_add(&s->grants_used[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
/*
- * If the grant is not mapped by the backend we end the
- * foreign access and add it to the tail of the list,
- * so it will not be picked again unless we run out of
- * persistent grants.
+ * If the grant is not mapped by the backend we add it
+ * to the tail of the list, so it will not be picked
+ * again unless we run out of persistent grants.
*/
- gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
- s->grants_used[i]->gref = GRANT_INVALID_REF;
+ s->grants_used[i]->gref = INVALID_GRANT_REF;
list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
}
}
if (s->req.operation == BLKIF_OP_INDIRECT) {
for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
- if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->indirect_grants[i]->gref);
+ if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->indirect_grants[i]->gref);
+ return -1;
+ }
list_add(&s->indirect_grants[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
struct page *indirect_page;
- gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
/*
* Add the used indirect page back to the list of
* available pages for indirect grefs.
*/
- if (!info->feature_persistent) {
+ if (!info->bounce) {
indirect_page = s->indirect_grants[i]->page;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
- s->indirect_grants[i]->gref = GRANT_INVALID_REF;
+ s->indirect_grants[i]->gref = INVALID_GRANT_REF;
list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
}
}
}
- return true;
+ return 1;
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1512,9 +1517,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
unsigned long flags;
struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
struct blkfront_info *info = rinfo->dev_info;
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
return IRQ_HANDLED;
+ }
spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
@@ -1530,6 +1538,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
unsigned long id;
unsigned int op;
+ eoiflag = 0;
+
RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
id = bret.id;
@@ -1562,12 +1572,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
}
if (bret.operation != BLKIF_OP_DISCARD) {
+ int ret;
+
/*
* We may need to wait for an extra response if the
* I/O request is split in 2
*/
- if (!blkif_completion(&id, rinfo, &bret))
+ ret = blkif_completion(&id, rinfo, &bret);
+ if (!ret)
continue;
+ if (unlikely(ret < 0))
+ goto err;
}
if (add_id_to_freelist(rinfo, id)) {
@@ -1591,8 +1606,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
- blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_max_discard_sectors(rq, 0);
+ blk_queue_max_secure_erase_sectors(rq, 0);
}
break;
case BLKIF_OP_FLUSH_DISKCACHE:
@@ -1646,6 +1661,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ xen_irq_lateeoi(irq, eoiflag);
+
return IRQ_HANDLED;
err:
@@ -1653,6 +1670,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ /* No EOI in order to avoid further interrupts. */
+
pr_alert("%s disabled for further use\n", info->gd->disk_name);
return IRQ_HANDLED;
}
@@ -1662,38 +1681,23 @@ static int setup_blkring(struct xenbus_device *dev,
struct blkfront_ring_info *rinfo)
{
struct blkif_sring *sring;
- int err, i;
+ int err;
struct blkfront_info *info = rinfo->dev_info;
unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
- grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
-
- for (i = 0; i < info->nr_ring_pages; i++)
- rinfo->ring_ref[i] = GRANT_INVALID_REF;
-
- sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
- get_order(ring_size));
- if (!sring) {
- xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
- return -ENOMEM;
- }
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
- err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
- if (err < 0) {
- free_pages((unsigned long)sring, get_order(ring_size));
- rinfo->ring.sring = NULL;
+ err = xenbus_setup_ring(dev, GFP_NOIO, (void **)&sring,
+ info->nr_ring_pages, rinfo->ring_ref);
+ if (err)
goto fail;
- }
- for (i = 0; i < info->nr_ring_pages; i++)
- rinfo->ring_ref[i] = gref[i];
+
+ XEN_FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
if (err)
goto fail;
- err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
- "blkif", rinfo);
+ err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt,
+ 0, "blkif", rinfo);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");
@@ -1755,6 +1759,12 @@ abort_transaction:
return err;
}
+/* Enable the persistent grants feature. */
+static bool feature_persistent = true;
+module_param(feature_persistent, bool, 0644);
+MODULE_PARM_DESC(feature_persistent,
+ "Enables the persistent grants feature");
+
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info)
@@ -1769,6 +1779,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
if (!info)
return -ENODEV;
+ /* Check if backend is trusted. */
+ info->bounce = !xen_blkif_trusted ||
+ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
"max-ring-page-order", 0);
ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
@@ -1842,8 +1856,9 @@ again:
message = "writing protocol";
goto abort_transaction;
}
+ info->feature_persistent_parm = feature_persistent;
err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
- info->feature_persistent);
+ info->feature_persistent_parm);
if (err)
dev_warn(&dev->dev,
"writing persistent grants feature to xenbus");
@@ -1911,12 +1926,6 @@ static int negotiate_mq(struct blkfront_info *info)
return 0;
}
-/* Enable the persistent grants feature. */
-static bool feature_persistent = true;
-module_param(feature_persistent, bool, 0644);
-MODULE_PARM_DESC(feature_persistent,
- "Enables the persistent grants feature");
-
/*
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffer for communication with the backend, and
@@ -1983,8 +1992,6 @@ static int blkfront_probe(struct xenbus_device *dev,
info->vdevice = vdevice;
info->connected = BLKIF_STATE_DISCONNECTED;
- info->feature_persistent = feature_persistent;
-
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
@@ -2119,9 +2126,11 @@ static void blkfront_closing(struct blkfront_info *info)
return;
/* No more blkif_request(). */
- blk_mq_stop_hw_queues(info->rq);
- blk_set_queue_dying(info->rq);
- set_capacity(info->gd, 0);
+ if (info->rq && info->gd) {
+ blk_mq_stop_hw_queues(info->rq);
+ blk_mark_disk_dead(info->gd);
+ set_capacity(info->gd, 0);
+ }
for_each_rinfo(info, rinfo, i) {
/* No more gnttab callback work. */
@@ -2176,17 +2185,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
if (err)
goto out_of_memory;
- if (!info->feature_persistent && info->max_indirect_segments) {
+ if (!info->bounce && info->max_indirect_segments) {
/*
- * We are using indirect descriptors but not persistent
- * grants, we need to allocate a set of pages that can be
+ * We are using indirect descriptors but don't have a bounce
+ * buffer, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&rinfo->indirect_pages));
for (i = 0; i < num; i++) {
- struct page *indirect_page = alloc_page(GFP_KERNEL);
+ struct page *indirect_page = alloc_page(GFP_KERNEL |
+ __GFP_ZERO);
if (!indirect_page)
goto out_of_memory;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
@@ -2275,10 +2285,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0))
blkfront_setup_discard(info);
- if (info->feature_persistent)
+ if (info->feature_persistent_parm)
info->feature_persistent =
!!xenbus_read_unsigned(info->xbdev->otherend,
"feature-persistent", 0);
+ if (info->feature_persistent)
+ info->bounce = true;
indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
"feature-max-indirect-segments", 0);
@@ -2304,7 +2316,6 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long long sectors;
unsigned long sector_size;
unsigned int physical_sector_size;
- unsigned int binfo;
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2342,7 +2353,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"sectors", "%llu", &sectors,
- "info", "%u", &binfo,
+ "info", "%u", &info->vdisk_info,
"sector-size", "%lu", &sector_size,
NULL);
if (err) {
@@ -2371,7 +2382,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
}
- err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
+ err = xlvbd_alloc_gendisk(sectors, info, sector_size,
physical_sector_size);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -2388,7 +2399,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = device_add_disk(&info->xbdev->dev, info->gd, NULL);
if (err) {
- blk_cleanup_disk(info->gd);
+ put_disk(info->gd);
blk_mq_free_tag_set(&info->tag_set);
info->rq = NULL;
goto fail;
@@ -2463,16 +2474,19 @@ static int blkfront_remove(struct xenbus_device *xbdev)
dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
- del_gendisk(info->gd);
+ if (info->gd)
+ del_gendisk(info->gd);
mutex_lock(&blkfront_mutex);
list_del(&info->info_list);
mutex_unlock(&blkfront_mutex);
blkif_free(info, 0);
- xlbd_release_minors(info->gd->first_minor, info->gd->minors);
- blk_cleanup_disk(info->gd);
- blk_mq_free_tag_set(&info->tag_set);
+ if (info->gd) {
+ xlbd_release_minors(info->gd->first_minor, info->gd->minors);
+ put_disk(info->gd);
+ blk_mq_free_tag_set(&info->tag_set);
+ }
kfree(info);
return 0;
@@ -2516,6 +2530,7 @@ static void purge_persistent_grants(struct blkfront_info *info)
for_each_rinfo(info, rinfo, i) {
struct grant *gnt_list_entry, *tmp;
+ LIST_HEAD(grants);
spin_lock_irqsave(&rinfo->ring_lock, flags);
@@ -2526,17 +2541,18 @@ static void purge_persistent_grants(struct blkfront_info *info)
list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
node) {
- if (gnt_list_entry->gref == GRANT_INVALID_REF ||
- gnttab_query_foreign_access(gnt_list_entry->gref))
+ if (gnt_list_entry->gref == INVALID_GRANT_REF ||
+ !gnttab_try_end_foreign_access(gnt_list_entry->gref))
continue;
list_del(&gnt_list_entry->node);
- gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
rinfo->persistent_gnts_c--;
- gnt_list_entry->gref = GRANT_INVALID_REF;
- list_add_tail(&gnt_list_entry->node, &rinfo->grants);
+ gnt_list_entry->gref = INVALID_GRANT_REF;
+ list_add_tail(&gnt_list_entry->node, &grants);
}
+ list_splice_tail(&grants, &rinfo->grants);
+
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
}
}
@@ -2546,6 +2562,13 @@ static void blkfront_delay_work(struct work_struct *work)
struct blkfront_info *info;
bool need_schedule_work = false;
+ /*
+ * Note that when using bounce buffers but not persistent grants
+ * there's no need to run blkfront_delay_work because grants are
+ * revoked in blkif_completion or else an error is reported and the
+ * connection is closed.
+ */
+
mutex_lock(&blkfront_mutex);
list_for_each_entry(info, &info_list, info_list) {
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index ccc52c935faf..c1e85f356e4d 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -327,6 +327,7 @@ static int z2ram_register_disk(int minor)
disk->major = Z2RAM_MAJOR;
disk->first_minor = minor;
disk->minors = 1;
+ disk->flags |= GENHD_FL_NO_PART;
disk->fops = &z2_fops;
if (minor)
sprintf(disk->disk_name, "z2ram%d", minor);
@@ -336,7 +337,7 @@ static int z2ram_register_disk(int minor)
z2ram_gendisk[minor] = disk;
err = add_disk(disk);
if (err)
- blk_cleanup_disk(disk);
+ put_disk(disk);
return err;
}
@@ -383,7 +384,6 @@ static void __exit z2_exit(void)
for (i = 0; i < Z2MINOR_COUNT; i++) {
del_gendisk(z2ram_gendisk[i]);
- blk_cleanup_queue(z2ram_gendisk[i]->queue);
put_disk(z2ram_gendisk[i]);
}
blk_mq_free_tag_set(&tag_set);
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 668c6bf2554d..d4100b0c083e 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
config ZRAM
tristate "Compressed RAM block device support"
- depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO
+ depends on BLOCK && SYSFS && MMU
depends on CRYPTO_LZO || CRYPTO_ZSTD || CRYPTO_LZ4 || CRYPTO_LZ4HC || CRYPTO_842
+ select ZSMALLOC
help
Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
Pages written to these disks are compressed and stored in memory
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 052aa3f65514..0916de952e09 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -63,12 +63,6 @@ static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp)
bool zcomp_available_algorithm(const char *comp)
{
- int i;
-
- i = sysfs_match_string(backends, comp);
- if (i >= 0)
- return true;
-
/*
* Crypto does not ignore a trailing new line symbol,
* so make sure you don't supply a string containing
@@ -217,6 +211,11 @@ struct zcomp *zcomp_create(const char *compress)
struct zcomp *comp;
int error;
+ /*
+ * Crypto API will execute /sbin/modprobe if the compression module
+ * is not loaded yet. We must do it here, otherwise we are about to
+ * call /sbin/modprobe under CPU hot-plug lock.
+ */
if (!zcomp_available_algorithm(compress))
return ERR_PTR(-EINVAL);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 25071126995b..966aab902d19 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -22,7 +22,6 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
-#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
@@ -53,7 +52,6 @@ static unsigned int num_devices = 1;
static size_t huge_class_size;
static const struct block_device_operations zram_devops;
-static const struct block_device_operations zram_wb_devops;
static void zram_free_page(struct zram *zram, size_t index);
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
@@ -328,8 +326,8 @@ static ssize_t idle_store(struct device *dev,
if (!sysfs_streq(buf, "all")) {
/*
- * If it did not parse as 'all' try to treat it as an integer when
- * we have memory tracking enabled.
+ * If it did not parse as 'all' try to treat it as an integer
+ * when we have memory tracking enabled.
*/
u64 age_sec;
@@ -344,7 +342,10 @@ static ssize_t idle_store(struct device *dev,
if (!init_done(zram))
goto out_unlock;
- /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */
+ /*
+ * A cutoff_time of 0 marks everything as idle, this is the
+ * "all" behavior.
+ */
mark_idle(zram, cutoff_time);
rv = len;
@@ -498,7 +499,7 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
}
- strlcpy(file_name, buf, PATH_MAX);
+ strscpy(file_name, buf, PATH_MAX);
/* ignore trailing newline */
sz = strlen(file_name);
if (sz > 0 && file_name[sz - 1] == '\n')
@@ -542,17 +543,6 @@ static ssize_t backing_dev_store(struct device *dev,
zram->backing_dev = backing_dev;
zram->bitmap = bitmap;
zram->nr_pages = nr_pages;
- /*
- * With writeback feature, zram does asynchronous IO so it's no longer
- * synchronous device so let's remove synchronous io flag. Othewise,
- * upper layer(e.g., swap) could wait IO completion rather than
- * (submit and return), which will cause system sluggish.
- * Furthermore, when the IO function returns(e.g., swap_readpage),
- * upper layer expects IO was done so it could deallocate the page
- * freely but in fact, IO is going on so finally could cause
- * use-after-free when the IO is really done.
- */
- zram->disk->fops = &zram_wb_devops;
up_write(&zram->init_lock);
pr_info("setup backing device %s\n", file_name);
@@ -617,24 +607,21 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
{
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, 1);
+ bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
+ GFP_NOIO);
if (!bio)
return -ENOMEM;
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
- bio_set_dev(bio, zram->bdev);
if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
bio_put(bio);
return -EIO;
}
- if (!parent) {
- bio->bi_opf = REQ_OP_READ;
+ if (!parent)
bio->bi_end_io = zram_page_end_io;
- } else {
- bio->bi_opf = parent->bi_opf;
+ else
bio_chain(bio, parent);
- }
submit_bio(bio);
return 1;
@@ -643,8 +630,8 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
#define PAGE_WB_SIG "page_index="
#define PAGE_WRITEBACK 0
-#define HUGE_WRITEBACK 1
-#define IDLE_WRITEBACK 2
+#define HUGE_WRITEBACK (1<<0)
+#define IDLE_WRITEBACK (1<<1)
static ssize_t writeback_store(struct device *dev,
@@ -664,6 +651,8 @@ static ssize_t writeback_store(struct device *dev,
mode = IDLE_WRITEBACK;
else if (sysfs_streq(buf, "huge"))
mode = HUGE_WRITEBACK;
+ else if (sysfs_streq(buf, "huge_idle"))
+ mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
else {
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
return -EINVAL;
@@ -725,10 +714,10 @@ static ssize_t writeback_store(struct device *dev,
zram_test_flag(zram, index, ZRAM_UNDER_WB))
goto next;
- if (mode == IDLE_WRITEBACK &&
+ if (mode & IDLE_WRITEBACK &&
!zram_test_flag(zram, index, ZRAM_IDLE))
goto next;
- if (mode == HUGE_WRITEBACK &&
+ if (mode & HUGE_WRITEBACK &&
!zram_test_flag(zram, index, ZRAM_HUGE))
goto next;
/*
@@ -747,10 +736,9 @@ static ssize_t writeback_store(struct device *dev,
continue;
}
- bio_init(&bio, &bio_vec, 1);
- bio_set_dev(&bio, zram->bdev);
+ bio_init(&bio, zram->bdev, &bio_vec, 1,
+ REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
- bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
bvec.bv_offset);
@@ -1032,7 +1020,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
char compressor[ARRAY_SIZE(zram->compressor)];
size_t sz;
- strlcpy(compressor, buf, sizeof(compressor));
+ strscpy(compressor, buf, sizeof(compressor));
/* ignore trailing newline */
sz = strlen(compressor);
if (sz > 0 && compressor[sz - 1] == '\n')
@@ -1268,6 +1256,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
struct bio_vec bvec;
zram_slot_unlock(zram, index);
+ /* A null bio means rw_page was used, we must fallback to bio */
+ if (!bio)
+ return -EOPNOTSUPP;
bvec.bv_page = page;
bvec.bv_len = PAGE_SIZE;
@@ -1336,12 +1327,10 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
goto out;
if (is_partial_io(bvec)) {
- void *dst = kmap_atomic(bvec->bv_page);
void *src = kmap_atomic(page);
- memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
+ memcpy_to_bvec(bvec, src + offset);
kunmap_atomic(src);
- kunmap_atomic(dst);
}
out:
if (is_partial_io(bvec))
@@ -1355,7 +1344,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
{
int ret = 0;
unsigned long alloced_pages;
- unsigned long handle = 0;
+ unsigned long handle = -ENOMEM;
unsigned int comp_len = 0;
void *src, *dst, *mem;
struct zcomp_strm *zstrm;
@@ -1401,21 +1390,31 @@ compress_again:
* if we have a 'non-null' handle here then we are coming
* from the slow path and handle has already been allocated.
*/
- if (!handle)
+ if (IS_ERR((void *)handle))
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
__GFP_HIGHMEM |
__GFP_MOVABLE);
- if (!handle) {
+ if (IS_ERR((void *)handle)) {
zcomp_stream_put(zram->comp);
atomic64_inc(&zram->stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
- if (handle)
+ if (IS_ERR((void *)handle))
+ return PTR_ERR((void *)handle);
+
+ if (comp_len != PAGE_SIZE)
goto compress_again;
- return -ENOMEM;
+ /*
+ * If the page is not compressible, you need to acquire the
+ * lock and execute the code below. The zcomp_stream_get()
+ * call is needed to disable the cpu hotplug and grab the
+ * zstrm buffer back. It is necessary that the dereferencing
+ * of the zstrm variable below occurs correctly.
+ */
+ zstrm = zcomp_stream_get(zram->comp);
}
alloced_pages = zs_get_total_pages(zram->mem_pool);
@@ -1472,7 +1471,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
{
int ret;
struct page *page = NULL;
- void *src;
struct bio_vec vec;
vec = *bvec;
@@ -1490,11 +1488,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (ret)
goto out;
- src = kmap_atomic(bvec->bv_page);
dst = kmap_atomic(page);
- memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
+ memcpy_from_bvec(dst + offset, bvec);
kunmap_atomic(dst);
- kunmap_atomic(src);
vec.bv_page = page;
vec.bv_len = PAGE_SIZE;
@@ -1552,7 +1548,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
* Returns 1 if IO request was successfully submitted.
*/
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, unsigned int op, struct bio *bio)
+ int offset, enum req_op op, struct bio *bio)
{
int ret;
@@ -1660,7 +1656,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
}
static int zram_rw_page(struct block_device *bdev, sector_t sector,
- struct page *page, unsigned int op)
+ struct page *page, enum req_op op)
{
int offset, ret;
u32 index;
@@ -1685,9 +1681,10 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
+ start_time = bdev_start_io_acct(bdev->bd_disk->part0,
+ SECTORS_PER_PAGE, op, jiffies);
ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
- disk_end_io_acct(bdev->bd_disk, op, start_time);
+ bdev_end_io_acct(bdev->bd_disk->part0, op, start_time);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -1715,9 +1712,6 @@ out:
static void zram_reset_device(struct zram *zram)
{
- struct zcomp *comp;
- u64 disksize;
-
down_write(&zram->init_lock);
zram->limit_pages = 0;
@@ -1727,17 +1721,15 @@ static void zram_reset_device(struct zram *zram)
return;
}
- comp = zram->comp;
- disksize = zram->disksize;
- zram->disksize = 0;
-
set_capacity_and_notify(zram->disk, 0);
part_stat_set_all(zram->disk->part0, 0);
/* I/O operation under all of CPU are done so let's free */
- zram_meta_free(zram, disksize);
+ zram_meta_free(zram, zram->disksize);
+ zram->disksize = 0;
memset(&zram->stats, 0, sizeof(zram->stats));
- zcomp_destroy(comp);
+ zcomp_destroy(zram->comp);
+ zram->comp = NULL;
reset_bdev(zram);
up_write(&zram->init_lock);
@@ -1796,7 +1788,7 @@ static ssize_t reset_store(struct device *dev,
int ret;
unsigned short do_reset;
struct zram *zram;
- struct block_device *bdev;
+ struct gendisk *disk;
ret = kstrtou16(buf, 10, &do_reset);
if (ret)
@@ -1806,26 +1798,26 @@ static ssize_t reset_store(struct device *dev,
return -EINVAL;
zram = dev_to_zram(dev);
- bdev = zram->disk->part0;
+ disk = zram->disk;
- mutex_lock(&bdev->bd_disk->open_mutex);
+ mutex_lock(&disk->open_mutex);
/* Do not reset an active device or claimed device */
- if (bdev->bd_openers || zram->claim) {
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ if (disk_openers(disk) || zram->claim) {
+ mutex_unlock(&disk->open_mutex);
return -EBUSY;
}
/* From now on, anyone can't open /dev/zram[0-9] */
zram->claim = true;
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
/* Make sure all the pending I/O are finished */
- sync_blockdev(bdev);
+ sync_blockdev(disk->part0);
zram_reset_device(zram);
- mutex_lock(&bdev->bd_disk->open_mutex);
+ mutex_lock(&disk->open_mutex);
zram->claim = false;
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
return len;
}
@@ -1853,15 +1845,6 @@ static const struct block_device_operations zram_devops = {
.owner = THIS_MODULE
};
-#ifdef CONFIG_ZRAM_WRITEBACK
-static const struct block_device_operations zram_wb_devops = {
- .open = zram_open,
- .submit_bio = zram_submit_bio,
- .swap_slot_free_notify = zram_slot_free_notify,
- .owner = THIS_MODULE
-};
-#endif
-
static DEVICE_ATTR_WO(compact);
static DEVICE_ATTR_RW(disksize);
static DEVICE_ATTR_RO(initstate);
@@ -1903,14 +1886,7 @@ static struct attribute *zram_disk_attrs[] = {
NULL,
};
-static const struct attribute_group zram_disk_attr_group = {
- .attrs = zram_disk_attrs,
-};
-
-static const struct attribute_group *zram_disk_attr_groups[] = {
- &zram_disk_attr_group,
- NULL,
-};
+ATTRIBUTE_GROUPS(zram_disk);
/*
* Allocate and initialize new zram device. the function returns
@@ -1947,6 +1923,7 @@ static int zram_add(void)
zram->disk->major = zram_major;
zram->disk->first_minor = device_id;
zram->disk->minors = 1;
+ zram->disk->flags |= GENHD_FL_NO_PART;
zram->disk->fops = &zram_devops;
zram->disk->private_data = zram;
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
@@ -1968,7 +1945,6 @@ static int zram_add(void)
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
/*
* zram_bio_discard() will clear all logical blocks if logical block
@@ -1982,18 +1958,18 @@ static int zram_add(void)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
- ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
+ ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
- strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
+ strscpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram_debugfs_register(zram);
pr_info("Added device: %s\n", zram->disk->disk_name);
return device_id;
out_cleanup_disk:
- blk_cleanup_disk(zram->disk);
+ put_disk(zram->disk);
out_free_idr:
idr_remove(&zram_index_idr, device_id);
out_free_dev:
@@ -2003,19 +1979,18 @@ out_free_dev:
static int zram_remove(struct zram *zram)
{
- struct block_device *bdev = zram->disk->part0;
bool claimed;
- mutex_lock(&bdev->bd_disk->open_mutex);
- if (bdev->bd_openers) {
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_lock(&zram->disk->open_mutex);
+ if (disk_openers(zram->disk)) {
+ mutex_unlock(&zram->disk->open_mutex);
return -EBUSY;
}
claimed = zram->claim;
if (!claimed)
zram->claim = true;
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&zram->disk->open_mutex);
zram_debugfs_unregister(zram);
@@ -2027,7 +2002,7 @@ static int zram_remove(struct zram *zram)
;
} else {
/* Make sure all the pending I/O are finished */
- sync_blockdev(bdev);
+ sync_blockdev(zram->disk->part0);
zram_reset_device(zram);
}
@@ -2045,7 +2020,7 @@ static int zram_remove(struct zram *zram)
*/
zram_reset_device(zram);
- blk_cleanup_disk(zram->disk);
+ put_disk(zram->disk);
kfree(zram);
return 0;
}
@@ -2139,6 +2114,8 @@ static int __init zram_init(void)
{
int ret;
+ BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG);
+
ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
zcomp_cpu_up_prepare, zcomp_cpu_dead);
if (ret < 0)
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 80c3b43b4828..a2bda53020fd 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -30,16 +30,15 @@
/*
- * The lower ZRAM_FLAG_SHIFT bits of table.flags is for
- * object size (excluding header), the higher bits is for
- * zram_pageflags.
+ * ZRAM is mainly used for memory efficiency so we want to keep memory
+ * footprint small and thus squeeze size and zram pageflags into a flags
+ * member. The lower ZRAM_FLAG_SHIFT bits is for object size (excluding
+ * header), which cannot be larger than PAGE_SIZE (requiring PAGE_SHIFT
+ * bits), the higher bits are for zram_pageflags.
*
- * zram is mainly used for memory efficiency so we want to keep memory
- * footprint small so we can squeeze size and flags into a field.
- * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
- * the higher bits is for zram_pageflags.
+ * We use BUILD_BUG_ON() to make sure that zram pageflags don't overflow.
*/
-#define ZRAM_FLAG_SHIFT 24
+#define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {