aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/DAC960.c12
-rw-r--r--drivers/block/Kconfig30
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/brd.c11
-rw-r--r--drivers/block/cciss.c5415
-rw-r--r--drivers/block/cciss.h433
-rw-r--r--drivers/block/cciss_cmd.h269
-rw-r--r--drivers/block/cciss_scsi.c1653
-rw-r--r--drivers/block/cciss_scsi.h79
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_int.h31
-rw-r--r--drivers/block/drbd/drbd_main.c113
-rw-r--r--drivers/block/drbd/drbd_nl.c60
-rw-r--r--drivers/block/drbd/drbd_proc.c10
-rw-r--r--drivers/block/drbd/drbd_receiver.c60
-rw-r--r--drivers/block/drbd/drbd_req.c86
-rw-r--r--drivers/block/drbd/drbd_req.h6
-rw-r--r--drivers/block/drbd/drbd_state.c48
-rw-r--r--drivers/block/drbd/drbd_state.h8
-rw-r--r--drivers/block/drbd/drbd_worker.c48
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c249
-rw-r--r--drivers/block/loop.h9
-rw-r--r--drivers/block/nbd.c15
-rw-r--r--drivers/block/null_blk.c1311
-rw-r--r--drivers/block/pktcdvd.c11
-rw-r--r--drivers/block/ps3vram.c10
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/block/rsxx/dev.c6
-rw-r--r--drivers/block/skd_main.c3164
-rw-r--r--drivers/block/skd_s1120.h38
-rw-r--r--drivers/block/virtio_blk.c18
-rw-r--r--drivers/block/xen-blkback/blkback.c9
-rw-r--r--drivers/block/xen-blkback/xenbus.c13
-rw-r--r--drivers/block/xen-blkfront.c8
-rw-r--r--drivers/block/zram/Kconfig12
-rw-r--r--drivers/block/zram/zram_drv.c558
-rw-r--r--drivers/block/zram/zram_drv.h11
39 files changed, 3035 insertions, 10788 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 245a879b036e..255591ab3716 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1678,9 +1678,12 @@ static bool DAC960_V1_ReadControllerConfiguration(DAC960_Controller_T
Enquiry2->FirmwareID.FirmwareType = '0';
Enquiry2->FirmwareID.TurnID = 0;
}
- sprintf(Controller->FirmwareVersion, "%d.%02d-%c-%02d",
- Enquiry2->FirmwareID.MajorVersion, Enquiry2->FirmwareID.MinorVersion,
- Enquiry2->FirmwareID.FirmwareType, Enquiry2->FirmwareID.TurnID);
+ snprintf(Controller->FirmwareVersion, sizeof(Controller->FirmwareVersion),
+ "%d.%02d-%c-%02d",
+ Enquiry2->FirmwareID.MajorVersion,
+ Enquiry2->FirmwareID.MinorVersion,
+ Enquiry2->FirmwareID.FirmwareType,
+ Enquiry2->FirmwareID.TurnID);
if (!((Controller->FirmwareVersion[0] == '5' &&
strcmp(Controller->FirmwareVersion, "5.06") >= 0) ||
(Controller->FirmwareVersion[0] == '4' &&
@@ -6588,7 +6591,8 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
&dac960_proc_fops);
}
- sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber);
+ snprintf(Controller->ControllerName, sizeof(Controller->ControllerName),
+ "c%d", Controller->ControllerNumber);
ControllerProcEntry = proc_mkdir(Controller->ControllerName,
DAC960_ProcDirectoryEntry);
proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 8ddc98279c8f..4a438b8abe27 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -17,6 +17,7 @@ if BLK_DEV
config BLK_DEV_NULL_BLK
tristate "Null test block driver"
+ depends on CONFIGFS_FS
config BLK_DEV_FD
tristate "Normal floppy disk support"
@@ -111,33 +112,6 @@ source "drivers/block/mtip32xx/Kconfig"
source "drivers/block/zram/Kconfig"
-config BLK_CPQ_CISS_DA
- tristate "Compaq Smart Array 5xxx support"
- depends on PCI
- select CHECK_SIGNATURE
- select BLK_SCSI_REQUEST
- help
- This is the driver for Compaq Smart Array 5xxx controllers.
- Everyone using these boards should say Y here.
- See <file:Documentation/blockdev/cciss.txt> for the current list of
- boards supported by this driver, and for further information
- on the use of this driver.
-
-config CISS_SCSI_TAPE
- bool "SCSI tape drive support for Smart Array 5xxx"
- depends on BLK_CPQ_CISS_DA && PROC_FS
- depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA
- help
- When enabled (Y), this option allows SCSI tape drives and SCSI medium
- changers (tape robots) to be accessed via a Compaq 5xxx array
- controller. (See <file:Documentation/blockdev/cciss.txt> for more details.)
-
- "SCSI support" and "SCSI tape support" must also be enabled for this
- option to work.
-
- When this option is disabled (N), the SCSI portion of the driver
- is not compiled.
-
config BLK_DEV_DAC960
tristate "Mylex DAC960/DAC1100 PCI RAID Controller support"
depends on PCI
@@ -470,7 +444,7 @@ config VIRTIO_BLK
depends on VIRTIO
---help---
This is the virtual block driver for virtio. It can be used with
- lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+ QEMU based VMMs (like KVM or Xen). Say Y or M.
config VIRTIO_BLK_SCSI
bool "SCSI passthrough request for the Virtio block driver"
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index ec8c36897b75..1f456d86a190 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
-obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 104b71c0490d..bbd0d186cfc0 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -294,14 +294,13 @@ out:
static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
{
- struct block_device *bdev = bio->bi_bdev;
- struct brd_device *brd = bdev->bd_disk->private_data;
+ struct brd_device *brd = bio->bi_disk->private_data;
struct bio_vec bvec;
sector_t sector;
struct bvec_iter iter;
sector = bio->bi_iter.bi_sector;
- if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
+ if (bio_end_sector(bio) > get_capacity(bio->bi_disk))
goto io_error;
bio_for_each_segment(bvec, bio, iter) {
@@ -326,7 +325,11 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct brd_device *brd = bdev->bd_disk->private_data;
- int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
+ int err;
+
+ if (PageTransHuge(page))
+ return -ENOTSUPP;
+ err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
page_endio(page, is_write, err);
return err;
}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
deleted file mode 100644
index 678af946be30..000000000000
--- a/drivers/block/cciss.c
+++ /dev/null
@@ -1,5415 +0,0 @@
-/*
- * Disk Array driver for HP Smart Array controllers.
- * (C) Copyright 2000, 2007 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- * 02111-1307, USA.
- *
- * Questions/Comments/Bugfixes to iss_storagedev@hp.com
- *
- */
-
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/pci-aspm.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/major.h>
-#include <linux/fs.h>
-#include <linux/bio.h>
-#include <linux/blkpg.h>
-#include <linux/timer.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/jiffies.h>
-#include <linux/hdreg.h>
-#include <linux/spinlock.h>
-#include <linux/compat.h>
-#include <linux/mutex.h>
-#include <linux/bitmap.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-
-#include <linux/dma-mapping.h>
-#include <linux/blkdev.h>
-#include <linux/genhd.h>
-#include <linux/completion.h>
-#include <scsi/scsi.h>
-#include <scsi/sg.h>
-#include <scsi/scsi_ioctl.h>
-#include <scsi/scsi_request.h>
-#include <linux/cdrom.h>
-#include <linux/scatterlist.h>
-#include <linux/kthread.h>
-
-#define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
-#define DRIVER_NAME "HP CISS Driver (v 3.6.26)"
-#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 26)
-
-/* Embedded module documentation macros - see modules.h */
-MODULE_AUTHOR("Hewlett-Packard Company");
-MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
-MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
-MODULE_VERSION("3.6.26");
-MODULE_LICENSE("GPL");
-static int cciss_tape_cmds = 6;
-module_param(cciss_tape_cmds, int, 0644);
-MODULE_PARM_DESC(cciss_tape_cmds,
- "number of commands to allocate for tape devices (default: 6)");
-static int cciss_simple_mode;
-module_param(cciss_simple_mode, int, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(cciss_simple_mode,
- "Use 'simple mode' rather than 'performant mode'");
-
-static int cciss_allow_hpsa;
-module_param(cciss_allow_hpsa, int, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(cciss_allow_hpsa,
- "Prevent cciss driver from accessing hardware known to be "
- " supported by the hpsa driver");
-
-static DEFINE_MUTEX(cciss_mutex);
-static struct proc_dir_entry *proc_cciss;
-
-#include "cciss_cmd.h"
-#include "cciss.h"
-#include <linux/cciss_ioctl.h>
-
-/* define the PCI info for the cards we can control */
-static const struct pci_device_id cciss_pci_device_id[] = {
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISS, 0x0E11, 0x4070},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4080},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4082},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4083},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x4091},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409A},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409B},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409C},
- {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409D},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSA, 0x103C, 0x3225},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103C, 0x3223},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103C, 0x3234},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103C, 0x3235},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3211},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3212},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3213},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3214},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD, 0x103C, 0x3215},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103C, 0x3237},
- {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103C, 0x323D},
- {0,}
-};
-
-MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
-
-/* board_id = Subsystem Device ID & Vendor ID
- * product = Marketing Name for the board
- * access = Address of the struct of function pointers
- */
-static struct board_type products[] = {
- {0x40700E11, "Smart Array 5300", &SA5_access},
- {0x40800E11, "Smart Array 5i", &SA5B_access},
- {0x40820E11, "Smart Array 532", &SA5B_access},
- {0x40830E11, "Smart Array 5312", &SA5B_access},
- {0x409A0E11, "Smart Array 641", &SA5_access},
- {0x409B0E11, "Smart Array 642", &SA5_access},
- {0x409C0E11, "Smart Array 6400", &SA5_access},
- {0x409D0E11, "Smart Array 6400 EM", &SA5_access},
- {0x40910E11, "Smart Array 6i", &SA5_access},
- {0x3225103C, "Smart Array P600", &SA5_access},
- {0x3223103C, "Smart Array P800", &SA5_access},
- {0x3234103C, "Smart Array P400", &SA5_access},
- {0x3235103C, "Smart Array P400i", &SA5_access},
- {0x3211103C, "Smart Array E200i", &SA5_access},
- {0x3212103C, "Smart Array E200", &SA5_access},
- {0x3213103C, "Smart Array E200i", &SA5_access},
- {0x3214103C, "Smart Array E200i", &SA5_access},
- {0x3215103C, "Smart Array E200i", &SA5_access},
- {0x3237103C, "Smart Array E500", &SA5_access},
- {0x323D103C, "Smart Array P700m", &SA5_access},
-};
-
-/* How long to wait (in milliseconds) for board to go into simple mode */
-#define MAX_CONFIG_WAIT 30000
-#define MAX_IOCTL_CONFIG_WAIT 1000
-
-/*define how many times we will try a command because of bus resets */
-#define MAX_CMD_RETRIES 3
-
-#define MAX_CTLR 32
-
-/* Originally cciss driver only supports 8 major numbers */
-#define MAX_CTLR_ORIG 8
-
-static ctlr_info_t *hba[MAX_CTLR];
-
-static struct task_struct *cciss_scan_thread;
-static DEFINE_MUTEX(scan_mutex);
-static LIST_HEAD(scan_q);
-
-static void do_cciss_request(struct request_queue *q);
-static irqreturn_t do_cciss_intx(int irq, void *dev_id);
-static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id);
-static int cciss_open(struct block_device *bdev, fmode_t mode);
-static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode);
-static void cciss_release(struct gendisk *disk, fmode_t mode);
-static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg);
-static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
-
-static int cciss_revalidate(struct gendisk *disk);
-static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl);
-static int deregister_disk(ctlr_info_t *h, int drv_index,
- int clear_all, int via_ioctl);
-
-static void cciss_read_capacity(ctlr_info_t *h, int logvol,
- sector_t *total_size, unsigned int *block_size);
-static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
- sector_t *total_size, unsigned int *block_size);
-static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
- sector_t total_size,
- unsigned int block_size, InquiryData_struct *inq_buff,
- drive_info_struct *drv);
-static void cciss_interrupt_mode(ctlr_info_t *);
-static int cciss_enter_simple_mode(struct ctlr_info *h);
-static void start_io(ctlr_info_t *h);
-static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
- __u8 page_code, unsigned char scsi3addr[],
- int cmd_type);
-static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
- int attempt_retry);
-static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
-
-static int add_to_scan_list(struct ctlr_info *h);
-static int scan_thread(void *data);
-static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
-static void cciss_hba_release(struct device *dev);
-static void cciss_device_release(struct device *dev);
-static void cciss_free_gendisk(ctlr_info_t *h, int drv_index);
-static void cciss_free_drive_info(ctlr_info_t *h, int drv_index);
-static inline u32 next_command(ctlr_info_t *h);
-static int cciss_find_cfg_addrs(struct pci_dev *pdev, void __iomem *vaddr,
- u32 *cfg_base_addr, u64 *cfg_base_addr_index,
- u64 *cfg_offset);
-static int cciss_pci_find_memory_BAR(struct pci_dev *pdev,
- unsigned long *memory_bar);
-static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
-static int write_driver_ver_to_cfgtable(CfgTable_struct __iomem *cfgtable);
-
-/* performant mode helper functions */
-static void calc_bucket_map(int *bucket, int num_buckets, int nsgs,
- int *bucket_map);
-static void cciss_put_controller_into_performant_mode(ctlr_info_t *h);
-
-#ifdef CONFIG_PROC_FS
-static void cciss_procinit(ctlr_info_t *h);
-#else
-static void cciss_procinit(ctlr_info_t *h)
-{
-}
-#endif /* CONFIG_PROC_FS */
-
-#ifdef CONFIG_COMPAT
-static int cciss_compat_ioctl(struct block_device *, fmode_t,
- unsigned, unsigned long);
-#endif
-
-static const struct block_device_operations cciss_fops = {
- .owner = THIS_MODULE,
- .open = cciss_unlocked_open,
- .release = cciss_release,
- .ioctl = cciss_ioctl,
- .getgeo = cciss_getgeo,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = cciss_compat_ioctl,
-#endif
- .revalidate_disk = cciss_revalidate,
-};
-
-/* set_performant_mode: Modify the tag for cciss performant
- * set bit 0 for pull model, bits 3-1 for block fetch
- * register number
- */
-static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
-{
- if (likely(h->transMethod & CFGTBL_Trans_Performant))
- c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
-}
-
-/*
- * Enqueuing and dequeuing functions for cmdlists.
- */
-static inline void addQ(struct list_head *list, CommandList_struct *c)
-{
- list_add_tail(&c->list, list);
-}
-
-static inline void removeQ(CommandList_struct *c)
-{
- /*
- * After kexec/dump some commands might still
- * be in flight, which the firmware will try
- * to complete. Resetting the firmware doesn't work
- * with old fw revisions, so we have to mark
- * them off as 'stale' to prevent the driver from
- * falling over.
- */
- if (WARN_ON(list_empty(&c->list))) {
- c->cmd_type = CMD_MSG_STALE;
- return;
- }
-
- list_del_init(&c->list);
-}
-
-static void enqueue_cmd_and_start_io(ctlr_info_t *h,
- CommandList_struct *c)
-{
- unsigned long flags;
- set_performant_mode(h, c);
- spin_lock_irqsave(&h->lock, flags);
- addQ(&h->reqQ, c);
- h->Qdepth++;
- if (h->Qdepth > h->maxQsinceinit)
- h->maxQsinceinit = h->Qdepth;
- start_io(h);
- spin_unlock_irqrestore(&h->lock, flags);
-}
-
-static void cciss_free_sg_chain_blocks(SGDescriptor_struct **cmd_sg_list,
- int nr_cmds)
-{
- int i;
-
- if (!cmd_sg_list)
- return;
- for (i = 0; i < nr_cmds; i++) {
- kfree(cmd_sg_list[i]);
- cmd_sg_list[i] = NULL;
- }
- kfree(cmd_sg_list);
-}
-
-static SGDescriptor_struct **cciss_allocate_sg_chain_blocks(
- ctlr_info_t *h, int chainsize, int nr_cmds)
-{
- int j;
- SGDescriptor_struct **cmd_sg_list;
-
- if (chainsize <= 0)
- return NULL;
-
- cmd_sg_list = kmalloc(sizeof(*cmd_sg_list) * nr_cmds, GFP_KERNEL);
- if (!cmd_sg_list)
- return NULL;
-
- /* Build up chain blocks for each command */
- for (j = 0; j < nr_cmds; j++) {
- /* Need a block of chainsized s/g elements. */
- cmd_sg_list[j] = kmalloc((chainsize *
- sizeof(*cmd_sg_list[j])), GFP_KERNEL);
- if (!cmd_sg_list[j]) {
- dev_err(&h->pdev->dev, "Cannot get memory "
- "for s/g chains.\n");
- goto clean;
- }
- }
- return cmd_sg_list;
-clean:
- cciss_free_sg_chain_blocks(cmd_sg_list, nr_cmds);
- return NULL;
-}
-
-static void cciss_unmap_sg_chain_block(ctlr_info_t *h, CommandList_struct *c)
-{
- SGDescriptor_struct *chain_sg;
- u64bit temp64;
-
- if (c->Header.SGTotal <= h->max_cmd_sgentries)
- return;
-
- chain_sg = &c->SG[h->max_cmd_sgentries - 1];
- temp64.val32.lower = chain_sg->Addr.lower;
- temp64.val32.upper = chain_sg->Addr.upper;
- pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE);
-}
-
-static int cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
- SGDescriptor_struct *chain_block, int len)
-{
- SGDescriptor_struct *chain_sg;
- u64bit temp64;
-
- chain_sg = &c->SG[h->max_cmd_sgentries - 1];
- chain_sg->Ext = CCISS_SG_CHAIN;
- chain_sg->Len = len;
- temp64.val = pci_map_single(h->pdev, chain_block, len,
- PCI_DMA_TODEVICE);
- if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
- dev_warn(&h->pdev->dev,
- "%s: error mapping chain block for DMA\n",
- __func__);
- return -1;
- }
- chain_sg->Addr.lower = temp64.val32.lower;
- chain_sg->Addr.upper = temp64.val32.upper;
-
- return 0;
-}
-
-#include "cciss_scsi.c" /* For SCSI tape support */
-
-static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
- "UNKNOWN"
-};
-#define RAID_UNKNOWN (ARRAY_SIZE(raid_label)-1)
-
-#ifdef CONFIG_PROC_FS
-
-/*
- * Report information about this controller.
- */
-#define ENG_GIG 1000000000
-#define ENG_GIG_FACTOR (ENG_GIG/512)
-#define ENGAGE_SCSI "engage scsi"
-
-static void cciss_seq_show_header(struct seq_file *seq)
-{
- ctlr_info_t *h = seq->private;
-
- seq_printf(seq, "%s: HP %s Controller\n"
- "Board ID: 0x%08lx\n"
- "Firmware Version: %c%c%c%c\n"
- "IRQ: %d\n"
- "Logical drives: %d\n"
- "Current Q depth: %d\n"
- "Current # commands on controller: %d\n"
- "Max Q depth since init: %d\n"
- "Max # commands on controller since init: %d\n"
- "Max SG entries since init: %d\n",
- h->devname,
- h->product_name,
- (unsigned long)h->board_id,
- h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
- h->firm_ver[3], (unsigned int)h->intr[h->intr_mode],
- h->num_luns,
- h->Qdepth, h->commands_outstanding,
- h->maxQsinceinit, h->max_outstanding, h->maxSG);
-
-#ifdef CONFIG_CISS_SCSI_TAPE
- cciss_seq_tape_report(seq, h);
-#endif /* CONFIG_CISS_SCSI_TAPE */
-}
-
-static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
-{
- ctlr_info_t *h = seq->private;
- unsigned long flags;
-
- /* prevent displaying bogus info during configuration
- * or deconfiguration of a logical volume
- */
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring) {
- spin_unlock_irqrestore(&h->lock, flags);
- return ERR_PTR(-EBUSY);
- }
- h->busy_configuring = 1;
- spin_unlock_irqrestore(&h->lock, flags);
-
- if (*pos == 0)
- cciss_seq_show_header(seq);
-
- return pos;
-}
-
-static int cciss_seq_show(struct seq_file *seq, void *v)
-{
- sector_t vol_sz, vol_sz_frac;
- ctlr_info_t *h = seq->private;
- unsigned ctlr = h->ctlr;
- loff_t *pos = v;
- drive_info_struct *drv = h->drv[*pos];
-
- if (*pos > h->highest_lun)
- return 0;
-
- if (drv == NULL) /* it's possible for h->drv[] to have holes. */
- return 0;
-
- if (drv->heads == 0)
- return 0;
-
- vol_sz = drv->nr_blocks;
- vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
- vol_sz_frac *= 100;
- sector_div(vol_sz_frac, ENG_GIG_FACTOR);
-
- if (drv->raid_level < 0 || drv->raid_level > RAID_UNKNOWN)
- drv->raid_level = RAID_UNKNOWN;
- seq_printf(seq, "cciss/c%dd%d:"
- "\t%4u.%02uGB\tRAID %s\n",
- ctlr, (int) *pos, (int)vol_sz, (int)vol_sz_frac,
- raid_label[drv->raid_level]);
- return 0;
-}
-
-static void *cciss_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- ctlr_info_t *h = seq->private;
-
- if (*pos > h->highest_lun)
- return NULL;
- *pos += 1;
-
- return pos;
-}
-
-static void cciss_seq_stop(struct seq_file *seq, void *v)
-{
- ctlr_info_t *h = seq->private;
-
- /* Only reset h->busy_configuring if we succeeded in setting
- * it during cciss_seq_start. */
- if (v == ERR_PTR(-EBUSY))
- return;
-
- h->busy_configuring = 0;
-}
-
-static const struct seq_operations cciss_seq_ops = {
- .start = cciss_seq_start,
- .show = cciss_seq_show,
- .next = cciss_seq_next,
- .stop = cciss_seq_stop,
-};
-
-static int cciss_seq_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &cciss_seq_ops);
- struct seq_file *seq = file->private_data;
-
- if (!ret)
- seq->private = PDE_DATA(inode);
-
- return ret;
-}
-
-static ssize_t
-cciss_proc_write(struct file *file, const char __user *buf,
- size_t length, loff_t *ppos)
-{
- int err;
- char *buffer;
-
-#ifndef CONFIG_CISS_SCSI_TAPE
- return -EINVAL;
-#endif
-
- if (!buf || length > PAGE_SIZE - 1)
- return -EINVAL;
-
- buffer = memdup_user_nul(buf, length);
- if (IS_ERR(buffer))
- return PTR_ERR(buffer);
-
-#ifdef CONFIG_CISS_SCSI_TAPE
- if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) {
- struct seq_file *seq = file->private_data;
- ctlr_info_t *h = seq->private;
-
- err = cciss_engage_scsi(h);
- if (err == 0)
- err = length;
- } else
-#endif /* CONFIG_CISS_SCSI_TAPE */
- err = -EINVAL;
- /* might be nice to have "disengage" too, but it's not
- safely possible. (only 1 module use count, lock issues.) */
-
- kfree(buffer);
- return err;
-}
-
-static const struct file_operations cciss_proc_fops = {
- .owner = THIS_MODULE,
- .open = cciss_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
- .write = cciss_proc_write,
-};
-
-static void cciss_procinit(ctlr_info_t *h)
-{
- struct proc_dir_entry *pde;
-
- if (proc_cciss == NULL)
- proc_cciss = proc_mkdir("driver/cciss", NULL);
- if (!proc_cciss)
- return;
- pde = proc_create_data(h->devname, S_IWUSR | S_IRUSR | S_IRGRP |
- S_IROTH, proc_cciss,
- &cciss_proc_fops, h);
-}
-#endif /* CONFIG_PROC_FS */
-
-#define MAX_PRODUCT_NAME_LEN 19
-
-#define to_hba(n) container_of(n, struct ctlr_info, dev)
-#define to_drv(n) container_of(n, drive_info_struct, dev)
-
-/* List of controllers which cannot be hard reset on kexec with reset_devices */
-static u32 unresettable_controller[] = {
- 0x3223103C, /* Smart Array P800 */
- 0x3234103C, /* Smart Array P400 */
- 0x3235103C, /* Smart Array P400i */
- 0x3211103C, /* Smart Array E200i */
- 0x3212103C, /* Smart Array E200 */
- 0x3213103C, /* Smart Array E200i */
- 0x3214103C, /* Smart Array E200i */
- 0x3215103C, /* Smart Array E200i */
- 0x3237103C, /* Smart Array E500 */
- 0x323D103C, /* Smart Array P700m */
- 0x40800E11, /* Smart Array 5i */
- 0x409C0E11, /* Smart Array 6400 */
- 0x409D0E11, /* Smart Array 6400 EM */
- 0x40700E11, /* Smart Array 5300 */
- 0x40820E11, /* Smart Array 532 */
- 0x40830E11, /* Smart Array 5312 */
- 0x409A0E11, /* Smart Array 641 */
- 0x409B0E11, /* Smart Array 642 */
- 0x40910E11, /* Smart Array 6i */
-};
-
-/* List of controllers which cannot even be soft reset */
-static u32 soft_unresettable_controller[] = {
- 0x40800E11, /* Smart Array 5i */
- 0x40700E11, /* Smart Array 5300 */
- 0x40820E11, /* Smart Array 532 */
- 0x40830E11, /* Smart Array 5312 */
- 0x409A0E11, /* Smart Array 641 */
- 0x409B0E11, /* Smart Array 642 */
- 0x40910E11, /* Smart Array 6i */
- /* Exclude 640x boards. These are two pci devices in one slot
- * which share a battery backed cache module. One controls the
- * cache, the other accesses the cache through the one that controls
- * it. If we reset the one controlling the cache, the other will
- * likely not be happy. Just forbid resetting this conjoined mess.
- */
- 0x409C0E11, /* Smart Array 6400 */
- 0x409D0E11, /* Smart Array 6400 EM */
-};
-
-static int ctlr_is_hard_resettable(u32 board_id)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
- if (unresettable_controller[i] == board_id)
- return 0;
- return 1;
-}
-
-static int ctlr_is_soft_resettable(u32 board_id)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
- if (soft_unresettable_controller[i] == board_id)
- return 0;
- return 1;
-}
-
-static int ctlr_is_resettable(u32 board_id)
-{
- return ctlr_is_hard_resettable(board_id) ||
- ctlr_is_soft_resettable(board_id);
-}
-
-static ssize_t host_show_resettable(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct ctlr_info *h = to_hba(dev);
-
- return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id));
-}
-static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
-
-static ssize_t host_store_rescan(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct ctlr_info *h = to_hba(dev);
-
- add_to_scan_list(h);
- wake_up_process(cciss_scan_thread);
- wait_for_completion_interruptible(&h->scan_wait);
-
- return count;
-}
-static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
-
-static ssize_t host_show_transport_mode(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct ctlr_info *h = to_hba(dev);
-
- return snprintf(buf, 20, "%s\n",
- h->transMethod & CFGTBL_Trans_Performant ?
- "performant" : "simple");
-}
-static DEVICE_ATTR(transport_mode, S_IRUGO, host_show_transport_mode, NULL);
-
-static ssize_t dev_show_unique_id(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- drive_info_struct *drv = to_drv(dev);
- struct ctlr_info *h = to_hba(drv->dev.parent);
- __u8 sn[16];
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring)
- ret = -EBUSY;
- else
- memcpy(sn, drv->serial_no, sizeof(sn));
- spin_unlock_irqrestore(&h->lock, flags);
-
- if (ret)
- return ret;
- else
- return snprintf(buf, 16 * 2 + 2,
- "%02X%02X%02X%02X%02X%02X%02X%02X"
- "%02X%02X%02X%02X%02X%02X%02X%02X\n",
- sn[0], sn[1], sn[2], sn[3],
- sn[4], sn[5], sn[6], sn[7],
- sn[8], sn[9], sn[10], sn[11],
- sn[12], sn[13], sn[14], sn[15]);
-}
-static DEVICE_ATTR(unique_id, S_IRUGO, dev_show_unique_id, NULL);
-
-static ssize_t dev_show_vendor(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- drive_info_struct *drv = to_drv(dev);
- struct ctlr_info *h = to_hba(drv->dev.parent);
- char vendor[VENDOR_LEN + 1];
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring)
- ret = -EBUSY;
- else
- memcpy(vendor, drv->vendor, VENDOR_LEN + 1);
- spin_unlock_irqrestore(&h->lock, flags);
-
- if (ret)
- return ret;
- else
- return snprintf(buf, sizeof(vendor) + 1, "%s\n", drv->vendor);
-}
-static DEVICE_ATTR(vendor, S_IRUGO, dev_show_vendor, NULL);
-
-static ssize_t dev_show_model(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- drive_info_struct *drv = to_drv(dev);
- struct ctlr_info *h = to_hba(drv->dev.parent);
- char model[MODEL_LEN + 1];
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring)
- ret = -EBUSY;
- else
- memcpy(model, drv->model, MODEL_LEN + 1);
- spin_unlock_irqrestore(&h->lock, flags);
-
- if (ret)
- return ret;
- else
- return snprintf(buf, sizeof(model) + 1, "%s\n", drv->model);
-}
-static DEVICE_ATTR(model, S_IRUGO, dev_show_model, NULL);
-
-static ssize_t dev_show_rev(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- drive_info_struct *drv = to_drv(dev);
- struct ctlr_info *h = to_hba(drv->dev.parent);
- char rev[REV_LEN + 1];
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring)
- ret = -EBUSY;
- else
- memcpy(rev, drv->rev, REV_LEN + 1);
- spin_unlock_irqrestore(&h->lock, flags);
-
- if (ret)
- return ret;
- else
- return snprintf(buf, sizeof(rev) + 1, "%s\n", drv->rev);
-}
-static DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL);
-
-static ssize_t cciss_show_lunid(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- drive_info_struct *drv = to_drv(dev);
- struct ctlr_info *h = to_hba(drv->dev.parent);
- unsigned long flags;
- unsigned char lunid[8];
-
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring) {
- spin_unlock_irqrestore(&h->lock, flags);
- return -EBUSY;
- }
- if (!drv->heads) {
- spin_unlock_irqrestore(&h->lock, flags);
- return -ENOTTY;
- }
- memcpy(lunid, drv->LunID, sizeof(lunid));
- spin_unlock_irqrestore(&h->lock, flags);
- return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- lunid[0], lunid[1], lunid[2], lunid[3],
- lunid[4], lunid[5], lunid[6], lunid[7]);
-}
-static DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL);
-
-static ssize_t cciss_show_raid_level(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- drive_info_struct *drv = to_drv(dev);
- struct ctlr_info *h = to_hba(drv->dev.parent);
- int raid;
- unsigned long flags;
-
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring) {
- spin_unlock_irqrestore(&h->lock, flags);
- return -EBUSY;
- }
- raid = drv->raid_level;
- spin_unlock_irqrestore(&h->lock, flags);
- if (raid < 0 || raid > RAID_UNKNOWN)
- raid = RAID_UNKNOWN;
-
- return snprintf(buf, strlen(raid_label[raid]) + 7, "RAID %s\n",
- raid_label[raid]);
-}
-static DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL);
-
-static ssize_t cciss_show_usage_count(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- drive_info_struct *drv = to_drv(dev);
- struct ctlr_info *h = to_hba(drv->dev.parent);
- unsigned long flags;
- int count;
-
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring) {
- spin_unlock_irqrestore(&h->lock, flags);
- return -EBUSY;
- }
- count = drv->usage_count;
- spin_unlock_irqrestore(&h->lock, flags);
- return snprintf(buf, 20, "%d\n", count);
-}
-static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
-
-static struct attribute *cciss_host_attrs[] = {
- &dev_attr_rescan.attr,
- &dev_attr_resettable.attr,
- &dev_attr_transport_mode.attr,
- NULL
-};
-
-static struct attribute_group cciss_host_attr_group = {
- .attrs = cciss_host_attrs,
-};
-
-static const struct attribute_group *cciss_host_attr_groups[] = {
- &cciss_host_attr_group,
- NULL
-};
-
-static struct device_type cciss_host_type = {
- .name = "cciss_host",
- .groups = cciss_host_attr_groups,
- .release = cciss_hba_release,
-};
-
-static struct attribute *cciss_dev_attrs[] = {
- &dev_attr_unique_id.attr,
- &dev_attr_model.attr,
- &dev_attr_vendor.attr,
- &dev_attr_rev.attr,
- &dev_attr_lunid.attr,
- &dev_attr_raid_level.attr,
- &dev_attr_usage_count.attr,
- NULL
-};
-
-static struct attribute_group cciss_dev_attr_group = {
- .attrs = cciss_dev_attrs,
-};
-
-static const struct attribute_group *cciss_dev_attr_groups[] = {
- &cciss_dev_attr_group,
- NULL
-};
-
-static struct device_type cciss_dev_type = {
- .name = "cciss_device",
- .groups = cciss_dev_attr_groups,
- .release = cciss_device_release,
-};
-
-static struct bus_type cciss_bus_type = {
- .name = "cciss",
-};
-
-/*
- * cciss_hba_release is called when the reference count
- * of h->dev goes to zero.
- */
-static void cciss_hba_release(struct device *dev)
-{
- /*
- * nothing to do, but need this to avoid a warning
- * about not having a release handler from lib/kref.c.
- */
-}
-
-/*
- * Initialize sysfs entry for each controller. This sets up and registers
- * the 'cciss#' directory for each individual controller under
- * /sys/bus/pci/devices/<dev>/.
- */
-static int cciss_create_hba_sysfs_entry(struct ctlr_info *h)
-{
- device_initialize(&h->dev);
- h->dev.type = &cciss_host_type;
- h->dev.bus = &cciss_bus_type;
- dev_set_name(&h->dev, "%s", h->devname);
- h->dev.parent = &h->pdev->dev;
-
- return device_add(&h->dev);
-}
-
-/*
- * Remove sysfs entries for an hba.
- */
-static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
-{
- device_del(&h->dev);
- put_device(&h->dev); /* final put. */
-}
-
-/* cciss_device_release is called when the reference count
- * of h->drv[x]dev goes to zero.
- */
-static void cciss_device_release(struct device *dev)
-{
- drive_info_struct *drv = to_drv(dev);
- kfree(drv);
-}
-
-/*
- * Initialize sysfs for each logical drive. This sets up and registers
- * the 'c#d#' directory for each individual logical drive under
- * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from
- * /sys/block/cciss!c#d# to this entry.
- */
-static long cciss_create_ld_sysfs_entry(struct ctlr_info *h,
- int drv_index)
-{
- struct device *dev;
-
- if (h->drv[drv_index]->device_initialized)
- return 0;
-
- dev = &h->drv[drv_index]->dev;
- device_initialize(dev);
- dev->type = &cciss_dev_type;
- dev->bus = &cciss_bus_type;
- dev_set_name(dev, "c%dd%d", h->ctlr, drv_index);
- dev->parent = &h->dev;
- h->drv[drv_index]->device_initialized = 1;
- return device_add(dev);
-}
-
-/*
- * Remove sysfs entries for a logical drive.
- */
-static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index,
- int ctlr_exiting)
-{
- struct device *dev = &h->drv[drv_index]->dev;
-
- /* special case for c*d0, we only destroy it on controller exit */
- if (drv_index == 0 && !ctlr_exiting)
- return;
-
- device_del(dev);
- put_device(dev); /* the "final" put. */
- h->drv[drv_index] = NULL;
-}
-
-/*
- * For operations that cannot sleep, a command block is allocated at init,
- * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
- * which ones are free or in use.
- */
-static CommandList_struct *cmd_alloc(ctlr_info_t *h)
-{
- CommandList_struct *c;
- int i;
- u64bit temp64;
- dma_addr_t cmd_dma_handle, err_dma_handle;
-
- do {
- i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
- if (i == h->nr_cmds)
- return NULL;
- } while (test_and_set_bit(i, h->cmd_pool_bits) != 0);
- c = h->cmd_pool + i;
- memset(c, 0, sizeof(CommandList_struct));
- cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct);
- c->err_info = h->errinfo_pool + i;
- memset(c->err_info, 0, sizeof(ErrorInfo_struct));
- err_dma_handle = h->errinfo_pool_dhandle
- + i * sizeof(ErrorInfo_struct);
- h->nr_allocs++;
-
- c->cmdindex = i;
-
- INIT_LIST_HEAD(&c->list);
- c->busaddr = (__u32) cmd_dma_handle;
- temp64.val = (__u64) err_dma_handle;
- c->ErrDesc.Addr.lower = temp64.val32.lower;
- c->ErrDesc.Addr.upper = temp64.val32.upper;
- c->ErrDesc.Len = sizeof(ErrorInfo_struct);
-
- c->ctlr = h->ctlr;
- return c;
-}
-
-/* allocate a command using pci_alloc_consistent, used for ioctls,
- * etc., not for the main i/o path.
- */
-static CommandList_struct *cmd_special_alloc(ctlr_info_t *h)
-{
- CommandList_struct *c;
- u64bit temp64;
- dma_addr_t cmd_dma_handle, err_dma_handle;
-
- c = pci_zalloc_consistent(h->pdev, sizeof(CommandList_struct),
- &cmd_dma_handle);
- if (c == NULL)
- return NULL;
-
- c->cmdindex = -1;
-
- c->err_info = pci_zalloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
- &err_dma_handle);
-
- if (c->err_info == NULL) {
- pci_free_consistent(h->pdev,
- sizeof(CommandList_struct), c, cmd_dma_handle);
- return NULL;
- }
-
- INIT_LIST_HEAD(&c->list);
- c->busaddr = (__u32) cmd_dma_handle;
- temp64.val = (__u64) err_dma_handle;
- c->ErrDesc.Addr.lower = temp64.val32.lower;
- c->ErrDesc.Addr.upper = temp64.val32.upper;
- c->ErrDesc.Len = sizeof(ErrorInfo_struct);
-
- c->ctlr = h->ctlr;
- return c;
-}
-
-static void cmd_free(ctlr_info_t *h, CommandList_struct *c)
-{
- int i;
-
- i = c - h->cmd_pool;
- clear_bit(i, h->cmd_pool_bits);
- h->nr_frees++;
-}
-
-static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c)
-{
- u64bit temp64;
-
- temp64.val32.lower = c->ErrDesc.Addr.lower;
- temp64.val32.upper = c->ErrDesc.Addr.upper;
- pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
- c->err_info, (dma_addr_t) temp64.val);
- pci_free_consistent(h->pdev, sizeof(CommandList_struct), c,
- (dma_addr_t) cciss_tag_discard_error_bits(h, (u32) c->busaddr));
-}
-
-static inline ctlr_info_t *get_host(struct gendisk *disk)
-{
- return disk->queue->queuedata;
-}
-
-static inline drive_info_struct *get_drv(struct gendisk *disk)
-{
- return disk->private_data;
-}
-
-/*
- * Open. Make sure the device is really there.
- */
-static int cciss_open(struct block_device *bdev, fmode_t mode)
-{
- ctlr_info_t *h = get_host(bdev->bd_disk);
- drive_info_struct *drv = get_drv(bdev->bd_disk);
-
- dev_dbg(&h->pdev->dev, "cciss_open %s\n", bdev->bd_disk->disk_name);
- if (drv->busy_configuring)
- return -EBUSY;
- /*
- * Root is allowed to open raw volume zero even if it's not configured
- * so array config can still work. Root is also allowed to open any
- * volume that has a LUN ID, so it can issue IOCTL to reread the
- * disk information. I don't think I really like this
- * but I'm already using way to many device nodes to claim another one
- * for "raw controller".
- */
- if (drv->heads == 0) {
- if (MINOR(bdev->bd_dev) != 0) { /* not node 0? */
- /* if not node 0 make sure it is a partition = 0 */
- if (MINOR(bdev->bd_dev) & 0x0f) {
- return -ENXIO;
- /* if it is, make sure we have a LUN ID */
- } else if (memcmp(drv->LunID, CTLR_LUNID,
- sizeof(drv->LunID))) {
- return -ENXIO;
- }
- }
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- }
- drv->usage_count++;
- h->usage_count++;
- return 0;
-}
-
-static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode)
-{
- int ret;
-
- mutex_lock(&cciss_mutex);
- ret = cciss_open(bdev, mode);
- mutex_unlock(&cciss_mutex);
-
- return ret;
-}
-
-/*
- * Close. Sync first.
- */
-static void cciss_release(struct gendisk *disk, fmode_t mode)
-{
- ctlr_info_t *h;
- drive_info_struct *drv;
-
- mutex_lock(&cciss_mutex);
- h = get_host(disk);
- drv = get_drv(disk);
- dev_dbg(&h->pdev->dev, "cciss_release %s\n", disk->disk_name);
- drv->usage_count--;
- h->usage_count--;
- mutex_unlock(&cciss_mutex);
-}
-
-#ifdef CONFIG_COMPAT
-
-static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long arg);
-static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long arg);
-
-static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long arg)
-{
- switch (cmd) {
- case CCISS_GETPCIINFO:
- case CCISS_GETINTINFO:
- case CCISS_SETINTINFO:
- case CCISS_GETNODENAME:
- case CCISS_SETNODENAME:
- case CCISS_GETHEARTBEAT:
- case CCISS_GETBUSTYPES:
- case CCISS_GETFIRMVER:
- case CCISS_GETDRIVVER:
- case CCISS_REVALIDVOLS:
- case CCISS_DEREGDISK:
- case CCISS_REGNEWDISK:
- case CCISS_REGNEWD:
- case CCISS_RESCANDISK:
- case CCISS_GETLUNINFO:
- return cciss_ioctl(bdev, mode, cmd, arg);
-
- case CCISS_PASSTHRU32:
- return cciss_ioctl32_passthru(bdev, mode, cmd, arg);
- case CCISS_BIG_PASSTHRU32:
- return cciss_ioctl32_big_passthru(bdev, mode, cmd, arg);
-
- default:
- return -ENOIOCTLCMD;
- }
-}
-
-static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long arg)
-{
- IOCTL32_Command_struct __user *arg32 =
- (IOCTL32_Command_struct __user *) arg;
- IOCTL_Command_struct arg64;
- IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
- int err;
- u32 cp;
-
- memset(&arg64, 0, sizeof(arg64));
- err = 0;
- err |=
- copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
- sizeof(arg64.LUN_info));
- err |=
- copy_from_user(&arg64.Request, &arg32->Request,
- sizeof(arg64.Request));
- err |=
- copy_from_user(&arg64.error_info, &arg32->error_info,
- sizeof(arg64.error_info));
- err |= get_user(arg64.buf_size, &arg32->buf_size);
- err |= get_user(cp, &arg32->buf);
- arg64.buf = compat_ptr(cp);
- err |= copy_to_user(p, &arg64, sizeof(arg64));
-
- if (err)
- return -EFAULT;
-
- err = cciss_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p);
- if (err)
- return err;
- err |=
- copy_in_user(&arg32->error_info, &p->error_info,
- sizeof(arg32->error_info));
- if (err)
- return -EFAULT;
- return err;
-}
-
-static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long arg)
-{
- BIG_IOCTL32_Command_struct __user *arg32 =
- (BIG_IOCTL32_Command_struct __user *) arg;
- BIG_IOCTL_Command_struct arg64;
- BIG_IOCTL_Command_struct __user *p =
- compat_alloc_user_space(sizeof(arg64));
- int err;
- u32 cp;
-
- memset(&arg64, 0, sizeof(arg64));
- err = 0;
- err |=
- copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
- sizeof(arg64.LUN_info));
- err |=
- copy_from_user(&arg64.Request, &arg32->Request,
- sizeof(arg64.Request));
- err |=
- copy_from_user(&arg64.error_info, &arg32->error_info,
- sizeof(arg64.error_info));
- err |= get_user(arg64.buf_size, &arg32->buf_size);
- err |= get_user(arg64.malloc_size, &arg32->malloc_size);
- err |= get_user(cp, &arg32->buf);
- arg64.buf = compat_ptr(cp);
- err |= copy_to_user(p, &arg64, sizeof(arg64));
-
- if (err)
- return -EFAULT;
-
- err = cciss_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p);
- if (err)
- return err;
- err |=
- copy_in_user(&arg32->error_info, &p->error_info,
- sizeof(arg32->error_info));
- if (err)
- return -EFAULT;
- return err;
-}
-#endif
-
-static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- drive_info_struct *drv = get_drv(bdev->bd_disk);
-
- if (!drv->cylinders)
- return -ENXIO;
-
- geo->heads = drv->heads;
- geo->sectors = drv->sectors;
- geo->cylinders = drv->cylinders;
- return 0;
-}
-
-static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c)
-{
- if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
- c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
- (void)check_for_unit_attention(h, c);
-}
-
-static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp)
-{
- cciss_pci_info_struct pciinfo;
-
- if (!argp)
- return -EINVAL;
- pciinfo.domain = pci_domain_nr(h->pdev->bus);
- pciinfo.bus = h->pdev->bus->number;
- pciinfo.dev_fn = h->pdev->devfn;
- pciinfo.board_id = h->board_id;
- if (copy_to_user(argp, &pciinfo, sizeof(cciss_pci_info_struct)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_getintinfo(ctlr_info_t *h, void __user *argp)
-{
- cciss_coalint_struct intinfo;
- unsigned long flags;
-
- if (!argp)
- return -EINVAL;
- spin_lock_irqsave(&h->lock, flags);
- intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay);
- intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount);
- spin_unlock_irqrestore(&h->lock, flags);
- if (copy_to_user
- (argp, &intinfo, sizeof(cciss_coalint_struct)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_setintinfo(ctlr_info_t *h, void __user *argp)
-{
- cciss_coalint_struct intinfo;
- unsigned long flags;
- int i;
-
- if (!argp)
- return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (copy_from_user(&intinfo, argp, sizeof(intinfo)))
- return -EFAULT;
- if ((intinfo.delay == 0) && (intinfo.count == 0))
- return -EINVAL;
- spin_lock_irqsave(&h->lock, flags);
- /* Update the field, and then ring the doorbell */
- writel(intinfo.delay, &(h->cfgtable->HostWrite.CoalIntDelay));
- writel(intinfo.count, &(h->cfgtable->HostWrite.CoalIntCount));
- writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-
- for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
- if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
- break;
- udelay(1000); /* delay and try again */
- }
- spin_unlock_irqrestore(&h->lock, flags);
- if (i >= MAX_IOCTL_CONFIG_WAIT)
- return -EAGAIN;
- return 0;
-}
-
-static int cciss_getnodename(ctlr_info_t *h, void __user *argp)
-{
- NodeName_type NodeName;
- unsigned long flags;
- int i;
-
- if (!argp)
- return -EINVAL;
- spin_lock_irqsave(&h->lock, flags);
- for (i = 0; i < 16; i++)
- NodeName[i] = readb(&h->cfgtable->ServerName[i]);
- spin_unlock_irqrestore(&h->lock, flags);
- if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_setnodename(ctlr_info_t *h, void __user *argp)
-{
- NodeName_type NodeName;
- unsigned long flags;
- int i;
-
- if (!argp)
- return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (copy_from_user(NodeName, argp, sizeof(NodeName_type)))
- return -EFAULT;
- spin_lock_irqsave(&h->lock, flags);
- /* Update the field, and then ring the doorbell */
- for (i = 0; i < 16; i++)
- writeb(NodeName[i], &h->cfgtable->ServerName[i]);
- writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
- for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
- if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
- break;
- udelay(1000); /* delay and try again */
- }
- spin_unlock_irqrestore(&h->lock, flags);
- if (i >= MAX_IOCTL_CONFIG_WAIT)
- return -EAGAIN;
- return 0;
-}
-
-static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp)
-{
- Heartbeat_type heartbeat;
- unsigned long flags;
-
- if (!argp)
- return -EINVAL;
- spin_lock_irqsave(&h->lock, flags);
- heartbeat = readl(&h->cfgtable->HeartBeat);
- spin_unlock_irqrestore(&h->lock, flags);
- if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_getbustypes(ctlr_info_t *h, void __user *argp)
-{
- BusTypes_type BusTypes;
- unsigned long flags;
-
- if (!argp)
- return -EINVAL;
- spin_lock_irqsave(&h->lock, flags);
- BusTypes = readl(&h->cfgtable->BusTypes);
- spin_unlock_irqrestore(&h->lock, flags);
- if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_getfirmver(ctlr_info_t *h, void __user *argp)
-{
- FirmwareVer_type firmware;
-
- if (!argp)
- return -EINVAL;
- memcpy(firmware, h->firm_ver, 4);
-
- if (copy_to_user
- (argp, firmware, sizeof(FirmwareVer_type)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_getdrivver(ctlr_info_t *h, void __user *argp)
-{
- DriverVer_type DriverVer = DRIVER_VERSION;
-
- if (!argp)
- return -EINVAL;
- if (copy_to_user(argp, &DriverVer, sizeof(DriverVer_type)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_getluninfo(ctlr_info_t *h,
- struct gendisk *disk, void __user *argp)
-{
- LogvolInfo_struct luninfo;
- drive_info_struct *drv = get_drv(disk);
-
- if (!argp)
- return -EINVAL;
- memcpy(&luninfo.LunID, drv->LunID, sizeof(luninfo.LunID));
- luninfo.num_opens = drv->usage_count;
- luninfo.num_parts = 0;
- if (copy_to_user(argp, &luninfo, sizeof(LogvolInfo_struct)))
- return -EFAULT;
- return 0;
-}
-
-static int cciss_passthru(ctlr_info_t *h, void __user *argp)
-{
- IOCTL_Command_struct iocommand;
- CommandList_struct *c;
- char *buff = NULL;
- u64bit temp64;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- if (!argp)
- return -EINVAL;
-
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- if (copy_from_user
- (&iocommand, argp, sizeof(IOCTL_Command_struct)))
- return -EFAULT;
- if ((iocommand.buf_size < 1) &&
- (iocommand.Request.Type.Direction != XFER_NONE)) {
- return -EINVAL;
- }
- if (iocommand.buf_size > 0) {
- buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
- if (buff == NULL)
- return -EFAULT;
- }
- if (iocommand.Request.Type.Direction == XFER_WRITE) {
- /* Copy the data into the buffer we created */
- if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) {
- kfree(buff);
- return -EFAULT;
- }
- } else {
- memset(buff, 0, iocommand.buf_size);
- }
- c = cmd_special_alloc(h);
- if (!c) {
- kfree(buff);
- return -ENOMEM;
- }
- /* Fill in the command type */
- c->cmd_type = CMD_IOCTL_PEND;
- /* Fill in Command Header */
- c->Header.ReplyQueue = 0; /* unused in simple mode */
- if (iocommand.buf_size > 0) { /* buffer to fill */
- c->Header.SGList = 1;
- c->Header.SGTotal = 1;
- } else { /* no buffers to fill */
- c->Header.SGList = 0;
- c->Header.SGTotal = 0;
- }
- c->Header.LUN = iocommand.LUN_info;
- /* use the kernel address the cmd block for tag */
- c->Header.Tag.lower = c->busaddr;
-
- /* Fill in Request block */
- c->Request = iocommand.Request;
-
- /* Fill in the scatter gather information */
- if (iocommand.buf_size > 0) {
- temp64.val = pci_map_single(h->pdev, buff,
- iocommand.buf_size, PCI_DMA_BIDIRECTIONAL);
- c->SG[0].Addr.lower = temp64.val32.lower;
- c->SG[0].Addr.upper = temp64.val32.upper;
- c->SG[0].Len = iocommand.buf_size;
- c->SG[0].Ext = 0; /* we are not chaining */
- }
- c->waiting = &wait;
-
- enqueue_cmd_and_start_io(h, c);
- wait_for_completion(&wait);
-
- /* unlock the buffers from DMA */
- temp64.val32.lower = c->SG[0].Addr.lower;
- temp64.val32.upper = c->SG[0].Addr.upper;
- pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, iocommand.buf_size,
- PCI_DMA_BIDIRECTIONAL);
- check_ioctl_unit_attention(h, c);
-
- /* Copy the error information out */
- iocommand.error_info = *(c->err_info);
- if (copy_to_user(argp, &iocommand, sizeof(IOCTL_Command_struct))) {
- kfree(buff);
- cmd_special_free(h, c);
- return -EFAULT;
- }
-
- if (iocommand.Request.Type.Direction == XFER_READ) {
- /* Copy the data out of the buffer we created */
- if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
- kfree(buff);
- cmd_special_free(h, c);
- return -EFAULT;
- }
- }
- kfree(buff);
- cmd_special_free(h, c);
- return 0;
-}
-
-static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp)
-{
- BIG_IOCTL_Command_struct *ioc;
- CommandList_struct *c;
- unsigned char **buff = NULL;
- int *buff_size = NULL;
- u64bit temp64;
- BYTE sg_used = 0;
- int status = 0;
- int i;
- DECLARE_COMPLETION_ONSTACK(wait);
- __u32 left;
- __u32 sz;
- BYTE __user *data_ptr;
-
- if (!argp)
- return -EINVAL;
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
- ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
- if (!ioc) {
- status = -ENOMEM;
- goto cleanup1;
- }
- if (copy_from_user(ioc, argp, sizeof(*ioc))) {
- status = -EFAULT;
- goto cleanup1;
- }
- if ((ioc->buf_size < 1) &&
- (ioc->Request.Type.Direction != XFER_NONE)) {
- status = -EINVAL;
- goto cleanup1;
- }
- /* Check kmalloc limits using all SGs */
- if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
- status = -EINVAL;
- goto cleanup1;
- }
- if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
- status = -EINVAL;
- goto cleanup1;
- }
- buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
- if (!buff) {
- status = -ENOMEM;
- goto cleanup1;
- }
- buff_size = kmalloc(MAXSGENTRIES * sizeof(int), GFP_KERNEL);
- if (!buff_size) {
- status = -ENOMEM;
- goto cleanup1;
- }
- left = ioc->buf_size;
- data_ptr = ioc->buf;
- while (left) {
- sz = (left > ioc->malloc_size) ? ioc->malloc_size : left;
- buff_size[sg_used] = sz;
- buff[sg_used] = kmalloc(sz, GFP_KERNEL);
- if (buff[sg_used] == NULL) {
- status = -ENOMEM;
- goto cleanup1;
- }
- if (ioc->Request.Type.Direction == XFER_WRITE) {
- if (copy_from_user(buff[sg_used], data_ptr, sz)) {
- status = -EFAULT;
- goto cleanup1;
- }
- } else {
- memset(buff[sg_used], 0, sz);
- }
- left -= sz;
- data_ptr += sz;
- sg_used++;
- }
- c = cmd_special_alloc(h);
- if (!c) {
- status = -ENOMEM;
- goto cleanup1;
- }
- c->cmd_type = CMD_IOCTL_PEND;
- c->Header.ReplyQueue = 0;
- c->Header.SGList = sg_used;
- c->Header.SGTotal = sg_used;
- c->Header.LUN = ioc->LUN_info;
- c->Header.Tag.lower = c->busaddr;
-
- c->Request = ioc->Request;
- for (i = 0; i < sg_used; i++) {
- temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i],
- PCI_DMA_BIDIRECTIONAL);
- c->SG[i].Addr.lower = temp64.val32.lower;
- c->SG[i].Addr.upper = temp64.val32.upper;
- c->SG[i].Len = buff_size[i];
- c->SG[i].Ext = 0; /* we are not chaining */
- }
- c->waiting = &wait;
- enqueue_cmd_and_start_io(h, c);
- wait_for_completion(&wait);
- /* unlock the buffers from DMA */
- for (i = 0; i < sg_used; i++) {
- temp64.val32.lower = c->SG[i].Addr.lower;
- temp64.val32.upper = c->SG[i].Addr.upper;
- pci_unmap_single(h->pdev,
- (dma_addr_t) temp64.val, buff_size[i],
- PCI_DMA_BIDIRECTIONAL);
- }
- check_ioctl_unit_attention(h, c);
- /* Copy the error information out */
- ioc->error_info = *(c->err_info);
- if (copy_to_user(argp, ioc, sizeof(*ioc))) {
- cmd_special_free(h, c);
- status = -EFAULT;
- goto cleanup1;
- }
- if (ioc->Request.Type.Direction == XFER_READ) {
- /* Copy the data out of the buffer we created */
- BYTE __user *ptr = ioc->buf;
- for (i = 0; i < sg_used; i++) {
- if (copy_to_user(ptr, buff[i], buff_size[i])) {
- cmd_special_free(h, c);
- status = -EFAULT;
- goto cleanup1;
- }
- ptr += buff_size[i];
- }
- }
- cmd_special_free(h, c);
- status = 0;
-cleanup1:
- if (buff) {
- for (i = 0; i < sg_used; i++)
- kfree(buff[i]);
- kfree(buff);
- }
- kfree(buff_size);
- kfree(ioc);
- return status;
-}
-
-static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct gendisk *disk = bdev->bd_disk;
- ctlr_info_t *h = get_host(disk);
- void __user *argp = (void __user *)arg;
-
- dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
- cmd, arg);
- switch (cmd) {
- case CCISS_GETPCIINFO:
- return cciss_getpciinfo(h, argp);
- case CCISS_GETINTINFO:
- return cciss_getintinfo(h, argp);
- case CCISS_SETINTINFO:
- return cciss_setintinfo(h, argp);
- case CCISS_GETNODENAME:
- return cciss_getnodename(h, argp);
- case CCISS_SETNODENAME:
- return cciss_setnodename(h, argp);
- case CCISS_GETHEARTBEAT:
- return cciss_getheartbeat(h, argp);
- case CCISS_GETBUSTYPES:
- return cciss_getbustypes(h, argp);
- case CCISS_GETFIRMVER:
- return cciss_getfirmver(h, argp);
- case CCISS_GETDRIVVER:
- return cciss_getdrivver(h, argp);
- case CCISS_DEREGDISK:
- case CCISS_REGNEWD:
- case CCISS_REVALIDVOLS:
- return rebuild_lun_table(h, 0, 1);
- case CCISS_GETLUNINFO:
- return cciss_getluninfo(h, disk, argp);
- case CCISS_PASSTHRU:
- return cciss_passthru(h, argp);
- case CCISS_BIG_PASSTHRU:
- return cciss_bigpassthru(h, argp);
-
- /* scsi_cmd_blk_ioctl handles these, below, though some are not */
- /* very meaningful for cciss. SG_IO is the main one people want. */
-
- case SG_GET_VERSION_NUM:
- case SG_SET_TIMEOUT:
- case SG_GET_TIMEOUT:
- case SG_GET_RESERVED_SIZE:
- case SG_SET_RESERVED_SIZE:
- case SG_EMULATED_HOST:
- case SG_IO:
- case SCSI_IOCTL_SEND_COMMAND:
- return scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
-
- /* scsi_cmd_blk_ioctl would normally handle these, below, but */
- /* they aren't a good fit for cciss, as CD-ROMs are */
- /* not supported, and we don't have any bus/target/lun */
- /* which we present to the kernel. */
-
- case CDROM_SEND_PACKET:
- case CDROMCLOSETRAY:
- case CDROMEJECT:
- case SCSI_IOCTL_GET_IDLUN:
- case SCSI_IOCTL_GET_BUS_NUMBER:
- default:
- return -ENOTTY;
- }
-}
-
-static void cciss_check_queues(ctlr_info_t *h)
-{
- int start_queue = h->next_to_run;
- int i;
-
- /* check to see if we have maxed out the number of commands that can
- * be placed on the queue. If so then exit. We do this check here
- * in case the interrupt we serviced was from an ioctl and did not
- * free any new commands.
- */
- if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds)
- return;
-
- /* We have room on the queue for more commands. Now we need to queue
- * them up. We will also keep track of the next queue to run so
- * that every queue gets a chance to be started first.
- */
- for (i = 0; i < h->highest_lun + 1; i++) {
- int curr_queue = (start_queue + i) % (h->highest_lun + 1);
- /* make sure the disk has been added and the drive is real
- * because this can be called from the middle of init_one.
- */
- if (!h->drv[curr_queue])
- continue;
- if (!(h->drv[curr_queue]->queue) ||
- !(h->drv[curr_queue]->heads))
- continue;
- blk_start_queue(h->gendisk[curr_queue]->queue);
-
- /* check to see if we have maxed out the number of commands
- * that can be placed on the queue.
- */
- if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) {
- if (curr_queue == start_queue) {
- h->next_to_run =
- (start_queue + 1) % (h->highest_lun + 1);
- break;
- } else {
- h->next_to_run = curr_queue;
- break;
- }
- }
- }
-}
-
-static void cciss_softirq_done(struct request *rq)
-{
- CommandList_struct *c = rq->completion_data;
- ctlr_info_t *h = hba[c->ctlr];
- SGDescriptor_struct *curr_sg = c->SG;
- u64bit temp64;
- unsigned long flags;
- int i, ddir;
- int sg_index = 0;
-
- if (c->Request.Type.Direction == XFER_READ)
- ddir = PCI_DMA_FROMDEVICE;
- else
- ddir = PCI_DMA_TODEVICE;
-
- /* command did not need to be retried */
- /* unmap the DMA mapping for all the scatter gather elements */
- for (i = 0; i < c->Header.SGList; i++) {
- if (curr_sg[sg_index].Ext == CCISS_SG_CHAIN) {
- cciss_unmap_sg_chain_block(h, c);
- /* Point to the next block */
- curr_sg = h->cmd_sg_list[c->cmdindex];
- sg_index = 0;
- }
- temp64.val32.lower = curr_sg[sg_index].Addr.lower;
- temp64.val32.upper = curr_sg[sg_index].Addr.upper;
- pci_unmap_page(h->pdev, temp64.val, curr_sg[sg_index].Len,
- ddir);
- ++sg_index;
- }
-
- dev_dbg(&h->pdev->dev, "Done with %p\n", rq);
-
- /* set the residual count for pc requests */
- if (blk_rq_is_passthrough(rq))
- scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
- blk_end_request_all(rq, scsi_req(rq)->result ?
- BLK_STS_IOERR : BLK_STS_OK);
-
- spin_lock_irqsave(&h->lock, flags);
- cmd_free(h, c);
- cciss_check_queues(h);
- spin_unlock_irqrestore(&h->lock, flags);
-}
-
-static inline void log_unit_to_scsi3addr(ctlr_info_t *h,
- unsigned char scsi3addr[], uint32_t log_unit)
-{
- memcpy(scsi3addr, h->drv[log_unit]->LunID,
- sizeof(h->drv[log_unit]->LunID));
-}
-
-/* This function gets the SCSI vendor, model, and revision of a logical drive
- * via the inquiry page 0. Model, vendor, and rev are set to empty strings if
- * they cannot be read.
- */
-static void cciss_get_device_descr(ctlr_info_t *h, int logvol,
- char *vendor, char *model, char *rev)
-{
- int rc;
- InquiryData_struct *inq_buf;
- unsigned char scsi3addr[8];
-
- *vendor = '\0';
- *model = '\0';
- *rev = '\0';
-
- inq_buf = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
- if (!inq_buf)
- return;
-
- log_unit_to_scsi3addr(h, scsi3addr, logvol);
- rc = sendcmd_withirq(h, CISS_INQUIRY, inq_buf, sizeof(*inq_buf), 0,
- scsi3addr, TYPE_CMD);
- if (rc == IO_OK) {
- memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
- vendor[VENDOR_LEN] = '\0';
- memcpy(model, &inq_buf->data_byte[16], MODEL_LEN);
- model[MODEL_LEN] = '\0';
- memcpy(rev, &inq_buf->data_byte[32], REV_LEN);
- rev[REV_LEN] = '\0';
- }
-
- kfree(inq_buf);
- return;
-}
-
-/* This function gets the serial number of a logical drive via
- * inquiry page 0x83. Serial no. is 16 bytes. If the serial
- * number cannot be had, for whatever reason, 16 bytes of 0xff
- * are returned instead.
- */
-static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
- unsigned char *serial_no, int buflen)
-{
-#define PAGE_83_INQ_BYTES 64
- int rc;
- unsigned char *buf;
- unsigned char scsi3addr[8];
-
- if (buflen > 16)
- buflen = 16;
- memset(serial_no, 0xff, buflen);
- buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL);
- if (!buf)
- return;
- memset(serial_no, 0, buflen);
- log_unit_to_scsi3addr(h, scsi3addr, logvol);
- rc = sendcmd_withirq(h, CISS_INQUIRY, buf,
- PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
- if (rc == IO_OK)
- memcpy(serial_no, &buf[8], buflen);
- kfree(buf);
- return;
-}
-
-static void cciss_initialize_rq(struct request *rq)
-{
- struct scsi_request *sreq = blk_mq_rq_to_pdu(rq);
-
- scsi_req_init(sreq);
-}
-
-/*
- * cciss_add_disk sets up the block device queue for a logical drive
- */
-static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
- int drv_index)
-{
- disk->queue = blk_alloc_queue(GFP_KERNEL);
- if (!disk->queue)
- goto init_queue_failure;
-
- disk->queue->cmd_size = sizeof(struct scsi_request);
- disk->queue->request_fn = do_cciss_request;
- disk->queue->initialize_rq_fn = cciss_initialize_rq;
- disk->queue->queue_lock = &h->lock;
- queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
- if (blk_init_allocated_queue(disk->queue) < 0)
- goto cleanup_queue;
-
- sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
- disk->major = h->major;
- disk->first_minor = drv_index << NWD_SHIFT;
- disk->fops = &cciss_fops;
- if (cciss_create_ld_sysfs_entry(h, drv_index))
- goto cleanup_queue;
- disk->private_data = h->drv[drv_index];
-
- /* Set up queue information */
- blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
-
- /* This is a hardware imposed limit. */
- blk_queue_max_segments(disk->queue, h->maxsgentries);
-
- blk_queue_max_hw_sectors(disk->queue, h->cciss_max_sectors);
-
- blk_queue_softirq_done(disk->queue, cciss_softirq_done);
-
- disk->queue->queuedata = h;
-
- blk_queue_logical_block_size(disk->queue,
- h->drv[drv_index]->block_size);
-
- /* Make sure all queue data is written out before */
- /* setting h->drv[drv_index]->queue, as setting this */
- /* allows the interrupt handler to start the queue */
- wmb();
- h->drv[drv_index]->queue = disk->queue;
- device_add_disk(&h->drv[drv_index]->dev, disk);
- return 0;
-
-cleanup_queue:
- blk_cleanup_queue(disk->queue);
- disk->queue = NULL;
-init_queue_failure:
- return -1;
-}
-
-/* This function will check the usage_count of the drive to be updated/added.
- * If the usage_count is zero and it is a heretofore unknown drive, or,
- * the drive's capacity, geometry, or serial number has changed,
- * then the drive information will be updated and the disk will be
- * re-registered with the kernel. If these conditions don't hold,
- * then it will be left alone for the next reboot. The exception to this
- * is disk 0 which will always be left registered with the kernel since it
- * is also the controller node. Any changes to disk 0 will show up on
- * the next reboot.
- */
-static void cciss_update_drive_info(ctlr_info_t *h, int drv_index,
- int first_time, int via_ioctl)
-{
- struct gendisk *disk;
- InquiryData_struct *inq_buff = NULL;
- unsigned int block_size;
- sector_t total_size;
- unsigned long flags = 0;
- int ret = 0;
- drive_info_struct *drvinfo;
-
- /* Get information about the disk and modify the driver structure */
- inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
- drvinfo = kzalloc(sizeof(*drvinfo), GFP_KERNEL);
- if (inq_buff == NULL || drvinfo == NULL)
- goto mem_msg;
-
- /* testing to see if 16-byte CDBs are already being used */
- if (h->cciss_read == CCISS_READ_16) {
- cciss_read_capacity_16(h, drv_index,
- &total_size, &block_size);
-
- } else {
- cciss_read_capacity(h, drv_index, &total_size, &block_size);
- /* if read_capacity returns all F's this volume is >2TB */
- /* in size so we switch to 16-byte CDB's for all */
- /* read/write ops */
- if (total_size == 0xFFFFFFFFULL) {
- cciss_read_capacity_16(h, drv_index,
- &total_size, &block_size);
- h->cciss_read = CCISS_READ_16;
- h->cciss_write = CCISS_WRITE_16;
- } else {
- h->cciss_read = CCISS_READ_10;
- h->cciss_write = CCISS_WRITE_10;
- }
- }
-
- cciss_geometry_inquiry(h, drv_index, total_size, block_size,
- inq_buff, drvinfo);
- drvinfo->block_size = block_size;
- drvinfo->nr_blocks = total_size + 1;
-
- cciss_get_device_descr(h, drv_index, drvinfo->vendor,
- drvinfo->model, drvinfo->rev);
- cciss_get_serial_no(h, drv_index, drvinfo->serial_no,
- sizeof(drvinfo->serial_no));
- /* Save the lunid in case we deregister the disk, below. */
- memcpy(drvinfo->LunID, h->drv[drv_index]->LunID,
- sizeof(drvinfo->LunID));
-
- /* Is it the same disk we already know, and nothing's changed? */
- if (h->drv[drv_index]->raid_level != -1 &&
- ((memcmp(drvinfo->serial_no,
- h->drv[drv_index]->serial_no, 16) == 0) &&
- drvinfo->block_size == h->drv[drv_index]->block_size &&
- drvinfo->nr_blocks == h->drv[drv_index]->nr_blocks &&
- drvinfo->heads == h->drv[drv_index]->heads &&
- drvinfo->sectors == h->drv[drv_index]->sectors &&
- drvinfo->cylinders == h->drv[drv_index]->cylinders))
- /* The disk is unchanged, nothing to update */
- goto freeret;
-
- /* If we get here it's not the same disk, or something's changed,
- * so we need to * deregister it, and re-register it, if it's not
- * in use.
- * If the disk already exists then deregister it before proceeding
- * (unless it's the first disk (for the controller node).
- */
- if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) {
- dev_warn(&h->pdev->dev, "disk %d has changed.\n", drv_index);
- spin_lock_irqsave(&h->lock, flags);
- h->drv[drv_index]->busy_configuring = 1;
- spin_unlock_irqrestore(&h->lock, flags);
-
- /* deregister_disk sets h->drv[drv_index]->queue = NULL
- * which keeps the interrupt handler from starting
- * the queue.
- */
- ret = deregister_disk(h, drv_index, 0, via_ioctl);
- }
-
- /* If the disk is in use return */
- if (ret)
- goto freeret;
-
- /* Save the new information from cciss_geometry_inquiry
- * and serial number inquiry. If the disk was deregistered
- * above, then h->drv[drv_index] will be NULL.
- */
- if (h->drv[drv_index] == NULL) {
- drvinfo->device_initialized = 0;
- h->drv[drv_index] = drvinfo;
- drvinfo = NULL; /* so it won't be freed below. */
- } else {
- /* special case for cxd0 */
- h->drv[drv_index]->block_size = drvinfo->block_size;
- h->drv[drv_index]->nr_blocks = drvinfo->nr_blocks;
- h->drv[drv_index]->heads = drvinfo->heads;
- h->drv[drv_index]->sectors = drvinfo->sectors;
- h->drv[drv_index]->cylinders = drvinfo->cylinders;
- h->drv[drv_index]->raid_level = drvinfo->raid_level;
- memcpy(h->drv[drv_index]->serial_no, drvinfo->serial_no, 16);
- memcpy(h->drv[drv_index]->vendor, drvinfo->vendor,
- VENDOR_LEN + 1);
- memcpy(h->drv[drv_index]->model, drvinfo->model, MODEL_LEN + 1);
- memcpy(h->drv[drv_index]->rev, drvinfo->rev, REV_LEN + 1);
- }
-
- ++h->num_luns;
- disk = h->gendisk[drv_index];
- set_capacity(disk, h->drv[drv_index]->nr_blocks);
-
- /* If it's not disk 0 (drv_index != 0)
- * or if it was disk 0, but there was previously
- * no actual corresponding configured logical drive
- * (raid_leve == -1) then we want to update the
- * logical drive's information.
- */
- if (drv_index || first_time) {
- if (cciss_add_disk(h, disk, drv_index) != 0) {
- cciss_free_gendisk(h, drv_index);
- cciss_free_drive_info(h, drv_index);
- dev_warn(&h->pdev->dev, "could not update disk %d\n",
- drv_index);
- --h->num_luns;
- }
- }
-
-freeret:
- kfree(inq_buff);
- kfree(drvinfo);
- return;
-mem_msg:
- dev_err(&h->pdev->dev, "out of memory\n");
- goto freeret;
-}
-
-/* This function will find the first index of the controllers drive array
- * that has a null drv pointer and allocate the drive info struct and
- * will return that index This is where new drives will be added.
- * If the index to be returned is greater than the highest_lun index for
- * the controller then highest_lun is set * to this new index.
- * If there are no available indexes or if tha allocation fails, then -1
- * is returned. * "controller_node" is used to know if this is a real
- * logical drive, or just the controller node, which determines if this
- * counts towards highest_lun.
- */
-static int cciss_alloc_drive_info(ctlr_info_t *h, int controller_node)
-{
- int i;
- drive_info_struct *drv;
-
- /* Search for an empty slot for our drive info */
- for (i = 0; i < CISS_MAX_LUN; i++) {
-
- /* if not cxd0 case, and it's occupied, skip it. */
- if (h->drv[i] && i != 0)
- continue;
- /*
- * If it's cxd0 case, and drv is alloc'ed already, and a
- * disk is configured there, skip it.
- */
- if (i == 0 && h->drv[i] && h->drv[i]->raid_level != -1)
- continue;
-
- /*
- * We've found an empty slot. Update highest_lun
- * provided this isn't just the fake cxd0 controller node.
- */
- if (i > h->highest_lun && !controller_node)
- h->highest_lun = i;
-
- /* If adding a real disk at cxd0, and it's already alloc'ed */
- if (i == 0 && h->drv[i] != NULL)
- return i;
-
- /*
- * Found an empty slot, not already alloc'ed. Allocate it.
- * Mark it with raid_level == -1, so we know it's new later on.
- */
- drv = kzalloc(sizeof(*drv), GFP_KERNEL);
- if (!drv)
- return -1;
- drv->raid_level = -1; /* so we know it's new */
- h->drv[i] = drv;
- return i;
- }
- return -1;
-}
-
-static void cciss_free_drive_info(ctlr_info_t *h, int drv_index)
-{
- kfree(h->drv[drv_index]);
- h->drv[drv_index] = NULL;
-}
-
-static void cciss_free_gendisk(ctlr_info_t *h, int drv_index)
-{
- put_disk(h->gendisk[drv_index]);
- h->gendisk[drv_index] = NULL;
-}
-
-/* cciss_add_gendisk finds a free hba[]->drv structure
- * and allocates a gendisk if needed, and sets the lunid
- * in the drvinfo structure. It returns the index into
- * the ->drv[] array, or -1 if none are free.
- * is_controller_node indicates whether highest_lun should
- * count this disk, or if it's only being added to provide
- * a means to talk to the controller in case no logical
- * drives have yet been configured.
- */
-static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[],
- int controller_node)
-{
- int drv_index;
-
- drv_index = cciss_alloc_drive_info(h, controller_node);
- if (drv_index == -1)
- return -1;
-
- /*Check if the gendisk needs to be allocated */
- if (!h->gendisk[drv_index]) {
- h->gendisk[drv_index] =
- alloc_disk(1 << NWD_SHIFT);
- if (!h->gendisk[drv_index]) {
- dev_err(&h->pdev->dev,
- "could not allocate a new disk %d\n",
- drv_index);
- goto err_free_drive_info;
- }
- }
- memcpy(h->drv[drv_index]->LunID, lunid,
- sizeof(h->drv[drv_index]->LunID));
- if (cciss_create_ld_sysfs_entry(h, drv_index))
- goto err_free_disk;
- /* Don't need to mark this busy because nobody */
- /* else knows about this disk yet to contend */
- /* for access to it. */
- h->drv[drv_index]->busy_configuring = 0;
- wmb();
- return drv_index;
-
-err_free_disk:
- cciss_free_gendisk(h, drv_index);
-err_free_drive_info:
- cciss_free_drive_info(h, drv_index);
- return -1;
-}
-
-/* This is for the special case of a controller which
- * has no logical drives. In this case, we still need
- * to register a disk so the controller can be accessed
- * by the Array Config Utility.
- */
-static void cciss_add_controller_node(ctlr_info_t *h)
-{
- struct gendisk *disk;
- int drv_index;
-
- if (h->gendisk[0] != NULL) /* already did this? Then bail. */
- return;
-
- drv_index = cciss_add_gendisk(h, CTLR_LUNID, 1);
- if (drv_index == -1)
- goto error;
- h->drv[drv_index]->block_size = 512;
- h->drv[drv_index]->nr_blocks = 0;
- h->drv[drv_index]->heads = 0;
- h->drv[drv_index]->sectors = 0;
- h->drv[drv_index]->cylinders = 0;
- h->drv[drv_index]->raid_level = -1;
- memset(h->drv[drv_index]->serial_no, 0, 16);
- disk = h->gendisk[drv_index];
- if (cciss_add_disk(h, disk, drv_index) == 0)
- return;
- cciss_free_gendisk(h, drv_index);
- cciss_free_drive_info(h, drv_index);
-error:
- dev_warn(&h->pdev->dev, "could not add disk 0.\n");
- return;
-}
-
-/* This function will add and remove logical drives from the Logical
- * drive array of the controller and maintain persistency of ordering
- * so that mount points are preserved until the next reboot. This allows
- * for the removal of logical drives in the middle of the drive array
- * without a re-ordering of those drives.
- * INPUT
- * h = The controller to perform the operations on
- */
-static int rebuild_lun_table(ctlr_info_t *h, int first_time,
- int via_ioctl)
-{
- int num_luns;
- ReportLunData_struct *ld_buff = NULL;
- int return_code;
- int listlength = 0;
- int i;
- int drv_found;
- int drv_index = 0;
- unsigned char lunid[8] = CTLR_LUNID;
- unsigned long flags;
-
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- /* Set busy_configuring flag for this operation */
- spin_lock_irqsave(&h->lock, flags);
- if (h->busy_configuring) {
- spin_unlock_irqrestore(&h->lock, flags);
- return -EBUSY;
- }
- h->busy_configuring = 1;
- spin_unlock_irqrestore(&h->lock, flags);
-
- ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
- if (ld_buff == NULL)
- goto mem_msg;
-
- return_code = sendcmd_withirq(h, CISS_REPORT_LOG, ld_buff,
- sizeof(ReportLunData_struct),
- 0, CTLR_LUNID, TYPE_CMD);
-
- if (return_code == IO_OK)
- listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
- else { /* reading number of logical volumes failed */
- dev_warn(&h->pdev->dev,
- "report logical volume command failed\n");
- listlength = 0;
- goto freeret;
- }
-
- num_luns = listlength / 8; /* 8 bytes per entry */
- if (num_luns > CISS_MAX_LUN) {
- num_luns = CISS_MAX_LUN;
- dev_warn(&h->pdev->dev, "more luns configured"
- " on controller than can be handled by"
- " this driver.\n");
- }
-
- if (num_luns == 0)
- cciss_add_controller_node(h);
-
- /* Compare controller drive array to driver's drive array
- * to see if any drives are missing on the controller due
- * to action of Array Config Utility (user deletes drive)
- * and deregister logical drives which have disappeared.
- */
- for (i = 0; i <= h->highest_lun; i++) {
- int j;
- drv_found = 0;
-
- /* skip holes in the array from already deleted drives */
- if (h->drv[i] == NULL)
- continue;
-
- for (j = 0; j < num_luns; j++) {
- memcpy(lunid, &ld_buff->LUN[j][0], sizeof(lunid));
- if (memcmp(h->drv[i]->LunID, lunid,
- sizeof(lunid)) == 0) {
- drv_found = 1;
- break;
- }
- }
- if (!drv_found) {
- /* Deregister it from the OS, it's gone. */
- spin_lock_irqsave(&h->lock, flags);
- h->drv[i]->busy_configuring = 1;
- spin_unlock_irqrestore(&h->lock, flags);
- return_code = deregister_disk(h, i, 1, via_ioctl);
- if (h->drv[i] != NULL)
- h->drv[i]->busy_configuring = 0;
- }
- }
-
- /* Compare controller drive array to driver's drive array.
- * Check for updates in the drive information and any new drives
- * on the controller due to ACU adding logical drives, or changing
- * a logical drive's size, etc. Reregister any new/changed drives
- */
- for (i = 0; i < num_luns; i++) {
- int j;
-
- drv_found = 0;
-
- memcpy(lunid, &ld_buff->LUN[i][0], sizeof(lunid));
- /* Find if the LUN is already in the drive array
- * of the driver. If so then update its info
- * if not in use. If it does not exist then find
- * the first free index and add it.
- */
- for (j = 0; j <= h->highest_lun; j++) {
- if (h->drv[j] != NULL &&
- memcmp(h->drv[j]->LunID, lunid,
- sizeof(h->drv[j]->LunID)) == 0) {
- drv_index = j;
- drv_found = 1;
- break;
- }
- }
-
- /* check if the drive was found already in the array */
- if (!drv_found) {
- drv_index = cciss_add_gendisk(h, lunid, 0);
- if (drv_index == -1)
- goto freeret;
- }
- cciss_update_drive_info(h, drv_index, first_time, via_ioctl);
- } /* end for */
-
-freeret:
- kfree(ld_buff);
- h->busy_configuring = 0;
- /* We return -1 here to tell the ACU that we have registered/updated
- * all of the drives that we can and to keep it from calling us
- * additional times.
- */
- return -1;
-mem_msg:
- dev_err(&h->pdev->dev, "out of memory\n");
- h->busy_configuring = 0;
- goto freeret;
-}
-
-static void cciss_clear_drive_info(drive_info_struct *drive_info)
-{
- /* zero out the disk size info */
- drive_info->nr_blocks = 0;
- drive_info->block_size = 0;
- drive_info->heads = 0;
- drive_info->sectors = 0;
- drive_info->cylinders = 0;
- drive_info->raid_level = -1;
- memset(drive_info->serial_no, 0, sizeof(drive_info->serial_no));
- memset(drive_info->model, 0, sizeof(drive_info->model));
- memset(drive_info->rev, 0, sizeof(drive_info->rev));
- memset(drive_info->vendor, 0, sizeof(drive_info->vendor));
- /*
- * don't clear the LUNID though, we need to remember which
- * one this one is.
- */
-}
-
-/* This function will deregister the disk and it's queue from the
- * kernel. It must be called with the controller lock held and the
- * drv structures busy_configuring flag set. It's parameters are:
- *
- * disk = This is the disk to be deregistered
- * drv = This is the drive_info_struct associated with the disk to be
- * deregistered. It contains information about the disk used
- * by the driver.
- * clear_all = This flag determines whether or not the disk information
- * is going to be completely cleared out and the highest_lun
- * reset. Sometimes we want to clear out information about
- * the disk in preparation for re-adding it. In this case
- * the highest_lun should be left unchanged and the LunID
- * should not be cleared.
- * via_ioctl
- * This indicates whether we've reached this path via ioctl.
- * This affects the maximum usage count allowed for c0d0 to be messed with.
- * If this path is reached via ioctl(), then the max_usage_count will
- * be 1, as the process calling ioctl() has got to have the device open.
- * If we get here via sysfs, then the max usage count will be zero.
-*/
-static int deregister_disk(ctlr_info_t *h, int drv_index,
- int clear_all, int via_ioctl)
-{
- int i;
- struct gendisk *disk;
- drive_info_struct *drv;
- int recalculate_highest_lun;
-
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- drv = h->drv[drv_index];
- disk = h->gendisk[drv_index];
-
- /* make sure logical volume is NOT is use */
- if (clear_all || (h->gendisk[0] == disk)) {
- if (drv->usage_count > via_ioctl)
- return -EBUSY;
- } else if (drv->usage_count > 0)
- return -EBUSY;
-
- recalculate_highest_lun = (drv == h->drv[h->highest_lun]);
-
- /* invalidate the devices and deregister the disk. If it is disk
- * zero do not deregister it but just zero out it's values. This
- * allows us to delete disk zero but keep the controller registered.
- */
- if (h->gendisk[0] != disk) {
- struct request_queue *q = disk->queue;
- if (disk->flags & GENHD_FL_UP) {
- cciss_destroy_ld_sysfs_entry(h, drv_index, 0);
- del_gendisk(disk);
- }
- if (q)
- blk_cleanup_queue(q);
- /* If clear_all is set then we are deleting the logical
- * drive, not just refreshing its info. For drives
- * other than disk 0 we will call put_disk. We do not
- * do this for disk 0 as we need it to be able to
- * configure the controller.
- */
- if (clear_all){
- /* This isn't pretty, but we need to find the
- * disk in our array and NULL our the pointer.
- * This is so that we will call alloc_disk if
- * this index is used again later.
- */
- for (i=0; i < CISS_MAX_LUN; i++){
- if (h->gendisk[i] == disk) {
- h->gendisk[i] = NULL;
- break;
- }
- }
- put_disk(disk);
- }
- } else {
- set_capacity(disk, 0);
- cciss_clear_drive_info(drv);
- }
-
- --h->num_luns;
-
- /* if it was the last disk, find the new hightest lun */
- if (clear_all && recalculate_highest_lun) {
- int newhighest = -1;
- for (i = 0; i <= h->highest_lun; i++) {
- /* if the disk has size > 0, it is available */
- if (h->drv[i] && h->drv[i]->heads)
- newhighest = i;
- }
- h->highest_lun = newhighest;
- }
- return 0;
-}
-
-static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
- size_t size, __u8 page_code, unsigned char *scsi3addr,
- int cmd_type)
-{
- u64bit buff_dma_handle;
- int status = IO_OK;
-
- c->cmd_type = CMD_IOCTL_PEND;
- c->Header.ReplyQueue = 0;
- if (buff != NULL) {
- c->Header.SGList = 1;
- c->Header.SGTotal = 1;
- } else {
- c->Header.SGList = 0;
- c->Header.SGTotal = 0;
- }
- c->Header.Tag.lower = c->busaddr;
- memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
-
- c->Request.Type.Type = cmd_type;
- if (cmd_type == TYPE_CMD) {
- switch (cmd) {
- case CISS_INQUIRY:
- /* are we trying to read a vital product page */
- if (page_code != 0) {
- c->Request.CDB[1] = 0x01;
- c->Request.CDB[2] = page_code;
- }
- c->Request.CDBLen = 6;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_READ;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = CISS_INQUIRY;
- c->Request.CDB[4] = size & 0xFF;
- break;
- case CISS_REPORT_LOG:
- case CISS_REPORT_PHYS:
- /* Talking to controller so It's a physical command
- mode = 00 target = 0. Nothing to write.
- */
- c->Request.CDBLen = 12;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_READ;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = cmd;
- c->Request.CDB[6] = (size >> 24) & 0xFF; /* MSB */
- c->Request.CDB[7] = (size >> 16) & 0xFF;
- c->Request.CDB[8] = (size >> 8) & 0xFF;
- c->Request.CDB[9] = size & 0xFF;
- break;
-
- case CCISS_READ_CAPACITY:
- c->Request.CDBLen = 10;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_READ;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = cmd;
- break;
- case CCISS_READ_CAPACITY_16:
- c->Request.CDBLen = 16;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_READ;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = cmd;
- c->Request.CDB[1] = 0x10;
- c->Request.CDB[10] = (size >> 24) & 0xFF;
- c->Request.CDB[11] = (size >> 16) & 0xFF;
- c->Request.CDB[12] = (size >> 8) & 0xFF;
- c->Request.CDB[13] = size & 0xFF;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = cmd;
- break;
- case CCISS_CACHE_FLUSH:
- c->Request.CDBLen = 12;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_WRITE;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = BMIC_WRITE;
- c->Request.CDB[6] = BMIC_CACHE_FLUSH;
- c->Request.CDB[7] = (size >> 8) & 0xFF;
- c->Request.CDB[8] = size & 0xFF;
- break;
- case TEST_UNIT_READY:
- c->Request.CDBLen = 6;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_NONE;
- c->Request.Timeout = 0;
- break;
- default:
- dev_warn(&h->pdev->dev, "Unknown Command 0x%c\n", cmd);
- return IO_ERROR;
- }
- } else if (cmd_type == TYPE_MSG) {
- switch (cmd) {
- case CCISS_ABORT_MSG:
- c->Request.CDBLen = 12;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_WRITE;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = cmd; /* abort */
- c->Request.CDB[1] = 0; /* abort a command */
- /* buff contains the tag of the command to abort */
- memcpy(&c->Request.CDB[4], buff, 8);
- break;
- case CCISS_RESET_MSG:
- c->Request.CDBLen = 16;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_NONE;
- c->Request.Timeout = 0;
- memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
- c->Request.CDB[0] = cmd; /* reset */
- c->Request.CDB[1] = CCISS_RESET_TYPE_TARGET;
- break;
- case CCISS_NOOP_MSG:
- c->Request.CDBLen = 1;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = XFER_WRITE;
- c->Request.Timeout = 0;
- c->Request.CDB[0] = cmd;
- break;
- default:
- dev_warn(&h->pdev->dev,
- "unknown message type %d\n", cmd);
- return IO_ERROR;
- }
- } else {
- dev_warn(&h->pdev->dev, "unknown command type %d\n", cmd_type);
- return IO_ERROR;
- }
- /* Fill in the scatter gather information */
- if (size > 0) {
- buff_dma_handle.val = (__u64) pci_map_single(h->pdev,
- buff, size,
- PCI_DMA_BIDIRECTIONAL);
- c->SG[0].Addr.lower = buff_dma_handle.val32.lower;
- c->SG[0].Addr.upper = buff_dma_handle.val32.upper;
- c->SG[0].Len = size;
- c->SG[0].Ext = 0; /* we are not chaining */
- }
- return status;
-}
-
-static int cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr,
- u8 reset_type)
-{
- CommandList_struct *c;
- int return_status;
-
- c = cmd_alloc(h);
- if (!c)
- return -ENOMEM;
- return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0,
- CTLR_LUNID, TYPE_MSG);
- c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
- if (return_status != IO_OK) {
- cmd_special_free(h, c);
- return return_status;
- }
- c->waiting = NULL;
- enqueue_cmd_and_start_io(h, c);
- /* Don't wait for completion, the reset won't complete. Don't free
- * the command either. This is the last command we will send before
- * re-initializing everything, so it doesn't matter and won't leak.
- */
- return 0;
-}
-
-static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
-{
- switch (c->err_info->ScsiStatus) {
- case SAM_STAT_GOOD:
- return IO_OK;
- case SAM_STAT_CHECK_CONDITION:
- switch (0xf & c->err_info->SenseInfo[2]) {
- case 0: return IO_OK; /* no sense */
- case 1: return IO_OK; /* recovered error */
- default:
- if (check_for_unit_attention(h, c))
- return IO_NEEDS_RETRY;
- dev_warn(&h->pdev->dev, "cmd 0x%02x "
- "check condition, sense key = 0x%02x\n",
- c->Request.CDB[0], c->err_info->SenseInfo[2]);
- }
- break;
- default:
- dev_warn(&h->pdev->dev, "cmd 0x%02x"
- "scsi status = 0x%02x\n",
- c->Request.CDB[0], c->err_info->ScsiStatus);
- break;
- }
- return IO_ERROR;
-}
-
-static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c)
-{
- int return_status = IO_OK;
-
- if (c->err_info->CommandStatus == CMD_SUCCESS)
- return IO_OK;
-
- switch (c->err_info->CommandStatus) {
- case CMD_TARGET_STATUS:
- return_status = check_target_status(h, c);
- break;
- case CMD_DATA_UNDERRUN:
- case CMD_DATA_OVERRUN:
- /* expected for inquiry and report lun commands */
- break;
- case CMD_INVALID:
- dev_warn(&h->pdev->dev, "cmd 0x%02x is "
- "reported invalid\n", c->Request.CDB[0]);
- return_status = IO_ERROR;
- break;
- case CMD_PROTOCOL_ERR:
- dev_warn(&h->pdev->dev, "cmd 0x%02x has "
- "protocol error\n", c->Request.CDB[0]);
- return_status = IO_ERROR;
- break;
- case CMD_HARDWARE_ERR:
- dev_warn(&h->pdev->dev, "cmd 0x%02x had "
- " hardware error\n", c->Request.CDB[0]);
- return_status = IO_ERROR;
- break;
- case CMD_CONNECTION_LOST:
- dev_warn(&h->pdev->dev, "cmd 0x%02x had "
- "connection lost\n", c->Request.CDB[0]);
- return_status = IO_ERROR;
- break;
- case CMD_ABORTED:
- dev_warn(&h->pdev->dev, "cmd 0x%02x was "
- "aborted\n", c->Request.CDB[0]);
- return_status = IO_ERROR;
- break;
- case CMD_ABORT_FAILED:
- dev_warn(&h->pdev->dev, "cmd 0x%02x reports "
- "abort failed\n", c->Request.CDB[0]);
- return_status = IO_ERROR;
- break;
- case CMD_UNSOLICITED_ABORT:
- dev_warn(&h->pdev->dev, "unsolicited abort 0x%02x\n",
- c->Request.CDB[0]);
- return_status = IO_NEEDS_RETRY;
- break;
- case CMD_UNABORTABLE:
- dev_warn(&h->pdev->dev, "cmd unabortable\n");
- return_status = IO_ERROR;
- break;
- default:
- dev_warn(&h->pdev->dev, "cmd 0x%02x returned "
- "unknown status %x\n", c->Request.CDB[0],
- c->err_info->CommandStatus);
- return_status = IO_ERROR;
- }
- return return_status;
-}
-
-static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
- int attempt_retry)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- u64bit buff_dma_handle;
- int return_status = IO_OK;
-
-resend_cmd2:
- c->waiting = &wait;
- enqueue_cmd_and_start_io(h, c);
-
- wait_for_completion(&wait);
-
- if (c->err_info->CommandStatus == 0 || !attempt_retry)
- goto command_done;
-
- return_status = process_sendcmd_error(h, c);
-
- if (return_status == IO_NEEDS_RETRY &&
- c->retry_count < MAX_CMD_RETRIES) {
- dev_warn(&h->pdev->dev, "retrying 0x%02x\n",
- c->Request.CDB[0]);
- c->retry_count++;
- /* erase the old error information */
- memset(c->err_info, 0, sizeof(ErrorInfo_struct));
- return_status = IO_OK;
- reinit_completion(&wait);
- goto resend_cmd2;
- }
-
-command_done:
- /* unlock the buffers from DMA */
- buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
- buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
- pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
- c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
- return return_status;
-}
-
-static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
- __u8 page_code, unsigned char scsi3addr[],
- int cmd_type)
-{
- CommandList_struct *c;
- int return_status;
-
- c = cmd_special_alloc(h);
- if (!c)
- return -ENOMEM;
- return_status = fill_cmd(h, c, cmd, buff, size, page_code,
- scsi3addr, cmd_type);
- if (return_status == IO_OK)
- return_status = sendcmd_withirq_core(h, c, 1);
-
- cmd_special_free(h, c);
- return return_status;
-}
-
-static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
- sector_t total_size,
- unsigned int block_size,
- InquiryData_struct *inq_buff,
- drive_info_struct *drv)
-{
- int return_code;
- unsigned long t;
- unsigned char scsi3addr[8];
-
- memset(inq_buff, 0, sizeof(InquiryData_struct));
- log_unit_to_scsi3addr(h, scsi3addr, logvol);
- return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
- sizeof(*inq_buff), 0xC1, scsi3addr, TYPE_CMD);
- if (return_code == IO_OK) {
- if (inq_buff->data_byte[8] == 0xFF) {
- dev_warn(&h->pdev->dev,
- "reading geometry failed, volume "
- "does not support reading geometry\n");
- drv->heads = 255;
- drv->sectors = 32; /* Sectors per track */
- drv->cylinders = total_size + 1;
- drv->raid_level = RAID_UNKNOWN;
- } else {
- drv->heads = inq_buff->data_byte[6];
- drv->sectors = inq_buff->data_byte[7];
- drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
- drv->cylinders += inq_buff->data_byte[5];
- drv->raid_level = inq_buff->data_byte[8];
- }
- drv->block_size = block_size;
- drv->nr_blocks = total_size + 1;
- t = drv->heads * drv->sectors;
- if (t > 1) {
- sector_t real_size = total_size + 1;
- unsigned long rem = sector_div(real_size, t);
- if (rem)
- real_size++;
- drv->cylinders = real_size;
- }
- } else { /* Get geometry failed */
- dev_warn(&h->pdev->dev, "reading geometry failed\n");
- }
-}
-
-static void
-cciss_read_capacity(ctlr_info_t *h, int logvol, sector_t *total_size,
- unsigned int *block_size)
-{
- ReadCapdata_struct *buf;
- int return_code;
- unsigned char scsi3addr[8];
-
- buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
- if (!buf) {
- dev_warn(&h->pdev->dev, "out of memory\n");
- return;
- }
-
- log_unit_to_scsi3addr(h, scsi3addr, logvol);
- return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY, buf,
- sizeof(ReadCapdata_struct), 0, scsi3addr, TYPE_CMD);
- if (return_code == IO_OK) {
- *total_size = be32_to_cpu(*(__be32 *) buf->total_size);
- *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
- } else { /* read capacity command failed */
- dev_warn(&h->pdev->dev, "read capacity failed\n");
- *total_size = 0;
- *block_size = BLOCK_SIZE;
- }
- kfree(buf);
-}
-
-static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
- sector_t *total_size, unsigned int *block_size)
-{
- ReadCapdata_struct_16 *buf;
- int return_code;
- unsigned char scsi3addr[8];
-
- buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
- if (!buf) {
- dev_warn(&h->pdev->dev, "out of memory\n");
- return;
- }
-
- log_unit_to_scsi3addr(h, scsi3addr, logvol);
- return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY_16,
- buf, sizeof(ReadCapdata_struct_16),
- 0, scsi3addr, TYPE_CMD);
- if (return_code == IO_OK) {
- *total_size = be64_to_cpu(*(__be64 *) buf->total_size);
- *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
- } else { /* read capacity command failed */
- dev_warn(&h->pdev->dev, "read capacity failed\n");
- *total_size = 0;
- *block_size = BLOCK_SIZE;
- }
- dev_info(&h->pdev->dev, " blocks= %llu block_size= %d\n",
- (unsigned long long)*total_size+1, *block_size);
- kfree(buf);
-}
-
-static int cciss_revalidate(struct gendisk *disk)
-{
- ctlr_info_t *h = get_host(disk);
- drive_info_struct *drv = get_drv(disk);
- int logvol;
- int FOUND = 0;
- unsigned int block_size;
- sector_t total_size;
- InquiryData_struct *inq_buff = NULL;
-
- for (logvol = 0; logvol <= h->highest_lun; logvol++) {
- if (!h->drv[logvol])
- continue;
- if (memcmp(h->drv[logvol]->LunID, drv->LunID,
- sizeof(drv->LunID)) == 0) {
- FOUND = 1;
- break;
- }
- }
-
- if (!FOUND)
- return 1;
-
- inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
- if (inq_buff == NULL) {
- dev_warn(&h->pdev->dev, "out of memory\n");
- return 1;
- }
- if (h->cciss_read == CCISS_READ_10) {
- cciss_read_capacity(h, logvol,
- &total_size, &block_size);
- } else {
- cciss_read_capacity_16(h, logvol,
- &total_size, &block_size);
- }
- cciss_geometry_inquiry(h, logvol, total_size, block_size,
- inq_buff, drv);
-
- blk_queue_logical_block_size(drv->queue, drv->block_size);
- set_capacity(disk, drv->nr_blocks);
-
- kfree(inq_buff);
- return 0;
-}
-
-/*
- * Map (physical) PCI mem into (virtual) kernel space
- */
-static void __iomem *remap_pci_mem(ulong base, ulong size)
-{
- ulong page_base = ((ulong) base) & PAGE_MASK;
- ulong page_offs = ((ulong) base) - page_base;
- void __iomem *page_remapped = ioremap(page_base, page_offs + size);
-
- return page_remapped ? (page_remapped + page_offs) : NULL;
-}
-
-/*
- * Takes jobs of the Q and sends them to the hardware, then puts it on
- * the Q to wait for completion.
- */
-static void start_io(ctlr_info_t *h)
-{
- CommandList_struct *c;
-
- while (!list_empty(&h->reqQ)) {
- c = list_entry(h->reqQ.next, CommandList_struct, list);
- /* can't do anything if fifo is full */
- if ((h->access.fifo_full(h))) {
- dev_warn(&h->pdev->dev, "fifo full\n");
- break;
- }
-
- /* Get the first entry from the Request Q */
- removeQ(c);
- h->Qdepth--;
-
- /* Tell the controller execute command */
- h->access.submit_command(h, c);
-
- /* Put job onto the completed Q */
- addQ(&h->cmpQ, c);
- }
-}
-
-/* Assumes that h->lock is held. */
-/* Zeros out the error record and then resends the command back */
-/* to the controller */
-static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
-{
- /* erase the old error information */
- memset(c->err_info, 0, sizeof(ErrorInfo_struct));
-
- /* add it to software queue and then send it to the controller */
- addQ(&h->reqQ, c);
- h->Qdepth++;
- if (h->Qdepth > h->maxQsinceinit)
- h->maxQsinceinit = h->Qdepth;
-
- start_io(h);
-}
-
-static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
- unsigned int msg_byte, unsigned int host_byte,
- unsigned int driver_byte)
-{
- /* inverse of macros in scsi.h */
- return (scsi_status_byte & 0xff) |
- ((msg_byte & 0xff) << 8) |
- ((host_byte & 0xff) << 16) |
- ((driver_byte & 0xff) << 24);
-}
-
-static inline int evaluate_target_status(ctlr_info_t *h,
- CommandList_struct *cmd, int *retry_cmd)
-{
- unsigned char sense_key;
- unsigned char status_byte, msg_byte, host_byte, driver_byte;
- int error_value;
-
- *retry_cmd = 0;
- /* If we get in here, it means we got "target status", that is, scsi status */
- status_byte = cmd->err_info->ScsiStatus;
- driver_byte = DRIVER_OK;
- msg_byte = cmd->err_info->CommandStatus; /* correct? seems too device specific */
-
- if (blk_rq_is_passthrough(cmd->rq))
- host_byte = DID_PASSTHROUGH;
- else
- host_byte = DID_OK;
-
- error_value = make_status_bytes(status_byte, msg_byte,
- host_byte, driver_byte);
-
- if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
- if (!blk_rq_is_passthrough(cmd->rq))
- dev_warn(&h->pdev->dev, "cmd %p "
- "has SCSI Status 0x%x\n",
- cmd, cmd->err_info->ScsiStatus);
- return error_value;
- }
-
- /* check the sense key */
- sense_key = 0xf & cmd->err_info->SenseInfo[2];
- /* no status or recovered error */
- if (((sense_key == 0x0) || (sense_key == 0x1)) &&
- !blk_rq_is_passthrough(cmd->rq))
- error_value = 0;
-
- if (check_for_unit_attention(h, cmd)) {
- *retry_cmd = !blk_rq_is_passthrough(cmd->rq);
- return 0;
- }
-
- /* Not SG_IO or similar? */
- if (!blk_rq_is_passthrough(cmd->rq)) {
- if (error_value != 0)
- dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION"
- " sense key = 0x%x\n", cmd, sense_key);
- return error_value;
- }
-
- scsi_req(cmd->rq)->sense_len = cmd->err_info->SenseLen;
- return error_value;
-}
-
-/* checks the status of the job and calls complete buffers to mark all
- * buffers for the completed job. Note that this function does not need
- * to hold the hba/queue lock.
- */
-static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
- int timeout)
-{
- int retry_cmd = 0;
- struct request *rq = cmd->rq;
- struct scsi_request *sreq = scsi_req(rq);
-
- sreq->result = 0;
-
- if (timeout)
- sreq->result = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
-
- if (cmd->err_info->CommandStatus == 0) /* no error has occurred */
- goto after_error_processing;
-
- switch (cmd->err_info->CommandStatus) {
- case CMD_TARGET_STATUS:
- sreq->result = evaluate_target_status(h, cmd, &retry_cmd);
- break;
- case CMD_DATA_UNDERRUN:
- if (!blk_rq_is_passthrough(cmd->rq)) {
- dev_warn(&h->pdev->dev, "cmd %p has"
- " completed with data underrun "
- "reported\n", cmd);
- }
- break;
- case CMD_DATA_OVERRUN:
- if (!blk_rq_is_passthrough(cmd->rq))
- dev_warn(&h->pdev->dev, "cciss: cmd %p has"
- " completed with data overrun "
- "reported\n", cmd);
- break;
- case CMD_INVALID:
- dev_warn(&h->pdev->dev, "cciss: cmd %p is "
- "reported invalid\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- break;
- case CMD_PROTOCOL_ERR:
- dev_warn(&h->pdev->dev, "cciss: cmd %p has "
- "protocol error\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- break;
- case CMD_HARDWARE_ERR:
- dev_warn(&h->pdev->dev, "cciss: cmd %p had "
- " hardware error\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- break;
- case CMD_CONNECTION_LOST:
- dev_warn(&h->pdev->dev, "cciss: cmd %p had "
- "connection lost\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- break;
- case CMD_ABORTED:
- dev_warn(&h->pdev->dev, "cciss: cmd %p was "
- "aborted\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ABORT);
- break;
- case CMD_ABORT_FAILED:
- dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
- "abort failed\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- break;
- case CMD_UNSOLICITED_ABORT:
- dev_warn(&h->pdev->dev, "cciss%d: unsolicited "
- "abort %p\n", h->ctlr, cmd);
- if (cmd->retry_count < MAX_CMD_RETRIES) {
- retry_cmd = 1;
- dev_warn(&h->pdev->dev, "retrying %p\n", cmd);
- cmd->retry_count++;
- } else
- dev_warn(&h->pdev->dev,
- "%p retried too many times\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ABORT);
- break;
- case CMD_TIMEOUT:
- dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- break;
- case CMD_UNABORTABLE:
- dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- break;
- default:
- dev_warn(&h->pdev->dev, "cmd %p returned "
- "unknown status %x\n", cmd,
- cmd->err_info->CommandStatus);
- sreq->result = make_status_bytes(SAM_STAT_GOOD,
- cmd->err_info->CommandStatus, DRIVER_OK,
- blk_rq_is_passthrough(cmd->rq) ?
- DID_PASSTHROUGH : DID_ERROR);
- }
-
-after_error_processing:
-
- /* We need to return this command */
- if (retry_cmd) {
- resend_cciss_cmd(h, cmd);
- return;
- }
- cmd->rq->completion_data = cmd;
- blk_complete_request(cmd->rq);
-}
-
-static inline u32 cciss_tag_contains_index(u32 tag)
-{
-#define DIRECT_LOOKUP_BIT 0x10
- return tag & DIRECT_LOOKUP_BIT;
-}
-
-static inline u32 cciss_tag_to_index(u32 tag)
-{
-#define DIRECT_LOOKUP_SHIFT 5
- return tag >> DIRECT_LOOKUP_SHIFT;
-}
-
-static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag)
-{
-#define CCISS_PERF_ERROR_BITS ((1 << DIRECT_LOOKUP_SHIFT) - 1)
-#define CCISS_SIMPLE_ERROR_BITS 0x03
- if (likely(h->transMethod & CFGTBL_Trans_Performant))
- return tag & ~CCISS_PERF_ERROR_BITS;
- return tag & ~CCISS_SIMPLE_ERROR_BITS;
-}
-
-static inline void cciss_mark_tag_indexed(u32 *tag)
-{
- *tag |= DIRECT_LOOKUP_BIT;
-}
-
-static inline void cciss_set_tag_index(u32 *tag, u32 index)
-{
- *tag |= (index << DIRECT_LOOKUP_SHIFT);
-}
-
-/*
- * Get a request and submit it to the controller.
- */
-static void do_cciss_request(struct request_queue *q)
-{
- ctlr_info_t *h = q->queuedata;
- CommandList_struct *c;
- sector_t start_blk;
- int seg;
- struct request *creq;
- u64bit temp64;
- struct scatterlist *tmp_sg;
- SGDescriptor_struct *curr_sg;
- drive_info_struct *drv;
- int i, dir;
- int sg_index = 0;
- int chained = 0;
-
- queue:
- creq = blk_peek_request(q);
- if (!creq)
- goto startio;
-
- BUG_ON(creq->nr_phys_segments > h->maxsgentries);
-
- c = cmd_alloc(h);
- if (!c)
- goto full;
-
- blk_start_request(creq);
-
- tmp_sg = h->scatter_list[c->cmdindex];
- spin_unlock_irq(q->queue_lock);
-
- c->cmd_type = CMD_RWREQ;
- c->rq = creq;
-
- /* fill in the request */
- drv = creq->rq_disk->private_data;
- c->Header.ReplyQueue = 0; /* unused in simple mode */
- /* got command from pool, so use the command block index instead */
- /* for direct lookups. */
- /* The first 2 bits are reserved for controller error reporting. */
- cciss_set_tag_index(&c->Header.Tag.lower, c->cmdindex);
- cciss_mark_tag_indexed(&c->Header.Tag.lower);
- memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
- c->Request.CDBLen = 10; /* 12 byte commands not in FW yet; */
- c->Request.Type.Type = TYPE_CMD; /* It is a command. */
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction =
- (rq_data_dir(creq) == READ) ? XFER_READ : XFER_WRITE;
- c->Request.Timeout = 0; /* Don't time out */
- c->Request.CDB[0] =
- (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
- start_blk = blk_rq_pos(creq);
- dev_dbg(&h->pdev->dev, "sector =%d nr_sectors=%d\n",
- (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
- sg_init_table(tmp_sg, h->maxsgentries);
- seg = blk_rq_map_sg(q, creq, tmp_sg);
-
- /* get the DMA records for the setup */
- if (c->Request.Type.Direction == XFER_READ)
- dir = PCI_DMA_FROMDEVICE;
- else
- dir = PCI_DMA_TODEVICE;
-
- curr_sg = c->SG;
- sg_index = 0;
- chained = 0;
-
- for (i = 0; i < seg; i++) {
- if (((sg_index+1) == (h->max_cmd_sgentries)) &&
- !chained && ((seg - i) > 1)) {
- /* Point to next chain block. */
- curr_sg = h->cmd_sg_list[c->cmdindex];
- sg_index = 0;
- chained = 1;
- }
- curr_sg[sg_index].Len = tmp_sg[i].length;
- temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
- tmp_sg[i].offset,
- tmp_sg[i].length, dir);
- if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
- dev_warn(&h->pdev->dev,
- "%s: error mapping page for DMA\n", __func__);
- scsi_req(creq)->result =
- make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
- DID_SOFT_ERROR);
- cmd_free(h, c);
- return;
- }
- curr_sg[sg_index].Addr.lower = temp64.val32.lower;
- curr_sg[sg_index].Addr.upper = temp64.val32.upper;
- curr_sg[sg_index].Ext = 0; /* we are not chaining */
- ++sg_index;
- }
- if (chained) {
- if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
- (seg - (h->max_cmd_sgentries - 1)) *
- sizeof(SGDescriptor_struct))) {
- scsi_req(creq)->result =
- make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
- DID_SOFT_ERROR);
- cmd_free(h, c);
- return;
- }
- }
-
- /* track how many SG entries we are using */
- if (seg > h->maxSG)
- h->maxSG = seg;
-
- dev_dbg(&h->pdev->dev, "Submitting %u sectors in %d segments "
- "chained[%d]\n",
- blk_rq_sectors(creq), seg, chained);
-
- c->Header.SGTotal = seg + chained;
- if (seg <= h->max_cmd_sgentries)
- c->Header.SGList = c->Header.SGTotal;
- else
- c->Header.SGList = h->max_cmd_sgentries;
- set_performant_mode(h, c);
-
- switch (req_op(creq)) {
- case REQ_OP_READ:
- case REQ_OP_WRITE:
- if(h->cciss_read == CCISS_READ_10) {
- c->Request.CDB[1] = 0;
- c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */
- c->Request.CDB[3] = (start_blk >> 16) & 0xff;
- c->Request.CDB[4] = (start_blk >> 8) & 0xff;
- c->Request.CDB[5] = start_blk & 0xff;
- c->Request.CDB[6] = 0; /* (sect >> 24) & 0xff; MSB */
- c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
- c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
- c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
- } else {
- u32 upper32 = upper_32_bits(start_blk);
-
- c->Request.CDBLen = 16;
- c->Request.CDB[1]= 0;
- c->Request.CDB[2]= (upper32 >> 24) & 0xff; /* MSB */
- c->Request.CDB[3]= (upper32 >> 16) & 0xff;
- c->Request.CDB[4]= (upper32 >> 8) & 0xff;
- c->Request.CDB[5]= upper32 & 0xff;
- c->Request.CDB[6]= (start_blk >> 24) & 0xff;
- c->Request.CDB[7]= (start_blk >> 16) & 0xff;
- c->Request.CDB[8]= (start_blk >> 8) & 0xff;
- c->Request.CDB[9]= start_blk & 0xff;
- c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff;
- c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff;
- c->Request.CDB[12]= (blk_rq_sectors(creq) >> 8) & 0xff;
- c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
- c->Request.CDB[14] = c->Request.CDB[15] = 0;
- }
- break;
- case REQ_OP_SCSI_IN:
- case REQ_OP_SCSI_OUT:
- c->Request.CDBLen = scsi_req(creq)->cmd_len;
- memcpy(c->Request.CDB, scsi_req(creq)->cmd, BLK_MAX_CDB);
- scsi_req(creq)->sense = c->err_info->SenseInfo;
- break;
- default:
- dev_warn(&h->pdev->dev, "bad request type %d\n",
- creq->cmd_flags);
- BUG();
- }
-
- spin_lock_irq(q->queue_lock);
-
- addQ(&h->reqQ, c);
- h->Qdepth++;
- if (h->Qdepth > h->maxQsinceinit)
- h->maxQsinceinit = h->Qdepth;
-
- goto queue;
-full:
- blk_stop_queue(q);
-startio:
- /* We will already have the driver lock here so not need
- * to lock it.
- */
- start_io(h);
-}
-
-static inline unsigned long get_next_completion(ctlr_info_t *h)
-{
- return h->access.command_completed(h);
-}
-
-static inline int interrupt_pending(ctlr_info_t *h)
-{
- return h->access.intr_pending(h);
-}
-
-static inline long interrupt_not_for_us(ctlr_info_t *h)
-{
- return ((h->access.intr_pending(h) == 0) ||
- (h->interrupts_enabled == 0));
-}
-
-static inline int bad_tag(ctlr_info_t *h, u32 tag_index,
- u32 raw_tag)
-{
- if (unlikely(tag_index >= h->nr_cmds)) {
- dev_warn(&h->pdev->dev, "bad tag 0x%08x ignored.\n", raw_tag);
- return 1;
- }
- return 0;
-}
-
-static inline void finish_cmd(ctlr_info_t *h, CommandList_struct *c,
- u32 raw_tag)
-{
- removeQ(c);
- if (likely(c->cmd_type == CMD_RWREQ))
- complete_command(h, c, 0);
- else if (c->cmd_type == CMD_IOCTL_PEND)
- complete(c->waiting);
-#ifdef CONFIG_CISS_SCSI_TAPE
- else if (c->cmd_type == CMD_SCSI)
- complete_scsi_command(c, 0, raw_tag);
-#endif
-}
-
-static inline u32 next_command(ctlr_info_t *h)
-{
- u32 a;
-
- if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant)))
- return h->access.command_completed(h);
-
- if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
- a = *(h->reply_pool_head); /* Next cmd in ring buffer */
- (h->reply_pool_head)++;
- h->commands_outstanding--;
- } else {
- a = FIFO_EMPTY;
- }
- /* Check for wraparound */
- if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
- h->reply_pool_head = h->reply_pool;
- h->reply_pool_wraparound ^= 1;
- }
- return a;
-}
-
-/* process completion of an indexed ("direct lookup") command */
-static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag)
-{
- u32 tag_index;
- CommandList_struct *c;
-
- tag_index = cciss_tag_to_index(raw_tag);
- if (bad_tag(h, tag_index, raw_tag))
- return next_command(h);
- c = h->cmd_pool + tag_index;
- finish_cmd(h, c, raw_tag);
- return next_command(h);
-}
-
-/* process completion of a non-indexed command */
-static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
-{
- CommandList_struct *c = NULL;
- __u32 busaddr_masked, tag_masked;
-
- tag_masked = cciss_tag_discard_error_bits(h, raw_tag);
- list_for_each_entry(c, &h->cmpQ, list) {
- busaddr_masked = cciss_tag_discard_error_bits(h, c->busaddr);
- if (busaddr_masked == tag_masked) {
- finish_cmd(h, c, raw_tag);
- return next_command(h);
- }
- }
- bad_tag(h, h->nr_cmds + 1, raw_tag);
- return next_command(h);
-}
-
-/* Some controllers, like p400, will give us one interrupt
- * after a soft reset, even if we turned interrupts off.
- * Only need to check for this in the cciss_xxx_discard_completions
- * functions.
- */
-static int ignore_bogus_interrupt(ctlr_info_t *h)
-{
- if (likely(!reset_devices))
- return 0;
-
- if (likely(h->interrupts_enabled))
- return 0;
-
- dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled "
- "(known firmware bug.) Ignoring.\n");
-
- return 1;
-}
-
-static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id)
-{
- ctlr_info_t *h = dev_id;
- unsigned long flags;
- u32 raw_tag;
-
- if (ignore_bogus_interrupt(h))
- return IRQ_NONE;
-
- if (interrupt_not_for_us(h))
- return IRQ_NONE;
- spin_lock_irqsave(&h->lock, flags);
- while (interrupt_pending(h)) {
- raw_tag = get_next_completion(h);
- while (raw_tag != FIFO_EMPTY)
- raw_tag = next_command(h);
- }
- spin_unlock_irqrestore(&h->lock, flags);
- return IRQ_HANDLED;
-}
-
-static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id)
-{
- ctlr_info_t *h = dev_id;
- unsigned long flags;
- u32 raw_tag;
-
- if (ignore_bogus_interrupt(h))
- return IRQ_NONE;
-
- spin_lock_irqsave(&h->lock, flags);
- raw_tag = get_next_completion(h);
- while (raw_tag != FIFO_EMPTY)
- raw_tag = next_command(h);
- spin_unlock_irqrestore(&h->lock, flags);
- return IRQ_HANDLED;
-}
-
-static irqreturn_t do_cciss_intx(int irq, void *dev_id)
-{
- ctlr_info_t *h = dev_id;
- unsigned long flags;
- u32 raw_tag;
-
- if (interrupt_not_for_us(h))
- return IRQ_NONE;
- spin_lock_irqsave(&h->lock, flags);
- while (interrupt_pending(h)) {
- raw_tag = get_next_completion(h);
- while (raw_tag != FIFO_EMPTY) {
- if (cciss_tag_contains_index(raw_tag))
- raw_tag = process_indexed_cmd(h, raw_tag);
- else
- raw_tag = process_nonindexed_cmd(h, raw_tag);
- }
- }
- spin_unlock_irqrestore(&h->lock, flags);
- return IRQ_HANDLED;
-}
-
-/* Add a second interrupt handler for MSI/MSI-X mode. In this mode we never
- * check the interrupt pending register because it is not set.
- */
-static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id)
-{
- ctlr_info_t *h = dev_id;
- unsigned long flags;
- u32 raw_tag;
-
- spin_lock_irqsave(&h->lock, flags);
- raw_tag = get_next_completion(h);
- while (raw_tag != FIFO_EMPTY) {
- if (cciss_tag_contains_index(raw_tag))
- raw_tag = process_indexed_cmd(h, raw_tag);
- else
- raw_tag = process_nonindexed_cmd(h, raw_tag);
- }
- spin_unlock_irqrestore(&h->lock, flags);
- return IRQ_HANDLED;
-}
-
-/**
- * add_to_scan_list() - add controller to rescan queue
- * @h: Pointer to the controller.
- *
- * Adds the controller to the rescan queue if not already on the queue.
- *
- * returns 1 if added to the queue, 0 if skipped (could be on the
- * queue already, or the controller could be initializing or shutting
- * down).
- **/
-static int add_to_scan_list(struct ctlr_info *h)
-{
- struct ctlr_info *test_h;
- int found = 0;
- int ret = 0;
-
- if (h->busy_initializing)
- return 0;
-
- if (!mutex_trylock(&h->busy_shutting_down))
- return 0;
-
- mutex_lock(&scan_mutex);
- list_for_each_entry(test_h, &scan_q, scan_list) {
- if (test_h == h) {
- found = 1;
- break;
- }
- }
- if (!found && !h->busy_scanning) {
- reinit_completion(&h->scan_wait);
- list_add_tail(&h->scan_list, &scan_q);
- ret = 1;
- }
- mutex_unlock(&scan_mutex);
- mutex_unlock(&h->busy_shutting_down);
-
- return ret;
-}
-
-/**
- * remove_from_scan_list() - remove controller from rescan queue
- * @h: Pointer to the controller.
- *
- * Removes the controller from the rescan queue if present. Blocks if
- * the controller is currently conducting a rescan. The controller
- * can be in one of three states:
- * 1. Doesn't need a scan
- * 2. On the scan list, but not scanning yet (we remove it)
- * 3. Busy scanning (and not on the list). In this case we want to wait for
- * the scan to complete to make sure the scanning thread for this
- * controller is completely idle.
- **/
-static void remove_from_scan_list(struct ctlr_info *h)
-{
- struct ctlr_info *test_h, *tmp_h;
-
- mutex_lock(&scan_mutex);
- list_for_each_entry_safe(test_h, tmp_h, &scan_q, scan_list) {
- if (test_h == h) { /* state 2. */
- list_del(&h->scan_list);
- complete_all(&h->scan_wait);
- mutex_unlock(&scan_mutex);
- return;
- }
- }
- if (h->busy_scanning) { /* state 3. */
- mutex_unlock(&scan_mutex);
- wait_for_completion(&h->scan_wait);
- } else { /* state 1, nothing to do. */
- mutex_unlock(&scan_mutex);
- }
-}
-
-/**
- * scan_thread() - kernel thread used to rescan controllers
- * @data: Ignored.
- *
- * A kernel thread used scan for drive topology changes on
- * controllers. The thread processes only one controller at a time
- * using a queue. Controllers are added to the queue using
- * add_to_scan_list() and removed from the queue either after done
- * processing or using remove_from_scan_list().
- *
- * returns 0.
- **/
-static int scan_thread(void *data)
-{
- struct ctlr_info *h;
-
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- if (kthread_should_stop())
- break;
-
- while (1) {
- mutex_lock(&scan_mutex);
- if (list_empty(&scan_q)) {
- mutex_unlock(&scan_mutex);
- break;
- }
-
- h = list_entry(scan_q.next,
- struct ctlr_info,
- scan_list);
- list_del(&h->scan_list);
- h->busy_scanning = 1;
- mutex_unlock(&scan_mutex);
-
- rebuild_lun_table(h, 0, 0);
- complete_all(&h->scan_wait);
- mutex_lock(&scan_mutex);
- h->busy_scanning = 0;
- mutex_unlock(&scan_mutex);
- }
- }
-
- return 0;
-}
-
-static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
-{
- if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
- return 0;
-
- switch (c->err_info->SenseInfo[12]) {
- case STATE_CHANGED:
- dev_warn(&h->pdev->dev, "a state change "
- "detected, command retried\n");
- return 1;
- break;
- case LUN_FAILED:
- dev_warn(&h->pdev->dev, "LUN failure "
- "detected, action required\n");
- return 1;
- break;
- case REPORT_LUNS_CHANGED:
- dev_warn(&h->pdev->dev, "report LUN data changed\n");
- /*
- * Here, we could call add_to_scan_list and wake up the scan thread,
- * except that it's quite likely that we will get more than one
- * REPORT_LUNS_CHANGED condition in quick succession, which means
- * that those which occur after the first one will likely happen
- * *during* the scan_thread's rescan. And the rescan code is not
- * robust enough to restart in the middle, undoing what it has already
- * done, and it's not clear that it's even possible to do this, since
- * part of what it does is notify the block layer, which starts
- * doing it's own i/o to read partition tables and so on, and the
- * driver doesn't have visibility to know what might need undoing.
- * In any event, if possible, it is horribly complicated to get right
- * so we just don't do it for now.
- *
- * Note: this REPORT_LUNS_CHANGED condition only occurs on the MSA2012.
- */
- return 1;
- break;
- case POWER_OR_RESET:
- dev_warn(&h->pdev->dev,
- "a power on or device reset detected\n");
- return 1;
- break;
- case UNIT_ATTENTION_CLEARED:
- dev_warn(&h->pdev->dev,
- "unit attention cleared by another initiator\n");
- return 1;
- break;
- default:
- dev_warn(&h->pdev->dev, "unknown unit attention detected\n");
- return 1;
- }
-}
-
-/*
- * We cannot read the structure directly, for portability we must use
- * the io functions.
- * This is for debug only.
- */
-static void print_cfg_table(ctlr_info_t *h)
-{
- int i;
- char temp_name[17];
- CfgTable_struct *tb = h->cfgtable;
-
- dev_dbg(&h->pdev->dev, "Controller Configuration information\n");
- dev_dbg(&h->pdev->dev, "------------------------------------\n");
- for (i = 0; i < 4; i++)
- temp_name[i] = readb(&(tb->Signature[i]));
- temp_name[4] = '\0';
- dev_dbg(&h->pdev->dev, " Signature = %s\n", temp_name);
- dev_dbg(&h->pdev->dev, " Spec Number = %d\n",
- readl(&(tb->SpecValence)));
- dev_dbg(&h->pdev->dev, " Transport methods supported = 0x%x\n",
- readl(&(tb->TransportSupport)));
- dev_dbg(&h->pdev->dev, " Transport methods active = 0x%x\n",
- readl(&(tb->TransportActive)));
- dev_dbg(&h->pdev->dev, " Requested transport Method = 0x%x\n",
- readl(&(tb->HostWrite.TransportRequest)));
- dev_dbg(&h->pdev->dev, " Coalesce Interrupt Delay = 0x%x\n",
- readl(&(tb->HostWrite.CoalIntDelay)));
- dev_dbg(&h->pdev->dev, " Coalesce Interrupt Count = 0x%x\n",
- readl(&(tb->HostWrite.CoalIntCount)));
- dev_dbg(&h->pdev->dev, " Max outstanding commands = 0x%x\n",
- readl(&(tb->CmdsOutMax)));
- dev_dbg(&h->pdev->dev, " Bus Types = 0x%x\n",
- readl(&(tb->BusTypes)));
- for (i = 0; i < 16; i++)
- temp_name[i] = readb(&(tb->ServerName[i]));
- temp_name[16] = '\0';
- dev_dbg(&h->pdev->dev, " Server Name = %s\n", temp_name);
- dev_dbg(&h->pdev->dev, " Heartbeat Counter = 0x%x\n\n\n",
- readl(&(tb->HeartBeat)));
-}
-
-static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
-{
- int i, offset, mem_type, bar_type;
- if (pci_bar_addr == PCI_BASE_ADDRESS_0) /* looking for BAR zero? */
- return 0;
- offset = 0;
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- bar_type = pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE;
- if (bar_type == PCI_BASE_ADDRESS_SPACE_IO)
- offset += 4;
- else {
- mem_type = pci_resource_flags(pdev, i) &
- PCI_BASE_ADDRESS_MEM_TYPE_MASK;
- switch (mem_type) {
- case PCI_BASE_ADDRESS_MEM_TYPE_32:
- case PCI_BASE_ADDRESS_MEM_TYPE_1M:
- offset += 4; /* 32 bit */
- break;
- case PCI_BASE_ADDRESS_MEM_TYPE_64:
- offset += 8;
- break;
- default: /* reserved in PCI 2.2 */
- dev_warn(&pdev->dev,
- "Base address is invalid\n");
- return -1;
- break;
- }
- }
- if (offset == pci_bar_addr - PCI_BASE_ADDRESS_0)
- return i + 1;
- }
- return -1;
-}
-
-/* Fill in bucket_map[], given nsgs (the max number of
- * scatter gather elements supported) and bucket[],
- * which is an array of 8 integers. The bucket[] array
- * contains 8 different DMA transfer sizes (in 16
- * byte increments) which the controller uses to fetch
- * commands. This function fills in bucket_map[], which
- * maps a given number of scatter gather elements to one of
- * the 8 DMA transfer sizes. The point of it is to allow the
- * controller to only do as much DMA as needed to fetch the
- * command, with the DMA transfer size encoded in the lower
- * bits of the command address.
- */
-static void calc_bucket_map(int bucket[], int num_buckets,
- int nsgs, int *bucket_map)
-{
- int i, j, b, size;
-
- /* even a command with 0 SGs requires 4 blocks */
-#define MINIMUM_TRANSFER_BLOCKS 4
-#define NUM_BUCKETS 8
- /* Note, bucket_map must have nsgs+1 entries. */
- for (i = 0; i <= nsgs; i++) {
- /* Compute size of a command with i SG entries */
- size = i + MINIMUM_TRANSFER_BLOCKS;
- b = num_buckets; /* Assume the biggest bucket */
- /* Find the bucket that is just big enough */
- for (j = 0; j < 8; j++) {
- if (bucket[j] >= size) {
- b = j;
- break;
- }
- }
- /* for a command with i SG entries, use bucket b. */
- bucket_map[i] = b;
- }
-}
-
-static void cciss_wait_for_mode_change_ack(ctlr_info_t *h)
-{
- int i;
-
- /* under certain very rare conditions, this can take awhile.
- * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
- * as we enter this code.) */
- for (i = 0; i < MAX_CONFIG_WAIT; i++) {
- if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
- break;
- usleep_range(10000, 20000);
- }
-}
-
-static void cciss_enter_performant_mode(ctlr_info_t *h, u32 use_short_tags)
-{
- /* This is a bit complicated. There are 8 registers on
- * the controller which we write to to tell it 8 different
- * sizes of commands which there may be. It's a way of
- * reducing the DMA done to fetch each command. Encoded into
- * each command's tag are 3 bits which communicate to the controller
- * which of the eight sizes that command fits within. The size of
- * each command depends on how many scatter gather entries there are.
- * Each SG entry requires 16 bytes. The eight registers are programmed
- * with the number of 16-byte blocks a command of that size requires.
- * The smallest command possible requires 5 such 16 byte blocks.
- * the largest command possible requires MAXSGENTRIES + 4 16-byte
- * blocks. Note, this only extends to the SG entries contained
- * within the command block, and does not extend to chained blocks
- * of SG elements. bft[] contains the eight values we write to
- * the registers. They are not evenly distributed, but have more
- * sizes for small commands, and fewer sizes for larger commands.
- */
- __u32 trans_offset;
- int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4};
- /*
- * 5 = 1 s/g entry or 4k
- * 6 = 2 s/g entry or 8k
- * 8 = 4 s/g entry or 16k
- * 10 = 6 s/g entry or 24k
- */
- unsigned long register_value;
- BUILD_BUG_ON(28 > MAXSGENTRIES + 4);
-
- h->reply_pool_wraparound = 1; /* spec: init to 1 */
-
- /* Controller spec: zero out this buffer. */
- memset(h->reply_pool, 0, h->max_commands * sizeof(__u64));
- h->reply_pool_head = h->reply_pool;
-
- trans_offset = readl(&(h->cfgtable->TransMethodOffset));
- calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries,
- h->blockFetchTable);
- writel(bft[0], &h->transtable->BlockFetch0);
- writel(bft[1], &h->transtable->BlockFetch1);
- writel(bft[2], &h->transtable->BlockFetch2);
- writel(bft[3], &h->transtable->BlockFetch3);
- writel(bft[4], &h->transtable->BlockFetch4);
- writel(bft[5], &h->transtable->BlockFetch5);
- writel(bft[6], &h->transtable->BlockFetch6);
- writel(bft[7], &h->transtable->BlockFetch7);
-
- /* size of controller ring buffer */
- writel(h->max_commands, &h->transtable->RepQSize);
- writel(1, &h->transtable->RepQCount);
- writel(0, &h->transtable->RepQCtrAddrLow32);
- writel(0, &h->transtable->RepQCtrAddrHigh32);
- writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
- writel(0, &h->transtable->RepQAddr0High32);
- writel(CFGTBL_Trans_Performant | use_short_tags,
- &(h->cfgtable->HostWrite.TransportRequest));
-
- writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
- cciss_wait_for_mode_change_ack(h);
- register_value = readl(&(h->cfgtable->TransportActive));
- if (!(register_value & CFGTBL_Trans_Performant))
- dev_warn(&h->pdev->dev, "cciss: unable to get board into"
- " performant mode\n");
-}
-
-static void cciss_put_controller_into_performant_mode(ctlr_info_t *h)
-{
- __u32 trans_support;
-
- if (cciss_simple_mode)
- return;
-
- dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
- /* Attempt to put controller into performant mode if supported */
- /* Does board support performant mode? */
- trans_support = readl(&(h->cfgtable->TransportSupport));
- if (!(trans_support & PERFORMANT_MODE))
- return;
-
- dev_dbg(&h->pdev->dev, "Placing controller into performant mode\n");
- /* Performant mode demands commands on a 32 byte boundary
- * pci_alloc_consistent aligns on page boundarys already.
- * Just need to check if divisible by 32
- */
- if ((sizeof(CommandList_struct) % 32) != 0) {
- dev_warn(&h->pdev->dev, "%s %d %s\n",
- "cciss info: command size[",
- (int)sizeof(CommandList_struct),
- "] not divisible by 32, no performant mode..\n");
- return;
- }
-
- /* Performant mode ring buffer and supporting data structures */
- h->reply_pool = (__u64 *)pci_alloc_consistent(
- h->pdev, h->max_commands * sizeof(__u64),
- &(h->reply_pool_dhandle));
-
- /* Need a block fetch table for performant mode */
- h->blockFetchTable = kmalloc(((h->maxsgentries+1) *
- sizeof(__u32)), GFP_KERNEL);
-
- if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
- goto clean_up;
-
- cciss_enter_performant_mode(h,
- trans_support & CFGTBL_Trans_use_short_tags);
-
- /* Change the access methods to the performant access methods */
- h->access = SA5_performant_access;
- h->transMethod = CFGTBL_Trans_Performant;
-
- return;
-clean_up:
- kfree(h->blockFetchTable);
- if (h->reply_pool)
- pci_free_consistent(h->pdev,
- h->max_commands * sizeof(__u64),
- h->reply_pool,
- h->reply_pool_dhandle);
- return;
-
-} /* cciss_put_controller_into_performant_mode */
-
-/* If MSI/MSI-X is supported by the kernel we will try to enable it on
- * controllers that are capable. If not, we use IO-APIC mode.
- */
-
-static void cciss_interrupt_mode(ctlr_info_t *h)
-{
- int ret;
-
- /* Some boards advertise MSI but don't really support it */
- if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) ||
- (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
- goto default_int_mode;
-
- ret = pci_alloc_irq_vectors(h->pdev, 4, 4, PCI_IRQ_MSIX);
- if (ret >= 0) {
- h->intr[0] = pci_irq_vector(h->pdev, 0);
- h->intr[1] = pci_irq_vector(h->pdev, 1);
- h->intr[2] = pci_irq_vector(h->pdev, 2);
- h->intr[3] = pci_irq_vector(h->pdev, 3);
- return;
- }
-
- ret = pci_alloc_irq_vectors(h->pdev, 1, 1, PCI_IRQ_MSI);
-
-default_int_mode:
- /* if we get here we're going to use the default interrupt mode */
- h->intr[h->intr_mode] = pci_irq_vector(h->pdev, 0);
- return;
-}
-
-static int cciss_lookup_board_id(struct pci_dev *pdev, u32 *board_id)
-{
- int i;
- u32 subsystem_vendor_id, subsystem_device_id;
-
- subsystem_vendor_id = pdev->subsystem_vendor;
- subsystem_device_id = pdev->subsystem_device;
- *board_id = ((subsystem_device_id << 16) & 0xffff0000) |
- subsystem_vendor_id;
-
- for (i = 0; i < ARRAY_SIZE(products); i++) {
- /* Stand aside for hpsa driver on request */
- if (cciss_allow_hpsa)
- return -ENODEV;
- if (*board_id == products[i].board_id)
- return i;
- }
- dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n",
- *board_id);
- return -ENODEV;
-}
-
-static inline bool cciss_board_disabled(ctlr_info_t *h)
-{
- u16 command;
-
- (void) pci_read_config_word(h->pdev, PCI_COMMAND, &command);
- return ((command & PCI_COMMAND_MEMORY) == 0);
-}
-
-static int cciss_pci_find_memory_BAR(struct pci_dev *pdev,
- unsigned long *memory_bar)
-{
- int i;
-
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
- if (pci_resource_flags(pdev, i) & IORESOURCE_MEM) {
- /* addressing mode bits already removed */
- *memory_bar = pci_resource_start(pdev, i);
- dev_dbg(&pdev->dev, "memory BAR = %lx\n",
- *memory_bar);
- return 0;
- }
- dev_warn(&pdev->dev, "no memory BAR found\n");
- return -ENODEV;
-}
-
-static int cciss_wait_for_board_state(struct pci_dev *pdev,
- void __iomem *vaddr, int wait_for_ready)
-#define BOARD_READY 1
-#define BOARD_NOT_READY 0
-{
- int i, iterations;
- u32 scratchpad;
-
- if (wait_for_ready)
- iterations = CCISS_BOARD_READY_ITERATIONS;
- else
- iterations = CCISS_BOARD_NOT_READY_ITERATIONS;
-
- for (i = 0; i < iterations; i++) {
- scratchpad = readl(vaddr + SA5_SCRATCHPAD_OFFSET);
- if (wait_for_ready) {
- if (scratchpad == CCISS_FIRMWARE_READY)
- return 0;
- } else {
- if (scratchpad != CCISS_FIRMWARE_READY)
- return 0;
- }
- msleep(CCISS_BOARD_READY_POLL_INTERVAL_MSECS);
- }
- dev_warn(&pdev->dev, "board not ready, timed out.\n");
- return -ENODEV;
-}
-
-static int cciss_find_cfg_addrs(struct pci_dev *pdev, void __iomem *vaddr,
- u32 *cfg_base_addr, u64 *cfg_base_addr_index,
- u64 *cfg_offset)
-{
- *cfg_base_addr = readl(vaddr + SA5_CTCFG_OFFSET);
- *cfg_offset = readl(vaddr + SA5_CTMEM_OFFSET);
- *cfg_base_addr &= (u32) 0x0000ffff;
- *cfg_base_addr_index = find_PCI_BAR_index(pdev, *cfg_base_addr);
- if (*cfg_base_addr_index == -1) {
- dev_warn(&pdev->dev, "cannot find cfg_base_addr_index, "
- "*cfg_base_addr = 0x%08x\n", *cfg_base_addr);
- return -ENODEV;
- }
- return 0;
-}
-
-static int cciss_find_cfgtables(ctlr_info_t *h)
-{
- u64 cfg_offset;
- u32 cfg_base_addr;
- u64 cfg_base_addr_index;
- u32 trans_offset;
- int rc;
-
- rc = cciss_find_cfg_addrs(h->pdev, h->vaddr, &cfg_base_addr,
- &cfg_base_addr_index, &cfg_offset);
- if (rc)
- return rc;
- h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
- cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));
- if (!h->cfgtable)
- return -ENOMEM;
- rc = write_driver_ver_to_cfgtable(h->cfgtable);
- if (rc)
- return rc;
- /* Find performant mode table. */
- trans_offset = readl(&h->cfgtable->TransMethodOffset);
- h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
- cfg_base_addr_index)+cfg_offset+trans_offset,
- sizeof(*h->transtable));
- if (!h->transtable)
- return -ENOMEM;
- return 0;
-}
-
-static void cciss_get_max_perf_mode_cmds(struct ctlr_info *h)
-{
- h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands));
-
- /* Limit commands in memory limited kdump scenario. */
- if (reset_devices && h->max_commands > 32)
- h->max_commands = 32;
-
- if (h->max_commands < 16) {
- dev_warn(&h->pdev->dev, "Controller reports "
- "max supported commands of %d, an obvious lie. "
- "Using 16. Ensure that firmware is up to date.\n",
- h->max_commands);
- h->max_commands = 16;
- }
-}
-
-/* Interrogate the hardware for some limits:
- * max commands, max SG elements without chaining, and with chaining,
- * SG chain block size, etc.
- */
-static void cciss_find_board_params(ctlr_info_t *h)
-{
- cciss_get_max_perf_mode_cmds(h);
- h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds;
- h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
- /*
- * The P600 may exhibit poor performnace under some workloads
- * if we use the value in the configuration table. Limit this
- * controller to MAXSGENTRIES (32) instead.
- */
- if (h->board_id == 0x3225103C)
- h->maxsgentries = MAXSGENTRIES;
- /*
- * Limit in-command s/g elements to 32 save dma'able memory.
- * Howvever spec says if 0, use 31
- */
- h->max_cmd_sgentries = 31;
- if (h->maxsgentries > 512) {
- h->max_cmd_sgentries = 32;
- h->chainsize = h->maxsgentries - h->max_cmd_sgentries + 1;
- h->maxsgentries--; /* save one for chain pointer */
- } else {
- h->maxsgentries = 31; /* default to traditional values */
- h->chainsize = 0;
- }
-}
-
-static inline bool CISS_signature_present(ctlr_info_t *h)
-{
- if (!check_signature(h->cfgtable->Signature, "CISS", 4)) {
- dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
- return false;
- }
- return true;
-}
-
-/* Need to enable prefetch in the SCSI core for 6400 in x86 */
-static inline void cciss_enable_scsi_prefetch(ctlr_info_t *h)
-{
-#ifdef CONFIG_X86
- u32 prefetch;
-
- prefetch = readl(&(h->cfgtable->SCSI_Prefetch));
- prefetch |= 0x100;
- writel(prefetch, &(h->cfgtable->SCSI_Prefetch));
-#endif
-}
-
-/* Disable DMA prefetch for the P600. Otherwise an ASIC bug may result
- * in a prefetch beyond physical memory.
- */
-static inline void cciss_p600_dma_prefetch_quirk(ctlr_info_t *h)
-{
- u32 dma_prefetch;
- __u32 dma_refetch;
-
- if (h->board_id != 0x3225103C)
- return;
- dma_prefetch = readl(h->vaddr + I2O_DMA1_CFG);
- dma_prefetch |= 0x8000;
- writel(dma_prefetch, h->vaddr + I2O_DMA1_CFG);
- pci_read_config_dword(h->pdev, PCI_COMMAND_PARITY, &dma_refetch);
- dma_refetch |= 0x1;
- pci_write_config_dword(h->pdev, PCI_COMMAND_PARITY, dma_refetch);
-}
-
-static int cciss_pci_init(ctlr_info_t *h)
-{
- int prod_index, err;
-
- prod_index = cciss_lookup_board_id(h->pdev, &h->board_id);
- if (prod_index < 0)
- return -ENODEV;
- h->product_name = products[prod_index].product_name;
- h->access = *(products[prod_index].access);
-
- if (cciss_board_disabled(h)) {
- dev_warn(&h->pdev->dev, "controller appears to be disabled\n");
- return -ENODEV;
- }
-
- pci_disable_link_state(h->pdev, PCIE_LINK_STATE_L0S |
- PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM);
-
- err = pci_enable_device(h->pdev);
- if (err) {
- dev_warn(&h->pdev->dev, "Unable to Enable PCI device\n");
- return err;
- }
-
- err = pci_request_regions(h->pdev, "cciss");
- if (err) {
- dev_warn(&h->pdev->dev,
- "Cannot obtain PCI resources, aborting\n");
- return err;
- }
-
- dev_dbg(&h->pdev->dev, "irq = %x\n", h->pdev->irq);
- dev_dbg(&h->pdev->dev, "board_id = %x\n", h->board_id);
-
-/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
- * else we use the IO-APIC interrupt assigned to us by system ROM.
- */
- cciss_interrupt_mode(h);
- err = cciss_pci_find_memory_BAR(h->pdev, &h->paddr);
- if (err)
- goto err_out_free_res;
- h->vaddr = remap_pci_mem(h->paddr, 0x250);
- if (!h->vaddr) {
- err = -ENOMEM;
- goto err_out_free_res;
- }
- err = cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY);
- if (err)
- goto err_out_free_res;
- err = cciss_find_cfgtables(h);
- if (err)
- goto err_out_free_res;
- print_cfg_table(h);
- cciss_find_board_params(h);
-
- if (!CISS_signature_present(h)) {
- err = -ENODEV;
- goto err_out_free_res;
- }
- cciss_enable_scsi_prefetch(h);
- cciss_p600_dma_prefetch_quirk(h);
- err = cciss_enter_simple_mode(h);
- if (err)
- goto err_out_free_res;
- cciss_put_controller_into_performant_mode(h);
- return 0;
-
-err_out_free_res:
- /*
- * Deliberately omit pci_disable_device(): it does something nasty to
- * Smart Array controllers that pci_enable_device does not undo
- */
- if (h->transtable)
- iounmap(h->transtable);
- if (h->cfgtable)
- iounmap(h->cfgtable);
- if (h->vaddr)
- iounmap(h->vaddr);
- pci_release_regions(h->pdev);
- return err;
-}
-
-/* Function to find the first free pointer into our hba[] array
- * Returns -1 if no free entries are left.
- */
-static int alloc_cciss_hba(struct pci_dev *pdev)
-{
- int i;
-
- for (i = 0; i < MAX_CTLR; i++) {
- if (!hba[i]) {
- ctlr_info_t *h;
-
- h = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
- if (!h)
- goto Enomem;
- hba[i] = h;
- return i;
- }
- }
- dev_warn(&pdev->dev, "This driver supports a maximum"
- " of %d controllers.\n", MAX_CTLR);
- return -1;
-Enomem:
- dev_warn(&pdev->dev, "out of memory.\n");
- return -1;
-}
-
-static void free_hba(ctlr_info_t *h)
-{
- int i;
-
- hba[h->ctlr] = NULL;
- for (i = 0; i < h->highest_lun + 1; i++)
- if (h->gendisk[i] != NULL)
- put_disk(h->gendisk[i]);
- kfree(h);
-}
-
-/* Send a message CDB to the firmware. */
-static int cciss_message(struct pci_dev *pdev, unsigned char opcode,
- unsigned char type)
-{
- typedef struct {
- CommandListHeader_struct CommandHeader;
- RequestBlock_struct Request;
- ErrDescriptor_struct ErrorDescriptor;
- } Command;
- static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
- Command *cmd;
- dma_addr_t paddr64;
- uint32_t paddr32, tag;
- void __iomem *vaddr;
- int i, err;
-
- vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
- if (vaddr == NULL)
- return -ENOMEM;
-
- /* The Inbound Post Queue only accepts 32-bit physical addresses for the
- CCISS commands, so they must be allocated from the lower 4GiB of
- memory. */
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err) {
- iounmap(vaddr);
- return -ENOMEM;
- }
-
- cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
- if (cmd == NULL) {
- iounmap(vaddr);
- return -ENOMEM;
- }
-
- /* This must fit, because of the 32-bit consistent DMA mask. Also,
- although there's no guarantee, we assume that the address is at
- least 4-byte aligned (most likely, it's page-aligned). */
- paddr32 = paddr64;
-
- cmd->CommandHeader.ReplyQueue = 0;
- cmd->CommandHeader.SGList = 0;
- cmd->CommandHeader.SGTotal = 0;
- cmd->CommandHeader.Tag.lower = paddr32;
- cmd->CommandHeader.Tag.upper = 0;
- memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
-
- cmd->Request.CDBLen = 16;
- cmd->Request.Type.Type = TYPE_MSG;
- cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
- cmd->Request.Type.Direction = XFER_NONE;
- cmd->Request.Timeout = 0; /* Don't time out */
- cmd->Request.CDB[0] = opcode;
- cmd->Request.CDB[1] = type;
- memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
-
- cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
- cmd->ErrorDescriptor.Addr.upper = 0;
- cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
-
- writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
-
- for (i = 0; i < 10; i++) {
- tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
- if ((tag & ~3) == paddr32)
- break;
- msleep(CCISS_POST_RESET_NOOP_TIMEOUT_MSECS);
- }
-
- iounmap(vaddr);
-
- /* we leak the DMA buffer here ... no choice since the controller could
- still complete the command. */
- if (i == 10) {
- dev_err(&pdev->dev,
- "controller message %02x:%02x timed out\n",
- opcode, type);
- return -ETIMEDOUT;
- }
-
- pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
-
- if (tag & 2) {
- dev_err(&pdev->dev, "controller message %02x:%02x failed\n",
- opcode, type);
- return -EIO;
- }
-
- dev_info(&pdev->dev, "controller message %02x:%02x succeeded\n",
- opcode, type);
- return 0;
-}
-
-#define cciss_noop(p) cciss_message(p, 3, 0)
-
-static int cciss_controller_hard_reset(struct pci_dev *pdev,
- void * __iomem vaddr, u32 use_doorbell)
-{
- u16 pmcsr;
- int pos;
-
- if (use_doorbell) {
- /* For everything after the P600, the PCI power state method
- * of resetting the controller doesn't work, so we have this
- * other way using the doorbell register.
- */
- dev_info(&pdev->dev, "using doorbell to reset controller\n");
- writel(use_doorbell, vaddr + SA5_DOORBELL);
- } else { /* Try to do it the PCI power state way */
-
- /* Quoting from the Open CISS Specification: "The Power
- * Management Control/Status Register (CSR) controls the power
- * state of the device. The normal operating state is D0,
- * CSR=00h. The software off state is D3, CSR=03h. To reset
- * the controller, place the interface device in D3 then to D0,
- * this causes a secondary PCI reset which will reset the
- * controller." */
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
- if (pos == 0) {
- dev_err(&pdev->dev,
- "cciss_controller_hard_reset: "
- "PCI PM not supported\n");
- return -ENODEV;
- }
- dev_info(&pdev->dev, "using PCI PM to reset controller\n");
- /* enter the D3hot power management state */
- pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
- pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
- pmcsr |= PCI_D3hot;
- pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
-
- msleep(500);
-
- /* enter the D0 power management state */
- pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
- pmcsr |= PCI_D0;
- pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
-
- /*
- * The P600 requires a small delay when changing states.
- * Otherwise we may think the board did not reset and we bail.
- * This for kdump only and is particular to the P600.
- */
- msleep(500);
- }
- return 0;
-}
-
-static void init_driver_version(char *driver_version, int len)
-{
- memset(driver_version, 0, len);
- strncpy(driver_version, "cciss " DRIVER_NAME, len - 1);
-}
-
-static int write_driver_ver_to_cfgtable(CfgTable_struct __iomem *cfgtable)
-{
- char *driver_version;
- int i, size = sizeof(cfgtable->driver_version);
-
- driver_version = kmalloc(size, GFP_KERNEL);
- if (!driver_version)
- return -ENOMEM;
-
- init_driver_version(driver_version, size);
- for (i = 0; i < size; i++)
- writeb(driver_version[i], &cfgtable->driver_version[i]);
- kfree(driver_version);
- return 0;
-}
-
-static void read_driver_ver_from_cfgtable(CfgTable_struct __iomem *cfgtable,
- unsigned char *driver_ver)
-{
- int i;
-
- for (i = 0; i < sizeof(cfgtable->driver_version); i++)
- driver_ver[i] = readb(&cfgtable->driver_version[i]);
-}
-
-static int controller_reset_failed(CfgTable_struct __iomem *cfgtable)
-{
-
- char *driver_ver, *old_driver_ver;
- int rc, size = sizeof(cfgtable->driver_version);
-
- old_driver_ver = kmalloc(2 * size, GFP_KERNEL);
- if (!old_driver_ver)
- return -ENOMEM;
- driver_ver = old_driver_ver + size;
-
- /* After a reset, the 32 bytes of "driver version" in the cfgtable
- * should have been changed, otherwise we know the reset failed.
- */
- init_driver_version(old_driver_ver, size);
- read_driver_ver_from_cfgtable(cfgtable, driver_ver);
- rc = !memcmp(driver_ver, old_driver_ver, size);
- kfree(old_driver_ver);
- return rc;
-}
-
-/* This does a hard reset of the controller using PCI power management
- * states or using the doorbell register. */
-static int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
-{
- u64 cfg_offset;
- u32 cfg_base_addr;
- u64 cfg_base_addr_index;
- void __iomem *vaddr;
- unsigned long paddr;
- u32 misc_fw_support;
- int rc;
- CfgTable_struct __iomem *cfgtable;
- u32 use_doorbell;
- u32 board_id;
- u16 command_register;
-
- /* For controllers as old a the p600, this is very nearly
- * the same thing as
- *
- * pci_save_state(pci_dev);
- * pci_set_power_state(pci_dev, PCI_D3hot);
- * pci_set_power_state(pci_dev, PCI_D0);
- * pci_restore_state(pci_dev);
- *
- * For controllers newer than the P600, the pci power state
- * method of resetting doesn't work so we have another way
- * using the doorbell register.
- */
-
- /* Exclude 640x boards. These are two pci devices in one slot
- * which share a battery backed cache module. One controls the
- * cache, the other accesses the cache through the one that controls
- * it. If we reset the one controlling the cache, the other will
- * likely not be happy. Just forbid resetting this conjoined mess.
- */
- cciss_lookup_board_id(pdev, &board_id);
- if (!ctlr_is_resettable(board_id)) {
- dev_warn(&pdev->dev, "Controller not resettable\n");
- return -ENODEV;
- }
-
- /* if controller is soft- but not hard resettable... */
- if (!ctlr_is_hard_resettable(board_id))
- return -ENOTSUPP; /* try soft reset later. */
-
- /* Save the PCI command register */
- pci_read_config_word(pdev, 4, &command_register);
- /* Turn the board off. This is so that later pci_restore_state()
- * won't turn the board on before the rest of config space is ready.
- */
- pci_disable_device(pdev);
- pci_save_state(pdev);
-
- /* find the first memory BAR, so we can find the cfg table */
- rc = cciss_pci_find_memory_BAR(pdev, &paddr);
- if (rc)
- return rc;
- vaddr = remap_pci_mem(paddr, 0x250);
- if (!vaddr)
- return -ENOMEM;
-
- /* find cfgtable in order to check if reset via doorbell is supported */
- rc = cciss_find_cfg_addrs(pdev, vaddr, &cfg_base_addr,
- &cfg_base_addr_index, &cfg_offset);
- if (rc)
- goto unmap_vaddr;
- cfgtable = remap_pci_mem(pci_resource_start(pdev,
- cfg_base_addr_index) + cfg_offset, sizeof(*cfgtable));
- if (!cfgtable) {
- rc = -ENOMEM;
- goto unmap_vaddr;
- }
- rc = write_driver_ver_to_cfgtable(cfgtable);
- if (rc)
- goto unmap_vaddr;
-
- /* If reset via doorbell register is supported, use that.
- * There are two such methods. Favor the newest method.
- */
- misc_fw_support = readl(&cfgtable->misc_fw_support);
- use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2;
- if (use_doorbell) {
- use_doorbell = DOORBELL_CTLR_RESET2;
- } else {
- use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
- if (use_doorbell) {
- dev_warn(&pdev->dev, "Controller claims that "
- "'Bit 2 doorbell reset' is "
- "supported, but not 'bit 5 doorbell reset'. "
- "Firmware update is recommended.\n");
- rc = -ENOTSUPP; /* use the soft reset */
- goto unmap_cfgtable;
- }
- }
-
- rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
- if (rc)
- goto unmap_cfgtable;
- pci_restore_state(pdev);
- rc = pci_enable_device(pdev);
- if (rc) {
- dev_warn(&pdev->dev, "failed to enable device.\n");
- goto unmap_cfgtable;
- }
- pci_write_config_word(pdev, 4, command_register);
-
- /* Some devices (notably the HP Smart Array 5i Controller)
- need a little pause here */
- msleep(CCISS_POST_RESET_PAUSE_MSECS);
-
- /* Wait for board to become not ready, then ready. */
- dev_info(&pdev->dev, "Waiting for board to reset.\n");
- rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
- if (rc) {
- dev_warn(&pdev->dev, "Failed waiting for board to hard reset."
- " Will try soft reset.\n");
- rc = -ENOTSUPP; /* Not expected, but try soft reset later */
- goto unmap_cfgtable;
- }
- rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY);
- if (rc) {
- dev_warn(&pdev->dev,
- "failed waiting for board to become ready "
- "after hard reset\n");
- goto unmap_cfgtable;
- }
-
- rc = controller_reset_failed(vaddr);
- if (rc < 0)
- goto unmap_cfgtable;
- if (rc) {
- dev_warn(&pdev->dev, "Unable to successfully hard reset "
- "controller. Will try soft reset.\n");
- rc = -ENOTSUPP; /* Not expected, but try soft reset later */
- } else {
- dev_info(&pdev->dev, "Board ready after hard reset.\n");
- }
-
-unmap_cfgtable:
- iounmap(cfgtable);
-
-unmap_vaddr:
- iounmap(vaddr);
- return rc;
-}
-
-static int cciss_init_reset_devices(struct pci_dev *pdev)
-{
- int rc, i;
-
- if (!reset_devices)
- return 0;
-
- /* Reset the controller with a PCI power-cycle or via doorbell */
- rc = cciss_kdump_hard_reset_controller(pdev);
-
- /* -ENOTSUPP here means we cannot reset the controller
- * but it's already (and still) up and running in
- * "performant mode". Or, it might be 640x, which can't reset
- * due to concerns about shared bbwc between 6402/6404 pair.
- */
- if (rc == -ENOTSUPP)
- return rc; /* just try to do the kdump anyhow. */
- if (rc)
- return -ENODEV;
-
- /* Now try to get the controller to respond to a no-op */
- dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
- for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
- if (cciss_noop(pdev) == 0)
- break;
- else
- dev_warn(&pdev->dev, "no-op failed%s\n",
- (i < CCISS_POST_RESET_NOOP_RETRIES - 1 ?
- "; re-trying" : ""));
- msleep(CCISS_POST_RESET_NOOP_INTERVAL_MSECS);
- }
- return 0;
-}
-
-static int cciss_allocate_cmd_pool(ctlr_info_t *h)
-{
- h->cmd_pool_bits = kmalloc(BITS_TO_LONGS(h->nr_cmds) *
- sizeof(unsigned long), GFP_KERNEL);
- h->cmd_pool = pci_alloc_consistent(h->pdev,
- h->nr_cmds * sizeof(CommandList_struct),
- &(h->cmd_pool_dhandle));
- h->errinfo_pool = pci_alloc_consistent(h->pdev,
- h->nr_cmds * sizeof(ErrorInfo_struct),
- &(h->errinfo_pool_dhandle));
- if ((h->cmd_pool_bits == NULL)
- || (h->cmd_pool == NULL)
- || (h->errinfo_pool == NULL)) {
- dev_err(&h->pdev->dev, "out of memory");
- return -ENOMEM;
- }
- return 0;
-}
-
-static int cciss_allocate_scatterlists(ctlr_info_t *h)
-{
- int i;
-
- /* zero it, so that on free we need not know how many were alloc'ed */
- h->scatter_list = kzalloc(h->max_commands *
- sizeof(struct scatterlist *), GFP_KERNEL);
- if (!h->scatter_list)
- return -ENOMEM;
-
- for (i = 0; i < h->nr_cmds; i++) {
- h->scatter_list[i] = kmalloc(sizeof(struct scatterlist) *
- h->maxsgentries, GFP_KERNEL);
- if (h->scatter_list[i] == NULL) {
- dev_err(&h->pdev->dev, "could not allocate "
- "s/g lists\n");
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-static void cciss_free_scatterlists(ctlr_info_t *h)
-{
- int i;
-
- if (h->scatter_list) {
- for (i = 0; i < h->nr_cmds; i++)
- kfree(h->scatter_list[i]);
- kfree(h->scatter_list);
- }
-}
-
-static void cciss_free_cmd_pool(ctlr_info_t *h)
-{
- kfree(h->cmd_pool_bits);
- if (h->cmd_pool)
- pci_free_consistent(h->pdev,
- h->nr_cmds * sizeof(CommandList_struct),
- h->cmd_pool, h->cmd_pool_dhandle);
- if (h->errinfo_pool)
- pci_free_consistent(h->pdev,
- h->nr_cmds * sizeof(ErrorInfo_struct),
- h->errinfo_pool, h->errinfo_pool_dhandle);
-}
-
-static int cciss_request_irq(ctlr_info_t *h,
- irqreturn_t (*msixhandler)(int, void *),
- irqreturn_t (*intxhandler)(int, void *))
-{
- if (h->pdev->msi_enabled || h->pdev->msix_enabled) {
- if (!request_irq(h->intr[h->intr_mode], msixhandler,
- 0, h->devname, h))
- return 0;
- dev_err(&h->pdev->dev, "Unable to get msi irq %d"
- " for %s\n", h->intr[h->intr_mode],
- h->devname);
- return -1;
- }
-
- if (!request_irq(h->intr[h->intr_mode], intxhandler,
- IRQF_SHARED, h->devname, h))
- return 0;
- dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
- h->intr[h->intr_mode], h->devname);
- return -1;
-}
-
-static int cciss_kdump_soft_reset(ctlr_info_t *h)
-{
- if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) {
- dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
- return -EIO;
- }
-
- dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
- if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
- dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
- return -1;
- }
-
- dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
- if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
- dev_warn(&h->pdev->dev, "Board failed to become ready "
- "after soft reset.\n");
- return -1;
- }
-
- return 0;
-}
-
-static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
-{
- int ctlr = h->ctlr;
-
- free_irq(h->intr[h->intr_mode], h);
- pci_free_irq_vectors(h->pdev);
- cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
- cciss_free_scatterlists(h);
- cciss_free_cmd_pool(h);
- kfree(h->blockFetchTable);
- if (h->reply_pool)
- pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
- h->reply_pool, h->reply_pool_dhandle);
- if (h->transtable)
- iounmap(h->transtable);
- if (h->cfgtable)
- iounmap(h->cfgtable);
- if (h->vaddr)
- iounmap(h->vaddr);
- unregister_blkdev(h->major, h->devname);
- cciss_destroy_hba_sysfs_entry(h);
- pci_release_regions(h->pdev);
- kfree(h);
- hba[ctlr] = NULL;
-}
-
-/*
- * This is it. Find all the controllers and register them. I really hate
- * stealing all these major device numbers.
- * returns the number of block devices registered.
- */
-static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
- int i;
- int j = 0;
- int rc;
- int try_soft_reset = 0;
- int dac, return_code;
- InquiryData_struct *inq_buff;
- ctlr_info_t *h;
- unsigned long flags;
-
- /*
- * By default the cciss driver is used for all older HP Smart Array
- * controllers. There are module paramaters that allow a user to
- * override this behavior and instead use the hpsa SCSI driver. If
- * this is the case cciss may be loaded first from the kdump initrd
- * image and cause a kernel panic. So if reset_devices is true and
- * cciss_allow_hpsa is set just bail.
- */
- if ((reset_devices) && (cciss_allow_hpsa == 1))
- return -ENODEV;
- rc = cciss_init_reset_devices(pdev);
- if (rc) {
- if (rc != -ENOTSUPP)
- return rc;
- /* If the reset fails in a particular way (it has no way to do
- * a proper hard reset, so returns -ENOTSUPP) we can try to do
- * a soft reset once we get the controller configured up to the
- * point that it can accept a command.
- */
- try_soft_reset = 1;
- rc = 0;
- }
-
-reinit_after_soft_reset:
-
- i = alloc_cciss_hba(pdev);
- if (i < 0)
- return -ENOMEM;
-
- h = hba[i];
- h->pdev = pdev;
- h->busy_initializing = 1;
- h->intr_mode = cciss_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
- INIT_LIST_HEAD(&h->cmpQ);
- INIT_LIST_HEAD(&h->reqQ);
- mutex_init(&h->busy_shutting_down);
-
- if (cciss_pci_init(h) != 0)
- goto clean_no_release_regions;
-
- sprintf(h->devname, "cciss%d", i);
- h->ctlr = i;
-
- if (cciss_tape_cmds < 2)
- cciss_tape_cmds = 2;
- if (cciss_tape_cmds > 16)
- cciss_tape_cmds = 16;
-
- init_completion(&h->scan_wait);
-
- if (cciss_create_hba_sysfs_entry(h))
- goto clean0;
-
- /* configure PCI DMA stuff */
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
- dac = 1;
- else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
- dac = 0;
- else {
- dev_err(&h->pdev->dev, "no suitable DMA available\n");
- goto clean1;
- }
-
- /*
- * register with the major number, or get a dynamic major number
- * by passing 0 as argument. This is done for greater than
- * 8 controller support.
- */
- if (i < MAX_CTLR_ORIG)
- h->major = COMPAQ_CISS_MAJOR + i;
- rc = register_blkdev(h->major, h->devname);
- if (rc == -EBUSY || rc == -EINVAL) {
- dev_err(&h->pdev->dev,
- "Unable to get major number %d for %s "
- "on hba %d\n", h->major, h->devname, i);
- goto clean1;
- } else {
- if (i >= MAX_CTLR_ORIG)
- h->major = rc;
- }
-
- /* make sure the board interrupts are off */
- h->access.set_intr_mask(h, CCISS_INTR_OFF);
- rc = cciss_request_irq(h, do_cciss_msix_intr, do_cciss_intx);
- if (rc)
- goto clean2;
-
- dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
- h->devname, pdev->device, pci_name(pdev),
- h->intr[h->intr_mode], dac ? "" : " not");
-
- if (cciss_allocate_cmd_pool(h))
- goto clean4;
-
- if (cciss_allocate_scatterlists(h))
- goto clean4;
-
- h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
- h->chainsize, h->nr_cmds);
- if (!h->cmd_sg_list && h->chainsize > 0)
- goto clean4;
-
- spin_lock_init(&h->lock);
-
- /* Initialize the pdev driver private data.
- have it point to h. */
- pci_set_drvdata(pdev, h);
- /* command and error info recs zeroed out before
- they are used */
- bitmap_zero(h->cmd_pool_bits, h->nr_cmds);
-
- h->num_luns = 0;
- h->highest_lun = -1;
- for (j = 0; j < CISS_MAX_LUN; j++) {
- h->drv[j] = NULL;
- h->gendisk[j] = NULL;
- }
-
- /* At this point, the controller is ready to take commands.
- * Now, if reset_devices and the hard reset didn't work, try
- * the soft reset and see if that works.
- */
- if (try_soft_reset) {
-
- /* This is kind of gross. We may or may not get a completion
- * from the soft reset command, and if we do, then the value
- * from the fifo may or may not be valid. So, we wait 10 secs
- * after the reset throwing away any completions we get during
- * that time. Unregister the interrupt handler and register
- * fake ones to scoop up any residual completions.
- */
- spin_lock_irqsave(&h->lock, flags);
- h->access.set_intr_mask(h, CCISS_INTR_OFF);
- spin_unlock_irqrestore(&h->lock, flags);
- free_irq(h->intr[h->intr_mode], h);
- rc = cciss_request_irq(h, cciss_msix_discard_completions,
- cciss_intx_discard_completions);
- if (rc) {
- dev_warn(&h->pdev->dev, "Failed to request_irq after "
- "soft reset.\n");
- goto clean4;
- }
-
- rc = cciss_kdump_soft_reset(h);
- if (rc) {
- dev_warn(&h->pdev->dev, "Soft reset failed.\n");
- goto clean4;
- }
-
- dev_info(&h->pdev->dev, "Board READY.\n");
- dev_info(&h->pdev->dev,
- "Waiting for stale completions to drain.\n");
- h->access.set_intr_mask(h, CCISS_INTR_ON);
- msleep(10000);
- h->access.set_intr_mask(h, CCISS_INTR_OFF);
-
- rc = controller_reset_failed(h->cfgtable);
- if (rc)
- dev_info(&h->pdev->dev,
- "Soft reset appears to have failed.\n");
-
- /* since the controller's reset, we have to go back and re-init
- * everything. Easiest to just forget what we've done and do it
- * all over again.
- */
- cciss_undo_allocations_after_kdump_soft_reset(h);
- try_soft_reset = 0;
- if (rc)
- /* don't go to clean4, we already unallocated */
- return -ENODEV;
-
- goto reinit_after_soft_reset;
- }
-
- cciss_scsi_setup(h);
-
- /* Turn the interrupts on so we can service requests */
- h->access.set_intr_mask(h, CCISS_INTR_ON);
-
- /* Get the firmware version */
- inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
- if (inq_buff == NULL) {
- dev_err(&h->pdev->dev, "out of memory\n");
- goto clean4;
- }
-
- return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
- sizeof(InquiryData_struct), 0, CTLR_LUNID, TYPE_CMD);
- if (return_code == IO_OK) {
- h->firm_ver[0] = inq_buff->data_byte[32];
- h->firm_ver[1] = inq_buff->data_byte[33];
- h->firm_ver[2] = inq_buff->data_byte[34];
- h->firm_ver[3] = inq_buff->data_byte[35];
- } else { /* send command failed */
- dev_warn(&h->pdev->dev, "unable to determine firmware"
- " version of controller\n");
- }
- kfree(inq_buff);
-
- cciss_procinit(h);
-
- h->cciss_max_sectors = 8192;
-
- rebuild_lun_table(h, 1, 0);
- cciss_engage_scsi(h);
- h->busy_initializing = 0;
- return 0;
-
-clean4:
- cciss_free_cmd_pool(h);
- cciss_free_scatterlists(h);
- cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
- free_irq(h->intr[h->intr_mode], h);
-clean2:
- unregister_blkdev(h->major, h->devname);
-clean1:
- cciss_destroy_hba_sysfs_entry(h);
-clean0:
- pci_release_regions(pdev);
-clean_no_release_regions:
- h->busy_initializing = 0;
-
- /*
- * Deliberately omit pci_disable_device(): it does something nasty to
- * Smart Array controllers that pci_enable_device does not undo
- */
- pci_set_drvdata(pdev, NULL);
- free_hba(h);
- return -ENODEV;
-}
-
-static void cciss_shutdown(struct pci_dev *pdev)
-{
- ctlr_info_t *h;
- char *flush_buf;
- int return_code;
-
- h = pci_get_drvdata(pdev);
- flush_buf = kzalloc(4, GFP_KERNEL);
- if (!flush_buf) {
- dev_warn(&h->pdev->dev, "cache not flushed, out of memory.\n");
- return;
- }
- /* write all data in the battery backed cache to disk */
- return_code = sendcmd_withirq(h, CCISS_CACHE_FLUSH, flush_buf,
- 4, 0, CTLR_LUNID, TYPE_CMD);
- kfree(flush_buf);
- if (return_code != IO_OK)
- dev_warn(&h->pdev->dev, "Error flushing cache\n");
- h->access.set_intr_mask(h, CCISS_INTR_OFF);
- free_irq(h->intr[h->intr_mode], h);
-}
-
-static int cciss_enter_simple_mode(struct ctlr_info *h)
-{
- u32 trans_support;
-
- trans_support = readl(&(h->cfgtable->TransportSupport));
- if (!(trans_support & SIMPLE_MODE))
- return -ENOTSUPP;
-
- h->max_commands = readl(&(h->cfgtable->CmdsOutMax));
- writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
- writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
- cciss_wait_for_mode_change_ack(h);
- print_cfg_table(h);
- if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
- dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
- return -ENODEV;
- }
- h->transMethod = CFGTBL_Trans_Simple;
- return 0;
-}
-
-
-static void cciss_remove_one(struct pci_dev *pdev)
-{
- ctlr_info_t *h;
- int i, j;
-
- if (pci_get_drvdata(pdev) == NULL) {
- dev_err(&pdev->dev, "Unable to remove device\n");
- return;
- }
-
- h = pci_get_drvdata(pdev);
- i = h->ctlr;
- if (hba[i] == NULL) {
- dev_err(&pdev->dev, "device appears to already be removed\n");
- return;
- }
-
- mutex_lock(&h->busy_shutting_down);
-
- remove_from_scan_list(h);
- remove_proc_entry(h->devname, proc_cciss);
- unregister_blkdev(h->major, h->devname);
-
- /* remove it from the disk list */
- for (j = 0; j < CISS_MAX_LUN; j++) {
- struct gendisk *disk = h->gendisk[j];
- if (disk) {
- struct request_queue *q = disk->queue;
-
- if (disk->flags & GENHD_FL_UP) {
- cciss_destroy_ld_sysfs_entry(h, j, 1);
- del_gendisk(disk);
- }
- if (q)
- blk_cleanup_queue(q);
- }
- }
-
-#ifdef CONFIG_CISS_SCSI_TAPE
- cciss_unregister_scsi(h); /* unhook from SCSI subsystem */
-#endif
-
- cciss_shutdown(pdev);
-
- pci_free_irq_vectors(h->pdev);
-
- iounmap(h->transtable);
- iounmap(h->cfgtable);
- iounmap(h->vaddr);
-
- cciss_free_cmd_pool(h);
- /* Free up sg elements */
- for (j = 0; j < h->nr_cmds; j++)
- kfree(h->scatter_list[j]);
- kfree(h->scatter_list);
- cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
- kfree(h->blockFetchTable);
- if (h->reply_pool)
- pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
- h->reply_pool, h->reply_pool_dhandle);
- /*
- * Deliberately omit pci_disable_device(): it does something nasty to
- * Smart Array controllers that pci_enable_device does not undo
- */
- pci_release_regions(pdev);
- pci_set_drvdata(pdev, NULL);
- cciss_destroy_hba_sysfs_entry(h);
- mutex_unlock(&h->busy_shutting_down);
- free_hba(h);
-}
-
-static struct pci_driver cciss_pci_driver = {
- .name = "cciss",
- .probe = cciss_init_one,
- .remove = cciss_remove_one,
- .id_table = cciss_pci_device_id, /* id_table */
- .shutdown = cciss_shutdown,
-};
-
-/*
- * This is it. Register the PCI driver information for the cards we control
- * the OS will call our registered routines when it finds one of our cards.
- */
-static int __init cciss_init(void)
-{
- int err;
-
- /*
- * The hardware requires that commands are aligned on a 64-bit
- * boundary. Given that we use pci_alloc_consistent() to allocate an
- * array of them, the size must be a multiple of 8 bytes.
- */
- BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT);
- printk(KERN_INFO DRIVER_NAME "\n");
-
- err = bus_register(&cciss_bus_type);
- if (err)
- return err;
-
- /* Start the scan thread */
- cciss_scan_thread = kthread_run(scan_thread, NULL, "cciss_scan");
- if (IS_ERR(cciss_scan_thread)) {
- err = PTR_ERR(cciss_scan_thread);
- goto err_bus_unregister;
- }
-
- /* Register for our PCI devices */
- err = pci_register_driver(&cciss_pci_driver);
- if (err)
- goto err_thread_stop;
-
- return err;
-
-err_thread_stop:
- kthread_stop(cciss_scan_thread);
-err_bus_unregister:
- bus_unregister(&cciss_bus_type);
-
- return err;
-}
-
-static void __exit cciss_cleanup(void)
-{
- int i;
-
- pci_unregister_driver(&cciss_pci_driver);
- /* double check that all controller entrys have been removed */
- for (i = 0; i < MAX_CTLR; i++) {
- if (hba[i] != NULL) {
- dev_warn(&hba[i]->pdev->dev,
- "had to remove controller\n");
- cciss_remove_one(hba[i]->pdev);
- }
- }
- kthread_stop(cciss_scan_thread);
- if (proc_cciss)
- remove_proc_entry("driver/cciss", NULL);
- bus_unregister(&cciss_bus_type);
-}
-
-module_init(cciss_init);
-module_exit(cciss_cleanup);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
deleted file mode 100644
index 24b5fd75501a..000000000000
--- a/drivers/block/cciss.h
+++ /dev/null
@@ -1,433 +0,0 @@
-#ifndef CCISS_H
-#define CCISS_H
-
-#include <linux/genhd.h>
-#include <linux/mutex.h>
-
-#include "cciss_cmd.h"
-
-
-#define NWD_SHIFT 4
-#define MAX_PART (1 << NWD_SHIFT)
-
-#define IO_OK 0
-#define IO_ERROR 1
-#define IO_NEEDS_RETRY 3
-
-#define VENDOR_LEN 8
-#define MODEL_LEN 16
-#define REV_LEN 4
-
-struct ctlr_info;
-typedef struct ctlr_info ctlr_info_t;
-
-struct access_method {
- void (*submit_command)(ctlr_info_t *h, CommandList_struct *c);
- void (*set_intr_mask)(ctlr_info_t *h, unsigned long val);
- unsigned long (*fifo_full)(ctlr_info_t *h);
- bool (*intr_pending)(ctlr_info_t *h);
- unsigned long (*command_completed)(ctlr_info_t *h);
-};
-typedef struct _drive_info_struct
-{
- unsigned char LunID[8];
- int usage_count;
- struct request_queue *queue;
- sector_t nr_blocks;
- int block_size;
- int heads;
- int sectors;
- int cylinders;
- int raid_level; /* set to -1 to indicate that
- * the drive is not in use/configured
- */
- int busy_configuring; /* This is set when a drive is being removed
- * to prevent it from being opened or it's
- * queue from being started.
- */
- struct device dev;
- __u8 serial_no[16]; /* from inquiry page 0x83,
- * not necc. null terminated.
- */
- char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */
- char model[MODEL_LEN + 1]; /* SCSI model string */
- char rev[REV_LEN + 1]; /* SCSI revision string */
- char device_initialized; /* indicates whether dev is initialized */
-} drive_info_struct;
-
-struct ctlr_info
-{
- int ctlr;
- char devname[8];
- char *product_name;
- char firm_ver[4]; /* Firmware version */
- struct pci_dev *pdev;
- __u32 board_id;
- void __iomem *vaddr;
- unsigned long paddr;
- int nr_cmds; /* Number of commands allowed on this controller */
- CfgTable_struct __iomem *cfgtable;
- int interrupts_enabled;
- int major;
- int max_commands;
- int commands_outstanding;
- int max_outstanding; /* Debug */
- int num_luns;
- int highest_lun;
- int usage_count; /* number of opens all all minor devices */
- /* Need space for temp sg list
- * number of scatter/gathers supported
- * number of scatter/gathers in chained block
- */
- struct scatterlist **scatter_list;
- int maxsgentries;
- int chainsize;
- int max_cmd_sgentries;
- SGDescriptor_struct **cmd_sg_list;
-
-# define PERF_MODE_INT 0
-# define DOORBELL_INT 1
-# define SIMPLE_MODE_INT 2
-# define MEMQ_MODE_INT 3
- unsigned int intr[4];
- int intr_mode;
- int cciss_max_sectors;
- BYTE cciss_read;
- BYTE cciss_write;
- BYTE cciss_read_capacity;
-
- /* information about each logical volume */
- drive_info_struct *drv[CISS_MAX_LUN];
-
- struct access_method access;
-
- /* queue and queue Info */
- struct list_head reqQ;
- struct list_head cmpQ;
- unsigned int Qdepth;
- unsigned int maxQsinceinit;
- unsigned int maxSG;
- spinlock_t lock;
-
- /* pointers to command and error info pool */
- CommandList_struct *cmd_pool;
- dma_addr_t cmd_pool_dhandle;
- ErrorInfo_struct *errinfo_pool;
- dma_addr_t errinfo_pool_dhandle;
- unsigned long *cmd_pool_bits;
- int nr_allocs;
- int nr_frees;
- int busy_configuring;
- int busy_initializing;
- int busy_scanning;
- struct mutex busy_shutting_down;
-
- /* This element holds the zero based queue number of the last
- * queue to be started. It is used for fairness.
- */
- int next_to_run;
-
- /* Disk structures we need to pass back */
- struct gendisk *gendisk[CISS_MAX_LUN];
-#ifdef CONFIG_CISS_SCSI_TAPE
- struct cciss_scsi_adapter_data_t *scsi_ctlr;
-#endif
- unsigned char alive;
- struct list_head scan_list;
- struct completion scan_wait;
- struct device dev;
- /*
- * Performant mode tables.
- */
- u32 trans_support;
- u32 trans_offset;
- struct TransTable_struct *transtable;
- unsigned long transMethod;
-
- /*
- * Performant mode completion buffer
- */
- u64 *reply_pool;
- dma_addr_t reply_pool_dhandle;
- u64 *reply_pool_head;
- size_t reply_pool_size;
- unsigned char reply_pool_wraparound;
- u32 *blockFetchTable;
-};
-
-/* Defining the diffent access_methods
- *
- * Memory mapped FIFO interface (SMART 53xx cards)
- */
-#define SA5_DOORBELL 0x20
-#define SA5_REQUEST_PORT_OFFSET 0x40
-#define SA5_REPLY_INTR_MASK_OFFSET 0x34
-#define SA5_REPLY_PORT_OFFSET 0x44
-#define SA5_INTR_STATUS 0x30
-#define SA5_SCRATCHPAD_OFFSET 0xB0
-
-#define SA5_CTCFG_OFFSET 0xB4
-#define SA5_CTMEM_OFFSET 0xB8
-
-#define SA5_INTR_OFF 0x08
-#define SA5B_INTR_OFF 0x04
-#define SA5_INTR_PENDING 0x08
-#define SA5B_INTR_PENDING 0x04
-#define FIFO_EMPTY 0xffffffff
-#define CCISS_FIRMWARE_READY 0xffff0000 /* value in scratchpad register */
-/* Perf. mode flags */
-#define SA5_PERF_INTR_PENDING 0x04
-#define SA5_PERF_INTR_OFF 0x05
-#define SA5_OUTDB_STATUS_PERF_BIT 0x01
-#define SA5_OUTDB_CLEAR_PERF_BIT 0x01
-#define SA5_OUTDB_CLEAR 0xA0
-#define SA5_OUTDB_CLEAR_PERF_BIT 0x01
-#define SA5_OUTDB_STATUS 0x9C
-
-
-#define CISS_ERROR_BIT 0x02
-
-#define CCISS_INTR_ON 1
-#define CCISS_INTR_OFF 0
-
-
-/* CCISS_BOARD_READY_WAIT_SECS is how long to wait for a board
- * to become ready, in seconds, before giving up on it.
- * CCISS_BOARD_READY_POLL_INTERVAL_MSECS * is how long to wait
- * between polling the board to see if it is ready, in
- * milliseconds. CCISS_BOARD_READY_ITERATIONS is derived
- * the above.
- */
-#define CCISS_BOARD_READY_WAIT_SECS (120)
-#define CCISS_BOARD_NOT_READY_WAIT_SECS (100)
-#define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
-#define CCISS_BOARD_READY_ITERATIONS \
- ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
- CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
-#define CCISS_BOARD_NOT_READY_ITERATIONS \
- ((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \
- CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
-#define CCISS_POST_RESET_PAUSE_MSECS (3000)
-#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (4000)
-#define CCISS_POST_RESET_NOOP_RETRIES (12)
-#define CCISS_POST_RESET_NOOP_TIMEOUT_MSECS (10000)
-
-/*
- Send the command to the hardware
-*/
-static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c)
-{
-#ifdef CCISS_DEBUG
- printk(KERN_WARNING "cciss%d: Sending %08x - down to controller\n",
- h->ctlr, c->busaddr);
-#endif /* CCISS_DEBUG */
- writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
- readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
- h->commands_outstanding++;
- if ( h->commands_outstanding > h->max_outstanding)
- h->max_outstanding = h->commands_outstanding;
-}
-
-/*
- * This card is the opposite of the other cards.
- * 0 turns interrupts on...
- * 0x08 turns them off...
- */
-static void SA5_intr_mask(ctlr_info_t *h, unsigned long val)
-{
- if (val)
- { /* Turn interrupts on */
- h->interrupts_enabled = 1;
- writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- } else /* Turn them off */
- {
- h->interrupts_enabled = 0;
- writel( SA5_INTR_OFF,
- h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- }
-}
-/*
- * This card is the opposite of the other cards.
- * 0 turns interrupts on...
- * 0x04 turns them off...
- */
-static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val)
-{
- if (val)
- { /* Turn interrupts on */
- h->interrupts_enabled = 1;
- writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- } else /* Turn them off */
- {
- h->interrupts_enabled = 0;
- writel( SA5B_INTR_OFF,
- h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- }
-}
-
-/* Performant mode intr_mask */
-static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val)
-{
- if (val) { /* turn on interrupts */
- h->interrupts_enabled = 1;
- writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- } else {
- h->interrupts_enabled = 0;
- writel(SA5_PERF_INTR_OFF,
- h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
- }
-}
-
-/*
- * Returns true if fifo is full.
- *
- */
-static unsigned long SA5_fifo_full(ctlr_info_t *h)
-{
- if( h->commands_outstanding >= h->max_commands)
- return(1);
- else
- return(0);
-
-}
-/*
- * returns value read from hardware.
- * returns FIFO_EMPTY if there is nothing to read
- */
-static unsigned long SA5_completed(ctlr_info_t *h)
-{
- unsigned long register_value
- = readl(h->vaddr + SA5_REPLY_PORT_OFFSET);
- if(register_value != FIFO_EMPTY)
- {
- h->commands_outstanding--;
-#ifdef CCISS_DEBUG
- printk("cciss: Read %lx back from board\n", register_value);
-#endif /* CCISS_DEBUG */
- }
-#ifdef CCISS_DEBUG
- else
- {
- printk("cciss: FIFO Empty read\n");
- }
-#endif
- return ( register_value);
-
-}
-
-/* Performant mode command completed */
-static unsigned long SA5_performant_completed(ctlr_info_t *h)
-{
- unsigned long register_value = FIFO_EMPTY;
-
- /* flush the controller write of the reply queue by reading
- * outbound doorbell status register.
- */
- register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
- /* msi auto clears the interrupt pending bit. */
- if (!(h->pdev->msi_enabled || h->pdev->msix_enabled)) {
- writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
- /* Do a read in order to flush the write to the controller
- * (as per spec.)
- */
- register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
- }
-
- if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
- register_value = *(h->reply_pool_head);
- (h->reply_pool_head)++;
- h->commands_outstanding--;
- } else {
- register_value = FIFO_EMPTY;
- }
- /* Check for wraparound */
- if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
- h->reply_pool_head = h->reply_pool;
- h->reply_pool_wraparound ^= 1;
- }
-
- return register_value;
-}
-/*
- * Returns true if an interrupt is pending..
- */
-static bool SA5_intr_pending(ctlr_info_t *h)
-{
- unsigned long register_value =
- readl(h->vaddr + SA5_INTR_STATUS);
-#ifdef CCISS_DEBUG
- printk("cciss: intr_pending %lx\n", register_value);
-#endif /* CCISS_DEBUG */
- if( register_value & SA5_INTR_PENDING)
- return 1;
- return 0 ;
-}
-
-/*
- * Returns true if an interrupt is pending..
- */
-static bool SA5B_intr_pending(ctlr_info_t *h)
-{
- unsigned long register_value =
- readl(h->vaddr + SA5_INTR_STATUS);
-#ifdef CCISS_DEBUG
- printk("cciss: intr_pending %lx\n", register_value);
-#endif /* CCISS_DEBUG */
- if( register_value & SA5B_INTR_PENDING)
- return 1;
- return 0 ;
-}
-
-static bool SA5_performant_intr_pending(ctlr_info_t *h)
-{
- unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS);
-
- if (!register_value)
- return false;
-
- if (h->pdev->msi_enabled || h->pdev->msix_enabled)
- return true;
-
- /* Read outbound doorbell to flush */
- register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
- return register_value & SA5_OUTDB_STATUS_PERF_BIT;
-}
-
-static struct access_method SA5_access = {
- .submit_command = SA5_submit_command,
- .set_intr_mask = SA5_intr_mask,
- .fifo_full = SA5_fifo_full,
- .intr_pending = SA5_intr_pending,
- .command_completed = SA5_completed,
-};
-
-static struct access_method SA5B_access = {
- .submit_command = SA5_submit_command,
- .set_intr_mask = SA5B_intr_mask,
- .fifo_full = SA5_fifo_full,
- .intr_pending = SA5B_intr_pending,
- .command_completed = SA5_completed,
-};
-
-static struct access_method SA5_performant_access = {
- .submit_command = SA5_submit_command,
- .set_intr_mask = SA5_performant_intr_mask,
- .fifo_full = SA5_fifo_full,
- .intr_pending = SA5_performant_intr_pending,
- .command_completed = SA5_performant_completed,
-};
-
-struct board_type {
- __u32 board_id;
- char *product_name;
- struct access_method *access;
- int nr_cmds; /* Max cmds this kind of ctlr can handle. */
-};
-
-#endif /* CCISS_H */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
deleted file mode 100644
index d9be6b4d49a6..000000000000
--- a/drivers/block/cciss_cmd.h
+++ /dev/null
@@ -1,269 +0,0 @@
-#ifndef CCISS_CMD_H
-#define CCISS_CMD_H
-
-#include <linux/cciss_defs.h>
-
-/* DEFINES */
-#define CISS_VERSION "1.00"
-
-/* general boundary definitions */
-#define MAXSGENTRIES 32
-#define CCISS_SG_CHAIN 0x80000000
-#define MAXREPLYQS 256
-
-/* Unit Attentions ASC's as defined for the MSA2012sa */
-#define POWER_OR_RESET 0x29
-#define STATE_CHANGED 0x2a
-#define UNIT_ATTENTION_CLEARED 0x2f
-#define LUN_FAILED 0x3e
-#define REPORT_LUNS_CHANGED 0x3f
-
-/* Unit Attentions ASCQ's as defined for the MSA2012sa */
-
- /* These ASCQ's defined for ASC = POWER_OR_RESET */
-#define POWER_ON_RESET 0x00
-#define POWER_ON_REBOOT 0x01
-#define SCSI_BUS_RESET 0x02
-#define MSA_TARGET_RESET 0x03
-#define CONTROLLER_FAILOVER 0x04
-#define TRANSCEIVER_SE 0x05
-#define TRANSCEIVER_LVD 0x06
-
- /* These ASCQ's defined for ASC = STATE_CHANGED */
-#define RESERVATION_PREEMPTED 0x03
-#define ASYM_ACCESS_CHANGED 0x06
-#define LUN_CAPACITY_CHANGED 0x09
-
-/* config space register offsets */
-#define CFG_VENDORID 0x00
-#define CFG_DEVICEID 0x02
-#define CFG_I2OBAR 0x10
-#define CFG_MEM1BAR 0x14
-
-/* i2o space register offsets */
-#define I2O_IBDB_SET 0x20
-#define I2O_IBDB_CLEAR 0x70
-#define I2O_INT_STATUS 0x30
-#define I2O_INT_MASK 0x34
-#define I2O_IBPOST_Q 0x40
-#define I2O_OBPOST_Q 0x44
-#define I2O_DMA1_CFG 0x214
-
-/* Configuration Table */
-#define CFGTBL_ChangeReq 0x00000001l
-#define CFGTBL_AccCmds 0x00000001l
-#define DOORBELL_CTLR_RESET 0x00000004l
-#define DOORBELL_CTLR_RESET2 0x00000020l
-
-#define CFGTBL_Trans_Simple 0x00000002l
-#define CFGTBL_Trans_Performant 0x00000004l
-#define CFGTBL_Trans_use_short_tags 0x20000000l
-
-#define CFGTBL_BusType_Ultra2 0x00000001l
-#define CFGTBL_BusType_Ultra3 0x00000002l
-#define CFGTBL_BusType_Fibre1G 0x00000100l
-#define CFGTBL_BusType_Fibre2G 0x00000200l
-typedef struct _vals32
-{
- __u32 lower;
- __u32 upper;
-} vals32;
-
-typedef union _u64bit
-{
- vals32 val32;
- __u64 val;
-} u64bit;
-
-/* Type defs used in the following structs */
-#define QWORD vals32
-
-/* STRUCTURES */
-#define CISS_MAX_PHYS_LUN 1024
-/* SCSI-3 Cmmands */
-
-#pragma pack(1)
-
-#define CISS_INQUIRY 0x12
-/* Date returned */
-typedef struct _InquiryData_struct
-{
- BYTE data_byte[36];
-} InquiryData_struct;
-
-#define CISS_REPORT_LOG 0xc2 /* Report Logical LUNs */
-#define CISS_REPORT_PHYS 0xc3 /* Report Physical LUNs */
-/* Data returned */
-typedef struct _ReportLUNdata_struct
-{
- BYTE LUNListLength[4];
- DWORD reserved;
- BYTE LUN[CISS_MAX_LUN][8];
-} ReportLunData_struct;
-
-#define CCISS_READ_CAPACITY 0x25 /* Read Capacity */
-typedef struct _ReadCapdata_struct
-{
- BYTE total_size[4]; /* Total size in blocks */
- BYTE block_size[4]; /* Size of blocks in bytes */
-} ReadCapdata_struct;
-
-#define CCISS_READ_CAPACITY_16 0x9e /* Read Capacity 16 */
-
-/* service action to differentiate a 16 byte read capacity from
- other commands that use the 0x9e SCSI op code */
-
-#define CCISS_READ_CAPACITY_16_SERVICE_ACT 0x10
-
-typedef struct _ReadCapdata_struct_16
-{
- BYTE total_size[8]; /* Total size in blocks */
- BYTE block_size[4]; /* Size of blocks in bytes */
- BYTE prot_en:1; /* protection enable bit */
- BYTE rto_en:1; /* reference tag own enable bit */
- BYTE reserved:6; /* reserved bits */
- BYTE reserved2[18]; /* reserved bytes per spec */
-} ReadCapdata_struct_16;
-
-/* Define the supported read/write commands for cciss based controllers */
-
-#define CCISS_READ_10 0x28 /* Read(10) */
-#define CCISS_WRITE_10 0x2a /* Write(10) */
-#define CCISS_READ_16 0x88 /* Read(16) */
-#define CCISS_WRITE_16 0x8a /* Write(16) */
-
-/* Define the CDB lengths supported by cciss based controllers */
-
-#define CDB_LEN10 10
-#define CDB_LEN16 16
-
-/* BMIC commands */
-#define BMIC_READ 0x26
-#define BMIC_WRITE 0x27
-#define BMIC_CACHE_FLUSH 0xc2
-#define CCISS_CACHE_FLUSH 0x01 /* C2 was already being used by CCISS */
-
-#define CCISS_ABORT_MSG 0x00
-#define CCISS_RESET_MSG 0x01
-#define CCISS_RESET_TYPE_CONTROLLER 0x00
-#define CCISS_RESET_TYPE_BUS 0x01
-#define CCISS_RESET_TYPE_TARGET 0x03
-#define CCISS_RESET_TYPE_LUN 0x04
-#define CCISS_NOOP_MSG 0x03
-
-/* Command List Structure */
-#define CTLR_LUNID "\0\0\0\0\0\0\0\0"
-
-typedef struct _CommandListHeader_struct {
- BYTE ReplyQueue;
- BYTE SGList;
- HWORD SGTotal;
- QWORD Tag;
- LUNAddr_struct LUN;
-} CommandListHeader_struct;
-typedef struct _ErrDescriptor_struct {
- QWORD Addr;
- DWORD Len;
-} ErrDescriptor_struct;
-typedef struct _SGDescriptor_struct {
- QWORD Addr;
- DWORD Len;
- DWORD Ext;
-} SGDescriptor_struct;
-
-/* Command types */
-#define CMD_RWREQ 0x00
-#define CMD_IOCTL_PEND 0x01
-#define CMD_SCSI 0x03
-#define CMD_MSG_DONE 0x04
-#define CMD_MSG_TIMEOUT 0x05
-#define CMD_MSG_STALE 0xff
-
-/* This structure needs to be divisible by COMMANDLIST_ALIGNMENT
- * because low bits of the address are used to to indicate that
- * whether the tag contains an index or an address. PAD_32 and
- * PAD_64 can be adjusted independently as needed for 32-bit
- * and 64-bits systems.
- */
-#define COMMANDLIST_ALIGNMENT (32)
-#define IS_64_BIT ((sizeof(long) - 4)/4)
-#define IS_32_BIT (!IS_64_BIT)
-#define PAD_32 (0)
-#define PAD_64 (4)
-#define PADSIZE (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
-#define DIRECT_LOOKUP_BIT 0x10
-#define DIRECT_LOOKUP_SHIFT 5
-
-typedef struct _CommandList_struct {
- CommandListHeader_struct Header;
- RequestBlock_struct Request;
- ErrDescriptor_struct ErrDesc;
- SGDescriptor_struct SG[MAXSGENTRIES];
- /* information associated with the command */
- __u32 busaddr; /* physical address of this record */
- ErrorInfo_struct * err_info; /* pointer to the allocated mem */
- int ctlr;
- int cmd_type;
- long cmdindex;
- struct list_head list;
- struct request * rq;
- struct completion *waiting;
- int retry_count;
- void * scsi_cmd;
- char pad[PADSIZE];
-} CommandList_struct;
-
-/* Configuration Table Structure */
-typedef struct _HostWrite_struct {
- DWORD TransportRequest;
- DWORD Reserved;
- DWORD CoalIntDelay;
- DWORD CoalIntCount;
-} HostWrite_struct;
-
-typedef struct _CfgTable_struct {
- BYTE Signature[4];
- DWORD SpecValence;
-#define SIMPLE_MODE 0x02
-#define PERFORMANT_MODE 0x04
-#define MEMQ_MODE 0x08
- DWORD TransportSupport;
- DWORD TransportActive;
- HostWrite_struct HostWrite;
- DWORD CmdsOutMax;
- DWORD BusTypes;
- DWORD TransMethodOffset;
- BYTE ServerName[16];
- DWORD HeartBeat;
- DWORD SCSI_Prefetch;
- DWORD MaxSGElements;
- DWORD MaxLogicalUnits;
- DWORD MaxPhysicalDrives;
- DWORD MaxPhysicalDrivesPerLogicalUnit;
- DWORD MaxPerformantModeCommands;
- u8 reserved[0x78 - 0x58];
- u32 misc_fw_support; /* offset 0x78 */
-#define MISC_FW_DOORBELL_RESET (0x02)
-#define MISC_FW_DOORBELL_RESET2 (0x10)
- u8 driver_version[32];
-} CfgTable_struct;
-
-struct TransTable_struct {
- u32 BlockFetch0;
- u32 BlockFetch1;
- u32 BlockFetch2;
- u32 BlockFetch3;
- u32 BlockFetch4;
- u32 BlockFetch5;
- u32 BlockFetch6;
- u32 BlockFetch7;
- u32 RepQSize;
- u32 RepQCount;
- u32 RepQCtrAddrLow32;
- u32 RepQCtrAddrHigh32;
- u32 RepQAddr0Low32;
- u32 RepQAddr0High32;
-};
-
-#pragma pack()
-#endif /* CCISS_CMD_H */
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
deleted file mode 100644
index 01a1f7e24978..000000000000
--- a/drivers/block/cciss_scsi.c
+++ /dev/null
@@ -1,1653 +0,0 @@
-/*
- * Disk Array driver for HP Smart Array controllers, SCSI Tape module.
- * (C) Copyright 2001, 2007 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 300, Boston, MA
- * 02111-1307, USA.
- *
- * Questions/Comments/Bugfixes to iss_storagedev@hp.com
- *
- * Author: Stephen M. Cameron
- */
-#ifdef CONFIG_CISS_SCSI_TAPE
-
-/* Here we have code to present the driver as a scsi driver
- as it is simultaneously presented as a block driver. The
- reason for doing this is to allow access to SCSI tape drives
- through the array controller. Note in particular, neither
- physical nor logical disks are presented through the scsi layer. */
-
-#include <linux/timer.h>
-#include <linux/completion.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-
-#include <linux/atomic.h>
-
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_host.h>
-
-#include "cciss_scsi.h"
-
-#define CCISS_ABORT_MSG 0x00
-#define CCISS_RESET_MSG 0x01
-
-static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
- size_t size,
- __u8 page_code, unsigned char *scsi3addr,
- int cmd_type);
-
-static CommandList_struct *cmd_alloc(ctlr_info_t *h);
-static CommandList_struct *cmd_special_alloc(ctlr_info_t *h);
-static void cmd_free(ctlr_info_t *h, CommandList_struct *c);
-static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c);
-
-static int cciss_scsi_write_info(struct Scsi_Host *sh,
- char *buffer, /* data buffer */
- int length); /* length of data in buffer */
-static int cciss_scsi_show_info(struct seq_file *m,
- struct Scsi_Host *sh);
-
-static int cciss_scsi_queue_command (struct Scsi_Host *h,
- struct scsi_cmnd *cmd);
-static int cciss_eh_device_reset_handler(struct scsi_cmnd *);
-static int cciss_eh_abort_handler(struct scsi_cmnd *);
-
-static struct cciss_scsi_hba_t ccissscsi[MAX_CTLR] = {
- { .name = "cciss0", .ndevices = 0 },
- { .name = "cciss1", .ndevices = 0 },
- { .name = "cciss2", .ndevices = 0 },
- { .name = "cciss3", .ndevices = 0 },
- { .name = "cciss4", .ndevices = 0 },
- { .name = "cciss5", .ndevices = 0 },
- { .name = "cciss6", .ndevices = 0 },
- { .name = "cciss7", .ndevices = 0 },
-};
-
-static struct scsi_host_template cciss_driver_template = {
- .module = THIS_MODULE,
- .name = "cciss",
- .proc_name = "cciss",
- .write_info = cciss_scsi_write_info,
- .show_info = cciss_scsi_show_info,
- .queuecommand = cciss_scsi_queue_command,
- .this_id = 7,
- .use_clustering = DISABLE_CLUSTERING,
- /* Can't have eh_bus_reset_handler or eh_host_reset_handler for cciss */
- .eh_device_reset_handler= cciss_eh_device_reset_handler,
- .eh_abort_handler = cciss_eh_abort_handler,
-};
-
-#pragma pack(1)
-
-#define SCSI_PAD_32 8
-#define SCSI_PAD_64 8
-
-struct cciss_scsi_cmd_stack_elem_t {
- CommandList_struct cmd;
- ErrorInfo_struct Err;
- __u32 busaddr;
- int cmdindex;
- u8 pad[IS_32_BIT * SCSI_PAD_32 + IS_64_BIT * SCSI_PAD_64];
-};
-
-#pragma pack()
-
-#pragma pack(1)
-struct cciss_scsi_cmd_stack_t {
- struct cciss_scsi_cmd_stack_elem_t *pool;
- struct cciss_scsi_cmd_stack_elem_t **elem;
- dma_addr_t cmd_pool_handle;
- int top;
- int nelems;
-};
-#pragma pack()
-
-struct cciss_scsi_adapter_data_t {
- struct Scsi_Host *scsi_host;
- struct cciss_scsi_cmd_stack_t cmd_stack;
- SGDescriptor_struct **cmd_sg_list;
- int registered;
- spinlock_t lock; // to protect ccissscsi[ctlr];
-};
-
-#define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \
- &h->scsi_ctlr->lock, flags);
-#define CPQ_TAPE_UNLOCK(h, flags) spin_unlock_irqrestore( \
- &h->scsi_ctlr->lock, flags);
-
-static CommandList_struct *
-scsi_cmd_alloc(ctlr_info_t *h)
-{
- /* assume only one process in here at a time, locking done by caller. */
- /* use h->lock */
- /* might be better to rewrite how we allocate scsi commands in a way that */
- /* needs no locking at all. */
-
- /* take the top memory chunk off the stack and return it, if any. */
- struct cciss_scsi_cmd_stack_elem_t *c;
- struct cciss_scsi_adapter_data_t *sa;
- struct cciss_scsi_cmd_stack_t *stk;
- u64bit temp64;
-
- sa = h->scsi_ctlr;
- stk = &sa->cmd_stack;
-
- if (stk->top < 0)
- return NULL;
- c = stk->elem[stk->top];
- /* memset(c, 0, sizeof(*c)); */
- memset(&c->cmd, 0, sizeof(c->cmd));
- memset(&c->Err, 0, sizeof(c->Err));
- /* set physical addr of cmd and addr of scsi parameters */
- c->cmd.busaddr = c->busaddr;
- c->cmd.cmdindex = c->cmdindex;
- /* (__u32) (stk->cmd_pool_handle +
- (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */
-
- temp64.val = (__u64) (c->busaddr + sizeof(CommandList_struct));
- /* (__u64) (stk->cmd_pool_handle +
- (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top) +
- sizeof(CommandList_struct)); */
- stk->top--;
- c->cmd.ErrDesc.Addr.lower = temp64.val32.lower;
- c->cmd.ErrDesc.Addr.upper = temp64.val32.upper;
- c->cmd.ErrDesc.Len = sizeof(ErrorInfo_struct);
-
- c->cmd.ctlr = h->ctlr;
- c->cmd.err_info = &c->Err;
-
- return (CommandList_struct *) c;
-}
-
-static void
-scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
-{
- /* assume only one process in here at a time, locking done by caller. */
- /* use h->lock */
- /* drop the free memory chunk on top of the stack. */
-
- struct cciss_scsi_adapter_data_t *sa;
- struct cciss_scsi_cmd_stack_t *stk;
-
- sa = h->scsi_ctlr;
- stk = &sa->cmd_stack;
- stk->top++;
- if (stk->top >= stk->nelems) {
- dev_err(&h->pdev->dev,
- "scsi_cmd_free called too many times.\n");
- BUG();
- }
- stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) c;
-}
-
-static int
-scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
-{
- int i;
- struct cciss_scsi_cmd_stack_t *stk;
- size_t size;
-
- stk = &sa->cmd_stack;
- stk->nelems = cciss_tape_cmds + 2;
- sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
- h->chainsize, stk->nelems);
- if (!sa->cmd_sg_list && h->chainsize > 0)
- return -ENOMEM;
-
- size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
-
- /* Check alignment, see cciss_cmd.h near CommandList_struct def. */
- BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
- /* pci_alloc_consistent guarantees 32-bit DMA address will be used */
- stk->pool = (struct cciss_scsi_cmd_stack_elem_t *)
- pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle);
-
- if (stk->pool == NULL) {
- cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
- sa->cmd_sg_list = NULL;
- return -ENOMEM;
- }
- stk->elem = kmalloc(sizeof(stk->elem[0]) * stk->nelems, GFP_KERNEL);
- if (!stk->elem) {
- pci_free_consistent(h->pdev, size, stk->pool,
- stk->cmd_pool_handle);
- return -1;
- }
- for (i = 0; i < stk->nelems; i++) {
- stk->elem[i] = &stk->pool[i];
- stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle +
- (sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
- stk->elem[i]->cmdindex = i;
- }
- stk->top = stk->nelems-1;
- return 0;
-}
-
-static void
-scsi_cmd_stack_free(ctlr_info_t *h)
-{
- struct cciss_scsi_adapter_data_t *sa;
- struct cciss_scsi_cmd_stack_t *stk;
- size_t size;
-
- sa = h->scsi_ctlr;
- stk = &sa->cmd_stack;
- if (stk->top != stk->nelems-1) {
- dev_warn(&h->pdev->dev,
- "bug: %d scsi commands are still outstanding.\n",
- stk->nelems - stk->top);
- }
- size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
-
- pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle);
- stk->pool = NULL;
- cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
- kfree(stk->elem);
- stk->elem = NULL;
-}
-
-#if 0
-static void
-print_cmd(CommandList_struct *cp)
-{
- printk("queue:%d\n", cp->Header.ReplyQueue);
- printk("sglist:%d\n", cp->Header.SGList);
- printk("sgtot:%d\n", cp->Header.SGTotal);
- printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper,
- cp->Header.Tag.lower);
- printk("LUN:0x%8phN\n", cp->Header.LUN.LunAddrBytes);
- printk("CDBLen:%d\n", cp->Request.CDBLen);
- printk("Type:%d\n",cp->Request.Type.Type);
- printk("Attr:%d\n",cp->Request.Type.Attribute);
- printk(" Dir:%d\n",cp->Request.Type.Direction);
- printk("Timeout:%d\n",cp->Request.Timeout);
- printk("CDB: %16ph\n", cp->Request.CDB);
- printk("edesc.Addr: 0x%08x/0%08x, Len = %d\n",
- cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower,
- cp->ErrDesc.Len);
- printk("sgs..........Errorinfo:\n");
- printk("scsistatus:%d\n", cp->err_info->ScsiStatus);
- printk("senselen:%d\n", cp->err_info->SenseLen);
- printk("cmd status:%d\n", cp->err_info->CommandStatus);
- printk("resid cnt:%d\n", cp->err_info->ResidualCnt);
- printk("offense size:%d\n", cp->err_info->MoreErrInfo.Invalid_Cmd.offense_size);
- printk("offense byte:%d\n", cp->err_info->MoreErrInfo.Invalid_Cmd.offense_num);
- printk("offense value:%d\n", cp->err_info->MoreErrInfo.Invalid_Cmd.offense_value);
-}
-#endif
-
-static int
-find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
-{
- /* finds an unused bus, target, lun for a new device */
- /* assumes h->scsi_ctlr->lock is held */
- int i, found=0;
- unsigned char target_taken[CCISS_MAX_SCSI_DEVS_PER_HBA];
-
- memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA);
-
- target_taken[SELF_SCSI_ID] = 1;
- for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++)
- target_taken[ccissscsi[h->ctlr].dev[i].target] = 1;
-
- for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) {
- if (!target_taken[i]) {
- *bus = 0; *target=i; *lun = 0; found=1;
- break;
- }
- }
- return (!found);
-}
-struct scsi2map {
- char scsi3addr[8];
- int bus, target, lun;
-};
-
-static int
-cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
- struct cciss_scsi_dev_t *device,
- struct scsi2map *added, int *nadded)
-{
- /* assumes h->scsi_ctlr->lock is held */
- int n = ccissscsi[h->ctlr].ndevices;
- struct cciss_scsi_dev_t *sd;
- int i, bus, target, lun;
- unsigned char addr1[8], addr2[8];
-
- if (n >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
- dev_warn(&h->pdev->dev, "Too many devices, "
- "some will be inaccessible.\n");
- return -1;
- }
-
- bus = target = -1;
- lun = 0;
- /* Is this device a non-zero lun of a multi-lun device */
- /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */
- if (device->scsi3addr[4] != 0) {
- /* Search through our list and find the device which */
- /* has the same 8 byte LUN address, excepting byte 4. */
- /* Assign the same bus and target for this new LUN. */
- /* Use the logical unit number from the firmware. */
- memcpy(addr1, device->scsi3addr, 8);
- addr1[4] = 0;
- for (i = 0; i < n; i++) {
- sd = &ccissscsi[h->ctlr].dev[i];
- memcpy(addr2, sd->scsi3addr, 8);
- addr2[4] = 0;
- /* differ only in byte 4? */
- if (memcmp(addr1, addr2, 8) == 0) {
- bus = sd->bus;
- target = sd->target;
- lun = device->scsi3addr[4];
- break;
- }
- }
- }
-
- sd = &ccissscsi[h->ctlr].dev[n];
- if (lun == 0) {
- if (find_bus_target_lun(h,
- &sd->bus, &sd->target, &sd->lun) != 0)
- return -1;
- } else {
- sd->bus = bus;
- sd->target = target;
- sd->lun = lun;
- }
- added[*nadded].bus = sd->bus;
- added[*nadded].target = sd->target;
- added[*nadded].lun = sd->lun;
- (*nadded)++;
-
- memcpy(sd->scsi3addr, device->scsi3addr, 8);
- memcpy(sd->vendor, device->vendor, sizeof(sd->vendor));
- memcpy(sd->revision, device->revision, sizeof(sd->revision));
- memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
- sd->devtype = device->devtype;
-
- ccissscsi[h->ctlr].ndevices++;
-
- /* initially, (before registering with scsi layer) we don't
- know our hostno and we don't want to print anything first
- time anyway (the scsi layer's inquiries will show that info) */
- if (hostno != -1)
- dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
- scsi_device_type(sd->devtype), hostno,
- sd->bus, sd->target, sd->lun);
- return 0;
-}
-
-static void
-cciss_scsi_remove_entry(ctlr_info_t *h, int hostno, int entry,
- struct scsi2map *removed, int *nremoved)
-{
- /* assumes h->ctlr]->scsi_ctlr->lock is held */
- int i;
- struct cciss_scsi_dev_t sd;
-
- if (entry < 0 || entry >= CCISS_MAX_SCSI_DEVS_PER_HBA) return;
- sd = ccissscsi[h->ctlr].dev[entry];
- removed[*nremoved].bus = sd.bus;
- removed[*nremoved].target = sd.target;
- removed[*nremoved].lun = sd.lun;
- (*nremoved)++;
- for (i = entry; i < ccissscsi[h->ctlr].ndevices-1; i++)
- ccissscsi[h->ctlr].dev[i] = ccissscsi[h->ctlr].dev[i+1];
- ccissscsi[h->ctlr].ndevices--;
- dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d removed.\n",
- scsi_device_type(sd.devtype), hostno,
- sd.bus, sd.target, sd.lun);
-}
-
-
-#define SCSI3ADDR_EQ(a,b) ( \
- (a)[7] == (b)[7] && \
- (a)[6] == (b)[6] && \
- (a)[5] == (b)[5] && \
- (a)[4] == (b)[4] && \
- (a)[3] == (b)[3] && \
- (a)[2] == (b)[2] && \
- (a)[1] == (b)[1] && \
- (a)[0] == (b)[0])
-
-static void fixup_botched_add(ctlr_info_t *h, char *scsi3addr)
-{
- /* called when scsi_add_device fails in order to re-adjust */
- /* ccissscsi[] to match the mid layer's view. */
- unsigned long flags;
- int i, j;
- CPQ_TAPE_LOCK(h, flags);
- for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
- if (memcmp(scsi3addr,
- ccissscsi[h->ctlr].dev[i].scsi3addr, 8) == 0) {
- for (j = i; j < ccissscsi[h->ctlr].ndevices-1; j++)
- ccissscsi[h->ctlr].dev[j] =
- ccissscsi[h->ctlr].dev[j+1];
- ccissscsi[h->ctlr].ndevices--;
- break;
- }
- }
- CPQ_TAPE_UNLOCK(h, flags);
-}
-
-static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
- struct cciss_scsi_dev_t *dev2)
-{
- return dev1->devtype == dev2->devtype &&
- memcmp(dev1->scsi3addr, dev2->scsi3addr,
- sizeof(dev1->scsi3addr)) == 0 &&
- memcmp(dev1->device_id, dev2->device_id,
- sizeof(dev1->device_id)) == 0 &&
- memcmp(dev1->vendor, dev2->vendor,
- sizeof(dev1->vendor)) == 0 &&
- memcmp(dev1->model, dev2->model,
- sizeof(dev1->model)) == 0 &&
- memcmp(dev1->revision, dev2->revision,
- sizeof(dev1->revision)) == 0;
-}
-
-static int
-adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
- struct cciss_scsi_dev_t sd[], int nsds)
-{
- /* sd contains scsi3 addresses and devtypes, but
- bus target and lun are not filled in. This funciton
- takes what's in sd to be the current and adjusts
- ccissscsi[] to be in line with what's in sd. */
-
- int i,j, found, changes=0;
- struct cciss_scsi_dev_t *csd;
- unsigned long flags;
- struct scsi2map *added, *removed;
- int nadded, nremoved;
- struct Scsi_Host *sh = NULL;
-
- added = kzalloc(sizeof(*added) * CCISS_MAX_SCSI_DEVS_PER_HBA,
- GFP_KERNEL);
- removed = kzalloc(sizeof(*removed) * CCISS_MAX_SCSI_DEVS_PER_HBA,
- GFP_KERNEL);
-
- if (!added || !removed) {
- dev_warn(&h->pdev->dev,
- "Out of memory in adjust_cciss_scsi_table\n");
- goto free_and_out;
- }
-
- CPQ_TAPE_LOCK(h, flags);
-
- if (hostno != -1) /* if it's not the first time... */
- sh = h->scsi_ctlr->scsi_host;
-
- /* find any devices in ccissscsi[] that are not in
- sd[] and remove them from ccissscsi[] */
-
- i = 0;
- nremoved = 0;
- nadded = 0;
- while (i < ccissscsi[h->ctlr].ndevices) {
- csd = &ccissscsi[h->ctlr].dev[i];
- found=0;
- for (j=0;j<nsds;j++) {
- if (SCSI3ADDR_EQ(sd[j].scsi3addr,
- csd->scsi3addr)) {
- if (device_is_the_same(&sd[j], csd))
- found=2;
- else
- found=1;
- break;
- }
- }
-
- if (found == 0) { /* device no longer present. */
- changes++;
- cciss_scsi_remove_entry(h, hostno, i,
- removed, &nremoved);
- /* remove ^^^, hence i not incremented */
- } else if (found == 1) { /* device is different in some way */
- changes++;
- dev_info(&h->pdev->dev,
- "device c%db%dt%dl%d has changed.\n",
- hostno, csd->bus, csd->target, csd->lun);
- cciss_scsi_remove_entry(h, hostno, i,
- removed, &nremoved);
- /* remove ^^^, hence i not incremented */
- if (cciss_scsi_add_entry(h, hostno, &sd[j],
- added, &nadded) != 0)
- /* we just removed one, so add can't fail. */
- BUG();
- csd->devtype = sd[j].devtype;
- memcpy(csd->device_id, sd[j].device_id,
- sizeof(csd->device_id));
- memcpy(csd->vendor, sd[j].vendor,
- sizeof(csd->vendor));
- memcpy(csd->model, sd[j].model,
- sizeof(csd->model));
- memcpy(csd->revision, sd[j].revision,
- sizeof(csd->revision));
- } else /* device is same as it ever was, */
- i++; /* so just move along. */
- }
-
- /* Now, make sure every device listed in sd[] is also
- listed in ccissscsi[], adding them if they aren't found */
-
- for (i=0;i<nsds;i++) {
- found=0;
- for (j = 0; j < ccissscsi[h->ctlr].ndevices; j++) {
- csd = &ccissscsi[h->ctlr].dev[j];
- if (SCSI3ADDR_EQ(sd[i].scsi3addr,
- csd->scsi3addr)) {
- if (device_is_the_same(&sd[i], csd))
- found=2; /* found device */
- else
- found=1; /* found a bug. */
- break;
- }
- }
- if (!found) {
- changes++;
- if (cciss_scsi_add_entry(h, hostno, &sd[i],
- added, &nadded) != 0)
- break;
- } else if (found == 1) {
- /* should never happen... */
- changes++;
- dev_warn(&h->pdev->dev,
- "device unexpectedly changed\n");
- /* but if it does happen, we just ignore that device */
- }
- }
- CPQ_TAPE_UNLOCK(h, flags);
-
- /* Don't notify scsi mid layer of any changes the first time through */
- /* (or if there are no changes) scsi_scan_host will do it later the */
- /* first time through. */
- if (hostno == -1 || !changes)
- goto free_and_out;
-
- /* Notify scsi mid layer of any removed devices */
- for (i = 0; i < nremoved; i++) {
- struct scsi_device *sdev =
- scsi_device_lookup(sh, removed[i].bus,
- removed[i].target, removed[i].lun);
- if (sdev != NULL) {
- scsi_remove_device(sdev);
- scsi_device_put(sdev);
- } else {
- /* We don't expect to get here. */
- /* future cmds to this device will get selection */
- /* timeout as if the device was gone. */
- dev_warn(&h->pdev->dev, "didn't find "
- "c%db%dt%dl%d\n for removal.",
- hostno, removed[i].bus,
- removed[i].target, removed[i].lun);
- }
- }
-
- /* Notify scsi mid layer of any added devices */
- for (i = 0; i < nadded; i++) {
- int rc;
- rc = scsi_add_device(sh, added[i].bus,
- added[i].target, added[i].lun);
- if (rc == 0)
- continue;
- dev_warn(&h->pdev->dev, "scsi_add_device "
- "c%db%dt%dl%d failed, device not added.\n",
- hostno, added[i].bus, added[i].target, added[i].lun);
- /* now we have to remove it from ccissscsi, */
- /* since it didn't get added to scsi mid layer */
- fixup_botched_add(h, added[i].scsi3addr);
- }
-
-free_and_out:
- kfree(added);
- kfree(removed);
- return 0;
-}
-
-static int
-lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr)
-{
- int i;
- struct cciss_scsi_dev_t *sd;
- unsigned long flags;
-
- CPQ_TAPE_LOCK(h, flags);
- for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
- sd = &ccissscsi[h->ctlr].dev[i];
- if (sd->bus == bus &&
- sd->target == target &&
- sd->lun == lun) {
- memcpy(scsi3addr, &sd->scsi3addr[0], 8);
- CPQ_TAPE_UNLOCK(h, flags);
- return 0;
- }
- }
- CPQ_TAPE_UNLOCK(h, flags);
- return -1;
-}
-
-static void
-cciss_scsi_setup(ctlr_info_t *h)
-{
- struct cciss_scsi_adapter_data_t * shba;
-
- ccissscsi[h->ctlr].ndevices = 0;
- shba = kmalloc(sizeof(*shba), GFP_KERNEL);
- if (shba == NULL)
- return;
- shba->scsi_host = NULL;
- spin_lock_init(&shba->lock);
- shba->registered = 0;
- if (scsi_cmd_stack_setup(h, shba) != 0) {
- kfree(shba);
- shba = NULL;
- }
- h->scsi_ctlr = shba;
- return;
-}
-
-static void complete_scsi_command(CommandList_struct *c, int timeout,
- __u32 tag)
-{
- struct scsi_cmnd *cmd;
- ctlr_info_t *h;
- ErrorInfo_struct *ei;
-
- ei = c->err_info;
-
- /* First, see if it was a message rather than a command */
- if (c->Request.Type.Type == TYPE_MSG) {
- c->cmd_type = CMD_MSG_DONE;
- return;
- }
-
- cmd = (struct scsi_cmnd *) c->scsi_cmd;
- h = hba[c->ctlr];
-
- scsi_dma_unmap(cmd);
- if (c->Header.SGTotal > h->max_cmd_sgentries)
- cciss_unmap_sg_chain_block(h, c);
-
- cmd->result = (DID_OK << 16); /* host byte */
- cmd->result |= (COMMAND_COMPLETE << 8); /* msg byte */
- /* cmd->result |= (GOOD < 1); */ /* status byte */
-
- cmd->result |= (ei->ScsiStatus);
- /* printk("Scsistatus is 0x%02x\n", ei->ScsiStatus); */
-
- /* copy the sense data whether we need to or not. */
-
- memcpy(cmd->sense_buffer, ei->SenseInfo,
- ei->SenseLen > SCSI_SENSE_BUFFERSIZE ?
- SCSI_SENSE_BUFFERSIZE :
- ei->SenseLen);
- scsi_set_resid(cmd, ei->ResidualCnt);
-
- if (ei->CommandStatus != 0) { /* an error has occurred */
- switch (ei->CommandStatus) {
- case CMD_TARGET_STATUS:
- /* Pass it up to the upper layers... */
- if (!ei->ScsiStatus) {
-
- /* Ordinarily, this case should never happen, but there is a bug
- in some released firmware revisions that allows it to happen
- if, for example, a 4100 backplane loses power and the tape
- drive is in it. We assume that it's a fatal error of some
- kind because we can't show that it wasn't. We will make it
- look like selection timeout since that is the most common
- reason for this to occur, and it's severe enough. */
-
- cmd->result = DID_NO_CONNECT << 16;
- }
- break;
- case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
- break;
- case CMD_DATA_OVERRUN:
- dev_warn(&h->pdev->dev, "%p has"
- " completed with data overrun "
- "reported\n", c);
- break;
- case CMD_INVALID: {
- /*
- print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, c, sizeof(*c), false);
- print_cmd(c);
- */
- /* We get CMD_INVALID if you address a non-existent tape drive instead
- of a selection timeout (no response). You will see this if you yank
- out a tape drive, then try to access it. This is kind of a shame
- because it means that any other CMD_INVALID (e.g. driver bug) will
- get interpreted as a missing target. */
- cmd->result = DID_NO_CONNECT << 16;
- }
- break;
- case CMD_PROTOCOL_ERR:
- cmd->result = DID_ERROR << 16;
- dev_warn(&h->pdev->dev,
- "%p has protocol error\n", c);
- break;
- case CMD_HARDWARE_ERR:
- cmd->result = DID_ERROR << 16;
- dev_warn(&h->pdev->dev,
- "%p had hardware error\n", c);
- break;
- case CMD_CONNECTION_LOST:
- cmd->result = DID_ERROR << 16;
- dev_warn(&h->pdev->dev,
- "%p had connection lost\n", c);
- break;
- case CMD_ABORTED:
- cmd->result = DID_ABORT << 16;
- dev_warn(&h->pdev->dev, "%p was aborted\n", c);
- break;
- case CMD_ABORT_FAILED:
- cmd->result = DID_ERROR << 16;
- dev_warn(&h->pdev->dev,
- "%p reports abort failed\n", c);
- break;
- case CMD_UNSOLICITED_ABORT:
- cmd->result = DID_ABORT << 16;
- dev_warn(&h->pdev->dev, "%p aborted due to an "
- "unsolicited abort\n", c);
- break;
- case CMD_TIMEOUT:
- cmd->result = DID_TIME_OUT << 16;
- dev_warn(&h->pdev->dev, "%p timedout\n", c);
- break;
- case CMD_UNABORTABLE:
- cmd->result = DID_ERROR << 16;
- dev_warn(&h->pdev->dev, "c %p command "
- "unabortable\n", c);
- break;
- default:
- cmd->result = DID_ERROR << 16;
- dev_warn(&h->pdev->dev,
- "%p returned unknown status %x\n", c,
- ei->CommandStatus);
- }
- }
- cmd->scsi_done(cmd);
- scsi_cmd_free(h, c);
-}
-
-static int
-cciss_scsi_detect(ctlr_info_t *h)
-{
- struct Scsi_Host *sh;
- int error;
-
- sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *));
- if (sh == NULL)
- goto fail;
- sh->io_port = 0; // good enough? FIXME,
- sh->n_io_port = 0; // I don't think we use these two...
- sh->this_id = SELF_SCSI_ID;
- sh->can_queue = cciss_tape_cmds;
- sh->sg_tablesize = h->maxsgentries;
- sh->max_cmd_len = MAX_COMMAND_SIZE;
- sh->max_sectors = h->cciss_max_sectors;
-
- ((struct cciss_scsi_adapter_data_t *)
- h->scsi_ctlr)->scsi_host = sh;
- sh->hostdata[0] = (unsigned long) h;
- sh->irq = h->intr[SIMPLE_MODE_INT];
- sh->unique_id = sh->irq;
- error = scsi_add_host(sh, &h->pdev->dev);
- if (error)
- goto fail_host_put;
- scsi_scan_host(sh);
- return 1;
-
- fail_host_put:
- scsi_host_put(sh);
- fail:
- return 0;
-}
-
-static void
-cciss_unmap_one(struct pci_dev *pdev,
- CommandList_struct *c,
- size_t buflen,
- int data_direction)
-{
- u64bit addr64;
-
- addr64.val32.lower = c->SG[0].Addr.lower;
- addr64.val32.upper = c->SG[0].Addr.upper;
- pci_unmap_single(pdev, (dma_addr_t) addr64.val, buflen, data_direction);
-}
-
-static void
-cciss_map_one(struct pci_dev *pdev,
- CommandList_struct *c,
- unsigned char *buf,
- size_t buflen,
- int data_direction)
-{
- __u64 addr64;
-
- addr64 = (__u64) pci_map_single(pdev, buf, buflen, data_direction);
- c->SG[0].Addr.lower =
- (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
- c->SG[0].Addr.upper =
- (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
- c->SG[0].Len = buflen;
- c->Header.SGList = (__u8) 1; /* no. SGs contig in this cmd */
- c->Header.SGTotal = (__u16) 1; /* total sgs in this cmd list */
-}
-
-static int
-cciss_scsi_do_simple_cmd(ctlr_info_t *h,
- CommandList_struct *c,
- unsigned char *scsi3addr,
- unsigned char *cdb,
- unsigned char cdblen,
- unsigned char *buf, int bufsize,
- int direction)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
-
- c->cmd_type = CMD_IOCTL_PEND; /* treat this like an ioctl */
- c->scsi_cmd = NULL;
- c->Header.ReplyQueue = 0; /* unused in simple mode */
- memcpy(&c->Header.LUN, scsi3addr, sizeof(c->Header.LUN));
- c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */
- // Fill in the request block...
-
- /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n",
- scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
- scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */
-
- memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
- memcpy(c->Request.CDB, cdb, cdblen);
- c->Request.Timeout = 0;
- c->Request.CDBLen = cdblen;
- c->Request.Type.Type = TYPE_CMD;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- c->Request.Type.Direction = direction;
-
- /* Fill in the SG list and do dma mapping */
- cciss_map_one(h->pdev, c, (unsigned char *) buf,
- bufsize, DMA_FROM_DEVICE);
-
- c->waiting = &wait;
- enqueue_cmd_and_start_io(h, c);
- wait_for_completion(&wait);
-
- /* undo the dma mapping */
- cciss_unmap_one(h->pdev, c, bufsize, DMA_FROM_DEVICE);
- return(0);
-}
-
-static void
-cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
-{
- ErrorInfo_struct *ei;
-
- ei = c->err_info;
- switch (ei->CommandStatus) {
- case CMD_TARGET_STATUS:
- dev_warn(&h->pdev->dev,
- "cmd %p has completed with errors\n", c);
- dev_warn(&h->pdev->dev,
- "cmd %p has SCSI Status = %x\n",
- c, ei->ScsiStatus);
- if (ei->ScsiStatus == 0)
- dev_warn(&h->pdev->dev,
- "SCSI status is abnormally zero. "
- "(probably indicates selection timeout "
- "reported incorrectly due to a known "
- "firmware bug, circa July, 2001.)\n");
- break;
- case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
- dev_info(&h->pdev->dev, "UNDERRUN\n");
- break;
- case CMD_DATA_OVERRUN:
- dev_warn(&h->pdev->dev, "%p has"
- " completed with data overrun "
- "reported\n", c);
- break;
- case CMD_INVALID: {
- /* controller unfortunately reports SCSI passthru's */
- /* to non-existent targets as invalid commands. */
- dev_warn(&h->pdev->dev,
- "%p is reported invalid (probably means "
- "target device no longer present)\n", c);
- /*
- print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, c, sizeof(*c), false);
- print_cmd(c);
- */
- }
- break;
- case CMD_PROTOCOL_ERR:
- dev_warn(&h->pdev->dev, "%p has protocol error\n", c);
- break;
- case CMD_HARDWARE_ERR:
- /* cmd->result = DID_ERROR << 16; */
- dev_warn(&h->pdev->dev, "%p had hardware error\n", c);
- break;
- case CMD_CONNECTION_LOST:
- dev_warn(&h->pdev->dev, "%p had connection lost\n", c);
- break;
- case CMD_ABORTED:
- dev_warn(&h->pdev->dev, "%p was aborted\n", c);
- break;
- case CMD_ABORT_FAILED:
- dev_warn(&h->pdev->dev,
- "%p reports abort failed\n", c);
- break;
- case CMD_UNSOLICITED_ABORT:
- dev_warn(&h->pdev->dev,
- "%p aborted due to an unsolicited abort\n", c);
- break;
- case CMD_TIMEOUT:
- dev_warn(&h->pdev->dev, "%p timedout\n", c);
- break;
- case CMD_UNABORTABLE:
- dev_warn(&h->pdev->dev,
- "%p unabortable\n", c);
- break;
- default:
- dev_warn(&h->pdev->dev,
- "%p returned unknown status %x\n",
- c, ei->CommandStatus);
- }
-}
-
-static int
-cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
- unsigned char page, unsigned char *buf,
- unsigned char bufsize)
-{
- int rc;
- CommandList_struct *c;
- char cdb[6];
- ErrorInfo_struct *ei;
- unsigned long flags;
-
- spin_lock_irqsave(&h->lock, flags);
- c = scsi_cmd_alloc(h);
- spin_unlock_irqrestore(&h->lock, flags);
-
- if (c == NULL) { /* trouble... */
- printk("cmd_alloc returned NULL!\n");
- return -1;
- }
-
- ei = c->err_info;
-
- cdb[0] = CISS_INQUIRY;
- cdb[1] = (page != 0);
- cdb[2] = page;
- cdb[3] = 0;
- cdb[4] = bufsize;
- cdb[5] = 0;
- rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr, cdb,
- 6, buf, bufsize, XFER_READ);
-
- if (rc != 0) return rc; /* something went wrong */
-
- if (ei->CommandStatus != 0 &&
- ei->CommandStatus != CMD_DATA_UNDERRUN) {
- cciss_scsi_interpret_error(h, c);
- rc = -1;
- }
- spin_lock_irqsave(&h->lock, flags);
- scsi_cmd_free(h, c);
- spin_unlock_irqrestore(&h->lock, flags);
- return rc;
-}
-
-/* Get the device id from inquiry page 0x83 */
-static int cciss_scsi_get_device_id(ctlr_info_t *h, unsigned char *scsi3addr,
- unsigned char *device_id, int buflen)
-{
- int rc;
- unsigned char *buf;
-
- if (buflen > 16)
- buflen = 16;
- buf = kzalloc(64, GFP_KERNEL);
- if (!buf)
- return -1;
- rc = cciss_scsi_do_inquiry(h, scsi3addr, 0x83, buf, 64);
- if (rc == 0)
- memcpy(device_id, &buf[8], buflen);
- kfree(buf);
- return rc != 0;
-}
-
-static int
-cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
- ReportLunData_struct *buf, int bufsize)
-{
- int rc;
- CommandList_struct *c;
- unsigned char cdb[12];
- unsigned char scsi3addr[8];
- ErrorInfo_struct *ei;
- unsigned long flags;
-
- spin_lock_irqsave(&h->lock, flags);
- c = scsi_cmd_alloc(h);
- spin_unlock_irqrestore(&h->lock, flags);
- if (c == NULL) { /* trouble... */
- printk("cmd_alloc returned NULL!\n");
- return -1;
- }
-
- memset(&scsi3addr[0], 0, 8); /* address the controller */
- cdb[0] = CISS_REPORT_PHYS;
- cdb[1] = 0;
- cdb[2] = 0;
- cdb[3] = 0;
- cdb[4] = 0;
- cdb[5] = 0;
- cdb[6] = (bufsize >> 24) & 0xFF; //MSB
- cdb[7] = (bufsize >> 16) & 0xFF;
- cdb[8] = (bufsize >> 8) & 0xFF;
- cdb[9] = bufsize & 0xFF;
- cdb[10] = 0;
- cdb[11] = 0;
-
- rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr,
- cdb, 12,
- (unsigned char *) buf,
- bufsize, XFER_READ);
-
- if (rc != 0) return rc; /* something went wrong */
-
- ei = c->err_info;
- if (ei->CommandStatus != 0 &&
- ei->CommandStatus != CMD_DATA_UNDERRUN) {
- cciss_scsi_interpret_error(h, c);
- rc = -1;
- }
- spin_lock_irqsave(&h->lock, flags);
- scsi_cmd_free(h, c);
- spin_unlock_irqrestore(&h->lock, flags);
- return rc;
-}
-
-static void
-cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
-{
- /* the idea here is we could get notified from /proc
- that some devices have changed, so we do a report
- physical luns cmd, and adjust our list of devices
- accordingly. (We can't rely on the scsi-mid layer just
- doing inquiries, because the "busses" that the scsi
- mid-layer probes are totally fabricated by this driver,
- so new devices wouldn't show up.
-
- the scsi3addr's of devices won't change so long as the
- adapter is not reset. That means we can rescan and
- tell which devices we already know about, vs. new
- devices, vs. disappearing devices.
-
- Also, if you yank out a tape drive, then put in a disk
- in it's place, (say, a configured volume from another
- array controller for instance) _don't_ poke this driver
- (so it thinks it's still a tape, but _do_ poke the scsi
- mid layer, so it does an inquiry... the scsi mid layer
- will see the physical disk. This would be bad. Need to
- think about how to prevent that. One idea would be to
- snoop all scsi responses and if an inquiry repsonse comes
- back that reports a disk, chuck it an return selection
- timeout instead and adjust our table... Not sure i like
- that though.
-
- */
-#define OBDR_TAPE_INQ_SIZE 49
-#define OBDR_TAPE_SIG "$DR-10"
- ReportLunData_struct *ld_buff;
- unsigned char *inq_buff;
- unsigned char scsi3addr[8];
- __u32 num_luns=0;
- unsigned char *ch;
- struct cciss_scsi_dev_t *currentsd, *this_device;
- int ncurrent=0;
- int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
- int i;
-
- ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
- inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
- currentsd = kzalloc(sizeof(*currentsd) *
- (CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL);
- if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) {
- printk(KERN_ERR "cciss: out of memory\n");
- goto out;
- }
- this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
- if (cciss_scsi_do_report_phys_luns(h, ld_buff, reportlunsize) == 0) {
- ch = &ld_buff->LUNListLength[0];
- num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
- if (num_luns > CISS_MAX_PHYS_LUN) {
- printk(KERN_WARNING
- "cciss: Maximum physical LUNs (%d) exceeded. "
- "%d LUNs ignored.\n", CISS_MAX_PHYS_LUN,
- num_luns - CISS_MAX_PHYS_LUN);
- num_luns = CISS_MAX_PHYS_LUN;
- }
- }
- else {
- printk(KERN_ERR "cciss: Report physical LUNs failed.\n");
- goto out;
- }
-
-
- /* adjust our table of devices */
- for (i = 0; i < num_luns; i++) {
- /* for each physical lun, do an inquiry */
- if (ld_buff->LUN[i][3] & 0xC0) continue;
- memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
- memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
-
- if (cciss_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
- (unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
- /* Inquiry failed (msg printed already) */
- continue; /* so we will skip this device. */
-
- this_device->devtype = (inq_buff[0] & 0x1f);
- this_device->bus = -1;
- this_device->target = -1;
- this_device->lun = -1;
- memcpy(this_device->scsi3addr, scsi3addr, 8);
- memcpy(this_device->vendor, &inq_buff[8],
- sizeof(this_device->vendor));
- memcpy(this_device->model, &inq_buff[16],
- sizeof(this_device->model));
- memcpy(this_device->revision, &inq_buff[32],
- sizeof(this_device->revision));
- memset(this_device->device_id, 0,
- sizeof(this_device->device_id));
- cciss_scsi_get_device_id(h, scsi3addr,
- this_device->device_id, sizeof(this_device->device_id));
-
- switch (this_device->devtype) {
- case 0x05: /* CD-ROM */ {
-
- /* We don't *really* support actual CD-ROM devices,
- * just this "One Button Disaster Recovery" tape drive
- * which temporarily pretends to be a CD-ROM drive.
- * So we check that the device is really an OBDR tape
- * device by checking for "$DR-10" in bytes 43-48 of
- * the inquiry data.
- */
- char obdr_sig[7];
-
- strncpy(obdr_sig, &inq_buff[43], 6);
- obdr_sig[6] = '\0';
- if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0)
- /* Not OBDR device, ignore it. */
- break;
- }
- /* fall through . . . */
- case 0x01: /* sequential access, (tape) */
- case 0x08: /* medium changer */
- if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
- printk(KERN_INFO "cciss%d: %s ignored, "
- "too many devices.\n", h->ctlr,
- scsi_device_type(this_device->devtype));
- break;
- }
- currentsd[ncurrent] = *this_device;
- ncurrent++;
- break;
- default:
- break;
- }
- }
-
- adjust_cciss_scsi_table(h, hostno, currentsd, ncurrent);
-out:
- kfree(inq_buff);
- kfree(ld_buff);
- kfree(currentsd);
- return;
-}
-
-static int
-is_keyword(char *ptr, int len, char *verb) // Thanks to ncr53c8xx.c
-{
- int verb_len = strlen(verb);
- if (len >= verb_len && !memcmp(verb,ptr,verb_len))
- return verb_len;
- else
- return 0;
-}
-
-static int
-cciss_scsi_user_command(ctlr_info_t *h, int hostno, char *buffer, int length)
-{
- int arg_len;
-
- if ((arg_len = is_keyword(buffer, length, "rescan")) != 0)
- cciss_update_non_disk_devices(h, hostno);
- else
- return -EINVAL;
- return length;
-}
-
-static int
-cciss_scsi_write_info(struct Scsi_Host *sh,
- char *buffer, /* data buffer */
- int length) /* length of data in buffer */
-{
- ctlr_info_t *h = (ctlr_info_t *) sh->hostdata[0];
- if (h == NULL) /* This really shouldn't ever happen. */
- return -EINVAL;
-
- return cciss_scsi_user_command(h, sh->host_no,
- buffer, length);
-}
-
-static int
-cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
-{
-
- ctlr_info_t *h = (ctlr_info_t *) sh->hostdata[0];
- int i;
-
- if (h == NULL) /* This really shouldn't ever happen. */
- return -EINVAL;
-
- seq_printf(m, "cciss%d: SCSI host: %d\n",
- h->ctlr, sh->host_no);
-
- /* this information is needed by apps to know which cciss
- device corresponds to which scsi host number without
- having to open a scsi target device node. The device
- information is not a duplicate of /proc/scsi/scsi because
- the two may be out of sync due to scsi hotplug, rather
- this info is for an app to be able to use to know how to
- get them back in sync. */
-
- for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
- struct cciss_scsi_dev_t *sd =
- &ccissscsi[h->ctlr].dev[i];
- seq_printf(m, "c%db%dt%dl%d %02d "
- "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- sh->host_no, sd->bus, sd->target, sd->lun,
- sd->devtype,
- sd->scsi3addr[0], sd->scsi3addr[1],
- sd->scsi3addr[2], sd->scsi3addr[3],
- sd->scsi3addr[4], sd->scsi3addr[5],
- sd->scsi3addr[6], sd->scsi3addr[7]);
- }
- return 0;
-}
-
-/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
- dma mapping and fills in the scatter gather entries of the
- cciss command, c. */
-
-static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
- struct scsi_cmnd *cmd)
-{
- unsigned int len;
- struct scatterlist *sg;
- __u64 addr64;
- int request_nsgs, i, chained, sg_index;
- struct cciss_scsi_adapter_data_t *sa = h->scsi_ctlr;
- SGDescriptor_struct *curr_sg;
-
- BUG_ON(scsi_sg_count(cmd) > h->maxsgentries);
-
- chained = 0;
- sg_index = 0;
- curr_sg = c->SG;
- request_nsgs = scsi_dma_map(cmd);
- if (request_nsgs) {
- scsi_for_each_sg(cmd, sg, request_nsgs, i) {
- if (sg_index + 1 == h->max_cmd_sgentries &&
- !chained && request_nsgs - i > 1) {
- chained = 1;
- sg_index = 0;
- curr_sg = sa->cmd_sg_list[c->cmdindex];
- }
- addr64 = (__u64) sg_dma_address(sg);
- len = sg_dma_len(sg);
- curr_sg[sg_index].Addr.lower =
- (__u32) (addr64 & 0x0FFFFFFFFULL);
- curr_sg[sg_index].Addr.upper =
- (__u32) ((addr64 >> 32) & 0x0FFFFFFFFULL);
- curr_sg[sg_index].Len = len;
- curr_sg[sg_index].Ext = 0;
- ++sg_index;
- }
- if (chained)
- cciss_map_sg_chain_block(h, c,
- sa->cmd_sg_list[c->cmdindex],
- (request_nsgs - (h->max_cmd_sgentries - 1)) *
- sizeof(SGDescriptor_struct));
- }
- /* track how many SG entries we are using */
- if (request_nsgs > h->maxSG)
- h->maxSG = request_nsgs;
- c->Header.SGTotal = (u16) request_nsgs + chained;
- if (request_nsgs > h->max_cmd_sgentries)
- c->Header.SGList = h->max_cmd_sgentries;
- else
- c->Header.SGList = c->Header.SGTotal;
- return;
-}
-
-
-static int
-cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
-{
- ctlr_info_t *h;
- int rc;
- unsigned char scsi3addr[8];
- CommandList_struct *c;
- unsigned long flags;
-
- // Get the ptr to our adapter structure (hba[i]) out of cmd->host.
- // We violate cmd->host privacy here. (Is there another way?)
- h = (ctlr_info_t *) cmd->device->host->hostdata[0];
-
- rc = lookup_scsi3addr(h, cmd->device->channel, cmd->device->id,
- cmd->device->lun, scsi3addr);
- if (rc != 0) {
- /* the scsi nexus does not match any that we presented... */
- /* pretend to mid layer that we got selection timeout */
- cmd->result = DID_NO_CONNECT << 16;
- done(cmd);
- /* we might want to think about registering controller itself
- as a processor device on the bus so sg binds to it. */
- return 0;
- }
-
- /* Ok, we have a reasonable scsi nexus, so send the cmd down, and
- see what the device thinks of it. */
-
- spin_lock_irqsave(&h->lock, flags);
- c = scsi_cmd_alloc(h);
- spin_unlock_irqrestore(&h->lock, flags);
- if (c == NULL) { /* trouble... */
- dev_warn(&h->pdev->dev, "scsi_cmd_alloc returned NULL!\n");
- /* FIXME: next 3 lines are -> BAD! <- */
- cmd->result = DID_NO_CONNECT << 16;
- done(cmd);
- return 0;
- }
-
- // Fill in the command list header
-
- cmd->scsi_done = done; // save this for use by completion code
-
- /* save c in case we have to abort it */
- cmd->host_scribble = (unsigned char *) c;
-
- c->cmd_type = CMD_SCSI;
- c->scsi_cmd = cmd;
- c->Header.ReplyQueue = 0; /* unused in simple mode */
- memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
- c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */
-
- // Fill in the request block...
-
- c->Request.Timeout = 0;
- memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
- BUG_ON(cmd->cmd_len > sizeof(c->Request.CDB));
- c->Request.CDBLen = cmd->cmd_len;
- memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
- c->Request.Type.Type = TYPE_CMD;
- c->Request.Type.Attribute = ATTR_SIMPLE;
- switch (cmd->sc_data_direction) {
- case DMA_TO_DEVICE:
- c->Request.Type.Direction = XFER_WRITE;
- break;
- case DMA_FROM_DEVICE:
- c->Request.Type.Direction = XFER_READ;
- break;
- case DMA_NONE:
- c->Request.Type.Direction = XFER_NONE;
- break;
- case DMA_BIDIRECTIONAL:
- // This can happen if a buggy application does a scsi passthru
- // and sets both inlen and outlen to non-zero. ( see
- // ../scsi/scsi_ioctl.c:scsi_ioctl_send_command() )
-
- c->Request.Type.Direction = XFER_RSVD;
- // This is technically wrong, and cciss controllers should
- // reject it with CMD_INVALID, which is the most correct
- // response, but non-fibre backends appear to let it
- // slide by, and give the same results as if this field
- // were set correctly. Either way is acceptable for
- // our purposes here.
-
- break;
-
- default:
- dev_warn(&h->pdev->dev, "unknown data direction: %d\n",
- cmd->sc_data_direction);
- BUG();
- break;
- }
- cciss_scatter_gather(h, c, cmd);
- enqueue_cmd_and_start_io(h, c);
- /* the cmd'll come back via intr handler in complete_scsi_command() */
- return 0;
-}
-
-static DEF_SCSI_QCMD(cciss_scsi_queue_command)
-
-static void cciss_unregister_scsi(ctlr_info_t *h)
-{
- struct cciss_scsi_adapter_data_t *sa;
- struct cciss_scsi_cmd_stack_t *stk;
- unsigned long flags;
-
- /* we are being forcibly unloaded, and may not refuse. */
-
- spin_lock_irqsave(&h->lock, flags);
- sa = h->scsi_ctlr;
- stk = &sa->cmd_stack;
-
- /* if we weren't ever actually registered, don't unregister */
- if (sa->registered) {
- spin_unlock_irqrestore(&h->lock, flags);
- scsi_remove_host(sa->scsi_host);
- scsi_host_put(sa->scsi_host);
- spin_lock_irqsave(&h->lock, flags);
- }
-
- /* set scsi_host to NULL so our detect routine will
- find us on register */
- sa->scsi_host = NULL;
- spin_unlock_irqrestore(&h->lock, flags);
- scsi_cmd_stack_free(h);
- kfree(sa);
-}
-
-static int cciss_engage_scsi(ctlr_info_t *h)
-{
- struct cciss_scsi_adapter_data_t *sa;
- struct cciss_scsi_cmd_stack_t *stk;
- unsigned long flags;
-
- spin_lock_irqsave(&h->lock, flags);
- sa = h->scsi_ctlr;
- stk = &sa->cmd_stack;
-
- if (sa->registered) {
- dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n");
- spin_unlock_irqrestore(&h->lock, flags);
- return -ENXIO;
- }
- sa->registered = 1;
- spin_unlock_irqrestore(&h->lock, flags);
- cciss_update_non_disk_devices(h, -1);
- cciss_scsi_detect(h);
- return 0;
-}
-
-static void
-cciss_seq_tape_report(struct seq_file *seq, ctlr_info_t *h)
-{
- unsigned long flags;
-
- CPQ_TAPE_LOCK(h, flags);
- seq_printf(seq,
- "Sequential access devices: %d\n\n",
- ccissscsi[h->ctlr].ndevices);
- CPQ_TAPE_UNLOCK(h, flags);
-}
-
-static int wait_for_device_to_become_ready(ctlr_info_t *h,
- unsigned char lunaddr[])
-{
- int rc;
- int count = 0;
- int waittime = HZ;
- CommandList_struct *c;
-
- c = cmd_alloc(h);
- if (!c) {
- dev_warn(&h->pdev->dev, "out of memory in "
- "wait_for_device_to_become_ready.\n");
- return IO_ERROR;
- }
-
- /* Send test unit ready until device ready, or give up. */
- while (count < 20) {
-
- /* Wait for a bit. do this first, because if we send
- * the TUR right away, the reset will just abort it.
- */
- schedule_timeout_uninterruptible(waittime);
- count++;
-
- /* Increase wait time with each try, up to a point. */
- if (waittime < (HZ * 30))
- waittime = waittime * 2;
-
- /* Send the Test Unit Ready */
- rc = fill_cmd(h, c, TEST_UNIT_READY, NULL, 0, 0,
- lunaddr, TYPE_CMD);
- if (rc == 0)
- rc = sendcmd_withirq_core(h, c, 0);
-
- (void) process_sendcmd_error(h, c);
-
- if (rc != 0)
- goto retry_tur;
-
- if (c->err_info->CommandStatus == CMD_SUCCESS)
- break;
-
- if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
- c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION) {
- if (c->err_info->SenseInfo[2] == NO_SENSE)
- break;
- if (c->err_info->SenseInfo[2] == UNIT_ATTENTION) {
- unsigned char asc;
- asc = c->err_info->SenseInfo[12];
- check_for_unit_attention(h, c);
- if (asc == POWER_OR_RESET)
- break;
- }
- }
-retry_tur:
- dev_warn(&h->pdev->dev, "Waiting %d secs "
- "for device to become ready.\n",
- waittime / HZ);
- rc = 1; /* device not ready. */
- }
-
- if (rc)
- dev_warn(&h->pdev->dev, "giving up on device.\n");
- else
- dev_warn(&h->pdev->dev, "device is ready.\n");
-
- cmd_free(h, c);
- return rc;
-}
-
-/* Need at least one of these error handlers to keep ../scsi/hosts.c from
- * complaining. Doing a host- or bus-reset can't do anything good here.
- * Despite what it might say in scsi_error.c, there may well be commands
- * on the controller, as the cciss driver registers twice, once as a block
- * device for the logical drives, and once as a scsi device, for any tape
- * drives. So we know there are no commands out on the tape drives, but we
- * don't know there are no commands on the controller, and it is likely
- * that there probably are, as the cciss block device is most commonly used
- * as a boot device (embedded controller on HP/Compaq systems.)
-*/
-
-static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
-{
- int rc;
- CommandList_struct *cmd_in_trouble;
- unsigned char lunaddr[8];
- ctlr_info_t *h;
-
- /* find the controller to which the command to be aborted was sent */
- h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
- if (h == NULL) /* paranoia */
- return FAILED;
- dev_warn(&h->pdev->dev, "resetting tape drive or medium changer.\n");
- /* find the command that's giving us trouble */
- cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
- if (cmd_in_trouble == NULL) /* paranoia */
- return FAILED;
- memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
- /* send a reset to the SCSI LUN which the command was sent to */
- rc = sendcmd_withirq(h, CCISS_RESET_MSG, NULL, 0, 0, lunaddr,
- TYPE_MSG);
- if (rc == 0 && wait_for_device_to_become_ready(h, lunaddr) == 0)
- return SUCCESS;
- dev_warn(&h->pdev->dev, "resetting device failed.\n");
- return FAILED;
-}
-
-static int cciss_eh_abort_handler(struct scsi_cmnd *scsicmd)
-{
- int rc;
- CommandList_struct *cmd_to_abort;
- unsigned char lunaddr[8];
- ctlr_info_t *h;
-
- /* find the controller to which the command to be aborted was sent */
- h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
- if (h == NULL) /* paranoia */
- return FAILED;
- dev_warn(&h->pdev->dev, "aborting tardy SCSI cmd\n");
-
- /* find the command to be aborted */
- cmd_to_abort = (CommandList_struct *) scsicmd->host_scribble;
- if (cmd_to_abort == NULL) /* paranoia */
- return FAILED;
- memcpy(lunaddr, &cmd_to_abort->Header.LUN.LunAddrBytes[0], 8);
- rc = sendcmd_withirq(h, CCISS_ABORT_MSG, &cmd_to_abort->Header.Tag,
- 0, 0, lunaddr, TYPE_MSG);
- if (rc == 0)
- return SUCCESS;
- return FAILED;
-
-}
-
-#else /* no CONFIG_CISS_SCSI_TAPE */
-
-/* If no tape support, then these become defined out of existence */
-
-#define cciss_scsi_setup(cntl_num)
-#define cciss_engage_scsi(h)
-
-#endif /* CONFIG_CISS_SCSI_TAPE */
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
deleted file mode 100644
index e71d986727ca..000000000000
--- a/drivers/block/cciss_scsi.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Disk Array driver for HP Smart Array controllers, SCSI Tape module.
- * (C) Copyright 2001, 2007 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 300, Boston, MA
- * 02111-1307, USA.
- *
- * Questions/Comments/Bugfixes to iss_storagedev@hp.com
- *
- */
-#ifdef CONFIG_CISS_SCSI_TAPE
-#ifndef _CCISS_SCSI_H_
-#define _CCISS_SCSI_H_
-
-#include <scsi/scsicam.h> /* possibly irrelevant, since we don't show disks */
-
- /* the scsi id of the adapter... */
-#define SELF_SCSI_ID 15
- /* 15 is somewhat arbitrary, since the scsi-2 bus
- that's presented by the driver to the OS is
- fabricated. The "real" scsi-3 bus the
- hardware presents is fabricated too.
- The actual, honest-to-goodness physical
- bus that the devices are attached to is not
- addressible natively, and may in fact turn
- out to be not scsi at all. */
-
-
-/*
-
-If the upper scsi layer tries to track how many commands we have
-outstanding, it will be operating under the misapprehension that it is
-the only one sending us requests. We also have the block interface,
-which is where most requests must surely come from, so the upper layer's
-notion of how many requests we have outstanding will be wrong most or
-all of the time.
-
-Note, the normal SCSI mid-layer error handling doesn't work well
-for this driver because 1) it takes the io_request_lock before
-calling error handlers and uses a local variable to store flags,
-so the io_request_lock cannot be released and interrupts enabled
-inside the error handlers, and, the error handlers cannot poll
-for command completion because they might get commands from the
-block half of the driver completing, and not know what to do
-with them. That's what we get for making a hybrid scsi/block
-driver, I suppose.
-
-*/
-
-struct cciss_scsi_dev_t {
- int devtype;
- int bus, target, lun; /* as presented to the OS */
- unsigned char scsi3addr[8]; /* as presented to the HW */
- unsigned char device_id[16]; /* from inquiry pg. 0x83 */
- unsigned char vendor[8]; /* bytes 8-15 of inquiry data */
- unsigned char model[16]; /* bytes 16-31 of inquiry data */
- unsigned char revision[4]; /* bytes 32-35 of inquiry data */
-};
-
-struct cciss_scsi_hba_t {
- char *name;
- int ndevices;
-#define CCISS_MAX_SCSI_DEVS_PER_HBA 16
- struct cciss_scsi_dev_t dev[CCISS_MAX_SCSI_DEVS_PER_HBA];
-};
-
-#endif /* _CCISS_SCSI_H_ */
-#endif /* CONFIG_CISS_SCSI_TAPE */
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e02c45cd3c5a..5f0eaee8c8a7 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -151,7 +151,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
op_flags |= REQ_SYNC;
bio = bio_alloc_drbd(GFP_NOIO);
- bio->bi_bdev = bdev->md_bdev;
+ bio_set_dev(bio, bdev->md_bdev);
bio->bi_iter.bi_sector = sector;
err = -EIO;
if (bio_add_page(bio, device->md_io.page, size, 0) != size)
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 809fd245c3dc..bd97908c766f 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1019,7 +1019,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
- bio->bi_bdev = device->ldev->md_bdev;
+ bio_set_dev(bio, device->ldev->md_bdev);
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d17b6e6393c7..7e8589ce631c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -63,19 +63,15 @@
# define __must_hold(x)
#endif
-/* module parameter, defined in drbd_main.c */
-extern unsigned int minor_count;
-extern bool disable_sendpage;
-extern bool allow_oos;
-void tl_abort_disk_io(struct drbd_device *device);
-
+/* shared module parameters, defined in drbd_main.c */
#ifdef CONFIG_DRBD_FAULT_INJECTION
-extern int enable_faults;
-extern int fault_rate;
-extern int fault_devs;
+extern int drbd_enable_faults;
+extern int drbd_fault_rate;
#endif
-extern char usermode_helper[];
+extern unsigned int drbd_minor_count;
+extern char drbd_usermode_helper[];
+extern int drbd_proc_details;
/* This is used to stop/restart our threads.
@@ -181,8 +177,8 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type);
static inline int
drbd_insert_fault(struct drbd_device *device, unsigned int type) {
#ifdef CONFIG_DRBD_FAULT_INJECTION
- return fault_rate &&
- (enable_faults & (1<<type)) &&
+ return drbd_fault_rate &&
+ (drbd_enable_faults & (1<<type)) &&
_drbd_insert_fault(device, type);
#else
return 0;
@@ -745,6 +741,8 @@ struct drbd_connection {
unsigned current_tle_writes; /* writes seen within this tl epoch */
unsigned long last_reconnect_jif;
+ /* empty member on older kernels without blk_start_plug() */
+ struct blk_plug receiver_plug;
struct drbd_thread receiver;
struct drbd_thread worker;
struct drbd_thread ack_receiver;
@@ -1131,7 +1129,8 @@ extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_sta
extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device);
-void drbd_print_uuids(struct drbd_device *device, const char *text);
+extern void drbd_print_uuids(struct drbd_device *device, const char *text);
+extern void drbd_queue_unplug(struct drbd_device *device);
extern void conn_md_sync(struct drbd_connection *connection);
extern void drbd_md_write(struct drbd_device *device, void *buffer);
@@ -1463,8 +1462,6 @@ extern struct drbd_resource *drbd_find_resource(const char *name);
extern void drbd_destroy_resource(struct kref *kref);
extern void conn_free_crypto(struct drbd_connection *connection);
-extern int proc_details;
-
/* drbd_req */
extern void do_submit(struct work_struct *ws);
extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
@@ -1628,8 +1625,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
- if (!bio->bi_bdev) {
- drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
+ if (!bio->bi_disk) {
+ drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n");
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index e2ed28d45ce1..8cb3791898ae 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -77,41 +77,41 @@ MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
#include <linux/moduleparam.h>
-/* allow_open_on_secondary */
-MODULE_PARM_DESC(allow_oos, "DONT USE!");
/* thanks to these macros, if compiled into the kernel (not-module),
- * this becomes the boot parameter drbd.minor_count */
-module_param(minor_count, uint, 0444);
-module_param(disable_sendpage, bool, 0644);
-module_param(allow_oos, bool, 0);
-module_param(proc_details, int, 0644);
+ * these become boot parameters (e.g., drbd.minor_count) */
#ifdef CONFIG_DRBD_FAULT_INJECTION
-int enable_faults;
-int fault_rate;
-static int fault_count;
-int fault_devs;
+int drbd_enable_faults;
+int drbd_fault_rate;
+static int drbd_fault_count;
+static int drbd_fault_devs;
/* bitmap of enabled faults */
-module_param(enable_faults, int, 0664);
+module_param_named(enable_faults, drbd_enable_faults, int, 0664);
/* fault rate % value - applies to all enabled faults */
-module_param(fault_rate, int, 0664);
+module_param_named(fault_rate, drbd_fault_rate, int, 0664);
/* count of faults inserted */
-module_param(fault_count, int, 0664);
+module_param_named(fault_count, drbd_fault_count, int, 0664);
/* bitmap of devices to insert faults on */
-module_param(fault_devs, int, 0644);
+module_param_named(fault_devs, drbd_fault_devs, int, 0644);
#endif
-/* module parameter, defined */
-unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
-bool disable_sendpage;
-bool allow_oos;
-int proc_details; /* Detail level in proc drbd*/
-
+/* module parameters we can keep static */
+static bool drbd_allow_oos; /* allow_open_on_secondary */
+static bool drbd_disable_sendpage;
+MODULE_PARM_DESC(allow_oos, "DONT USE!");
+module_param_named(allow_oos, drbd_allow_oos, bool, 0);
+module_param_named(disable_sendpage, drbd_disable_sendpage, bool, 0644);
+
+/* module parameters we share */
+int drbd_proc_details; /* Detail level in proc drbd*/
+module_param_named(proc_details, drbd_proc_details, int, 0644);
+/* module parameters shared with defaults */
+unsigned int drbd_minor_count = DRBD_MINOR_COUNT_DEF;
/* Module parameter for setting the user mode helper program
* to run. Default is /sbin/drbdadm */
-char usermode_helper[80] = "/sbin/drbdadm";
-
-module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
+char drbd_usermode_helper[80] = "/sbin/drbdadm";
+module_param_named(minor_count, drbd_minor_count, uint, 0444);
+module_param_string(usermode_helper, drbd_usermode_helper, sizeof(drbd_usermode_helper), 0644);
/* in 2.6.x, our device mapping and config info contains our virtual gendisks
* as member "struct gendisk *vdisk;"
@@ -923,7 +923,9 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
}
/* communicated if (agreed_features & DRBD_FF_WSAME) */
-void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q)
+static void
+assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
+ struct request_queue *q)
{
if (q) {
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
@@ -1560,7 +1562,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
* put_page(); and would cause either a VM_BUG directly, or
* __page_cache_release a page that would actually still be referenced
* by someone, leading to some obscure delayed Oops somewhere else. */
- if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+ if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page))
return _drbd_no_send_page(peer_device, page, offset, size, msg_flags);
msg_flags |= MSG_NOSIGNAL;
@@ -1932,7 +1934,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
if (device->state.role != R_PRIMARY) {
if (mode & FMODE_WRITE)
rv = -EROFS;
- else if (!allow_oos)
+ else if (!drbd_allow_oos)
rv = -EMEDIUMTYPE;
}
@@ -1952,6 +1954,19 @@ static void drbd_release(struct gendisk *gd, fmode_t mode)
mutex_unlock(&drbd_main_mutex);
}
+/* need to hold resource->req_lock */
+void drbd_queue_unplug(struct drbd_device *device)
+{
+ if (device->state.pdsk >= D_INCONSISTENT && device->state.conn >= C_CONNECTED) {
+ D_ASSERT(device, device->state.role == R_PRIMARY);
+ if (test_and_clear_bit(UNPLUG_REMOTE, &device->flags)) {
+ drbd_queue_work_if_unqueued(
+ &first_peer_device(device)->connection->sender_work,
+ &device->unplug_work);
+ }
+ }
+}
+
static void drbd_set_defaults(struct drbd_device *device)
{
/* Beware! The actual layout differs
@@ -2008,18 +2023,14 @@ void drbd_init_set_defaults(struct drbd_device *device)
device->unplug_work.cb = w_send_write_hint;
device->bm_io_work.w.cb = w_bitmap_io;
- init_timer(&device->resync_timer);
- init_timer(&device->md_sync_timer);
- init_timer(&device->start_resync_timer);
- init_timer(&device->request_timer);
- device->resync_timer.function = resync_timer_fn;
- device->resync_timer.data = (unsigned long) device;
- device->md_sync_timer.function = md_sync_timer_fn;
- device->md_sync_timer.data = (unsigned long) device;
- device->start_resync_timer.function = start_resync_timer_fn;
- device->start_resync_timer.data = (unsigned long) device;
- device->request_timer.function = request_timer_fn;
- device->request_timer.data = (unsigned long) device;
+ setup_timer(&device->resync_timer, resync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->md_sync_timer, md_sync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->start_resync_timer, start_resync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->request_timer, request_timer_fn,
+ (unsigned long)device);
init_waitqueue_head(&device->misc_wait);
init_waitqueue_head(&device->state_wait);
@@ -2131,7 +2142,7 @@ static void drbd_destroy_mempools(void)
static int drbd_create_mempools(void)
{
struct page *page;
- const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
+ const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
int i;
/* prepare our caches and mempools */
@@ -2167,13 +2178,12 @@ static int drbd_create_mempools(void)
goto Enomem;
/* mempools */
- drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+ drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
if (drbd_io_bio_set == NULL)
goto Enomem;
drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
- BIOSET_NEED_BVECS |
- BIOSET_NEED_RESCUER);
+ BIOSET_NEED_BVECS);
if (drbd_md_io_bio_set == NULL)
goto Enomem;
@@ -2409,7 +2419,6 @@ static void drbd_cleanup(void)
destroy_workqueue(retry.wq);
drbd_genl_unregister();
- drbd_debugfs_cleanup();
idr_for_each_entry(&drbd_devices, device, i)
drbd_delete_device(device);
@@ -2420,6 +2429,8 @@ static void drbd_cleanup(void)
drbd_free_resource(resource);
}
+ drbd_debugfs_cleanup();
+
drbd_destroy_mempools();
unregister_blkdev(DRBD_MAJOR, "drbd");
@@ -2972,12 +2983,12 @@ static int __init drbd_init(void)
{
int err;
- if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
- pr_err("invalid minor_count (%d)\n", minor_count);
+ if (drbd_minor_count < DRBD_MINOR_COUNT_MIN || drbd_minor_count > DRBD_MINOR_COUNT_MAX) {
+ pr_err("invalid minor_count (%d)\n", drbd_minor_count);
#ifdef MODULE
return -EINVAL;
#else
- minor_count = DRBD_MINOR_COUNT_DEF;
+ drbd_minor_count = DRBD_MINOR_COUNT_DEF;
#endif
}
@@ -3900,12 +3911,12 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type)
static struct fault_random_state rrs = {0, 0};
unsigned int ret = (
- (fault_devs == 0 ||
- ((1 << device_to_minor(device)) & fault_devs) != 0) &&
- (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
+ (drbd_fault_devs == 0 ||
+ ((1 << device_to_minor(device)) & drbd_fault_devs) != 0) &&
+ (((_drbd_fault_random(&rrs) % 100) + 1) <= drbd_fault_rate));
if (ret) {
- fault_count++;
+ drbd_fault_count++;
if (__ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "***Simulating %s failure\n",
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ad0fcb43e45c..a12f77e6891e 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -344,7 +344,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
(char[60]) { }, /* address */
NULL };
char mb[14];
- char *argv[] = {usermode_helper, cmd, mb, NULL };
+ char *argv[] = {drbd_usermode_helper, cmd, mb, NULL };
struct drbd_connection *connection = first_peer_device(device)->connection;
struct sib_info sib;
int ret;
@@ -359,19 +359,19 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
* write out any unsynced meta data changes now */
drbd_md_sync(device);
- drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
+ drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb);
sib.sib_reason = SIB_HELPER_PRE;
sib.helper_name = cmd;
drbd_bcast_event(device, &sib);
notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
- ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+ ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, mb,
+ drbd_usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
else
drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, mb,
+ drbd_usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
sib.sib_reason = SIB_HELPER_POST;
sib.helper_exit_code = ret;
@@ -396,24 +396,24 @@ enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
(char[60]) { }, /* address */
NULL };
char *resource_name = connection->resource->name;
- char *argv[] = {usermode_helper, cmd, resource_name, NULL };
+ char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL };
int ret;
setup_khelper_env(connection, envp);
conn_md_sync(connection);
- drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
+ drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name);
/* TODO: conn_bcast_event() ?? */
notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
- ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+ ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, resource_name,
+ drbd_usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
else
drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, resource_name,
+ drbd_usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
/* TODO: conn_bcast_event() ?? */
notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
@@ -1236,12 +1236,18 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
static void decide_on_write_same_support(struct drbd_device *device,
struct request_queue *q,
- struct request_queue *b, struct o_qlim *o)
+ struct request_queue *b, struct o_qlim *o,
+ bool disable_write_same)
{
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
bool can_do = b ? b->limits.max_write_same_sectors : true;
+ if (can_do && disable_write_same) {
+ can_do = false;
+ drbd_info(peer_device, "WRITE_SAME disabled by config\n");
+ }
+
if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
can_do = false;
drbd_info(peer_device, "peer does not support WRITE_SAME\n");
@@ -1302,6 +1308,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
struct request_queue *b = NULL;
struct disk_conf *dc;
bool discard_zeroes_if_aligned = true;
+ bool disable_write_same = false;
if (bdev) {
b = bdev->backing_bdev->bd_disk->queue;
@@ -1311,6 +1318,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
dc = rcu_dereference(device->ldev->disk_conf);
max_segments = dc->max_bio_bvecs;
discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
+ disable_write_same = dc->disable_write_same;
rcu_read_unlock();
blk_set_stacking_limits(&q->limits);
@@ -1321,7 +1329,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_SIZE-1);
decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
- decide_on_write_same_support(device, q, b, o);
+ decide_on_write_same_support(device, q, b, o, disable_write_same);
if (b) {
blk_queue_stack_limits(q, b);
@@ -1612,7 +1620,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
if (write_ordering_changed(old_disk_conf, new_disk_conf))
drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
- if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
+ if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned
+ || old_disk_conf->disable_write_same != new_disk_conf->disable_write_same)
drbd_reconsider_queue_parameters(device, device->ldev, NULL);
drbd_md_sync(device);
@@ -2140,34 +2149,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
static int adm_detach(struct drbd_device *device, int force)
{
- enum drbd_state_rv retcode;
- void *buffer;
- int ret;
-
if (force) {
set_bit(FORCE_DETACH, &device->flags);
drbd_force_state(device, NS(disk, D_FAILED));
- retcode = SS_SUCCESS;
- goto out;
+ return SS_SUCCESS;
}
- drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
- if (buffer) {
- retcode = drbd_request_state(device, NS(disk, D_FAILED));
- drbd_md_put_buffer(device);
- } else /* already <= D_FAILED */
- retcode = SS_NOTHING_TO_DO;
- /* D_FAILED will transition to DISKLESS. */
- drbd_resume_io(device);
- ret = wait_event_interruptible(device->misc_wait,
- device->state.disk != D_FAILED);
- if ((int)retcode == (int)SS_IS_DISKLESS)
- retcode = SS_NOTHING_TO_DO;
- if (ret)
- retcode = ERR_INTR;
-out:
- return retcode;
+ return drbd_request_detach_interruptible(device);
}
/* Detaching the disk is a process in multiple stages. First we need to lock
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 8378142f7a55..582caeb0de86 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -127,7 +127,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_putc(seq, '=');
seq_putc(seq, '>');
for (i = 0; i < y; i++)
- seq_printf(seq, ".");
+ seq_putc(seq, '.');
seq_puts(seq, "] ");
if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
@@ -179,7 +179,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_printf_with_thousands_grouping(seq, dbdt);
seq_puts(seq, " (");
/* ------------------------- ~3s average ------------------------ */
- if (proc_details >= 1) {
+ if (drbd_proc_details >= 1) {
/* this is what drbd_rs_should_slow_down() uses */
i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ;
@@ -209,7 +209,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
- if (proc_details >= 1) {
+ if (drbd_proc_details >= 1) {
/* 64 bit:
* we convert to sectors in the display below. */
unsigned long bm_bits = drbd_bm_bits(device);
@@ -332,13 +332,13 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
state.conn == C_VERIFY_T)
drbd_syncer_progress(device, seq, state);
- if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
+ if (drbd_proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
lc_seq_printf_stats(seq, device->resync);
lc_seq_printf_stats(seq, device->act_log);
put_ldev(device);
}
- if (proc_details >= 2)
+ if (drbd_proc_details >= 2)
seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
}
rcu_read_unlock();
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c7e95e6380fb..796eaf347dc0 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -332,7 +332,7 @@ static void drbd_free_pages(struct drbd_device *device, struct page *page, int i
if (page == NULL)
return;
- if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
+ if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
i = page_chain_free(page);
else {
struct page *tmp;
@@ -1100,7 +1100,10 @@ randomize:
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_lock(peer_device->device->state_mutex);
+ /* avoid a race with conn_request_state( C_DISCONNECTING ) */
+ spin_lock_irq(&connection->resource->req_lock);
set_bit(STATE_SENT, &connection->flags);
+ spin_unlock_irq(&connection->resource->req_lock);
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_unlock(peer_device->device->state_mutex);
@@ -1194,6 +1197,14 @@ static int decode_header(struct drbd_connection *connection, void *header, struc
return 0;
}
+static void drbd_unplug_all_devices(struct drbd_connection *connection)
+{
+ if (current->plug == &connection->receiver_plug) {
+ blk_finish_plug(&connection->receiver_plug);
+ blk_start_plug(&connection->receiver_plug);
+ } /* else: maybe just schedule() ?? */
+}
+
static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
{
void *buffer = connection->data.rbuf;
@@ -1209,6 +1220,36 @@ static int drbd_recv_header(struct drbd_connection *connection, struct packet_in
return err;
}
+static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
+{
+ void *buffer = connection->data.rbuf;
+ unsigned int size = drbd_header_size(connection);
+ int err;
+
+ err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
+ if (err != size) {
+ /* If we have nothing in the receive buffer now, to reduce
+ * application latency, try to drain the backend queues as
+ * quickly as possible, and let remote TCP know what we have
+ * received so far. */
+ if (err == -EAGAIN) {
+ drbd_tcp_quickack(connection->data.socket);
+ drbd_unplug_all_devices(connection);
+ }
+ if (err > 0) {
+ buffer += err;
+ size -= err;
+ }
+ err = drbd_recv_all_warn(connection, buffer, size);
+ if (err)
+ return err;
+ }
+
+ err = decode_header(connection, connection->data.rbuf, pi);
+ connection->last_received = jiffies;
+
+ return err;
+}
/* This is blkdev_issue_flush, but asynchronous.
* We want to submit to all component volumes in parallel,
* then wait for all completions.
@@ -1223,7 +1264,7 @@ struct one_flush_context {
struct issue_flush_context *ctx;
};
-void one_flush_endio(struct bio *bio)
+static void one_flush_endio(struct bio *bio)
{
struct one_flush_context *octx = bio->bi_private;
struct drbd_device *device = octx->device;
@@ -1265,7 +1306,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
octx->device = device;
octx->ctx = ctx;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -1548,7 +1589,7 @@ next_bio:
}
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
bio_set_op_attrs(bio, op, op_flags);
bio->bi_private = peer_req;
bio->bi_end_io = drbd_peer_request_endio;
@@ -4085,7 +4126,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
return config_unknown_volume(connection, pi);
device = peer_device->device;
- p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
+ p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
if (!p_uuid) {
drbd_err(device, "kmalloc of p_uuid failed\n");
return false;
@@ -4882,8 +4923,8 @@ static void drbdd(struct drbd_connection *connection)
struct data_cmd const *cmd;
drbd_thread_current_set_cpu(&connection->receiver);
- update_receiver_timing_details(connection, drbd_recv_header);
- if (drbd_recv_header(connection, &pi))
+ update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
+ if (drbd_recv_header_maybe_unplug(connection, &pi))
goto err_out;
cmd = &drbd_cmd_handler[pi.cmd];
@@ -5375,8 +5416,11 @@ int drbd_receiver(struct drbd_thread *thi)
}
} while (h == 0);
- if (h > 0)
+ if (h > 0) {
+ blk_start_plug(&connection->receiver_plug);
drbdd(connection);
+ blk_finish_plug(&connection->receiver_plug);
+ }
conn_disconnect(connection);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f6e865b2d543..de8566e55334 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -36,14 +36,18 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector,
/* Update disk stats at start of I/O request */
static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req)
{
- generic_start_io_acct(bio_data_dir(req->master_bio), req->i.size >> 9,
- &device->vdisk->part0);
+ struct request_queue *q = device->rq_queue;
+
+ generic_start_io_acct(q, bio_data_dir(req->master_bio),
+ req->i.size >> 9, &device->vdisk->part0);
}
/* Update disk stats when completing request upwards */
static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
{
- generic_end_io_acct(bio_data_dir(req->master_bio),
+ struct request_queue *q = device->rq_queue;
+
+ generic_end_io_acct(q, bio_data_dir(req->master_bio),
&device->vdisk->part0, req->start_jif);
}
@@ -1175,7 +1179,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
else
type = DRBD_FAULT_DT_RD;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
/* State may have changed since we grabbed our reference on the
* ->ldev member. Double check, and short-circuit to endio.
@@ -1275,6 +1279,57 @@ static bool may_do_writes(struct drbd_device *device)
return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
}
+struct drbd_plug_cb {
+ struct blk_plug_cb cb;
+ struct drbd_request *most_recent_req;
+ /* do we need more? */
+};
+
+static void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+ struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb);
+ struct drbd_resource *resource = plug->cb.data;
+ struct drbd_request *req = plug->most_recent_req;
+
+ kfree(cb);
+ if (!req)
+ return;
+
+ spin_lock_irq(&resource->req_lock);
+ /* In case the sender did not process it yet, raise the flag to
+ * have it followed with P_UNPLUG_REMOTE just after. */
+ req->rq_state |= RQ_UNPLUG;
+ /* but also queue a generic unplug */
+ drbd_queue_unplug(req->device);
+ kref_put(&req->kref, drbd_req_destroy);
+ spin_unlock_irq(&resource->req_lock);
+}
+
+static struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource)
+{
+ /* A lot of text to say
+ * return (struct drbd_plug_cb*)blk_check_plugged(); */
+ struct drbd_plug_cb *plug;
+ struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug));
+
+ if (cb)
+ plug = container_of(cb, struct drbd_plug_cb, cb);
+ else
+ plug = NULL;
+ return plug;
+}
+
+static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req)
+{
+ struct drbd_request *tmp = plug->most_recent_req;
+ /* Will be sent to some peer.
+ * Remember to tag it with UNPLUG_REMOTE on unplug */
+ kref_get(&req->kref);
+ plug->most_recent_req = req;
+ if (tmp)
+ kref_put(&tmp->kref, drbd_req_destroy);
+}
+
static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
{
struct drbd_resource *resource = device->resource;
@@ -1347,6 +1402,12 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
no_remote = true;
}
+ if (no_remote == false) {
+ struct drbd_plug_cb *plug = drbd_check_plugged(resource);
+ if (plug)
+ drbd_update_plug(plug, req);
+ }
+
/* If it took the fast path in drbd_request_prepare, add it here.
* The slow path has added it already. */
if (list_empty(&req->req_pending_master_completion))
@@ -1395,7 +1456,10 @@ void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned l
static void submit_fast_path(struct drbd_device *device, struct list_head *incoming)
{
+ struct blk_plug plug;
struct drbd_request *req, *tmp;
+
+ blk_start_plug(&plug);
list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
const int rw = bio_data_dir(req->master_bio);
@@ -1413,6 +1477,7 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
+ blk_finish_plug(&plug);
}
static bool prepare_al_transaction_nonblock(struct drbd_device *device,
@@ -1420,12 +1485,12 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
struct list_head *pending,
struct list_head *later)
{
- struct drbd_request *req, *tmp;
+ struct drbd_request *req;
int wake = 0;
int err;
spin_lock_irq(&device->al_lock);
- list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
+ while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) {
err = drbd_al_begin_io_nonblock(device, &req->i);
if (err == -ENOBUFS)
break;
@@ -1442,17 +1507,20 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
return !list_empty(pending);
}
-void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
+static void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
{
- struct drbd_request *req, *tmp;
+ struct blk_plug plug;
+ struct drbd_request *req;
- list_for_each_entry_safe(req, tmp, pending, tl_requests) {
+ blk_start_plug(&plug);
+ while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) {
req->rq_state |= RQ_IN_ACT_LOG;
req->in_actlog_jif = jiffies;
atomic_dec(&device->ap_actlog_cnt);
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
+ blk_finish_plug(&plug);
}
void do_submit(struct work_struct *ws)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 9e1866ab238f..a2254f825601 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -212,6 +212,11 @@ enum drbd_req_state_bits {
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG,
+ /* This was the most recent request during some blk_finish_plug()
+ * or its implicit from-schedule equivalent.
+ * We may use it as hint to send a P_UNPLUG_REMOTE */
+ __RQ_UNPLUG,
+
/* The peer has sent a retry ACK */
__RQ_POSTPONED,
@@ -249,6 +254,7 @@ enum drbd_req_state_bits {
#define RQ_WSAME (1UL << __RQ_WSAME)
#define RQ_UNMAP (1UL << __RQ_UNMAP)
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
+#define RQ_UNPLUG (1UL << __RQ_UNPLUG)
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index eea0c4aec978..0813c654c893 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -346,7 +346,7 @@ static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
enum drbd_role conn_highest_role(struct drbd_connection *connection)
{
- enum drbd_role role = R_UNKNOWN;
+ enum drbd_role role = R_SECONDARY;
struct drbd_peer_device *peer_device;
int vnr;
@@ -579,11 +579,14 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
unsigned long flags;
union drbd_state os, ns;
enum drbd_state_rv rv;
+ void *buffer = NULL;
init_completion(&done);
if (f & CS_SERIALIZE)
mutex_lock(device->state_mutex);
+ if (f & CS_INHIBIT_MD_IO)
+ buffer = drbd_md_get_buffer(device, __func__);
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
@@ -636,6 +639,8 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
}
abort:
+ if (buffer)
+ drbd_md_put_buffer(device);
if (f & CS_SERIALIZE)
mutex_unlock(device->state_mutex);
@@ -664,6 +669,47 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
return rv;
}
+/*
+ * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
+ * there is IO in-flight: the transition into D_FAILED for detach purposes
+ * may get misinterpreted as actual IO error in a confused endio function.
+ *
+ * We wrap it all into wait_event(), to retry in case the drbd_req_state()
+ * returns SS_IN_TRANSIENT_STATE.
+ *
+ * To avoid potential deadlock with e.g. the receiver thread trying to grab
+ * drbd_md_get_buffer() while trying to get out of the "transient state", we
+ * need to grab and release the meta data buffer inside of that wait_event loop.
+ */
+static enum drbd_state_rv
+request_detach(struct drbd_device *device)
+{
+ return drbd_req_state(device, NS(disk, D_FAILED),
+ CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
+}
+
+enum drbd_state_rv
+drbd_request_detach_interruptible(struct drbd_device *device)
+{
+ enum drbd_state_rv rv;
+ int ret;
+
+ drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
+ wait_event_interruptible(device->state_wait,
+ (rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
+ drbd_resume_io(device);
+
+ ret = wait_event_interruptible(device->misc_wait,
+ device->state.disk != D_FAILED);
+
+ if (rv == SS_IS_DISKLESS)
+ rv = SS_NOTHING_TO_DO;
+ if (ret)
+ rv = ERR_INTR;
+
+ return rv;
+}
+
enum drbd_state_rv
_drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask,
union drbd_state val, enum chg_state_flags f)
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index 6c9d5d4a8a75..0276c98fbbdd 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -71,6 +71,10 @@ enum chg_state_flags {
CS_DC_SUSP = 1 << 10,
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
CS_IGN_OUTD_FAIL = 1 << 11,
+
+ /* Make sure no meta data IO is in flight, by calling
+ * drbd_md_get_buffer(). Used for graceful detach. */
+ CS_INHIBIT_MD_IO = 1 << 12,
};
/* drbd_dev_state and drbd_state are different types. This is to stress the
@@ -156,6 +160,10 @@ static inline int drbd_request_state(struct drbd_device *device,
return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED);
}
+/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
+enum drbd_state_rv
+drbd_request_detach_interruptible(struct drbd_device *device);
+
enum drbd_role conn_highest_role(struct drbd_connection *connection);
enum drbd_role conn_highest_peer(struct drbd_connection *connection);
enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1d8726a8df34..03471b3fce86 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -65,6 +65,11 @@ void drbd_md_endio(struct bio *bio)
device = bio->bi_private;
device->md_io.error = blk_status_to_errno(bio->bi_status);
+ /* special case: drbd_md_read() during drbd_adm_attach() */
+ if (device->ldev)
+ put_ldev(device);
+ bio_put(bio);
+
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
* If this io completion runs after that timeout expired, this
@@ -79,9 +84,6 @@ void drbd_md_endio(struct bio *bio)
drbd_md_put_buffer(device);
device->md_io.done = 1;
wake_up(&device->misc_wait);
- bio_put(bio);
- if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
- put_ldev(device);
}
/* reads on behalf of the partner,
@@ -128,6 +130,14 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
block_id = peer_req->block_id;
peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
+ if (peer_req->flags & EE_WAS_ERROR) {
+ /* In protocol != C, we usually do not send write acks.
+ * In case of a write error, send the neg ack anyways. */
+ if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
+ inc_unacked(device);
+ drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
+ }
+
spin_lock_irqsave(&device->resource->req_lock, flags);
device->writ_cnt += peer_req->i.size >> 9;
list_move_tail(&peer_req->w.list, &device->done_ee);
@@ -195,7 +205,8 @@ void drbd_peer_request_endio(struct bio *bio)
}
}
-void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
+static void
+drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
{
panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
device->minor, device->resource->name, device->vnr);
@@ -1382,18 +1393,22 @@ static int drbd_send_barrier(struct drbd_connection *connection)
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
}
+static int pd_send_unplug_remote(struct drbd_peer_device *pd)
+{
+ struct drbd_socket *sock = &pd->connection->data;
+ if (!drbd_prepare_command(pd, sock))
+ return -EIO;
+ return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
+}
+
int w_send_write_hint(struct drbd_work *w, int cancel)
{
struct drbd_device *device =
container_of(w, struct drbd_device, unplug_work);
- struct drbd_socket *sock;
if (cancel)
return 0;
- sock = &first_peer_device(device)->connection->data;
- if (!drbd_prepare_command(first_peer_device(device), sock))
- return -EIO;
- return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
+ return pd_send_unplug_remote(first_peer_device(device));
}
static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
@@ -1455,6 +1470,7 @@ int w_send_dblock(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
+ bool do_send_unplug = req->rq_state & RQ_UNPLUG;
int err;
if (unlikely(cancel)) {
@@ -1470,6 +1486,9 @@ int w_send_dblock(struct drbd_work *w, int cancel)
err = drbd_send_dblock(peer_device, req);
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ if (do_send_unplug && !err)
+ pd_send_unplug_remote(peer_device);
+
return err;
}
@@ -1484,6 +1503,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
+ bool do_send_unplug = req->rq_state & RQ_UNPLUG;
int err;
if (unlikely(cancel)) {
@@ -1501,6 +1521,9 @@ int w_send_read_req(struct drbd_work *w, int cancel)
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ if (do_send_unplug && !err)
+ pd_send_unplug_remote(peer_device);
+
return err;
}
@@ -1513,7 +1536,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
drbd_al_begin_io(device, &req->i);
drbd_req_make_private_bio(req, req->master_bio);
- req->private_bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(req->private_bio, device->ldev->backing_bdev);
generic_make_request(req->private_bio);
return 0;
@@ -1733,6 +1756,11 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
return;
}
+ if (!connection) {
+ drbd_err(device, "No connection to peer, aborting!\n");
+ return;
+ }
+
if (!test_bit(B_RS_H_DONE, &device->flags)) {
if (side == C_SYNC_TARGET) {
/* Since application IO was locked out during C_WF_BITMAP_T and
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9c00f29e40c1..60c086a53609 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4134,7 +4134,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
cbdata.drive = drive;
bio_init(&bio, &bio_vec, 1);
- bio.bi_bdev = bdev;
+ bio_set_dev(&bio, bdev);
bio_add_page(&bio, page, size, 0);
bio.bi_iter.bi_sector = 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ef8334949b42..85de67334695 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -213,16 +213,18 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
*/
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
- if (use_dio)
+ if (use_dio) {
+ queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
- else
+ } else {
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
+ }
blk_mq_unfreeze_queue(lo->lo_queue);
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
- loff_t logical_blocksize)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
@@ -234,12 +236,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
- lo->lo_logical_blocksize = logical_blocksize;
- blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
- blk_queue_logical_block_size(lo->lo_queue,
- lo->lo_logical_blocksize);
- }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -467,12 +463,21 @@ static void lo_complete_rq(struct request *rq)
blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
+static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
+{
+ if (!atomic_dec_and_test(&cmd->ref))
+ return;
+ kfree(cmd->bvec);
+ cmd->bvec = NULL;
+ blk_mq_complete_request(cmd->rq);
+}
+
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
cmd->ret = ret;
- blk_mq_complete_request(cmd->rq);
+ lo_rw_aio_do_completion(cmd);
}
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
@@ -480,22 +485,51 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
{
struct iov_iter iter;
struct bio_vec *bvec;
- struct bio *bio = cmd->rq->bio;
+ struct request *rq = cmd->rq;
+ struct bio *bio = rq->bio;
struct file *file = lo->lo_backing_file;
+ unsigned int offset;
+ int segments = 0;
int ret;
- /* nomerge for loop request queue */
- WARN_ON(cmd->rq->bio != cmd->rq->biotail);
+ if (rq->bio != rq->biotail) {
+ struct req_iterator iter;
+ struct bio_vec tmp;
+
+ __rq_for_each_bio(bio, rq)
+ segments += bio_segments(bio);
+ bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO);
+ if (!bvec)
+ return -EIO;
+ cmd->bvec = bvec;
+
+ /*
+ * The bios of the request may be started from the middle of
+ * the 'bvec' because of bio splitting, so we can't directly
+ * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
+ * API will take care of all details for us.
+ */
+ rq_for_each_segment(tmp, rq, iter) {
+ *bvec = tmp;
+ bvec++;
+ }
+ bvec = cmd->bvec;
+ offset = 0;
+ } else {
+ /*
+ * Same here, this bio may be started from the middle of the
+ * 'bvec' because of bio splitting, so offset from the bvec
+ * must be passed to iov iterator
+ */
+ offset = bio->bi_iter.bi_bvec_done;
+ bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
+ segments = bio_segments(bio);
+ }
+ atomic_set(&cmd->ref, 2);
- bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
- bio_segments(bio), blk_rq_bytes(cmd->rq));
- /*
- * This bio may be started from the middle of the 'bvec'
- * because of bio splitting, so offset from the bvec must
- * be passed to iov iterator
- */
- iter.iov_offset = bio->bi_iter.bi_bvec_done;
+ segments, blk_rq_bytes(rq));
+ iter.iov_offset = offset;
cmd->iocb.ki_pos = pos;
cmd->iocb.ki_filp = file;
@@ -507,6 +541,8 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
else
ret = call_read_iter(file, &cmd->iocb, &iter);
+ lo_rw_aio_do_completion(cmd);
+
if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
return 0;
@@ -553,74 +589,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
}
}
-struct switch_request {
- struct file *file;
- struct completion wait;
-};
-
static inline void loop_update_dio(struct loop_device *lo)
{
__loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
lo->use_dio);
}
-/*
- * Do the actual switch; called from the BIO completion routine
- */
-static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
-{
- struct file *file = p->file;
- struct file *old_file = lo->lo_backing_file;
- struct address_space *mapping;
-
- /* if no new file, only flush of queued bios requested */
- if (!file)
- return;
-
- mapping = file->f_mapping;
- mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
- lo->lo_backing_file = file;
- lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
- mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
- lo->old_gfp_mask = mapping_gfp_mask(mapping);
- mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
- loop_update_dio(lo);
-}
-
-/*
- * loop_switch performs the hard work of switching a backing store.
- * First it needs to flush existing IO, it does this by sending a magic
- * BIO down the pipe. The completion of this BIO does the actual switch.
- */
-static int loop_switch(struct loop_device *lo, struct file *file)
-{
- struct switch_request w;
-
- w.file = file;
-
- /* freeze queue and wait for completion of scheduled requests */
- blk_mq_freeze_queue(lo->lo_queue);
-
- /* do the switch action */
- do_loop_switch(lo, &w);
-
- /* unfreeze */
- blk_mq_unfreeze_queue(lo->lo_queue);
-
- return 0;
-}
-
-/*
- * Helper to flush the IOs in loop, but keeping loop thread running
- */
-static int loop_flush(struct loop_device *lo)
-{
- /* loop not yet configured, no running thread, nothing to flush */
- if (lo->lo_state != Lo_bound)
- return 0;
- return loop_switch(lo, NULL);
-}
-
static void loop_reread_partitions(struct loop_device *lo,
struct block_device *bdev)
{
@@ -685,9 +659,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_putf;
/* and ... switch */
- error = loop_switch(lo, file);
- if (error)
- goto out_putf;
+ blk_mq_freeze_queue(lo->lo_queue);
+ mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
+ lo->lo_backing_file = file;
+ lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
+ mapping_set_gfp_mask(file->f_mapping,
+ lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_update_dio(lo);
+ blk_mq_unfreeze_queue(lo->lo_queue);
fput(old_file);
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
@@ -820,7 +799,6 @@ static void loop_config_discard(struct loop_device *lo)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
- int lo_bits = 9;
/*
* We use punch hole to reclaim the free space used by the
@@ -840,11 +818,9 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
- lo_bits = blksize_bits(lo->lo_logical_blocksize);
- blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
- blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -877,7 +853,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct file *file, *f;
struct inode *inode;
struct address_space *mapping;
- unsigned lo_blocksize;
int lo_flags = 0;
int error;
loff_t size;
@@ -921,9 +896,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY;
- lo_blocksize = S_ISBLK(inode->i_mode) ?
- inode->i_bdev->bd_block_size : PAGE_SIZE;
-
error = -EFBIG;
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
@@ -937,8 +909,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = false;
- lo->lo_blocksize = lo_blocksize;
- lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
@@ -958,7 +928,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
- set_blocksize(bdev, lo_blocksize);
+ set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
+ block_size(inode->i_bdev) : PAGE_SIZE);
lo->lo_state = Lo_bound;
if (part_shift)
@@ -1064,6 +1035,9 @@ static int loop_clr_fd(struct loop_device *lo)
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
+ blk_queue_logical_block_size(lo->lo_queue, 512);
+ blk_queue_physical_block_size(lo->lo_queue, 512);
+ blk_queue_io_min(lo->lo_queue, 512);
if (bdev) {
bdput(bdev);
invalidate_bdev(bdev);
@@ -1104,7 +1078,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
- int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
@@ -1137,26 +1110,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto exit;
- if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
- if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
- lo->lo_logical_blocksize = 512;
- lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
- if (LO_INFO_BLOCKSIZE(info) != 512 &&
- LO_INFO_BLOCKSIZE(info) != 1024 &&
- LO_INFO_BLOCKSIZE(info) != 2048 &&
- LO_INFO_BLOCKSIZE(info) != 4096)
- return -EINVAL;
- if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
- return -EINVAL;
- }
-
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit ||
- lo->lo_flags != lo_flags ||
- ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
- lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
- LO_INFO_BLOCKSIZE(info))) {
+ lo->lo_sizelimit != info->lo_sizelimit) {
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto exit;
}
@@ -1348,8 +1304,7 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
- lo->lo_logical_blocksize);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1366,6 +1321,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
return error;
}
+static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
+{
+ if (lo->lo_state != Lo_bound)
+ return -ENXIO;
+
+ if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
+ return -EINVAL;
+
+ blk_mq_freeze_queue(lo->lo_queue);
+
+ blk_queue_logical_block_size(lo->lo_queue, arg);
+ blk_queue_physical_block_size(lo->lo_queue, arg);
+ blk_queue_io_min(lo->lo_queue, arg);
+ loop_update_dio(lo);
+
+ blk_mq_unfreeze_queue(lo->lo_queue);
+
+ return 0;
+}
+
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -1414,6 +1389,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_dio(lo, arg);
break;
+ case LOOP_SET_BLOCK_SIZE:
+ err = -EPERM;
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ err = loop_set_block_size(lo, arg);
+ break;
default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
@@ -1613,12 +1593,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
err = loop_clr_fd(lo);
if (!err)
return;
- } else {
+ } else if (lo->lo_state == Lo_bound) {
/*
* Otherwise keep thread (if running) and config,
* but flush possible ongoing bios in thread.
*/
- loop_flush(lo);
+ blk_mq_freeze_queue(lo->lo_queue);
+ blk_mq_unfreeze_queue(lo->lo_queue);
}
mutex_unlock(&lo->lo_ctl_mutex);
@@ -1800,9 +1781,13 @@ static int loop_add(struct loop_device **l, int i)
}
lo->lo_queue->queuedata = lo;
+ blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
+
/*
- * It doesn't make sense to enable merge because the I/O
- * submitted to backing file is handled page by page.
+ * By default, we do buffer IO, so it doesn't make sense to enable
+ * merge because the I/O submitted to backing file is handled page by
+ * page. For directio mode, merge does help to dispatch bigger request
+ * to underlayer disk. We will enable merge once directio is enabled.
*/
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
@@ -1996,10 +1981,6 @@ static int __init loop_init(void)
struct loop_device *lo;
int err;
- err = misc_register(&loop_misc);
- if (err < 0)
- return err;
-
part_shift = 0;
if (max_part > 0) {
part_shift = fls(max_part);
@@ -2017,12 +1998,12 @@ static int __init loop_init(void)
if ((1UL << part_shift) > DISK_MAX_PARTS) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
if (max_loop > 1UL << (MINORBITS - part_shift)) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
/*
@@ -2041,6 +2022,11 @@ static int __init loop_init(void)
range = 1UL << MINORBITS;
}
+ err = misc_register(&loop_misc);
+ if (err < 0)
+ goto err_out;
+
+
if (register_blkdev(LOOP_MAJOR, "loop")) {
err = -EIO;
goto misc_out;
@@ -2060,6 +2046,7 @@ static int __init loop_init(void)
misc_out:
misc_deregister(&loop_misc);
+err_out:
return err;
}
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 2c096b9a17b8..f68c1d50802f 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -48,8 +48,6 @@ struct loop_device {
struct file * lo_backing_file;
struct block_device *lo_device;
- unsigned lo_blocksize;
- unsigned lo_logical_blocksize;
void *key_data;
gfp_t old_gfp_mask;
@@ -69,10 +67,13 @@ struct loop_device {
struct loop_cmd {
struct kthread_work work;
struct request *rq;
- struct list_head list;
- bool use_aio; /* use AIO interface to handle I/O */
+ union {
+ bool use_aio; /* use AIO interface to handle I/O */
+ atomic_t ref; /* only for aio */
+ };
long ret;
struct kiocb iocb;
+ struct bio_vec *bvec;
};
/* Support for loadable transfer modules */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5bdf923294a5..2aa87cbdede0 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -128,7 +128,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
static unsigned int nbds_max = 16;
-static int max_part;
+static int max_part = 16;
static struct workqueue_struct *recv_workqueue;
static int part_shift;
@@ -165,7 +165,7 @@ static ssize_t pid_show(struct device *dev,
return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
}
-static struct device_attribute pid_attr = {
+static const struct device_attribute pid_attr = {
.attr = { .name = "pid", .mode = S_IRUGO},
.show = pid_show,
};
@@ -1584,6 +1584,15 @@ again:
}
} else {
nbd = idr_find(&nbd_index_idr, index);
+ if (!nbd) {
+ ret = nbd_dev_add(index);
+ if (ret < 0) {
+ mutex_unlock(&nbd_index_mutex);
+ printk(KERN_ERR "nbd: failed to add new device\n");
+ return ret;
+ }
+ nbd = idr_find(&nbd_index_idr, index);
+ }
}
if (!nbd) {
printk(KERN_ERR "nbd: couldn't find device at index %d\n",
@@ -2137,4 +2146,4 @@ MODULE_LICENSE("GPL");
module_param(nbds_max, int, 0444);
MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
module_param(max_part, int, 0444);
-MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
+MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)");
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 85c24cace973..8042c26ea9e6 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1,3 +1,7 @@
+/*
+ * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
+ * Shaohua Li <shli@fb.com>
+ */
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -9,27 +13,110 @@
#include <linux/blk-mq.h>
#include <linux/hrtimer.h>
#include <linux/lightnvm.h>
+#include <linux/configfs.h>
+#include <linux/badblocks.h>
+
+#define SECTOR_SHIFT 9
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
+#define FREE_BATCH 16
+
+#define TICKS_PER_SEC 50ULL
+#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
+
+static inline u64 mb_per_tick(int mbps)
+{
+ return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
+}
struct nullb_cmd {
struct list_head list;
struct llist_node ll_list;
- struct call_single_data csd;
+ call_single_data_t csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
struct nullb_queue *nq;
struct hrtimer timer;
+ blk_status_t error;
};
struct nullb_queue {
unsigned long *tag_map;
wait_queue_head_t wait;
unsigned int queue_depth;
+ struct nullb_device *dev;
struct nullb_cmd *cmds;
};
+/*
+ * Status flags for nullb_device.
+ *
+ * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
+ * UP: Device is currently on and visible in userspace.
+ * THROTTLED: Device is being throttled.
+ * CACHE: Device is using a write-back cache.
+ */
+enum nullb_device_flags {
+ NULLB_DEV_FL_CONFIGURED = 0,
+ NULLB_DEV_FL_UP = 1,
+ NULLB_DEV_FL_THROTTLED = 2,
+ NULLB_DEV_FL_CACHE = 3,
+};
+
+/*
+ * nullb_page is a page in memory for nullb devices.
+ *
+ * @page: The page holding the data.
+ * @bitmap: The bitmap represents which sector in the page has data.
+ * Each bit represents one block size. For example, sector 8
+ * will use the 7th bit
+ * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
+ * page is being flushing to storage. FREE means the cache page is freed and
+ * should be skipped from flushing to storage. Please see
+ * null_make_cache_space
+ */
+struct nullb_page {
+ struct page *page;
+ unsigned long bitmap;
+};
+#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1)
+#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2)
+
+struct nullb_device {
+ struct nullb *nullb;
+ struct config_item item;
+ struct radix_tree_root data; /* data stored in the disk */
+ struct radix_tree_root cache; /* disk cache data */
+ unsigned long flags; /* device flags */
+ unsigned int curr_cache;
+ struct badblocks badblocks;
+
+ unsigned long size; /* device size in MB */
+ unsigned long completion_nsec; /* time in ns to complete a request */
+ unsigned long cache_size; /* disk cache size in MB */
+ unsigned int submit_queues; /* number of submission queues */
+ unsigned int home_node; /* home node for the device */
+ unsigned int queue_mode; /* block interface */
+ unsigned int blocksize; /* block size */
+ unsigned int irqmode; /* IRQ completion handler */
+ unsigned int hw_queue_depth; /* queue depth */
+ unsigned int index; /* index of the disk, only valid with a disk */
+ unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
+ bool use_lightnvm; /* register as a LightNVM device */
+ bool blocking; /* blocking blk-mq device */
+ bool use_per_node_hctx; /* use per-node allocation for hardware context */
+ bool power; /* power on/off the device */
+ bool memory_backed; /* if data is stored in memory */
+ bool discard; /* if support discard */
+};
+
struct nullb {
+ struct nullb_device *dev;
struct list_head list;
unsigned int index;
struct request_queue *q;
@@ -37,8 +124,10 @@ struct nullb {
struct nvm_dev *ndev;
struct blk_mq_tag_set *tag_set;
struct blk_mq_tag_set __tag_set;
- struct hrtimer timer;
unsigned int queue_depth;
+ atomic_long_t cur_bytes;
+ struct hrtimer bw_timer;
+ unsigned long cache_flush_pos;
spinlock_t lock;
struct nullb_queue *queues;
@@ -49,7 +138,7 @@ struct nullb {
static LIST_HEAD(nullb_list);
static struct mutex lock;
static int null_major;
-static int nullb_indexes;
+static DEFINE_IDA(nullb_indexes);
static struct kmem_cache *ppa_cache;
static struct blk_mq_tag_set tag_set;
@@ -65,15 +154,15 @@ enum {
NULL_Q_MQ = 2,
};
-static int submit_queues;
-module_param(submit_queues, int, S_IRUGO);
+static int g_submit_queues = 1;
+module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");
-static int home_node = NUMA_NO_NODE;
-module_param(home_node, int, S_IRUGO);
+static int g_home_node = NUMA_NO_NODE;
+module_param_named(home_node, g_home_node, int, S_IRUGO);
MODULE_PARM_DESC(home_node, "Home node for the device");
-static int queue_mode = NULL_Q_MQ;
+static int g_queue_mode = NULL_Q_MQ;
static int null_param_store_val(const char *str, int *val, int min, int max)
{
@@ -92,7 +181,7 @@ static int null_param_store_val(const char *str, int *val, int min, int max)
static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
{
- return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
+ return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
}
static const struct kernel_param_ops null_queue_mode_param_ops = {
@@ -100,38 +189,38 @@ static const struct kernel_param_ops null_queue_mode_param_ops = {
.get = param_get_int,
};
-device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO);
+device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
-static int gb = 250;
-module_param(gb, int, S_IRUGO);
+static int g_gb = 250;
+module_param_named(gb, g_gb, int, S_IRUGO);
MODULE_PARM_DESC(gb, "Size in GB");
-static int bs = 512;
-module_param(bs, int, S_IRUGO);
+static int g_bs = 512;
+module_param_named(bs, g_bs, int, S_IRUGO);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
static int nr_devices = 1;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
-static bool use_lightnvm;
-module_param(use_lightnvm, bool, S_IRUGO);
+static bool g_use_lightnvm;
+module_param_named(use_lightnvm, g_use_lightnvm, bool, S_IRUGO);
MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
-static bool blocking;
-module_param(blocking, bool, S_IRUGO);
+static bool g_blocking;
+module_param_named(blocking, g_blocking, bool, S_IRUGO);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
static bool shared_tags;
module_param(shared_tags, bool, S_IRUGO);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
-static int irqmode = NULL_IRQ_SOFTIRQ;
+static int g_irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
{
- return null_param_store_val(str, &irqmode, NULL_IRQ_NONE,
+ return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
NULL_IRQ_TIMER);
}
@@ -140,21 +229,358 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
.get = param_get_int,
};
-device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
+device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
-static unsigned long completion_nsec = 10000;
-module_param(completion_nsec, ulong, S_IRUGO);
+static unsigned long g_completion_nsec = 10000;
+module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
-static int hw_queue_depth = 64;
-module_param(hw_queue_depth, int, S_IRUGO);
+static int g_hw_queue_depth = 64;
+module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
-static bool use_per_node_hctx = false;
-module_param(use_per_node_hctx, bool, S_IRUGO);
+static bool g_use_per_node_hctx;
+module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+static struct nullb_device *null_alloc_dev(void);
+static void null_free_dev(struct nullb_device *dev);
+static void null_del_dev(struct nullb *nullb);
+static int null_add_dev(struct nullb_device *dev);
+static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
+
+static inline struct nullb_device *to_nullb_device(struct config_item *item)
+{
+ return item ? container_of(item, struct nullb_device, item) : NULL;
+}
+
+static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", val);
+}
+
+static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lu\n", val);
+}
+
+static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t nullb_device_uint_attr_store(unsigned int *val,
+ const char *page, size_t count)
+{
+ unsigned int tmp;
+ int result;
+
+ result = kstrtouint(page, 0, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
+ const char *page, size_t count)
+{
+ int result;
+ unsigned long tmp;
+
+ result = kstrtoul(page, 0, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
+ size_t count)
+{
+ bool tmp;
+ int result;
+
+ result = kstrtobool(page, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
+#define NULLB_DEVICE_ATTR(NAME, TYPE) \
+static ssize_t \
+nullb_device_##NAME##_show(struct config_item *item, char *page) \
+{ \
+ return nullb_device_##TYPE##_attr_show( \
+ to_nullb_device(item)->NAME, page); \
+} \
+static ssize_t \
+nullb_device_##NAME##_store(struct config_item *item, const char *page, \
+ size_t count) \
+{ \
+ if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \
+ return -EBUSY; \
+ return nullb_device_##TYPE##_attr_store( \
+ &to_nullb_device(item)->NAME, page, count); \
+} \
+CONFIGFS_ATTR(nullb_device_, NAME);
+
+NULLB_DEVICE_ATTR(size, ulong);
+NULLB_DEVICE_ATTR(completion_nsec, ulong);
+NULLB_DEVICE_ATTR(submit_queues, uint);
+NULLB_DEVICE_ATTR(home_node, uint);
+NULLB_DEVICE_ATTR(queue_mode, uint);
+NULLB_DEVICE_ATTR(blocksize, uint);
+NULLB_DEVICE_ATTR(irqmode, uint);
+NULLB_DEVICE_ATTR(hw_queue_depth, uint);
+NULLB_DEVICE_ATTR(index, uint);
+NULLB_DEVICE_ATTR(use_lightnvm, bool);
+NULLB_DEVICE_ATTR(blocking, bool);
+NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
+NULLB_DEVICE_ATTR(memory_backed, bool);
+NULLB_DEVICE_ATTR(discard, bool);
+NULLB_DEVICE_ATTR(mbps, uint);
+NULLB_DEVICE_ATTR(cache_size, ulong);
+
+static ssize_t nullb_device_power_show(struct config_item *item, char *page)
+{
+ return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
+}
+
+static ssize_t nullb_device_power_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+ bool newp = false;
+ ssize_t ret;
+
+ ret = nullb_device_bool_attr_store(&newp, page, count);
+ if (ret < 0)
+ return ret;
+
+ if (!dev->power && newp) {
+ if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
+ return count;
+ if (null_add_dev(dev)) {
+ clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+ return -ENOMEM;
+ }
+
+ set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+ dev->power = newp;
+ } else if (dev->power && !newp) {
+ mutex_lock(&lock);
+ dev->power = newp;
+ null_del_dev(dev->nullb);
+ mutex_unlock(&lock);
+ clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+ }
+
+ return count;
+}
+
+CONFIGFS_ATTR(nullb_device_, power);
+
+static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
+{
+ struct nullb_device *t_dev = to_nullb_device(item);
+
+ return badblocks_show(&t_dev->badblocks, page, 0);
+}
+
+static ssize_t nullb_device_badblocks_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *t_dev = to_nullb_device(item);
+ char *orig, *buf, *tmp;
+ u64 start, end;
+ int ret;
+
+ orig = kstrndup(page, count, GFP_KERNEL);
+ if (!orig)
+ return -ENOMEM;
+
+ buf = strstrip(orig);
+
+ ret = -EINVAL;
+ if (buf[0] != '+' && buf[0] != '-')
+ goto out;
+ tmp = strchr(&buf[1], '-');
+ if (!tmp)
+ goto out;
+ *tmp = '\0';
+ ret = kstrtoull(buf + 1, 0, &start);
+ if (ret)
+ goto out;
+ ret = kstrtoull(tmp + 1, 0, &end);
+ if (ret)
+ goto out;
+ ret = -EINVAL;
+ if (start > end)
+ goto out;
+ /* enable badblocks */
+ cmpxchg(&t_dev->badblocks.shift, -1, 0);
+ if (buf[0] == '+')
+ ret = badblocks_set(&t_dev->badblocks, start,
+ end - start + 1, 1);
+ else
+ ret = badblocks_clear(&t_dev->badblocks, start,
+ end - start + 1);
+ if (ret == 0)
+ ret = count;
+out:
+ kfree(orig);
+ return ret;
+}
+CONFIGFS_ATTR(nullb_device_, badblocks);
+
+static struct configfs_attribute *nullb_device_attrs[] = {
+ &nullb_device_attr_size,
+ &nullb_device_attr_completion_nsec,
+ &nullb_device_attr_submit_queues,
+ &nullb_device_attr_home_node,
+ &nullb_device_attr_queue_mode,
+ &nullb_device_attr_blocksize,
+ &nullb_device_attr_irqmode,
+ &nullb_device_attr_hw_queue_depth,
+ &nullb_device_attr_index,
+ &nullb_device_attr_use_lightnvm,
+ &nullb_device_attr_blocking,
+ &nullb_device_attr_use_per_node_hctx,
+ &nullb_device_attr_power,
+ &nullb_device_attr_memory_backed,
+ &nullb_device_attr_discard,
+ &nullb_device_attr_mbps,
+ &nullb_device_attr_cache_size,
+ &nullb_device_attr_badblocks,
+ NULL,
+};
+
+static void nullb_device_release(struct config_item *item)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ badblocks_exit(&dev->badblocks);
+ null_free_device_storage(dev, false);
+ null_free_dev(dev);
+}
+
+static struct configfs_item_operations nullb_device_ops = {
+ .release = nullb_device_release,
+};
+
+static struct config_item_type nullb_device_type = {
+ .ct_item_ops = &nullb_device_ops,
+ .ct_attrs = nullb_device_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct
+config_item *nullb_group_make_item(struct config_group *group, const char *name)
+{
+ struct nullb_device *dev;
+
+ dev = null_alloc_dev();
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&dev->item, name, &nullb_device_type);
+
+ return &dev->item;
+}
+
+static void
+nullb_group_drop_item(struct config_group *group, struct config_item *item)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+ mutex_lock(&lock);
+ dev->power = false;
+ null_del_dev(dev->nullb);
+ mutex_unlock(&lock);
+ }
+
+ config_item_put(item);
+}
+
+static ssize_t memb_group_features_show(struct config_item *item, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n");
+}
+
+CONFIGFS_ATTR_RO(memb_group_, features);
+
+static struct configfs_attribute *nullb_group_attrs[] = {
+ &memb_group_attr_features,
+ NULL,
+};
+
+static struct configfs_group_operations nullb_group_ops = {
+ .make_item = nullb_group_make_item,
+ .drop_item = nullb_group_drop_item,
+};
+
+static struct config_item_type nullb_group_type = {
+ .ct_group_ops = &nullb_group_ops,
+ .ct_attrs = nullb_group_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem nullb_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "nullb",
+ .ci_type = &nullb_group_type,
+ },
+ },
+};
+
+static inline int null_cache_active(struct nullb *nullb)
+{
+ return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+}
+
+static struct nullb_device *null_alloc_dev(void)
+{
+ struct nullb_device *dev;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+ INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
+ INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
+ if (badblocks_init(&dev->badblocks, 0)) {
+ kfree(dev);
+ return NULL;
+ }
+
+ dev->size = g_gb * 1024;
+ dev->completion_nsec = g_completion_nsec;
+ dev->submit_queues = g_submit_queues;
+ dev->home_node = g_home_node;
+ dev->queue_mode = g_queue_mode;
+ dev->blocksize = g_bs;
+ dev->irqmode = g_irqmode;
+ dev->hw_queue_depth = g_hw_queue_depth;
+ dev->use_lightnvm = g_use_lightnvm;
+ dev->blocking = g_blocking;
+ dev->use_per_node_hctx = g_use_per_node_hctx;
+ return dev;
+}
+
+static void null_free_dev(struct nullb_device *dev)
+{
+ kfree(dev);
+}
+
static void put_tag(struct nullb_queue *nq, unsigned int tag)
{
clear_bit_unlock(tag, nq->tag_map);
@@ -193,7 +619,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
cmd = &nq->cmds[tag];
cmd->tag = tag;
cmd->nq = nq;
- if (irqmode == NULL_IRQ_TIMER) {
+ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
@@ -229,19 +655,21 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
static void end_cmd(struct nullb_cmd *cmd)
{
struct request_queue *q = NULL;
+ int queue_mode = cmd->nq->dev->queue_mode;
if (cmd->rq)
q = cmd->rq->q;
switch (queue_mode) {
case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, BLK_STS_OK);
+ blk_mq_end_request(cmd->rq, cmd->error);
return;
case NULL_Q_RQ:
INIT_LIST_HEAD(&cmd->rq->queuelist);
- blk_end_request_all(cmd->rq, BLK_STS_OK);
+ blk_end_request_all(cmd->rq, cmd->error);
break;
case NULL_Q_BIO:
+ cmd->bio->bi_status = cmd->error;
bio_endio(cmd->bio);
break;
}
@@ -267,25 +695,582 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
static void null_cmd_end_timer(struct nullb_cmd *cmd)
{
- ktime_t kt = completion_nsec;
+ ktime_t kt = cmd->nq->dev->completion_nsec;
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
static void null_softirq_done_fn(struct request *rq)
{
- if (queue_mode == NULL_Q_MQ)
+ struct nullb *nullb = rq->q->queuedata;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
end_cmd(blk_mq_rq_to_pdu(rq));
else
end_cmd(rq->special);
}
-static inline void null_handle_cmd(struct nullb_cmd *cmd)
+static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
+{
+ struct nullb_page *t_page;
+
+ t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+ if (!t_page)
+ goto out;
+
+ t_page->page = alloc_pages(gfp_flags, 0);
+ if (!t_page->page)
+ goto out_freepage;
+
+ t_page->bitmap = 0;
+ return t_page;
+out_freepage:
+ kfree(t_page);
+out:
+ return NULL;
+}
+
+static void null_free_page(struct nullb_page *t_page)
+{
+ __set_bit(NULLB_PAGE_FREE, &t_page->bitmap);
+ if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap))
+ return;
+ __free_page(t_page->page);
+ kfree(t_page);
+}
+
+static void null_free_sector(struct nullb *nullb, sector_t sector,
+ bool is_cache)
+{
+ unsigned int sector_bit;
+ u64 idx;
+ struct nullb_page *t_page, *ret;
+ struct radix_tree_root *root;
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ sector_bit = (sector & SECTOR_MASK);
+
+ t_page = radix_tree_lookup(root, idx);
+ if (t_page) {
+ __clear_bit(sector_bit, &t_page->bitmap);
+
+ if (!t_page->bitmap) {
+ ret = radix_tree_delete_item(root, idx, t_page);
+ WARN_ON(ret != t_page);
+ null_free_page(ret);
+ if (is_cache)
+ nullb->dev->curr_cache -= PAGE_SIZE;
+ }
+ }
+}
+
+static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
+ struct nullb_page *t_page, bool is_cache)
+{
+ struct radix_tree_root *root;
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+
+ if (radix_tree_insert(root, idx, t_page)) {
+ null_free_page(t_page);
+ t_page = radix_tree_lookup(root, idx);
+ WARN_ON(!t_page || t_page->page->index != idx);
+ } else if (is_cache)
+ nullb->dev->curr_cache += PAGE_SIZE;
+
+ return t_page;
+}
+
+static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
+{
+ unsigned long pos = 0;
+ int nr_pages;
+ struct nullb_page *ret, *t_pages[FREE_BATCH];
+ struct radix_tree_root *root;
+
+ root = is_cache ? &dev->cache : &dev->data;
+
+ do {
+ int i;
+
+ nr_pages = radix_tree_gang_lookup(root,
+ (void **)t_pages, pos, FREE_BATCH);
+
+ for (i = 0; i < nr_pages; i++) {
+ pos = t_pages[i]->page->index;
+ ret = radix_tree_delete_item(root, pos, t_pages[i]);
+ WARN_ON(ret != t_pages[i]);
+ null_free_page(ret);
+ }
+
+ pos++;
+ } while (nr_pages == FREE_BATCH);
+
+ if (is_cache)
+ dev->curr_cache = 0;
+}
+
+static struct nullb_page *__null_lookup_page(struct nullb *nullb,
+ sector_t sector, bool for_write, bool is_cache)
+{
+ unsigned int sector_bit;
+ u64 idx;
+ struct nullb_page *t_page;
+ struct radix_tree_root *root;
+
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ sector_bit = (sector & SECTOR_MASK);
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+ t_page = radix_tree_lookup(root, idx);
+ WARN_ON(t_page && t_page->page->index != idx);
+
+ if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap)))
+ return t_page;
+
+ return NULL;
+}
+
+static struct nullb_page *null_lookup_page(struct nullb *nullb,
+ sector_t sector, bool for_write, bool ignore_cache)
+{
+ struct nullb_page *page = NULL;
+
+ if (!ignore_cache)
+ page = __null_lookup_page(nullb, sector, for_write, true);
+ if (page)
+ return page;
+ return __null_lookup_page(nullb, sector, for_write, false);
+}
+
+static struct nullb_page *null_insert_page(struct nullb *nullb,
+ sector_t sector, bool ignore_cache)
+{
+ u64 idx;
+ struct nullb_page *t_page;
+
+ t_page = null_lookup_page(nullb, sector, true, ignore_cache);
+ if (t_page)
+ return t_page;
+
+ spin_unlock_irq(&nullb->lock);
+
+ t_page = null_alloc_page(GFP_NOIO);
+ if (!t_page)
+ goto out_lock;
+
+ if (radix_tree_preload(GFP_NOIO))
+ goto out_freepage;
+
+ spin_lock_irq(&nullb->lock);
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ t_page->page->index = idx;
+ t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
+ radix_tree_preload_end();
+
+ return t_page;
+out_freepage:
+ null_free_page(t_page);
+out_lock:
+ spin_lock_irq(&nullb->lock);
+ return null_lookup_page(nullb, sector, true, ignore_cache);
+}
+
+static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
+{
+ int i;
+ unsigned int offset;
+ u64 idx;
+ struct nullb_page *t_page, *ret;
+ void *dst, *src;
+
+ idx = c_page->page->index;
+
+ t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
+
+ __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap);
+ if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) {
+ null_free_page(c_page);
+ if (t_page && t_page->bitmap == 0) {
+ ret = radix_tree_delete_item(&nullb->dev->data,
+ idx, t_page);
+ null_free_page(t_page);
+ }
+ return 0;
+ }
+
+ if (!t_page)
+ return -ENOMEM;
+
+ src = kmap_atomic(c_page->page);
+ dst = kmap_atomic(t_page->page);
+
+ for (i = 0; i < PAGE_SECTORS;
+ i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
+ if (test_bit(i, &c_page->bitmap)) {
+ offset = (i << SECTOR_SHIFT);
+ memcpy(dst + offset, src + offset,
+ nullb->dev->blocksize);
+ __set_bit(i, &t_page->bitmap);
+ }
+ }
+
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
+ null_free_page(ret);
+ nullb->dev->curr_cache -= PAGE_SIZE;
+
+ return 0;
+}
+
+static int null_make_cache_space(struct nullb *nullb, unsigned long n)
{
+ int i, err, nr_pages;
+ struct nullb_page *c_pages[FREE_BATCH];
+ unsigned long flushed = 0, one_round;
+
+again:
+ if ((nullb->dev->cache_size * 1024 * 1024) >
+ nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
+ return 0;
+
+ nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
+ (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
+ /*
+ * nullb_flush_cache_page could unlock before using the c_pages. To
+ * avoid race, we don't allow page free
+ */
+ for (i = 0; i < nr_pages; i++) {
+ nullb->cache_flush_pos = c_pages[i]->page->index;
+ /*
+ * We found the page which is being flushed to disk by other
+ * threads
+ */
+ if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap))
+ c_pages[i] = NULL;
+ else
+ __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap);
+ }
+
+ one_round = 0;
+ for (i = 0; i < nr_pages; i++) {
+ if (c_pages[i] == NULL)
+ continue;
+ err = null_flush_cache_page(nullb, c_pages[i]);
+ if (err)
+ return err;
+ one_round++;
+ }
+ flushed += one_round << PAGE_SHIFT;
+
+ if (n > flushed) {
+ if (nr_pages == 0)
+ nullb->cache_flush_pos = 0;
+ if (one_round == 0) {
+ /* give other threads a chance */
+ spin_unlock_irq(&nullb->lock);
+ spin_lock_irq(&nullb->lock);
+ }
+ goto again;
+ }
+ return 0;
+}
+
+static int copy_to_nullb(struct nullb *nullb, struct page *source,
+ unsigned int off, sector_t sector, size_t n, bool is_fua)
+{
+ size_t temp, count = 0;
+ unsigned int offset;
+ struct nullb_page *t_page;
+ void *dst, *src;
+
+ while (count < n) {
+ temp = min_t(size_t, nullb->dev->blocksize, n - count);
+
+ if (null_cache_active(nullb) && !is_fua)
+ null_make_cache_space(nullb, PAGE_SIZE);
+
+ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+ t_page = null_insert_page(nullb, sector,
+ !null_cache_active(nullb) || is_fua);
+ if (!t_page)
+ return -ENOSPC;
+
+ src = kmap_atomic(source);
+ dst = kmap_atomic(t_page->page);
+ memcpy(dst + offset, src + off + count, temp);
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ __set_bit(sector & SECTOR_MASK, &t_page->bitmap);
+
+ if (is_fua)
+ null_free_sector(nullb, sector, true);
+
+ count += temp;
+ sector += temp >> SECTOR_SHIFT;
+ }
+ return 0;
+}
+
+static int copy_from_nullb(struct nullb *nullb, struct page *dest,
+ unsigned int off, sector_t sector, size_t n)
+{
+ size_t temp, count = 0;
+ unsigned int offset;
+ struct nullb_page *t_page;
+ void *dst, *src;
+
+ while (count < n) {
+ temp = min_t(size_t, nullb->dev->blocksize, n - count);
+
+ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+ t_page = null_lookup_page(nullb, sector, false,
+ !null_cache_active(nullb));
+
+ dst = kmap_atomic(dest);
+ if (!t_page) {
+ memset(dst + off + count, 0, temp);
+ goto next;
+ }
+ src = kmap_atomic(t_page->page);
+ memcpy(dst + off + count, src + offset, temp);
+ kunmap_atomic(src);
+next:
+ kunmap_atomic(dst);
+
+ count += temp;
+ sector += temp >> SECTOR_SHIFT;
+ }
+ return 0;
+}
+
+static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
+{
+ size_t temp;
+
+ spin_lock_irq(&nullb->lock);
+ while (n > 0) {
+ temp = min_t(size_t, n, nullb->dev->blocksize);
+ null_free_sector(nullb, sector, false);
+ if (null_cache_active(nullb))
+ null_free_sector(nullb, sector, true);
+ sector += temp >> SECTOR_SHIFT;
+ n -= temp;
+ }
+ spin_unlock_irq(&nullb->lock);
+}
+
+static int null_handle_flush(struct nullb *nullb)
+{
+ int err;
+
+ if (!null_cache_active(nullb))
+ return 0;
+
+ spin_lock_irq(&nullb->lock);
+ while (true) {
+ err = null_make_cache_space(nullb,
+ nullb->dev->cache_size * 1024 * 1024);
+ if (err || nullb->dev->curr_cache == 0)
+ break;
+ }
+
+ WARN_ON(!radix_tree_empty(&nullb->dev->cache));
+ spin_unlock_irq(&nullb->lock);
+ return err;
+}
+
+static int null_transfer(struct nullb *nullb, struct page *page,
+ unsigned int len, unsigned int off, bool is_write, sector_t sector,
+ bool is_fua)
+{
+ int err = 0;
+
+ if (!is_write) {
+ err = copy_from_nullb(nullb, page, off, sector, len);
+ flush_dcache_page(page);
+ } else {
+ flush_dcache_page(page);
+ err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
+ }
+
+ return err;
+}
+
+static int null_handle_rq(struct nullb_cmd *cmd)
+{
+ struct request *rq = cmd->rq;
+ struct nullb *nullb = cmd->nq->dev->nullb;
+ int err;
+ unsigned int len;
+ sector_t sector;
+ struct req_iterator iter;
+ struct bio_vec bvec;
+
+ sector = blk_rq_pos(rq);
+
+ if (req_op(rq) == REQ_OP_DISCARD) {
+ null_handle_discard(nullb, sector, blk_rq_bytes(rq));
+ return 0;
+ }
+
+ spin_lock_irq(&nullb->lock);
+ rq_for_each_segment(bvec, rq, iter) {
+ len = bvec.bv_len;
+ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+ op_is_write(req_op(rq)), sector,
+ req_op(rq) & REQ_FUA);
+ if (err) {
+ spin_unlock_irq(&nullb->lock);
+ return err;
+ }
+ sector += len >> SECTOR_SHIFT;
+ }
+ spin_unlock_irq(&nullb->lock);
+
+ return 0;
+}
+
+static int null_handle_bio(struct nullb_cmd *cmd)
+{
+ struct bio *bio = cmd->bio;
+ struct nullb *nullb = cmd->nq->dev->nullb;
+ int err;
+ unsigned int len;
+ sector_t sector;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+
+ sector = bio->bi_iter.bi_sector;
+
+ if (bio_op(bio) == REQ_OP_DISCARD) {
+ null_handle_discard(nullb, sector,
+ bio_sectors(bio) << SECTOR_SHIFT);
+ return 0;
+ }
+
+ spin_lock_irq(&nullb->lock);
+ bio_for_each_segment(bvec, bio, iter) {
+ len = bvec.bv_len;
+ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+ op_is_write(bio_op(bio)), sector,
+ bio_op(bio) & REQ_FUA);
+ if (err) {
+ spin_unlock_irq(&nullb->lock);
+ return err;
+ }
+ sector += len >> SECTOR_SHIFT;
+ }
+ spin_unlock_irq(&nullb->lock);
+ return 0;
+}
+
+static void null_stop_queue(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
+ blk_mq_stop_hw_queues(q);
+ else {
+ spin_lock_irq(q->queue_lock);
+ blk_stop_queue(q);
+ spin_unlock_irq(q->queue_lock);
+ }
+}
+
+static void null_restart_queue_async(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+ unsigned long flags;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
+ blk_mq_start_stopped_hw_queues(q, true);
+ else {
+ spin_lock_irqsave(q->queue_lock, flags);
+ blk_start_queue_async(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+ }
+}
+
+static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
+{
+ struct nullb_device *dev = cmd->nq->dev;
+ struct nullb *nullb = dev->nullb;
+ int err = 0;
+
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
+ struct request *rq = cmd->rq;
+
+ if (!hrtimer_active(&nullb->bw_timer))
+ hrtimer_restart(&nullb->bw_timer);
+
+ if (atomic_long_sub_return(blk_rq_bytes(rq),
+ &nullb->cur_bytes) < 0) {
+ null_stop_queue(nullb);
+ /* race with timer */
+ if (atomic_long_read(&nullb->cur_bytes) > 0)
+ null_restart_queue_async(nullb);
+ if (dev->queue_mode == NULL_Q_RQ) {
+ struct request_queue *q = nullb->q;
+
+ spin_lock_irq(q->queue_lock);
+ rq->rq_flags |= RQF_DONTPREP;
+ blk_requeue_request(q, rq);
+ spin_unlock_irq(q->queue_lock);
+ return BLK_STS_OK;
+ } else
+ /* requeue request */
+ return BLK_STS_RESOURCE;
+ }
+ }
+
+ if (nullb->dev->badblocks.shift != -1) {
+ int bad_sectors;
+ sector_t sector, size, first_bad;
+ bool is_flush = true;
+
+ if (dev->queue_mode == NULL_Q_BIO &&
+ bio_op(cmd->bio) != REQ_OP_FLUSH) {
+ is_flush = false;
+ sector = cmd->bio->bi_iter.bi_sector;
+ size = bio_sectors(cmd->bio);
+ }
+ if (dev->queue_mode != NULL_Q_BIO &&
+ req_op(cmd->rq) != REQ_OP_FLUSH) {
+ is_flush = false;
+ sector = blk_rq_pos(cmd->rq);
+ size = blk_rq_sectors(cmd->rq);
+ }
+ if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector,
+ size, &first_bad, &bad_sectors)) {
+ cmd->error = BLK_STS_IOERR;
+ goto out;
+ }
+ }
+
+ if (dev->memory_backed) {
+ if (dev->queue_mode == NULL_Q_BIO) {
+ if (bio_op(cmd->bio) == REQ_OP_FLUSH)
+ err = null_handle_flush(nullb);
+ else
+ err = null_handle_bio(cmd);
+ } else {
+ if (req_op(cmd->rq) == REQ_OP_FLUSH)
+ err = null_handle_flush(nullb);
+ else
+ err = null_handle_rq(cmd);
+ }
+ }
+ cmd->error = errno_to_blk_status(err);
+out:
/* Complete IO by inline, softirq or timer */
- switch (irqmode) {
+ switch (dev->irqmode) {
case NULL_IRQ_SOFTIRQ:
- switch (queue_mode) {
+ switch (dev->queue_mode) {
case NULL_Q_MQ:
blk_mq_complete_request(cmd->rq);
break;
@@ -307,6 +1292,34 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
null_cmd_end_timer(cmd);
break;
}
+ return BLK_STS_OK;
+}
+
+static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
+{
+ struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
+ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+ unsigned int mbps = nullb->dev->mbps;
+
+ if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
+ return HRTIMER_NORESTART;
+
+ atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
+ null_restart_queue_async(nullb);
+
+ hrtimer_forward_now(&nullb->bw_timer, timer_interval);
+
+ return HRTIMER_RESTART;
+}
+
+static void nullb_setup_bwtimer(struct nullb *nullb)
+{
+ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+
+ hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ nullb->bw_timer.function = nullb_bwtimer_fn;
+ atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
+ hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
}
static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
@@ -366,20 +1379,20 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ struct nullb_queue *nq = hctx->driver_data;
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
- if (irqmode == NULL_IRQ_TIMER) {
+ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
cmd->rq = bd->rq;
- cmd->nq = hctx->driver_data;
+ cmd->nq = nq;
blk_mq_start_request(bd->rq);
- null_handle_cmd(cmd);
- return BLK_STS_OK;
+ return null_handle_cmd(cmd);
}
static const struct blk_mq_ops null_mq_ops = {
@@ -438,7 +1451,8 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
{
- sector_t size = gb * 1024 * 1024 * 1024ULL;
+ struct nullb *nullb = dev->q->queuedata;
+ sector_t size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
sector_t blksize;
struct nvm_id_group *grp;
@@ -460,7 +1474,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
id->ppaf.ch_offset = 56;
id->ppaf.ch_len = 8;
- sector_div(size, bs); /* convert size to pages */
+ sector_div(size, nullb->dev->blocksize); /* convert size to pages */
size >>= 8; /* concert size to pgs pr blk */
grp = &id->grp;
grp->mtype = 0;
@@ -474,8 +1488,8 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
grp->num_blk = blksize;
grp->num_pln = 1;
- grp->fpg_sz = bs;
- grp->csecs = bs;
+ grp->fpg_sz = nullb->dev->blocksize;
+ grp->csecs = nullb->dev->blocksize;
grp->trdt = 25000;
grp->trdm = 25000;
grp->tprt = 500000;
@@ -483,7 +1497,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
grp->tbet = 1500000;
grp->tbem = 1500000;
grp->mpos = 0x010101; /* single plane rwe */
- grp->cpar = hw_queue_depth;
+ grp->cpar = nullb->dev->hw_queue_depth;
return 0;
}
@@ -568,19 +1582,44 @@ static void null_nvm_unregister(struct nullb *nullb) {}
static void null_del_dev(struct nullb *nullb)
{
+ struct nullb_device *dev = nullb->dev;
+
+ ida_simple_remove(&nullb_indexes, nullb->index);
+
list_del_init(&nullb->list);
- if (use_lightnvm)
+ if (dev->use_lightnvm)
null_nvm_unregister(nullb);
else
del_gendisk(nullb->disk);
+
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
+ hrtimer_cancel(&nullb->bw_timer);
+ atomic_long_set(&nullb->cur_bytes, LONG_MAX);
+ null_restart_queue_async(nullb);
+ }
+
blk_cleanup_queue(nullb->q);
- if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (dev->queue_mode == NULL_Q_MQ &&
+ nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
- if (!use_lightnvm)
+ if (!dev->use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
+ if (null_cache_active(nullb))
+ null_free_device_storage(nullb->dev, true);
kfree(nullb);
+ dev->nullb = NULL;
+}
+
+static void null_config_discard(struct nullb *nullb)
+{
+ if (nullb->dev->discard == false)
+ return;
+ nullb->q->limits.discard_granularity = nullb->dev->blocksize;
+ nullb->q->limits.discard_alignment = nullb->dev->blocksize;
+ blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q);
}
static int null_open(struct block_device *bdev, fmode_t mode)
@@ -605,6 +1644,7 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
init_waitqueue_head(&nq->wait);
nq->queue_depth = nullb->queue_depth;
+ nq->dev = nullb->dev;
}
static void null_init_queues(struct nullb *nullb)
@@ -652,13 +1692,13 @@ static int setup_commands(struct nullb_queue *nq)
static int setup_queues(struct nullb *nullb)
{
- nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
- GFP_KERNEL);
+ nullb->queues = kzalloc(nullb->dev->submit_queues *
+ sizeof(struct nullb_queue), GFP_KERNEL);
if (!nullb->queues)
return -ENOMEM;
nullb->nr_queues = 0;
- nullb->queue_depth = hw_queue_depth;
+ nullb->queue_depth = nullb->dev->hw_queue_depth;
return 0;
}
@@ -668,7 +1708,7 @@ static int init_driver_queues(struct nullb *nullb)
struct nullb_queue *nq;
int i, ret = 0;
- for (i = 0; i < submit_queues; i++) {
+ for (i = 0; i < nullb->dev->submit_queues; i++) {
nq = &nullb->queues[i];
null_init_queue(nullb, nq);
@@ -686,10 +1726,10 @@ static int null_gendisk_register(struct nullb *nullb)
struct gendisk *disk;
sector_t size;
- disk = nullb->disk = alloc_disk_node(1, home_node);
+ disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
if (!disk)
return -ENOMEM;
- size = gb * 1024 * 1024 * 1024ULL;
+ size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
set_capacity(disk, size >> 9);
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
@@ -704,49 +1744,86 @@ static int null_gendisk_register(struct nullb *nullb)
return 0;
}
-static int null_init_tag_set(struct blk_mq_tag_set *set)
+static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
{
set->ops = &null_mq_ops;
- set->nr_hw_queues = submit_queues;
- set->queue_depth = hw_queue_depth;
- set->numa_node = home_node;
+ set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
+ g_submit_queues;
+ set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
+ g_hw_queue_depth;
+ set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
set->cmd_size = sizeof(struct nullb_cmd);
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->driver_data = NULL;
- if (blocking)
+ if ((nullb && nullb->dev->blocking) || g_blocking)
set->flags |= BLK_MQ_F_BLOCKING;
return blk_mq_alloc_tag_set(set);
}
-static int null_add_dev(void)
+static void null_validate_conf(struct nullb_device *dev)
+{
+ dev->blocksize = round_down(dev->blocksize, 512);
+ dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+ if (dev->use_lightnvm && dev->blocksize != 4096)
+ dev->blocksize = 4096;
+
+ if (dev->use_lightnvm && dev->queue_mode != NULL_Q_MQ)
+ dev->queue_mode = NULL_Q_MQ;
+
+ if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
+ if (dev->submit_queues != nr_online_nodes)
+ dev->submit_queues = nr_online_nodes;
+ } else if (dev->submit_queues > nr_cpu_ids)
+ dev->submit_queues = nr_cpu_ids;
+ else if (dev->submit_queues == 0)
+ dev->submit_queues = 1;
+
+ dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
+ dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+
+ /* Do memory allocation, so set blocking */
+ if (dev->memory_backed)
+ dev->blocking = true;
+ else /* cache is meaningless */
+ dev->cache_size = 0;
+ dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
+ dev->cache_size);
+ dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
+ /* can not stop a queue */
+ if (dev->queue_mode == NULL_Q_BIO)
+ dev->mbps = 0;
+}
+
+static int null_add_dev(struct nullb_device *dev)
{
struct nullb *nullb;
int rv;
- nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
+ null_validate_conf(dev);
+
+ nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
if (!nullb) {
rv = -ENOMEM;
goto out;
}
+ nullb->dev = dev;
+ dev->nullb = nullb;
spin_lock_init(&nullb->lock);
- if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
- submit_queues = nr_online_nodes;
-
rv = setup_queues(nullb);
if (rv)
goto out_free_nullb;
- if (queue_mode == NULL_Q_MQ) {
+ if (dev->queue_mode == NULL_Q_MQ) {
if (shared_tags) {
nullb->tag_set = &tag_set;
rv = 0;
} else {
nullb->tag_set = &nullb->__tag_set;
- rv = null_init_tag_set(nullb->tag_set);
+ rv = null_init_tag_set(nullb, nullb->tag_set);
}
if (rv)
@@ -758,8 +1835,8 @@ static int null_add_dev(void)
goto out_cleanup_tags;
}
null_init_queues(nullb);
- } else if (queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
+ } else if (dev->queue_mode == NULL_Q_BIO) {
+ nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
@@ -769,7 +1846,8 @@ static int null_add_dev(void)
if (rv)
goto out_cleanup_blk_queue;
} else {
- nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
+ nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock,
+ dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
@@ -781,20 +1859,34 @@ static int null_add_dev(void)
goto out_cleanup_blk_queue;
}
+ if (dev->mbps) {
+ set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
+ nullb_setup_bwtimer(nullb);
+ }
+
+ if (dev->cache_size > 0) {
+ set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+ blk_queue_write_cache(nullb->q, true, true);
+ blk_queue_flush_queueable(nullb->q, true);
+ }
+
nullb->q->queuedata = nullb;
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock);
- nullb->index = nullb_indexes++;
+ nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+ dev->index = nullb->index;
mutex_unlock(&lock);
- blk_queue_logical_block_size(nullb->q, bs);
- blk_queue_physical_block_size(nullb->q, bs);
+ blk_queue_logical_block_size(nullb->q, dev->blocksize);
+ blk_queue_physical_block_size(nullb->q, dev->blocksize);
+
+ null_config_discard(nullb);
sprintf(nullb->disk_name, "nullb%d", nullb->index);
- if (use_lightnvm)
+ if (dev->use_lightnvm)
rv = null_nvm_register(nullb);
else
rv = null_gendisk_register(nullb);
@@ -810,7 +1902,7 @@ static int null_add_dev(void)
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
- if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
cleanup_queues(nullb);
@@ -825,51 +1917,63 @@ static int __init null_init(void)
int ret = 0;
unsigned int i;
struct nullb *nullb;
+ struct nullb_device *dev;
+
+ /* check for nullb_page.bitmap */
+ if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT))
+ return -EINVAL;
- if (bs > PAGE_SIZE) {
+ if (g_bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n");
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
- bs = PAGE_SIZE;
+ g_bs = PAGE_SIZE;
}
- if (use_lightnvm && bs != 4096) {
+ if (g_use_lightnvm && g_bs != 4096) {
pr_warn("null_blk: LightNVM only supports 4k block size\n");
pr_warn("null_blk: defaults block size to 4k\n");
- bs = 4096;
+ g_bs = 4096;
}
- if (use_lightnvm && queue_mode != NULL_Q_MQ) {
+ if (g_use_lightnvm && g_queue_mode != NULL_Q_MQ) {
pr_warn("null_blk: LightNVM only supported for blk-mq\n");
pr_warn("null_blk: defaults queue mode to blk-mq\n");
- queue_mode = NULL_Q_MQ;
+ g_queue_mode = NULL_Q_MQ;
}
- if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
- if (submit_queues < nr_online_nodes) {
- pr_warn("null_blk: submit_queues param is set to %u.",
+ if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+ if (g_submit_queues != nr_online_nodes) {
+ pr_warn("null_blk: submit_queues param is set to %u.\n",
nr_online_nodes);
- submit_queues = nr_online_nodes;
+ g_submit_queues = nr_online_nodes;
}
- } else if (submit_queues > nr_cpu_ids)
- submit_queues = nr_cpu_ids;
- else if (!submit_queues)
- submit_queues = 1;
+ } else if (g_submit_queues > nr_cpu_ids)
+ g_submit_queues = nr_cpu_ids;
+ else if (g_submit_queues <= 0)
+ g_submit_queues = 1;
- if (queue_mode == NULL_Q_MQ && shared_tags) {
- ret = null_init_tag_set(&tag_set);
+ if (g_queue_mode == NULL_Q_MQ && shared_tags) {
+ ret = null_init_tag_set(NULL, &tag_set);
if (ret)
return ret;
}
+ config_group_init(&nullb_subsys.su_group);
+ mutex_init(&nullb_subsys.su_mutex);
+
+ ret = configfs_register_subsystem(&nullb_subsys);
+ if (ret)
+ goto err_tagset;
+
mutex_init(&lock);
null_major = register_blkdev(0, "nullb");
if (null_major < 0) {
ret = null_major;
- goto err_tagset;
+ goto err_conf;
}
- if (use_lightnvm) {
+ if (g_use_lightnvm) {
ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
0, 0, NULL);
if (!ppa_cache) {
@@ -880,9 +1984,14 @@ static int __init null_init(void)
}
for (i = 0; i < nr_devices; i++) {
- ret = null_add_dev();
- if (ret)
+ dev = null_alloc_dev();
+ if (!dev)
+ goto err_dev;
+ ret = null_add_dev(dev);
+ if (ret) {
+ null_free_dev(dev);
goto err_dev;
+ }
}
pr_info("null: module loaded\n");
@@ -891,13 +2000,17 @@ static int __init null_init(void)
err_dev:
while (!list_empty(&nullb_list)) {
nullb = list_entry(nullb_list.next, struct nullb, list);
+ dev = nullb->dev;
null_del_dev(nullb);
+ null_free_dev(dev);
}
kmem_cache_destroy(ppa_cache);
err_ppa:
unregister_blkdev(null_major, "nullb");
+err_conf:
+ configfs_unregister_subsystem(&nullb_subsys);
err_tagset:
- if (queue_mode == NULL_Q_MQ && shared_tags)
+ if (g_queue_mode == NULL_Q_MQ && shared_tags)
blk_mq_free_tag_set(&tag_set);
return ret;
}
@@ -906,16 +2019,22 @@ static void __exit null_exit(void)
{
struct nullb *nullb;
+ configfs_unregister_subsystem(&nullb_subsys);
+
unregister_blkdev(null_major, "nullb");
mutex_lock(&lock);
while (!list_empty(&nullb_list)) {
+ struct nullb_device *dev;
+
nullb = list_entry(nullb_list.next, struct nullb, list);
+ dev = nullb->dev;
null_del_dev(nullb);
+ null_free_dev(dev);
}
mutex_unlock(&lock);
- if (queue_mode == NULL_Q_MQ && shared_tags)
+ if (g_queue_mode == NULL_Q_MQ && shared_tags)
blk_mq_free_tag_set(&tag_set);
kmem_cache_destroy(ppa_cache);
@@ -924,5 +2043,5 @@ static void __exit null_exit(void)
module_init(null_init);
module_exit(null_exit);
-MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
+MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 6b8b097abbb9..67974796c350 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1028,7 +1028,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
bio = pkt->r_bios[f];
bio_reset(bio);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
- bio->bi_bdev = pd->bdev;
+ bio_set_dev(bio, pd->bdev);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1122,7 +1122,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
pkt->sector = new_sector;
bio_reset(pkt->bio);
- pkt->bio->bi_bdev = pd->bdev;
+ bio_set_set(pkt->bio, pd->bdev);
bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0);
pkt->bio->bi_iter.bi_sector = new_sector;
pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
@@ -1267,7 +1267,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
bio_reset(pkt->w_bio);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
- pkt->w_bio->bi_bdev = pd->bdev;
+ bio_set_dev(pkt->w_bio, pd->bdev);
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@@ -2314,7 +2314,7 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
psd->pd = pd;
psd->bio = bio;
- cloned_bio->bi_bdev = pd->bdev;
+ bio_set_dev(cloned_bio, pd->bdev);
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
@@ -2415,8 +2415,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
pd = q->queuedata;
if (!pd) {
- pr_err("%s incorrect request queue\n",
- bdevname(bio->bi_bdev, b));
+ pr_err("%s incorrect request queue\n", bio_devname(bio, b));
goto end_io;
}
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index e0e81cacd781..6a55959cbf78 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -409,10 +409,8 @@ static int ps3vram_cache_init(struct ps3_system_bus_device *dev)
priv->cache.page_size = CACHE_PAGE_SIZE;
priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
CACHE_PAGE_COUNT, GFP_KERNEL);
- if (priv->cache.tags == NULL) {
- dev_err(&dev->core, "Could not allocate cache tags\n");
+ if (!priv->cache.tags)
return -ENOMEM;
- }
dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n",
CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
@@ -743,7 +741,11 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
goto out_unmap_reports;
}
- ps3vram_cache_init(dev);
+ error = ps3vram_cache_init(dev);
+ if (error < 0) {
+ goto out_unmap_reports;
+ }
+
ps3vram_proc_init(dev);
queue = blk_alloc_queue(GFP_KERNEL);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b008b6a98098..b640ad8a6d20 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3435,7 +3435,7 @@ static void rbd_acquire_lock(struct work_struct *work)
struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
struct rbd_device, lock_dwork);
enum rbd_lock_state lock_state;
- int ret;
+ int ret = 0;
dout("%s rbd_dev %p\n", __func__, rbd_dev);
again:
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 7f4acebf4657..e397d3ee7308 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -112,7 +112,7 @@ static const struct block_device_operations rsxx_fops = {
static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
{
- generic_start_io_acct(bio_data_dir(bio), bio_sectors(bio),
+ generic_start_io_acct(card->queue, bio_data_dir(bio), bio_sectors(bio),
&card->gendisk->part0);
}
@@ -120,8 +120,8 @@ static void disk_stats_complete(struct rsxx_cardinfo *card,
struct bio *bio,
unsigned long start_time)
{
- generic_end_io_acct(bio_data_dir(bio), &card->gendisk->part0,
- start_time);
+ generic_end_io_acct(card->queue, bio_data_dir(bio),
+ &card->gendisk->part0, start_time);
}
static void bio_dma_done_cb(struct rsxx_cardinfo *card,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index d0368682bd43..7cedb4295e9d 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -1,19 +1,12 @@
-/* Copyright 2012 STEC, Inc.
+/*
+ * Driver for sTec s1120 PCIe SSDs. sTec was acquired in 2013 by HGST and HGST
+ * was acquired by Western Digital in 2012.
+ *
+ * Copyright 2012 sTec, Inc.
+ * Copyright (c) 2017 Western Digital Corporation or its affiliates.
*
- * This file is licensed under the terms of the 3-clause
- * BSD License (http://opensource.org/licenses/BSD-3-Clause)
- * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
- * at your option. Both licenses are also available in the LICENSE file
- * distributed with this project. This file may not be copied, modified,
- * or distributed except in accordance with those terms.
- * Gordoni Waidhofer <gwaidhofer@stec-inc.com>
- * Initial Driver Design!
- * Thomas Swann <tswann@stec-inc.com>
- * Interrupt handling.
- * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
- * biomode implementation.
- * Akhil Bhansali <abhansali@stec-inc.com>
- * Added support for DISCARD / FLUSH and FUA.
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
*/
#include <linux/kernel.h>
@@ -23,11 +16,11 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/compiler.h>
#include <linux/workqueue.h>
-#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/hdreg.h>
@@ -37,9 +30,9 @@
#include <linux/version.h>
#include <linux/err.h>
#include <linux/aer.h>
-#include <linux/ctype.h>
#include <linux/wait.h>
-#include <linux/uio.h>
+#include <linux/stringify.h>
+#include <linux/slab_def.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>
#include <linux/io.h>
@@ -51,19 +44,6 @@
static int skd_dbg_level;
static int skd_isr_comp_limit = 4;
-enum {
- STEC_LINK_2_5GTS = 0,
- STEC_LINK_5GTS = 1,
- STEC_LINK_8GTS = 2,
- STEC_LINK_UNKNOWN = 0xFF
-};
-
-enum {
- SKD_FLUSH_INITIALIZER,
- SKD_FLUSH_ZERO_SIZE_FIRST,
- SKD_FLUSH_DATA_SECOND,
-};
-
#define SKD_ASSERT(expr) \
do { \
if (unlikely(!(expr))) { \
@@ -73,17 +53,11 @@ enum {
} while (0)
#define DRV_NAME "skd"
-#define DRV_VERSION "2.2.1"
-#define DRV_BUILD_ID "0260"
#define PFX DRV_NAME ": "
-#define DRV_BIN_VERSION 0x100
-#define DRV_VER_COMPL "2.2.1." DRV_BUILD_ID
-MODULE_AUTHOR("bug-reports: support@stec-inc.com");
-MODULE_LICENSE("Dual BSD/GPL");
+MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver (b" DRV_BUILD_ID ")");
-MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
+MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver");
#define PCI_VENDOR_ID_STEC 0x1B39
#define PCI_DEVICE_ID_S1120 0x0001
@@ -96,34 +70,32 @@ MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
#define SKD_PAUSE_TIMEOUT (5 * 1000)
#define SKD_N_FITMSG_BYTES (512u)
+#define SKD_MAX_REQ_PER_MSG 14
-#define SKD_N_SPECIAL_CONTEXT 32u
#define SKD_N_SPECIAL_FITMSG_BYTES (128u)
/* SG elements are 32 bytes, so we can make this 4096 and still be under the
* 128KB limit. That allows 4096*4K = 16M xfer size
*/
#define SKD_N_SG_PER_REQ_DEFAULT 256u
-#define SKD_N_SG_PER_SPECIAL 256u
#define SKD_N_COMPLETION_ENTRY 256u
#define SKD_N_READ_CAP_BYTES (8u)
#define SKD_N_INTERNAL_BYTES (512u)
+#define SKD_SKCOMP_SIZE \
+ ((sizeof(struct fit_completion_entry_v1) + \
+ sizeof(struct fit_comp_error_info)) * SKD_N_COMPLETION_ENTRY)
+
/* 5 bits of uniqifier, 0xF800 */
-#define SKD_ID_INCR (0x400)
#define SKD_ID_TABLE_MASK (3u << 8u)
#define SKD_ID_RW_REQUEST (0u << 8u)
#define SKD_ID_INTERNAL (1u << 8u)
-#define SKD_ID_SPECIAL_REQUEST (2u << 8u)
#define SKD_ID_FIT_MSG (3u << 8u)
#define SKD_ID_SLOT_MASK 0x00FFu
#define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu
-#define SKD_N_TIMEOUT_SLOT 4u
-#define SKD_TIMEOUT_SLOT_MASK 3u
-
#define SKD_N_MAX_SECTORS 2048u
#define SKD_MAX_RETRIES 2u
@@ -141,7 +113,6 @@ enum skd_drvr_state {
SKD_DRVR_STATE_ONLINE,
SKD_DRVR_STATE_PAUSING,
SKD_DRVR_STATE_PAUSED,
- SKD_DRVR_STATE_DRAINING_TIMEOUT,
SKD_DRVR_STATE_RESTARTING,
SKD_DRVR_STATE_RESUMING,
SKD_DRVR_STATE_STOPPING,
@@ -158,7 +129,6 @@ enum skd_drvr_state {
#define SKD_WAIT_BOOT_TIMO SKD_TIMER_SECONDS(90u)
#define SKD_STARTING_TIMO SKD_TIMER_SECONDS(8u)
#define SKD_RESTARTING_TIMO SKD_TIMER_MINUTES(4u)
-#define SKD_DRAINING_TIMO SKD_TIMER_SECONDS(6u)
#define SKD_BUSY_TIMO SKD_TIMER_MINUTES(20u)
#define SKD_STARTED_BUSY_TIMO SKD_TIMER_SECONDS(60u)
#define SKD_START_WAIT_SECONDS 90u
@@ -169,12 +139,6 @@ enum skd_req_state {
SKD_REQ_STATE_BUSY,
SKD_REQ_STATE_COMPLETED,
SKD_REQ_STATE_TIMEOUT,
- SKD_REQ_STATE_ABORTED,
-};
-
-enum skd_fit_msg_state {
- SKD_MSG_STATE_IDLE,
- SKD_MSG_STATE_BUSY,
};
enum skd_check_status_action {
@@ -185,34 +149,29 @@ enum skd_check_status_action {
SKD_CHECK_STATUS_BUSY_IMMINENT,
};
-struct skd_fitmsg_context {
- enum skd_fit_msg_state state;
-
- struct skd_fitmsg_context *next;
+struct skd_msg_buf {
+ struct fit_msg_hdr fmh;
+ struct skd_scsi_request scsi[SKD_MAX_REQ_PER_MSG];
+};
+struct skd_fitmsg_context {
u32 id;
- u16 outstanding;
u32 length;
- u32 offset;
- u8 *msg_buf;
+ struct skd_msg_buf *msg_buf;
dma_addr_t mb_dma_address;
};
struct skd_request_context {
enum skd_req_state state;
- struct skd_request_context *next;
-
u16 id;
u32 fitmsg_id;
- struct request *req;
u8 flush_cmd;
- u32 timeout_stamp;
- u8 sg_data_dir;
+ enum dma_data_direction data_dir;
struct scatterlist *sg;
u32 n_sg;
u32 sg_byte_count;
@@ -224,38 +183,19 @@ struct skd_request_context {
struct fit_comp_error_info err_info;
+ blk_status_t status;
};
-#define SKD_DATA_DIR_HOST_TO_CARD 1
-#define SKD_DATA_DIR_CARD_TO_HOST 2
struct skd_special_context {
struct skd_request_context req;
- u8 orphaned;
-
void *data_buf;
dma_addr_t db_dma_address;
- u8 *msg_buf;
+ struct skd_msg_buf *msg_buf;
dma_addr_t mb_dma_address;
};
-struct skd_sg_io {
- fmode_t mode;
- void __user *argp;
-
- struct sg_io_hdr sg;
-
- u8 cdb[16];
-
- u32 dxfer_len;
- u32 iovcnt;
- struct sg_iovec *iov;
- struct sg_iovec no_iov_iov;
-
- struct skd_special_context *skspcl;
-};
-
typedef enum skd_irq_type {
SKD_IRQ_LEGACY,
SKD_IRQ_MSI,
@@ -265,7 +205,7 @@ typedef enum skd_irq_type {
#define SKD_MAX_BARS 2
struct skd_device {
- volatile void __iomem *mem_map[SKD_MAX_BARS];
+ void __iomem *mem_map[SKD_MAX_BARS];
resource_size_t mem_phys[SKD_MAX_BARS];
u32 mem_size[SKD_MAX_BARS];
@@ -276,21 +216,20 @@ struct skd_device {
spinlock_t lock;
struct gendisk *disk;
+ struct blk_mq_tag_set tag_set;
struct request_queue *queue;
+ struct skd_fitmsg_context *skmsg;
struct device *class_dev;
int gendisk_on;
int sync_done;
- atomic_t device_count;
u32 devno;
u32 major;
- char name[32];
char isr_name[30];
enum skd_drvr_state state;
u32 drive_state;
- u32 in_flight;
u32 cur_max_queue_depth;
u32 queue_low_water_mark;
u32 dev_max_queue_depth;
@@ -298,27 +237,20 @@ struct skd_device {
u32 num_fitmsg_context;
u32 num_req_context;
- u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
- u32 timeout_stamp;
- struct skd_fitmsg_context *skmsg_free_list;
struct skd_fitmsg_context *skmsg_table;
- struct skd_request_context *skreq_free_list;
- struct skd_request_context *skreq_table;
-
- struct skd_special_context *skspcl_free_list;
- struct skd_special_context *skspcl_table;
-
struct skd_special_context internal_skspcl;
u32 read_cap_blocksize;
u32 read_cap_last_lba;
int read_cap_is_valid;
int inquiry_is_valid;
u8 inq_serial_num[13]; /*12 chars plus null term */
- u8 id_str[80]; /* holds a composite name (pci + sernum) */
u8 skcomp_cycle;
u32 skcomp_ix;
+ struct kmem_cache *msgbuf_cache;
+ struct kmem_cache *sglist_cache;
+ struct kmem_cache *databuf_cache;
struct fit_completion_entry_v1 *skcomp_table;
struct fit_comp_error_info *skerr_table;
dma_addr_t cq_dma_address;
@@ -329,7 +261,6 @@ struct skd_device {
u32 timer_countdown;
u32 timer_substate;
- int n_special;
int sgs_per_request;
u32 last_mtd;
@@ -343,7 +274,7 @@ struct skd_device {
u32 timo_slot;
-
+ struct work_struct start_queue;
struct work_struct completion_worker;
};
@@ -353,53 +284,32 @@ struct skd_device {
static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset)
{
- u32 val;
-
- if (likely(skdev->dbg_level < 2))
- return readl(skdev->mem_map[1] + offset);
- else {
- barrier();
- val = readl(skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %x\n",
- skdev->name, __func__, __LINE__, offset, val);
- return val;
- }
+ u32 val = readl(skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
+ return val;
}
static inline void skd_reg_write32(struct skd_device *skdev, u32 val,
u32 offset)
{
- if (likely(skdev->dbg_level < 2)) {
- writel(val, skdev->mem_map[1] + offset);
- barrier();
- } else {
- barrier();
- writel(val, skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %x\n",
- skdev->name, __func__, __LINE__, offset, val);
- }
+ writel(val, skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
}
static inline void skd_reg_write64(struct skd_device *skdev, u64 val,
u32 offset)
{
- if (likely(skdev->dbg_level < 2)) {
- writeq(val, skdev->mem_map[1] + offset);
- barrier();
- } else {
- barrier();
- writeq(val, skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %016llx\n",
- skdev->name, __func__, __LINE__, offset, val);
- }
+ writeq(val, skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %016llx\n", offset,
+ val);
}
-#define SKD_IRQ_DEFAULT SKD_IRQ_MSI
+#define SKD_IRQ_DEFAULT SKD_IRQ_MSIX
static int skd_isr_type = SKD_IRQ_DEFAULT;
module_param(skd_isr_type, int, 0444);
@@ -412,7 +322,7 @@ static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
module_param(skd_max_req_per_msg, int, 0444);
MODULE_PARM_DESC(skd_max_req_per_msg,
"Maximum SCSI requests packed in a single message."
- " (1-14, default==1)");
+ " (1-" __stringify(SKD_MAX_REQ_PER_MSG) ", default==1)");
#define SKD_MAX_QUEUE_DEPTH_DEFAULT 64
#define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64"
@@ -429,10 +339,10 @@ MODULE_PARM_DESC(skd_sgs_per_request,
"Maximum SG elements per block request."
" (1-4096, default==256)");
-static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
+static int skd_max_pass_thru = 1;
module_param(skd_max_pass_thru, int, 0444);
MODULE_PARM_DESC(skd_max_pass_thru,
- "Maximum SCSI pass-thru at a time." " (1-50, default==32)");
+ "Maximum SCSI pass-thru at a time. IGNORED");
module_param(skd_dbg_level, int, 0444);
MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)");
@@ -449,9 +359,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
struct skd_fitmsg_context *skmsg);
static void skd_send_special_fitmsg(struct skd_device *skdev,
struct skd_special_context *skspcl);
-static void skd_request_fn(struct request_queue *rq);
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t status);
static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
static void skd_postop_sg_list(struct skd_device *skdev,
@@ -460,19 +367,14 @@ static void skd_postop_sg_list(struct skd_device *skdev,
static void skd_restart_device(struct skd_device *skdev);
static int skd_quiesce_dev(struct skd_device *skdev);
static int skd_unquiesce_dev(struct skd_device *skdev);
-static void skd_release_special(struct skd_device *skdev,
- struct skd_special_context *skspcl);
static void skd_disable_interrupts(struct skd_device *skdev);
static void skd_isr_fwstate(struct skd_device *skdev);
-static void skd_recover_requests(struct skd_device *skdev, int requeue);
+static void skd_recover_requests(struct skd_device *skdev);
static void skd_soft_reset(struct skd_device *skdev);
-static const char *skd_name(struct skd_device *skdev);
const char *skd_drive_state_to_str(int state);
const char *skd_skdev_state_to_str(enum skd_drvr_state state);
static void skd_log_skdev(struct skd_device *skdev, const char *event);
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event);
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event);
@@ -481,18 +383,20 @@ static void skd_log_skreq(struct skd_device *skdev,
* READ/WRITE REQUESTS
*****************************************************************************
*/
-static void skd_fail_all_pending(struct skd_device *skdev)
+static void skd_inc_in_flight(struct request *rq, void *data, bool reserved)
{
- struct request_queue *q = skdev->queue;
- struct request *req;
+ int *count = data;
- for (;; ) {
- req = blk_peek_request(q);
- if (req == NULL)
- break;
- blk_start_request(req);
- __blk_end_request_all(req, BLK_STS_IOERR);
- }
+ count++;
+}
+
+static int skd_in_flight(struct skd_device *skdev)
+{
+ int count = 0;
+
+ blk_mq_tagset_busy_iter(&skdev->tag_set, skd_inc_in_flight, &count);
+
+ return count;
}
static void
@@ -501,9 +405,9 @@ skd_prep_rw_cdb(struct skd_scsi_request *scsi_req,
unsigned count)
{
if (data_dir == READ)
- scsi_req->cdb[0] = 0x28;
+ scsi_req->cdb[0] = READ_10;
else
- scsi_req->cdb[0] = 0x2a;
+ scsi_req->cdb[0] = WRITE_10;
scsi_req->cdb[1] = 0;
scsi_req->cdb[2] = (lba & 0xff000000) >> 24;
@@ -522,7 +426,7 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
{
skreq->flush_cmd = 1;
- scsi_req->cdb[0] = 0x35;
+ scsi_req->cdb[0] = SYNCHRONIZE_CACHE;
scsi_req->cdb[1] = 0;
scsi_req->cdb[2] = 0;
scsi_req->cdb[3] = 0;
@@ -534,307 +438,194 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
scsi_req->cdb[9] = 0;
}
-static void skd_request_fn_not_online(struct request_queue *q);
-
-static void skd_request_fn(struct request_queue *q)
+/*
+ * Return true if and only if all pending requests should be failed.
+ */
+static bool skd_fail_all(struct request_queue *q)
{
struct skd_device *skdev = q->queuedata;
- struct skd_fitmsg_context *skmsg = NULL;
- struct fit_msg_hdr *fmh = NULL;
- struct skd_request_context *skreq;
- struct request *req = NULL;
- struct skd_scsi_request *scsi_req;
- unsigned long io_flags;
- u32 lba;
- u32 count;
- int data_dir;
- u32 be_lba;
- u32 be_count;
- u64 be_dmaa;
- u64 cmdctxt;
- u32 timo_slot;
- void *cmd_ptr;
- int flush, fua;
-
- if (skdev->state != SKD_DRVR_STATE_ONLINE) {
- skd_request_fn_not_online(q);
- return;
- }
- if (blk_queue_stopped(skdev->queue)) {
- if (skdev->skmsg_free_list == NULL ||
- skdev->skreq_free_list == NULL ||
- skdev->in_flight >= skdev->queue_low_water_mark)
- /* There is still some kind of shortage */
- return;
-
- queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue);
- }
+ SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
- /*
- * Stop conditions:
- * - There are no more native requests
- * - There are already the maximum number of requests in progress
- * - There are no more skd_request_context entries
- * - There are no more FIT msg buffers
+ skd_log_skdev(skdev, "req_not_online");
+ switch (skdev->state) {
+ case SKD_DRVR_STATE_PAUSING:
+ case SKD_DRVR_STATE_PAUSED:
+ case SKD_DRVR_STATE_STARTING:
+ case SKD_DRVR_STATE_RESTARTING:
+ case SKD_DRVR_STATE_WAIT_BOOT:
+ /* In case of starting, we haven't started the queue,
+ * so we can't get here... but requests are
+ * possibly hanging out waiting for us because we
+ * reported the dev/skd0 already. They'll wait
+ * forever if connect doesn't complete.
+ * What to do??? delay dev/skd0 ??
*/
- for (;; ) {
-
- flush = fua = 0;
-
- req = blk_peek_request(q);
-
- /* Are there any native requests to start? */
- if (req == NULL)
- break;
-
- lba = (u32)blk_rq_pos(req);
- count = blk_rq_sectors(req);
- data_dir = rq_data_dir(req);
- io_flags = req->cmd_flags;
-
- if (req_op(req) == REQ_OP_FLUSH)
- flush++;
-
- if (io_flags & REQ_FUA)
- fua++;
-
- pr_debug("%s:%s:%d new req=%p lba=%u(0x%x) "
- "count=%u(0x%x) dir=%d\n",
- skdev->name, __func__, __LINE__,
- req, lba, lba, count, count, data_dir);
-
- /* At this point we know there is a request */
+ case SKD_DRVR_STATE_BUSY:
+ case SKD_DRVR_STATE_BUSY_IMMINENT:
+ case SKD_DRVR_STATE_BUSY_ERASE:
+ return false;
- /* Are too many requets already in progress? */
- if (skdev->in_flight >= skdev->cur_max_queue_depth) {
- pr_debug("%s:%s:%d qdepth %d, limit %d\n",
- skdev->name, __func__, __LINE__,
- skdev->in_flight, skdev->cur_max_queue_depth);
- break;
- }
+ case SKD_DRVR_STATE_BUSY_SANITIZE:
+ case SKD_DRVR_STATE_STOPPING:
+ case SKD_DRVR_STATE_SYNCING:
+ case SKD_DRVR_STATE_FAULT:
+ case SKD_DRVR_STATE_DISAPPEARED:
+ default:
+ return true;
+ }
+}
- /* Is a skd_request_context available? */
- skreq = skdev->skreq_free_list;
- if (skreq == NULL) {
- pr_debug("%s:%s:%d Out of req=%p\n",
- skdev->name, __func__, __LINE__, q);
- break;
- }
- SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
- SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
-
- /* Now we check to see if we can get a fit msg */
- if (skmsg == NULL) {
- if (skdev->skmsg_free_list == NULL) {
- pr_debug("%s:%s:%d Out of msg\n",
- skdev->name, __func__, __LINE__);
- break;
- }
- }
+static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *mqd)
+{
+ struct request *const req = mqd->rq;
+ struct request_queue *const q = req->q;
+ struct skd_device *skdev = q->queuedata;
+ struct skd_fitmsg_context *skmsg;
+ struct fit_msg_hdr *fmh;
+ const u32 tag = blk_mq_unique_tag(req);
+ struct skd_request_context *const skreq = blk_mq_rq_to_pdu(req);
+ struct skd_scsi_request *scsi_req;
+ unsigned long flags = 0;
+ const u32 lba = blk_rq_pos(req);
+ const u32 count = blk_rq_sectors(req);
+ const int data_dir = rq_data_dir(req);
- skreq->flush_cmd = 0;
- skreq->n_sg = 0;
- skreq->sg_byte_count = 0;
+ if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE))
+ return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE;
- /*
- * OK to now dequeue request from q.
- *
- * At this point we are comitted to either start or reject
- * the native request. Note that skd_request_context is
- * available but is still at the head of the free list.
- */
- blk_start_request(req);
- skreq->req = req;
- skreq->fitmsg_id = 0;
-
- /* Either a FIT msg is in progress or we have to start one. */
- if (skmsg == NULL) {
- /* Are there any FIT msg buffers available? */
- skmsg = skdev->skmsg_free_list;
- if (skmsg == NULL) {
- pr_debug("%s:%s:%d Out of msg skdev=%p\n",
- skdev->name, __func__, __LINE__,
- skdev);
- break;
- }
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
+ blk_mq_start_request(req);
- skdev->skmsg_free_list = skmsg->next;
+ WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n",
+ tag, skd_max_queue_depth, q->nr_requests);
- skmsg->state = SKD_MSG_STATE_BUSY;
- skmsg->id += SKD_ID_INCR;
+ SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
- /* Initialize the FIT msg header */
- fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
- memset(fmh, 0, sizeof(*fmh));
- fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
- skmsg->length = sizeof(*fmh);
- }
+ dev_dbg(&skdev->pdev->dev,
+ "new req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", req, lba,
+ lba, count, count, data_dir);
- skreq->fitmsg_id = skmsg->id;
+ skreq->id = tag + SKD_ID_RW_REQUEST;
+ skreq->flush_cmd = 0;
+ skreq->n_sg = 0;
+ skreq->sg_byte_count = 0;
- /*
- * Note that a FIT msg may have just been started
- * but contains no SoFIT requests yet.
- */
+ skreq->fitmsg_id = 0;
- /*
- * Transcode the request, checking as we go. The outcome of
- * the transcoding is represented by the error variable.
- */
- cmd_ptr = &skmsg->msg_buf[skmsg->length];
- memset(cmd_ptr, 0, 32);
+ skreq->data_dir = data_dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
- be_lba = cpu_to_be32(lba);
- be_count = cpu_to_be32(count);
- be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address);
- cmdctxt = skreq->id + SKD_ID_INCR;
+ if (req->bio && !skd_preop_sg_list(skdev, skreq)) {
+ dev_dbg(&skdev->pdev->dev, "error Out\n");
+ skreq->status = BLK_STS_RESOURCE;
+ blk_mq_complete_request(req);
+ return BLK_STS_OK;
+ }
- scsi_req = cmd_ptr;
- scsi_req->hdr.tag = cmdctxt;
- scsi_req->hdr.sg_list_dma_address = be_dmaa;
+ dma_sync_single_for_device(&skdev->pdev->dev, skreq->sksg_dma_address,
+ skreq->n_sg *
+ sizeof(struct fit_sg_descriptor),
+ DMA_TO_DEVICE);
- if (data_dir == READ)
- skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST;
- else
- skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
+ /* Either a FIT msg is in progress or we have to start one. */
+ if (skd_max_req_per_msg == 1) {
+ skmsg = NULL;
+ } else {
+ spin_lock_irqsave(&skdev->lock, flags);
+ skmsg = skdev->skmsg;
+ }
+ if (!skmsg) {
+ skmsg = &skdev->skmsg_table[tag];
+ skdev->skmsg = skmsg;
- if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
- skd_prep_zerosize_flush_cdb(scsi_req, skreq);
- SKD_ASSERT(skreq->flush_cmd == 1);
+ /* Initialize the FIT msg header */
+ fmh = &skmsg->msg_buf->fmh;
+ memset(fmh, 0, sizeof(*fmh));
+ fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
+ skmsg->length = sizeof(*fmh);
+ } else {
+ fmh = &skmsg->msg_buf->fmh;
+ }
- } else {
- skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
- }
+ skreq->fitmsg_id = skmsg->id;
- if (fua)
- scsi_req->cdb[1] |= SKD_FUA_NV;
+ scsi_req = &skmsg->msg_buf->scsi[fmh->num_protocol_cmds_coalesced];
+ memset(scsi_req, 0, sizeof(*scsi_req));
- if (!req->bio)
- goto skip_sg;
+ scsi_req->hdr.tag = skreq->id;
+ scsi_req->hdr.sg_list_dma_address =
+ cpu_to_be64(skreq->sksg_dma_address);
- if (!skd_preop_sg_list(skdev, skreq)) {
- /*
- * Complete the native request with error.
- * Note that the request context is still at the
- * head of the free list, and that the SoFIT request
- * was encoded into the FIT msg buffer but the FIT
- * msg length has not been updated. In short, the
- * only resource that has been allocated but might
- * not be used is that the FIT msg could be empty.
- */
- pr_debug("%s:%s:%d error Out\n",
- skdev->name, __func__, __LINE__);
- skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
- continue;
- }
+ if (req_op(req) == REQ_OP_FLUSH) {
+ skd_prep_zerosize_flush_cdb(scsi_req, skreq);
+ SKD_ASSERT(skreq->flush_cmd == 1);
+ } else {
+ skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
+ }
-skip_sg:
- scsi_req->hdr.sg_list_len_bytes =
- cpu_to_be32(skreq->sg_byte_count);
+ if (req->cmd_flags & REQ_FUA)
+ scsi_req->cdb[1] |= SKD_FUA_NV;
- /* Complete resource allocations. */
- skdev->skreq_free_list = skreq->next;
- skreq->state = SKD_REQ_STATE_BUSY;
- skreq->id += SKD_ID_INCR;
+ scsi_req->hdr.sg_list_len_bytes = cpu_to_be32(skreq->sg_byte_count);
- skmsg->length += sizeof(struct skd_scsi_request);
- fmh->num_protocol_cmds_coalesced++;
+ /* Complete resource allocations. */
+ skreq->state = SKD_REQ_STATE_BUSY;
- /*
- * Update the active request counts.
- * Capture the timeout timestamp.
- */
- skreq->timeout_stamp = skdev->timeout_stamp;
- timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
- skdev->timeout_slot[timo_slot]++;
- skdev->in_flight++;
- pr_debug("%s:%s:%d req=0x%x busy=%d\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skdev->in_flight);
+ skmsg->length += sizeof(struct skd_scsi_request);
+ fmh->num_protocol_cmds_coalesced++;
- /*
- * If the FIT msg buffer is full send it.
- */
- if (skmsg->length >= SKD_N_FITMSG_BYTES ||
- fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
- skd_send_fitmsg(skdev, skmsg);
- skmsg = NULL;
- fmh = NULL;
- }
- }
+ dev_dbg(&skdev->pdev->dev, "req=0x%x busy=%d\n", skreq->id,
+ skd_in_flight(skdev));
/*
- * Is a FIT msg in progress? If it is empty put the buffer back
- * on the free list. If it is non-empty send what we got.
- * This minimizes latency when there are fewer requests than
- * what fits in a FIT msg.
+ * If the FIT msg buffer is full send it.
*/
- if (skmsg != NULL) {
- /* Bigger than just a FIT msg header? */
- if (skmsg->length > sizeof(struct fit_msg_hdr)) {
- pr_debug("%s:%s:%d sending msg=%p, len %d\n",
- skdev->name, __func__, __LINE__,
- skmsg, skmsg->length);
+ if (skd_max_req_per_msg == 1) {
+ skd_send_fitmsg(skdev, skmsg);
+ } else {
+ if (mqd->last ||
+ fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
skd_send_fitmsg(skdev, skmsg);
- } else {
- /*
- * The FIT msg is empty. It means we got started
- * on the msg, but the requests were rejected.
- */
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- skmsg->next = skdev->skmsg_free_list;
- skdev->skmsg_free_list = skmsg;
+ skdev->skmsg = NULL;
}
- skmsg = NULL;
- fmh = NULL;
+ spin_unlock_irqrestore(&skdev->lock, flags);
}
- /*
- * If req is non-NULL it means there is something to do but
- * we are out of a resource.
- */
- if (req)
- blk_stop_queue(skdev->queue);
+ return BLK_STS_OK;
}
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t error)
+static enum blk_eh_timer_return skd_timed_out(struct request *req,
+ bool reserved)
{
- if (unlikely(error)) {
- struct request *req = skreq->req;
- char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
- u32 lba = (u32)blk_rq_pos(req);
- u32 count = blk_rq_sectors(req);
-
- pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
- skd_name(skdev), cmd, lba, count, skreq->id);
- } else
- pr_debug("%s:%s:%d id=0x%x error=%d\n",
- skdev->name, __func__, __LINE__, skreq->id, error);
+ struct skd_device *skdev = req->q->queuedata;
+
+ dev_err(&skdev->pdev->dev, "request with tag %#x timed out\n",
+ blk_mq_unique_tag(req));
- __blk_end_request_all(skreq->req, error);
+ return BLK_EH_RESET_TIMER;
+}
+
+static void skd_complete_rq(struct request *req)
+{
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
+
+ blk_mq_end_request(req, skreq->status);
}
static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- struct request *req = skreq->req;
- int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
- int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
- struct scatterlist *sg = &skreq->sg[0];
+ struct request *req = blk_mq_rq_from_pdu(skreq);
+ struct scatterlist *sgl = &skreq->sg[0], *sg;
int n_sg;
int i;
skreq->sg_byte_count = 0;
- /* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD ||
- skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */
+ WARN_ON_ONCE(skreq->data_dir != DMA_TO_DEVICE &&
+ skreq->data_dir != DMA_FROM_DEVICE);
- n_sg = blk_rq_map_sg(skdev->queue, req, sg);
+ n_sg = blk_rq_map_sg(skdev->queue, req, sgl);
if (n_sg <= 0)
return false;
@@ -842,7 +633,7 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
* Map scatterlist to PCI bus addresses.
* Note PCI might change the number of entries.
*/
- n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
+ n_sg = pci_map_sg(skdev->pdev, sgl, n_sg, skreq->data_dir);
if (n_sg <= 0)
return false;
@@ -850,10 +641,10 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
skreq->n_sg = n_sg;
- for (i = 0; i < n_sg; i++) {
+ for_each_sg(sgl, sg, n_sg, i) {
struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
- u32 cnt = sg_dma_len(&sg[i]);
- uint64_t dma_addr = sg_dma_address(&sg[i]);
+ u32 cnt = sg_dma_len(sg);
+ uint64_t dma_addr = sg_dma_address(sg);
sgd->control = FIT_SGD_CONTROL_NOT_LAST;
sgd->byte_count = cnt;
@@ -866,16 +657,16 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
if (unlikely(skdev->dbg_level > 1)) {
- pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
+ dev_dbg(&skdev->pdev->dev,
+ "skreq=%x sksg_list=%p sksg_dma=%llx\n",
+ skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
for (i = 0; i < n_sg; i++) {
struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
+
+ dev_dbg(&skdev->pdev->dev,
+ " sg[%d] count=%u ctrl=0x%x addr=0x%llx next=0x%llx\n",
+ i, sgd->byte_count, sgd->control,
+ sgd->host_side_addr, sgd->next_desc_ptr);
}
}
@@ -885,9 +676,6 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
static void skd_postop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
- int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
-
/*
* restore the next ptr for next IO request so we
* don't have to set it every time.
@@ -895,51 +683,7 @@ static void skd_postop_sg_list(struct skd_device *skdev,
skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
skreq->sksg_dma_address +
((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
- pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, pci_dir);
-}
-
-static void skd_request_fn_not_online(struct request_queue *q)
-{
- struct skd_device *skdev = q->queuedata;
- int error;
-
- SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
-
- skd_log_skdev(skdev, "req_not_online");
- switch (skdev->state) {
- case SKD_DRVR_STATE_PAUSING:
- case SKD_DRVR_STATE_PAUSED:
- case SKD_DRVR_STATE_STARTING:
- case SKD_DRVR_STATE_RESTARTING:
- case SKD_DRVR_STATE_WAIT_BOOT:
- /* In case of starting, we haven't started the queue,
- * so we can't get here... but requests are
- * possibly hanging out waiting for us because we
- * reported the dev/skd0 already. They'll wait
- * forever if connect doesn't complete.
- * What to do??? delay dev/skd0 ??
- */
- case SKD_DRVR_STATE_BUSY:
- case SKD_DRVR_STATE_BUSY_IMMINENT:
- case SKD_DRVR_STATE_BUSY_ERASE:
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- return;
-
- case SKD_DRVR_STATE_BUSY_SANITIZE:
- case SKD_DRVR_STATE_STOPPING:
- case SKD_DRVR_STATE_SYNCING:
- case SKD_DRVR_STATE_FAULT:
- case SKD_DRVR_STATE_DISAPPEARED:
- default:
- error = -EIO;
- break;
- }
-
- /* If we get here, terminate all pending block requeusts
- * with EIO and any scsi pass thru with appropriate sense
- */
-
- skd_fail_all_pending(skdev);
+ pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, skreq->data_dir);
}
/*
@@ -950,12 +694,22 @@ static void skd_request_fn_not_online(struct request_queue *q)
static void skd_timer_tick_not_online(struct skd_device *skdev);
+static void skd_start_queue(struct work_struct *work)
+{
+ struct skd_device *skdev = container_of(work, typeof(*skdev),
+ start_queue);
+
+ /*
+ * Although it is safe to call blk_start_queue() from interrupt
+ * context, blk_mq_start_hw_queues() must not be called from
+ * interrupt context.
+ */
+ blk_mq_start_hw_queues(skdev->queue);
+}
+
static void skd_timer_tick(ulong arg)
{
struct skd_device *skdev = (struct skd_device *)arg;
-
- u32 timo_slot;
- u32 overdue_timestamp;
unsigned long reqflags;
u32 state;
@@ -972,37 +726,9 @@ static void skd_timer_tick(ulong arg)
if (state != skdev->drive_state)
skd_isr_fwstate(skdev);
- if (skdev->state != SKD_DRVR_STATE_ONLINE) {
+ if (skdev->state != SKD_DRVR_STATE_ONLINE)
skd_timer_tick_not_online(skdev);
- goto timer_func_out;
- }
- skdev->timeout_stamp++;
- timo_slot = skdev->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
-
- /*
- * All requests that happened during the previous use of
- * this slot should be done by now. The previous use was
- * over 7 seconds ago.
- */
- if (skdev->timeout_slot[timo_slot] == 0)
- goto timer_func_out;
-
- /* Something is overdue */
- overdue_timestamp = skdev->timeout_stamp - SKD_N_TIMEOUT_SLOT;
-
- pr_debug("%s:%s:%d found %d timeouts, draining busy=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->timeout_slot[timo_slot], skdev->in_flight);
- pr_err("(%s): Overdue IOs (%d), busy %d\n",
- skd_name(skdev), skdev->timeout_slot[timo_slot],
- skdev->in_flight);
- skdev->timer_countdown = SKD_DRAINING_TIMO;
- skdev->state = SKD_DRVR_STATE_DRAINING_TIMEOUT;
- skdev->timo_slot = timo_slot;
- blk_stop_queue(skdev->queue);
-
-timer_func_out:
mod_timer(&skdev->timer, (jiffies + HZ));
spin_unlock_irqrestore(&skdev->lock, reqflags);
@@ -1015,9 +741,9 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
case SKD_DRVR_STATE_LOAD:
break;
case SKD_DRVR_STATE_BUSY_SANITIZE:
- pr_debug("%s:%s:%d drive busy sanitize[%x], driver[%x]\n",
- skdev->name, __func__, __LINE__,
- skdev->drive_state, skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "drive busy sanitize[%x], driver[%x]\n",
+ skdev->drive_state, skdev->state);
/* If we've been in sanitize for 3 seconds, we figure we're not
* going to get anymore completions, so recover requests now
*/
@@ -1025,22 +751,21 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
skdev->timer_countdown--;
return;
}
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
break;
case SKD_DRVR_STATE_BUSY:
case SKD_DRVR_STATE_BUSY_IMMINENT:
case SKD_DRVR_STATE_BUSY_ERASE:
- pr_debug("%s:%s:%d busy[%x], countdown=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->state, skdev->timer_countdown);
+ dev_dbg(&skdev->pdev->dev, "busy[%x], countdown=%d\n",
+ skdev->state, skdev->timer_countdown);
if (skdev->timer_countdown > 0) {
skdev->timer_countdown--;
return;
}
- pr_debug("%s:%s:%d busy[%x], timedout=%d, restarting device.",
- skdev->name, __func__, __LINE__,
- skdev->state, skdev->timer_countdown);
+ dev_dbg(&skdev->pdev->dev,
+ "busy[%x], timedout=%d, restarting device.",
+ skdev->state, skdev->timer_countdown);
skd_restart_device(skdev);
break;
@@ -1054,12 +779,12 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
* revcover at some point. */
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): DriveFault Connect Timeout (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "DriveFault Connect Timeout (%x)\n",
+ skdev->drive_state);
/*start the queue so we can respond with error to requests */
/* wakeup anyone waiting for startup complete */
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -1072,29 +797,6 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
case SKD_DRVR_STATE_PAUSED:
break;
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- pr_debug("%s:%s:%d "
- "draining busy [%d] tick[%d] qdb[%d] tmls[%d]\n",
- skdev->name, __func__, __LINE__,
- skdev->timo_slot,
- skdev->timer_countdown,
- skdev->in_flight,
- skdev->timeout_slot[skdev->timo_slot]);
- /* if the slot has cleared we can let the I/O continue */
- if (skdev->timeout_slot[skdev->timo_slot] == 0) {
- pr_debug("%s:%s:%d Slot drained, starting queue.\n",
- skdev->name, __func__, __LINE__);
- skdev->state = SKD_DRVR_STATE_ONLINE;
- blk_start_queue(skdev->queue);
- return;
- }
- if (skdev->timer_countdown > 0) {
- skdev->timer_countdown--;
- return;
- }
- skd_restart_device(skdev);
- break;
-
case SKD_DRVR_STATE_RESTARTING:
if (skdev->timer_countdown > 0) {
skdev->timer_countdown--;
@@ -1103,8 +805,9 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
/* For now, we fault the drive. Could attempt resets to
* revcover at some point. */
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): DriveFault Reconnect Timeout (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev,
+ "DriveFault Reconnect Timeout (%x)\n",
+ skdev->drive_state);
/*
* Recovering does two things:
@@ -1124,18 +827,18 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
/* It never came out of soft reset. Try to
* recover the requests and then let them
* fail. This is to mitigate hung processes. */
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
else {
- pr_err("(%s): Disable BusMaster (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "Disable BusMaster (%x)\n",
+ skdev->drive_state);
pci_disable_device(skdev->pdev);
skd_disable_interrupts(skdev);
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
}
/*start the queue so we can respond with error to requests */
/* wakeup anyone waiting for startup complete */
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -1154,13 +857,11 @@ static int skd_start_timer(struct skd_device *skdev)
{
int rc;
- init_timer(&skdev->timer);
setup_timer(&skdev->timer, skd_timer_tick, (ulong)skdev);
rc = mod_timer(&skdev->timer, (jiffies + HZ));
if (rc)
- pr_err("%s: failed to start timer %d\n",
- __func__, rc);
+ dev_err(&skdev->pdev->dev, "failed to start timer %d\n", rc);
return rc;
}
@@ -1171,634 +872,6 @@ static void skd_kill_timer(struct skd_device *skdev)
/*
*****************************************************************************
- * IOCTL
- *****************************************************************************
- */
-static int skd_ioctl_sg_io(struct skd_device *skdev,
- fmode_t mode, void __user *argp);
-static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_prep_buffering(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_copy_buffer(struct skd_device *skdev,
- struct skd_sg_io *sksgio, int dxfer_dir);
-static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio);
-static int skd_sg_io_release_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_put_status(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-
-static void skd_complete_special(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl);
-
-static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode,
- uint cmd_in, ulong arg)
-{
- static const int sg_version_num = 30527;
- int rc = 0, timeout;
- struct gendisk *disk = bdev->bd_disk;
- struct skd_device *skdev = disk->private_data;
- int __user *p = (int __user *)arg;
-
- pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n",
- skdev->name, __func__, __LINE__,
- disk->disk_name, current->comm, mode, cmd_in, arg);
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch (cmd_in) {
- case SG_SET_TIMEOUT:
- rc = get_user(timeout, p);
- if (!rc)
- disk->queue->sg_timeout = clock_t_to_jiffies(timeout);
- break;
- case SG_GET_TIMEOUT:
- rc = jiffies_to_clock_t(disk->queue->sg_timeout);
- break;
- case SG_GET_VERSION_NUM:
- rc = put_user(sg_version_num, p);
- break;
- case SG_IO:
- rc = skd_ioctl_sg_io(skdev, mode, (void __user *)arg);
- break;
-
- default:
- rc = -ENOTTY;
- break;
- }
-
- pr_debug("%s:%s:%d %s: completion rc %d\n",
- skdev->name, __func__, __LINE__, disk->disk_name, rc);
- return rc;
-}
-
-static int skd_ioctl_sg_io(struct skd_device *skdev, fmode_t mode,
- void __user *argp)
-{
- int rc;
- struct skd_sg_io sksgio;
-
- memset(&sksgio, 0, sizeof(sksgio));
- sksgio.mode = mode;
- sksgio.argp = argp;
- sksgio.iov = &sksgio.no_iov_iov;
-
- switch (skdev->state) {
- case SKD_DRVR_STATE_ONLINE:
- case SKD_DRVR_STATE_BUSY_IMMINENT:
- break;
-
- default:
- pr_debug("%s:%s:%d drive not online\n",
- skdev->name, __func__, __LINE__);
- rc = -ENXIO;
- goto out;
- }
-
- rc = skd_sg_io_get_and_check_args(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_obtain_skspcl(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_prep_buffering(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_TO_DEV);
- if (rc)
- goto out;
-
- rc = skd_sg_io_send_fitmsg(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_await(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_FROM_DEV);
- if (rc)
- goto out;
-
- rc = skd_sg_io_put_status(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = 0;
-
-out:
- skd_sg_io_release_skspcl(skdev, &sksgio);
-
- if (sksgio.iov != NULL && sksgio.iov != &sksgio.no_iov_iov)
- kfree(sksgio.iov);
- return rc;
-}
-
-static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct sg_io_hdr *sgp = &sksgio->sg;
- int i, acc;
-
- if (!access_ok(VERIFY_WRITE, sksgio->argp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d access sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- if (__copy_from_user(sgp, sksgio->argp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d copy_from_user sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- if (sgp->interface_id != SG_INTERFACE_ID_ORIG) {
- pr_debug("%s:%s:%d interface_id invalid 0x%x\n",
- skdev->name, __func__, __LINE__, sgp->interface_id);
- return -EINVAL;
- }
-
- if (sgp->cmd_len > sizeof(sksgio->cdb)) {
- pr_debug("%s:%s:%d cmd_len invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->cmd_len);
- return -EINVAL;
- }
-
- if (sgp->iovec_count > 256) {
- pr_debug("%s:%s:%d iovec_count invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->iovec_count);
- return -EINVAL;
- }
-
- if (sgp->dxfer_len > (PAGE_SIZE * SKD_N_SG_PER_SPECIAL)) {
- pr_debug("%s:%s:%d dxfer_len invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->dxfer_len);
- return -EINVAL;
- }
-
- switch (sgp->dxfer_direction) {
- case SG_DXFER_NONE:
- acc = -1;
- break;
-
- case SG_DXFER_TO_DEV:
- acc = VERIFY_READ;
- break;
-
- case SG_DXFER_FROM_DEV:
- case SG_DXFER_TO_FROM_DEV:
- acc = VERIFY_WRITE;
- break;
-
- default:
- pr_debug("%s:%s:%d dxfer_dir invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->dxfer_direction);
- return -EINVAL;
- }
-
- if (copy_from_user(sksgio->cdb, sgp->cmdp, sgp->cmd_len)) {
- pr_debug("%s:%s:%d copy_from_user cmdp failed %p\n",
- skdev->name, __func__, __LINE__, sgp->cmdp);
- return -EFAULT;
- }
-
- if (sgp->mx_sb_len != 0) {
- if (!access_ok(VERIFY_WRITE, sgp->sbp, sgp->mx_sb_len)) {
- pr_debug("%s:%s:%d access sbp failed %p\n",
- skdev->name, __func__, __LINE__, sgp->sbp);
- return -EFAULT;
- }
- }
-
- if (sgp->iovec_count == 0) {
- sksgio->iov[0].iov_base = sgp->dxferp;
- sksgio->iov[0].iov_len = sgp->dxfer_len;
- sksgio->iovcnt = 1;
- sksgio->dxfer_len = sgp->dxfer_len;
- } else {
- struct sg_iovec *iov;
- uint nbytes = sizeof(*iov) * sgp->iovec_count;
- size_t iov_data_len;
-
- iov = kmalloc(nbytes, GFP_KERNEL);
- if (iov == NULL) {
- pr_debug("%s:%s:%d alloc iovec failed %d\n",
- skdev->name, __func__, __LINE__,
- sgp->iovec_count);
- return -ENOMEM;
- }
- sksgio->iov = iov;
- sksgio->iovcnt = sgp->iovec_count;
-
- if (copy_from_user(iov, sgp->dxferp, nbytes)) {
- pr_debug("%s:%s:%d copy_from_user iovec failed %p\n",
- skdev->name, __func__, __LINE__, sgp->dxferp);
- return -EFAULT;
- }
-
- /*
- * Sum up the vecs, making sure they don't overflow
- */
- iov_data_len = 0;
- for (i = 0; i < sgp->iovec_count; i++) {
- if (iov_data_len + iov[i].iov_len < iov_data_len)
- return -EINVAL;
- iov_data_len += iov[i].iov_len;
- }
-
- /* SG_IO howto says that the shorter of the two wins */
- if (sgp->dxfer_len < iov_data_len) {
- sksgio->iovcnt = iov_shorten((struct iovec *)iov,
- sgp->iovec_count,
- sgp->dxfer_len);
- sksgio->dxfer_len = sgp->dxfer_len;
- } else
- sksgio->dxfer_len = iov_data_len;
- }
-
- if (sgp->dxfer_direction != SG_DXFER_NONE) {
- struct sg_iovec *iov = sksgio->iov;
- for (i = 0; i < sksgio->iovcnt; i++, iov++) {
- if (!access_ok(acc, iov->iov_base, iov->iov_len)) {
- pr_debug("%s:%s:%d access data failed %p/%d\n",
- skdev->name, __func__, __LINE__,
- iov->iov_base, (int)iov->iov_len);
- return -EFAULT;
- }
- }
- }
-
- return 0;
-}
-
-static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = NULL;
- int rc;
-
- for (;;) {
- ulong flags;
-
- spin_lock_irqsave(&skdev->lock, flags);
- skspcl = skdev->skspcl_free_list;
- if (skspcl != NULL) {
- skdev->skspcl_free_list =
- (struct skd_special_context *)skspcl->req.next;
- skspcl->req.id += SKD_ID_INCR;
- skspcl->req.state = SKD_REQ_STATE_SETUP;
- skspcl->orphaned = 0;
- skspcl->req.n_sg = 0;
- }
- spin_unlock_irqrestore(&skdev->lock, flags);
-
- if (skspcl != NULL) {
- rc = 0;
- break;
- }
-
- pr_debug("%s:%s:%d blocking\n",
- skdev->name, __func__, __LINE__);
-
- rc = wait_event_interruptible_timeout(
- skdev->waitq,
- (skdev->skspcl_free_list != NULL),
- msecs_to_jiffies(sksgio->sg.timeout));
-
- pr_debug("%s:%s:%d unblocking, rc=%d\n",
- skdev->name, __func__, __LINE__, rc);
-
- if (rc <= 0) {
- if (rc == 0)
- rc = -ETIMEDOUT;
- else
- rc = -EINTR;
- break;
- }
- /*
- * If we get here rc > 0 meaning the timeout to
- * wait_event_interruptible_timeout() had time left, hence the
- * sought event -- non-empty free list -- happened.
- * Retry the allocation.
- */
- }
- sksgio->skspcl = skspcl;
-
- return rc;
-}
-
-static int skd_skreq_prep_buffering(struct skd_device *skdev,
- struct skd_request_context *skreq,
- u32 dxfer_len)
-{
- u32 resid = dxfer_len;
-
- /*
- * The DMA engine must have aligned addresses and byte counts.
- */
- resid += (-resid) & 3;
- skreq->sg_byte_count = resid;
-
- skreq->n_sg = 0;
-
- while (resid > 0) {
- u32 nbytes = PAGE_SIZE;
- u32 ix = skreq->n_sg;
- struct scatterlist *sg = &skreq->sg[ix];
- struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
- struct page *page;
-
- if (nbytes > resid)
- nbytes = resid;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL)
- return -ENOMEM;
-
- sg_set_page(sg, page, nbytes, 0);
-
- /* TODO: This should be going through a pci_???()
- * routine to do proper mapping. */
- sksg->control = FIT_SGD_CONTROL_NOT_LAST;
- sksg->byte_count = nbytes;
-
- sksg->host_side_addr = sg_phys(sg);
-
- sksg->dev_side_addr = 0;
- sksg->next_desc_ptr = skreq->sksg_dma_address +
- (ix + 1) * sizeof(*sksg);
-
- skreq->n_sg++;
- resid -= nbytes;
- }
-
- if (skreq->n_sg > 0) {
- u32 ix = skreq->n_sg - 1;
- struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
-
- sksg->control = FIT_SGD_CONTROL_LAST;
- sksg->next_desc_ptr = 0;
- }
-
- if (unlikely(skdev->dbg_level > 1)) {
- u32 i;
-
- pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
- for (i = 0; i < skreq->n_sg; i++) {
- struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
-
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
- }
- }
-
- return 0;
-}
-
-static int skd_sg_io_prep_buffering(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- struct skd_request_context *skreq = &skspcl->req;
- u32 dxfer_len = sksgio->dxfer_len;
- int rc;
-
- rc = skd_skreq_prep_buffering(skdev, skreq, dxfer_len);
- /*
- * Eventually, errors or not, skd_release_special() is called
- * to recover allocations including partial allocations.
- */
- return rc;
-}
-
-static int skd_sg_io_copy_buffer(struct skd_device *skdev,
- struct skd_sg_io *sksgio, int dxfer_dir)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- u32 iov_ix = 0;
- struct sg_iovec curiov;
- u32 sksg_ix = 0;
- u8 *bufp = NULL;
- u32 buf_len = 0;
- u32 resid = sksgio->dxfer_len;
- int rc;
-
- curiov.iov_len = 0;
- curiov.iov_base = NULL;
-
- if (dxfer_dir != sksgio->sg.dxfer_direction) {
- if (dxfer_dir != SG_DXFER_TO_DEV ||
- sksgio->sg.dxfer_direction != SG_DXFER_TO_FROM_DEV)
- return 0;
- }
-
- while (resid > 0) {
- u32 nbytes = PAGE_SIZE;
-
- if (curiov.iov_len == 0) {
- curiov = sksgio->iov[iov_ix++];
- continue;
- }
-
- if (buf_len == 0) {
- struct page *page;
- page = sg_page(&skspcl->req.sg[sksg_ix++]);
- bufp = page_address(page);
- buf_len = PAGE_SIZE;
- }
-
- nbytes = min_t(u32, nbytes, resid);
- nbytes = min_t(u32, nbytes, curiov.iov_len);
- nbytes = min_t(u32, nbytes, buf_len);
-
- if (dxfer_dir == SG_DXFER_TO_DEV)
- rc = __copy_from_user(bufp, curiov.iov_base, nbytes);
- else
- rc = __copy_to_user(curiov.iov_base, bufp, nbytes);
-
- if (rc)
- return -EFAULT;
-
- resid -= nbytes;
- curiov.iov_len -= nbytes;
- curiov.iov_base += nbytes;
- buf_len -= nbytes;
- }
-
- return 0;
-}
-
-static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
- struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
-
- memset(skspcl->msg_buf, 0, SKD_N_SPECIAL_FITMSG_BYTES);
-
- /* Initialize the FIT msg header */
- fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
- fmh->num_protocol_cmds_coalesced = 1;
-
- /* Initialize the SCSI request */
- if (sksgio->sg.dxfer_direction != SG_DXFER_NONE)
- scsi_req->hdr.sg_list_dma_address =
- cpu_to_be64(skspcl->req.sksg_dma_address);
- scsi_req->hdr.tag = skspcl->req.id;
- scsi_req->hdr.sg_list_len_bytes =
- cpu_to_be32(skspcl->req.sg_byte_count);
- memcpy(scsi_req->cdb, sksgio->cdb, sizeof(scsi_req->cdb));
-
- skspcl->req.state = SKD_REQ_STATE_BUSY;
- skd_send_special_fitmsg(skdev, skspcl);
-
- return 0;
-}
-
-static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio)
-{
- unsigned long flags;
- int rc;
-
- rc = wait_event_interruptible_timeout(skdev->waitq,
- (sksgio->skspcl->req.state !=
- SKD_REQ_STATE_BUSY),
- msecs_to_jiffies(sksgio->sg.
- timeout));
-
- spin_lock_irqsave(&skdev->lock, flags);
-
- if (sksgio->skspcl->req.state == SKD_REQ_STATE_ABORTED) {
- pr_debug("%s:%s:%d skspcl %p aborted\n",
- skdev->name, __func__, __LINE__, sksgio->skspcl);
-
- /* Build check cond, sense and let command finish. */
- /* For a timeout, we must fabricate completion and sense
- * data to complete the command */
- sksgio->skspcl->req.completion.status =
- SAM_STAT_CHECK_CONDITION;
-
- memset(&sksgio->skspcl->req.err_info, 0,
- sizeof(sksgio->skspcl->req.err_info));
- sksgio->skspcl->req.err_info.type = 0x70;
- sksgio->skspcl->req.err_info.key = ABORTED_COMMAND;
- sksgio->skspcl->req.err_info.code = 0x44;
- sksgio->skspcl->req.err_info.qual = 0;
- rc = 0;
- } else if (sksgio->skspcl->req.state != SKD_REQ_STATE_BUSY)
- /* No longer on the adapter. We finish. */
- rc = 0;
- else {
- /* Something's gone wrong. Still busy. Timeout or
- * user interrupted (control-C). Mark as an orphan
- * so it will be disposed when completed. */
- sksgio->skspcl->orphaned = 1;
- sksgio->skspcl = NULL;
- if (rc == 0) {
- pr_debug("%s:%s:%d timed out %p (%u ms)\n",
- skdev->name, __func__, __LINE__,
- sksgio, sksgio->sg.timeout);
- rc = -ETIMEDOUT;
- } else {
- pr_debug("%s:%s:%d cntlc %p\n",
- skdev->name, __func__, __LINE__, sksgio);
- rc = -EINTR;
- }
- }
-
- spin_unlock_irqrestore(&skdev->lock, flags);
-
- return rc;
-}
-
-static int skd_sg_io_put_status(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct sg_io_hdr *sgp = &sksgio->sg;
- struct skd_special_context *skspcl = sksgio->skspcl;
- int resid = 0;
-
- u32 nb = be32_to_cpu(skspcl->req.completion.num_returned_bytes);
-
- sgp->status = skspcl->req.completion.status;
- resid = sksgio->dxfer_len - nb;
-
- sgp->masked_status = sgp->status & STATUS_MASK;
- sgp->msg_status = 0;
- sgp->host_status = 0;
- sgp->driver_status = 0;
- sgp->resid = resid;
- if (sgp->masked_status || sgp->host_status || sgp->driver_status)
- sgp->info |= SG_INFO_CHECK;
-
- pr_debug("%s:%s:%d status %x masked %x resid 0x%x\n",
- skdev->name, __func__, __LINE__,
- sgp->status, sgp->masked_status, sgp->resid);
-
- if (sgp->masked_status == SAM_STAT_CHECK_CONDITION) {
- if (sgp->mx_sb_len > 0) {
- struct fit_comp_error_info *ei = &skspcl->req.err_info;
- u32 nbytes = sizeof(*ei);
-
- nbytes = min_t(u32, nbytes, sgp->mx_sb_len);
-
- sgp->sb_len_wr = nbytes;
-
- if (__copy_to_user(sgp->sbp, ei, nbytes)) {
- pr_debug("%s:%s:%d copy_to_user sense failed %p\n",
- skdev->name, __func__, __LINE__,
- sgp->sbp);
- return -EFAULT;
- }
- }
- }
-
- if (__copy_to_user(sksgio->argp, sgp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d copy_to_user sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int skd_sg_io_release_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
-
- if (skspcl != NULL) {
- ulong flags;
-
- sksgio->skspcl = NULL;
-
- spin_lock_irqsave(&skdev->lock, flags);
- skd_release_special(skdev, skspcl);
- spin_unlock_irqrestore(&skdev->lock, flags);
- }
-
- return 0;
-}
-
-/*
- *****************************************************************************
* INTERNAL REQUESTS -- generated by driver itself
*****************************************************************************
*/
@@ -1811,14 +884,15 @@ static int skd_format_internal_skspcl(struct skd_device *skdev)
uint64_t dma_address;
struct skd_scsi_request *scsi;
- fmh = (struct fit_msg_hdr *)&skspcl->msg_buf[0];
+ fmh = &skspcl->msg_buf->fmh;
fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
fmh->num_protocol_cmds_coalesced = 1;
- scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ scsi = &skspcl->msg_buf->scsi[0];
memset(scsi, 0, sizeof(*scsi));
dma_address = skspcl->req.sksg_dma_address;
scsi->hdr.sg_list_dma_address = cpu_to_be64(dma_address);
+ skspcl->req.n_sg = 1;
sgd->control = FIT_SGD_CONTROL_LAST;
sgd->byte_count = 0;
sgd->host_side_addr = skspcl->db_dma_address;
@@ -1846,11 +920,9 @@ static void skd_send_internal_skspcl(struct skd_device *skdev,
*/
return;
- SKD_ASSERT((skspcl->req.id & SKD_ID_INCR) == 0);
skspcl->req.state = SKD_REQ_STATE_BUSY;
- skspcl->req.id += SKD_ID_INCR;
- scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ scsi = &skspcl->msg_buf->scsi[0];
scsi->hdr.tag = skspcl->req.id;
memset(scsi->cdb, 0, sizeof(scsi->cdb));
@@ -1940,32 +1012,35 @@ static void skd_log_check_status(struct skd_device *skdev, u8 status, u8 key,
/* If the check condition is of special interest, log a message */
if ((status == SAM_STAT_CHECK_CONDITION) && (key == 0x02)
&& (code == 0x04) && (qual == 0x06)) {
- pr_err("(%s): *** LOST_WRITE_DATA ERROR *** key/asc/"
- "ascq/fruc %02x/%02x/%02x/%02x\n",
- skd_name(skdev), key, code, qual, fruc);
+ dev_err(&skdev->pdev->dev,
+ "*** LOST_WRITE_DATA ERROR *** key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
+ key, code, qual, fruc);
}
}
static void skd_complete_internal(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr,
struct skd_special_context *skspcl)
{
u8 *buf = skspcl->data_buf;
u8 status;
int i;
- struct skd_scsi_request *scsi =
- (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ struct skd_scsi_request *scsi = &skspcl->msg_buf->scsi[0];
+
+ lockdep_assert_held(&skdev->lock);
SKD_ASSERT(skspcl == &skdev->internal_skspcl);
- pr_debug("%s:%s:%d complete internal %x\n",
- skdev->name, __func__, __LINE__, scsi->cdb[0]);
+ dev_dbg(&skdev->pdev->dev, "complete internal %x\n", scsi->cdb[0]);
+
+ dma_sync_single_for_cpu(&skdev->pdev->dev,
+ skspcl->db_dma_address,
+ skspcl->req.sksg_list[0].byte_count,
+ DMA_BIDIRECTIONAL);
skspcl->req.completion = *skcomp;
skspcl->req.state = SKD_REQ_STATE_IDLE;
- skspcl->req.id += SKD_ID_INCR;
status = skspcl->req.completion.status;
@@ -1981,14 +1056,15 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl, WRITE_BUFFER);
else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d TUR failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "TUR failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d **** TUR failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** TUR failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -1997,14 +1073,15 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl, READ_BUFFER);
else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d write buffer failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "write buffer failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d **** write buffer failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** write buffer failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -2014,33 +1091,31 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl,
READ_CAPACITY);
else {
- pr_err(
- "(%s):*** W/R Buffer mismatch %d ***\n",
- skd_name(skdev), skdev->connect_retries);
+ dev_err(&skdev->pdev->dev,
+ "*** W/R Buffer mismatch %d ***\n",
+ skdev->connect_retries);
if (skdev->connect_retries <
SKD_MAX_CONNECT_RETRIES) {
skdev->connect_retries++;
skd_soft_reset(skdev);
} else {
- pr_err(
- "(%s): W/R Buffer Connect Error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev,
+ "W/R Buffer Connect Error\n");
return;
}
}
} else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d "
- "read buffer failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "read buffer failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d "
- "**** read buffer failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** read buffer failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -2054,10 +1129,9 @@ static void skd_complete_internal(struct skd_device *skdev,
(buf[4] << 24) | (buf[5] << 16) |
(buf[6] << 8) | buf[7];
- pr_debug("%s:%s:%d last lba %d, bs %d\n",
- skdev->name, __func__, __LINE__,
- skdev->read_cap_last_lba,
- skdev->read_cap_blocksize);
+ dev_dbg(&skdev->pdev->dev, "last lba %d, bs %d\n",
+ skdev->read_cap_last_lba,
+ skdev->read_cap_blocksize);
set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
@@ -2068,13 +1142,10 @@ static void skd_complete_internal(struct skd_device *skdev,
(skerr->key == MEDIUM_ERROR)) {
skdev->read_cap_last_lba = ~0;
set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
- pr_debug("%s:%s:%d "
- "**** MEDIUM ERROR caused READCAP to fail, ignore failure and continue to inquiry\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** MEDIUM ERROR caused READCAP to fail, ignore failure and continue to inquiry\n");
skd_send_internal_skspcl(skdev, skspcl, INQUIRY);
} else {
- pr_debug("%s:%s:%d **** READCAP failed, retry TUR\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** READCAP failed, retry TUR\n");
skd_send_internal_skspcl(skdev, skspcl,
TEST_UNIT_READY);
}
@@ -2091,8 +1162,7 @@ static void skd_complete_internal(struct skd_device *skdev,
}
if (skd_unquiesce_dev(skdev) < 0)
- pr_debug("%s:%s:%d **** failed, to ONLINE device\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** failed, to ONLINE device\n");
/* connection is complete */
skdev->connect_retries = 0;
break;
@@ -2120,27 +1190,20 @@ static void skd_send_fitmsg(struct skd_device *skdev,
struct skd_fitmsg_context *skmsg)
{
u64 qcmd;
- struct fit_msg_hdr *fmh;
- pr_debug("%s:%s:%d dma address 0x%llx, busy=%d\n",
- skdev->name, __func__, __LINE__,
- skmsg->mb_dma_address, skdev->in_flight);
- pr_debug("%s:%s:%d msg_buf 0x%p, offset %x\n",
- skdev->name, __func__, __LINE__,
- skmsg->msg_buf, skmsg->offset);
+ dev_dbg(&skdev->pdev->dev, "dma address 0x%llx, busy=%d\n",
+ skmsg->mb_dma_address, skd_in_flight(skdev));
+ dev_dbg(&skdev->pdev->dev, "msg_buf %p\n", skmsg->msg_buf);
qcmd = skmsg->mb_dma_address;
qcmd |= FIT_QCMD_QID_NORMAL;
- fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
- skmsg->outstanding = fmh->num_protocol_cmds_coalesced;
-
if (unlikely(skdev->dbg_level > 1)) {
u8 *bp = (u8 *)skmsg->msg_buf;
int i;
for (i = 0; i < skmsg->length; i += 8) {
- pr_debug("%s:%s:%d msg[%2d] %8ph\n",
- skdev->name, __func__, __LINE__, i, &bp[i]);
+ dev_dbg(&skdev->pdev->dev, "msg[%2d] %8ph\n", i,
+ &bp[i]);
if (i == 0)
i = 64 - 8;
}
@@ -2160,6 +1223,12 @@ static void skd_send_fitmsg(struct skd_device *skdev,
*/
qcmd |= FIT_QCMD_MSGSIZE_64;
+ dma_sync_single_for_device(&skdev->pdev->dev, skmsg->mb_dma_address,
+ skmsg->length, DMA_TO_DEVICE);
+
+ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
+ smp_wmb();
+
SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
}
@@ -2168,30 +1237,31 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
{
u64 qcmd;
+ WARN_ON_ONCE(skspcl->req.n_sg != 1);
+
if (unlikely(skdev->dbg_level > 1)) {
u8 *bp = (u8 *)skspcl->msg_buf;
int i;
for (i = 0; i < SKD_N_SPECIAL_FITMSG_BYTES; i += 8) {
- pr_debug("%s:%s:%d spcl[%2d] %8ph\n",
- skdev->name, __func__, __LINE__, i, &bp[i]);
+ dev_dbg(&skdev->pdev->dev, " spcl[%2d] %8ph\n", i,
+ &bp[i]);
if (i == 0)
i = 64 - 8;
}
- pr_debug("%s:%s:%d skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skspcl, skspcl->req.id, skspcl->req.sksg_list,
- skspcl->req.sksg_dma_address);
+ dev_dbg(&skdev->pdev->dev,
+ "skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n",
+ skspcl, skspcl->req.id, skspcl->req.sksg_list,
+ skspcl->req.sksg_dma_address);
for (i = 0; i < skspcl->req.n_sg; i++) {
struct fit_sg_descriptor *sgd =
&skspcl->req.sksg_list[i];
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
+ dev_dbg(&skdev->pdev->dev,
+ " sg[%d] count=%u ctrl=0x%x addr=0x%llx next=0x%llx\n",
+ i, sgd->byte_count, sgd->control,
+ sgd->host_side_addr, sgd->next_desc_ptr);
}
}
@@ -2202,6 +1272,20 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
qcmd = skspcl->mb_dma_address;
qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
+ dma_sync_single_for_device(&skdev->pdev->dev, skspcl->mb_dma_address,
+ SKD_N_SPECIAL_FITMSG_BYTES, DMA_TO_DEVICE);
+ dma_sync_single_for_device(&skdev->pdev->dev,
+ skspcl->req.sksg_dma_address,
+ 1 * sizeof(struct fit_sg_descriptor),
+ DMA_TO_DEVICE);
+ dma_sync_single_for_device(&skdev->pdev->dev,
+ skspcl->db_dma_address,
+ skspcl->req.sksg_list[0].byte_count,
+ DMA_BIDIRECTIONAL);
+
+ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
+ smp_wmb();
+
SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
}
@@ -2212,8 +1296,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
*/
static void skd_complete_other(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr);
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr);
struct sns_info {
u8 type;
@@ -2262,21 +1346,20 @@ static struct sns_info skd_chkstat_table[] = {
static enum skd_check_status_action
skd_check_status(struct skd_device *skdev,
- u8 cmp_status, volatile struct fit_comp_error_info *skerr)
+ u8 cmp_status, struct fit_comp_error_info *skerr)
{
- int i, n;
+ int i;
- pr_err("(%s): key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
- skd_name(skdev), skerr->key, skerr->code, skerr->qual,
- skerr->fruc);
+ dev_err(&skdev->pdev->dev, "key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
+ skerr->key, skerr->code, skerr->qual, skerr->fruc);
- pr_debug("%s:%s:%d stat: t=%02x stat=%02x k=%02x c=%02x q=%02x fruc=%02x\n",
- skdev->name, __func__, __LINE__, skerr->type, cmp_status,
- skerr->key, skerr->code, skerr->qual, skerr->fruc);
+ dev_dbg(&skdev->pdev->dev,
+ "stat: t=%02x stat=%02x k=%02x c=%02x q=%02x fruc=%02x\n",
+ skerr->type, cmp_status, skerr->key, skerr->code, skerr->qual,
+ skerr->fruc);
/* Does the info match an entry in the good category? */
- n = sizeof(skd_chkstat_table) / sizeof(skd_chkstat_table[0]);
- for (i = 0; i < n; i++) {
+ for (i = 0; i < ARRAY_SIZE(skd_chkstat_table); i++) {
struct sns_info *sns = &skd_chkstat_table[i];
if (sns->mask & 0x10)
@@ -2300,10 +1383,9 @@ skd_check_status(struct skd_device *skdev,
continue;
if (sns->action == SKD_CHECK_STATUS_REPORT_SMART_ALERT) {
- pr_err("(%s): SMART Alert: sense key/asc/ascq "
- "%02x/%02x/%02x\n",
- skd_name(skdev), skerr->key,
- skerr->code, skerr->qual);
+ dev_err(&skdev->pdev->dev,
+ "SMART Alert: sense key/asc/ascq %02x/%02x/%02x\n",
+ skerr->key, skerr->code, skerr->qual);
}
return sns->action;
}
@@ -2312,335 +1394,80 @@ skd_check_status(struct skd_device *skdev,
* zero status means good
*/
if (cmp_status) {
- pr_debug("%s:%s:%d status check: error\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "status check: error\n");
return SKD_CHECK_STATUS_REPORT_ERROR;
}
- pr_debug("%s:%s:%d status check good default\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "status check good default\n");
return SKD_CHECK_STATUS_REPORT_GOOD;
}
static void skd_resolve_req_exception(struct skd_device *skdev,
- struct skd_request_context *skreq)
+ struct skd_request_context *skreq,
+ struct request *req)
{
u8 cmp_status = skreq->completion.status;
switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
case SKD_CHECK_STATUS_REPORT_GOOD:
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
- skd_end_request(skdev, skreq, BLK_STS_OK);
+ skreq->status = BLK_STS_OK;
+ blk_mq_complete_request(req);
break;
case SKD_CHECK_STATUS_BUSY_IMMINENT:
skd_log_skreq(skdev, skreq, "retry(busy)");
- blk_requeue_request(skdev->queue, skreq->req);
- pr_info("(%s) drive BUSY imminent\n", skd_name(skdev));
+ blk_requeue_request(skdev->queue, req);
+ dev_info(&skdev->pdev->dev, "drive BUSY imminent\n");
skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT;
skdev->timer_countdown = SKD_TIMER_MINUTES(20);
skd_quiesce_dev(skdev);
break;
case SKD_CHECK_STATUS_REQUEUE_REQUEST:
- if ((unsigned long) ++skreq->req->special < SKD_MAX_RETRIES) {
+ if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
skd_log_skreq(skdev, skreq, "retry");
- blk_requeue_request(skdev->queue, skreq->req);
+ blk_requeue_request(skdev->queue, req);
break;
}
- /* fall through to report error */
+ /* fall through */
case SKD_CHECK_STATUS_REPORT_ERROR:
default:
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
+ skreq->status = BLK_STS_IOERR;
+ blk_mq_complete_request(req);
break;
}
}
-/* assume spinlock is already held */
static void skd_release_skreq(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- u32 msg_slot;
- struct skd_fitmsg_context *skmsg;
-
- u32 timo_slot;
-
- /*
- * Reclaim the FIT msg buffer if this is
- * the first of the requests it carried to
- * be completed. The FIT msg buffer used to
- * send this request cannot be reused until
- * we are sure the s1120 card has copied
- * it to its memory. The FIT msg might have
- * contained several requests. As soon as
- * any of them are completed we know that
- * the entire FIT msg was transferred.
- * Only the first completed request will
- * match the FIT msg buffer id. The FIT
- * msg buffer id is immediately updated.
- * When subsequent requests complete the FIT
- * msg buffer id won't match, so we know
- * quite cheaply that it is already done.
- */
- msg_slot = skreq->fitmsg_id & SKD_ID_SLOT_MASK;
- SKD_ASSERT(msg_slot < skdev->num_fitmsg_context);
-
- skmsg = &skdev->skmsg_table[msg_slot];
- if (skmsg->id == skreq->fitmsg_id) {
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_BUSY);
- SKD_ASSERT(skmsg->outstanding > 0);
- skmsg->outstanding--;
- if (skmsg->outstanding == 0) {
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- skmsg->next = skdev->skmsg_free_list;
- skdev->skmsg_free_list = skmsg;
- }
- }
-
- /*
- * Decrease the number of active requests.
- * Also decrements the count in the timeout slot.
- */
- SKD_ASSERT(skdev->in_flight > 0);
- skdev->in_flight -= 1;
-
- timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
- SKD_ASSERT(skdev->timeout_slot[timo_slot] > 0);
- skdev->timeout_slot[timo_slot] -= 1;
-
- /*
- * Reset backpointer
- */
- skreq->req = NULL;
-
/*
* Reclaim the skd_request_context
*/
skreq->state = SKD_REQ_STATE_IDLE;
- skreq->id += SKD_ID_INCR;
- skreq->next = skdev->skreq_free_list;
- skdev->skreq_free_list = skreq;
}
-#define DRIVER_INQ_EVPD_PAGE_CODE 0xDA
-
-static void skd_do_inq_page_00(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- uint16_t insert_pt, max_bytes, drive_pages, drive_bytes, new_size;
-
- /* Caller requested "supported pages". The driver needs to insert
- * its page.
- */
- pr_debug("%s:%s:%d skd_do_driver_inquiry: modify supported pages.\n",
- skdev->name, __func__, __LINE__);
-
- /* If the device rejected the request because the CDB was
- * improperly formed, then just leave.
- */
- if (skcomp->status == SAM_STAT_CHECK_CONDITION &&
- skerr->key == ILLEGAL_REQUEST && skerr->code == 0x24)
- return;
-
- /* Get the amount of space the caller allocated */
- max_bytes = (cdb[3] << 8) | cdb[4];
-
- /* Get the number of pages actually returned by the device */
- drive_pages = (buf[2] << 8) | buf[3];
- drive_bytes = drive_pages + 4;
- new_size = drive_pages + 1;
-
- /* Supported pages must be in numerical order, so find where
- * the driver page needs to be inserted into the list of
- * pages returned by the device.
- */
- for (insert_pt = 4; insert_pt < drive_bytes; insert_pt++) {
- if (buf[insert_pt] == DRIVER_INQ_EVPD_PAGE_CODE)
- return; /* Device using this page code. abort */
- else if (buf[insert_pt] > DRIVER_INQ_EVPD_PAGE_CODE)
- break;
- }
-
- if (insert_pt < max_bytes) {
- uint16_t u;
-
- /* Shift everything up one byte to make room. */
- for (u = new_size + 3; u > insert_pt; u--)
- buf[u] = buf[u - 1];
- buf[insert_pt] = DRIVER_INQ_EVPD_PAGE_CODE;
-
- /* SCSI byte order increment of num_returned_bytes by 1 */
- skcomp->num_returned_bytes =
- be32_to_cpu(skcomp->num_returned_bytes) + 1;
- skcomp->num_returned_bytes =
- be32_to_cpu(skcomp->num_returned_bytes);
- }
-
- /* update page length field to reflect the driver's page too */
- buf[2] = (uint8_t)((new_size >> 8) & 0xFF);
- buf[3] = (uint8_t)((new_size >> 0) & 0xFF);
-}
-
-static void skd_get_link_info(struct pci_dev *pdev, u8 *speed, u8 *width)
-{
- int pcie_reg;
- u16 pci_bus_speed;
- u8 pci_lanes;
-
- pcie_reg = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (pcie_reg) {
- u16 linksta;
- pci_read_config_word(pdev, pcie_reg + PCI_EXP_LNKSTA, &linksta);
-
- pci_bus_speed = linksta & 0xF;
- pci_lanes = (linksta & 0x3F0) >> 4;
- } else {
- *speed = STEC_LINK_UNKNOWN;
- *width = 0xFF;
- return;
- }
-
- switch (pci_bus_speed) {
- case 1:
- *speed = STEC_LINK_2_5GTS;
- break;
- case 2:
- *speed = STEC_LINK_5GTS;
- break;
- case 3:
- *speed = STEC_LINK_8GTS;
- break;
- default:
- *speed = STEC_LINK_UNKNOWN;
- break;
- }
-
- if (pci_lanes <= 0x20)
- *width = pci_lanes;
- else
- *width = 0xFF;
-}
-
-static void skd_do_inq_page_da(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- struct pci_dev *pdev = skdev->pdev;
- unsigned max_bytes;
- struct driver_inquiry_data inq;
- u16 val;
-
- pr_debug("%s:%s:%d skd_do_driver_inquiry: return driver page\n",
- skdev->name, __func__, __LINE__);
-
- memset(&inq, 0, sizeof(inq));
-
- inq.page_code = DRIVER_INQ_EVPD_PAGE_CODE;
-
- skd_get_link_info(pdev, &inq.pcie_link_speed, &inq.pcie_link_lanes);
- inq.pcie_bus_number = cpu_to_be16(pdev->bus->number);
- inq.pcie_device_number = PCI_SLOT(pdev->devfn);
- inq.pcie_function_number = PCI_FUNC(pdev->devfn);
-
- pci_read_config_word(pdev, PCI_VENDOR_ID, &val);
- inq.pcie_vendor_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_DEVICE_ID, &val);
- inq.pcie_device_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_SUBSYSTEM_VENDOR_ID, &val);
- inq.pcie_subsystem_vendor_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &val);
- inq.pcie_subsystem_device_id = cpu_to_be16(val);
-
- /* Driver version, fixed lenth, padded with spaces on the right */
- inq.driver_version_length = sizeof(inq.driver_version);
- memset(&inq.driver_version, ' ', sizeof(inq.driver_version));
- memcpy(inq.driver_version, DRV_VER_COMPL,
- min(sizeof(inq.driver_version), strlen(DRV_VER_COMPL)));
-
- inq.page_length = cpu_to_be16((sizeof(inq) - 4));
-
- /* Clear the error set by the device */
- skcomp->status = SAM_STAT_GOOD;
- memset((void *)skerr, 0, sizeof(*skerr));
-
- /* copy response into output buffer */
- max_bytes = (cdb[3] << 8) | cdb[4];
- memcpy(buf, &inq, min_t(unsigned, max_bytes, sizeof(inq)));
-
- skcomp->num_returned_bytes =
- be32_to_cpu(min_t(uint16_t, max_bytes, sizeof(inq)));
-}
-
-static void skd_do_driver_inq(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- if (!buf)
- return;
- else if (cdb[0] != INQUIRY)
- return; /* Not an INQUIRY */
- else if ((cdb[1] & 1) == 0)
- return; /* EVPD not set */
- else if (cdb[2] == 0)
- /* Need to add driver's page to supported pages list */
- skd_do_inq_page_00(skdev, skcomp, skerr, cdb, buf);
- else if (cdb[2] == DRIVER_INQ_EVPD_PAGE_CODE)
- /* Caller requested driver's page */
- skd_do_inq_page_da(skdev, skcomp, skerr, cdb, buf);
-}
-
-static unsigned char *skd_sg_1st_page_ptr(struct scatterlist *sg)
-{
- if (!sg)
- return NULL;
- if (!sg_page(sg))
- return NULL;
- return sg_virt(sg);
-}
-
-static void skd_process_scsi_inq(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl)
-{
- uint8_t *buf;
- struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
- struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
-
- dma_sync_sg_for_cpu(skdev->class_dev, skspcl->req.sg, skspcl->req.n_sg,
- skspcl->req.sg_data_dir);
- buf = skd_sg_1st_page_ptr(skspcl->req.sg);
-
- if (buf)
- skd_do_driver_inq(skdev, skcomp, skerr, scsi_req->cdb, buf);
-}
-
-
static int skd_isr_completion_posted(struct skd_device *skdev,
int limit, int *enqueued)
{
- volatile struct fit_completion_entry_v1 *skcmp = NULL;
- volatile struct fit_comp_error_info *skerr;
+ struct fit_completion_entry_v1 *skcmp;
+ struct fit_comp_error_info *skerr;
u16 req_id;
- u32 req_slot;
+ u32 tag;
+ u16 hwq = 0;
+ struct request *rq;
struct skd_request_context *skreq;
- u16 cmp_cntxt = 0;
- u8 cmp_status = 0;
- u8 cmp_cycle = 0;
- u32 cmp_bytes = 0;
+ u16 cmp_cntxt;
+ u8 cmp_status;
+ u8 cmp_cycle;
+ u32 cmp_bytes;
int rc = 0;
int processed = 0;
+ lockdep_assert_held(&skdev->lock);
+
for (;; ) {
SKD_ASSERT(skdev->skcomp_ix < SKD_N_COMPLETION_ENTRY);
@@ -2652,16 +1479,14 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
skerr = &skdev->skerr_table[skdev->skcomp_ix];
- pr_debug("%s:%s:%d "
- "cycle=%d ix=%d got cycle=%d cmdctxt=0x%x stat=%d "
- "busy=%d rbytes=0x%x proto=%d\n",
- skdev->name, __func__, __LINE__, skdev->skcomp_cycle,
- skdev->skcomp_ix, cmp_cycle, cmp_cntxt, cmp_status,
- skdev->in_flight, cmp_bytes, skdev->proto_ver);
+ dev_dbg(&skdev->pdev->dev,
+ "cycle=%d ix=%d got cycle=%d cmdctxt=0x%x stat=%d busy=%d rbytes=0x%x proto=%d\n",
+ skdev->skcomp_cycle, skdev->skcomp_ix, cmp_cycle,
+ cmp_cntxt, cmp_status, skd_in_flight(skdev),
+ cmp_bytes, skdev->proto_ver);
if (cmp_cycle != skdev->skcomp_cycle) {
- pr_debug("%s:%s:%d end of completions\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "end of completions\n");
break;
}
/*
@@ -2680,49 +1505,38 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
* r/w request (see skd_start() above) or a special request.
*/
req_id = cmp_cntxt;
- req_slot = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
+ tag = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
/* Is this other than a r/w request? */
- if (req_slot >= skdev->num_req_context) {
+ if (tag >= skdev->num_req_context) {
/*
* This is not a completion for a r/w request.
*/
+ WARN_ON_ONCE(blk_mq_tag_to_rq(skdev->tag_set.tags[hwq],
+ tag));
skd_complete_other(skdev, skcmp, skerr);
continue;
}
- skreq = &skdev->skreq_table[req_slot];
+ rq = blk_mq_tag_to_rq(skdev->tag_set.tags[hwq], tag);
+ if (WARN(!rq, "No request for tag %#x -> %#x\n", cmp_cntxt,
+ tag))
+ continue;
+ skreq = blk_mq_rq_to_pdu(rq);
/*
* Make sure the request ID for the slot matches.
*/
if (skreq->id != req_id) {
- pr_debug("%s:%s:%d mismatch comp_id=0x%x req_id=0x%x\n",
- skdev->name, __func__, __LINE__,
- req_id, skreq->id);
- {
- u16 new_id = cmp_cntxt;
- pr_err("(%s): Completion mismatch "
- "comp_id=0x%04x skreq=0x%04x new=0x%04x\n",
- skd_name(skdev), req_id,
- skreq->id, new_id);
+ dev_err(&skdev->pdev->dev,
+ "Completion mismatch comp_id=0x%04x skreq=0x%04x new=0x%04x\n",
+ req_id, skreq->id, cmp_cntxt);
- continue;
- }
+ continue;
}
SKD_ASSERT(skreq->state == SKD_REQ_STATE_BUSY);
- if (skreq->state == SKD_REQ_STATE_ABORTED) {
- pr_debug("%s:%s:%d reclaim req %p id=%04x\n",
- skdev->name, __func__, __LINE__,
- skreq, skreq->id);
- /* a previously timed out command can
- * now be cleaned up */
- skd_release_skreq(skdev, skreq);
- continue;
- }
-
skreq->completion = *skcmp;
if (unlikely(cmp_status == SAM_STAT_CHECK_CONDITION)) {
skreq->err_info = *skerr;
@@ -2734,27 +1548,17 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
if (skreq->n_sg > 0)
skd_postop_sg_list(skdev, skreq);
- if (!skreq->req) {
- pr_debug("%s:%s:%d NULL backptr skdreq %p, "
- "req=0x%x req_id=0x%x\n",
- skdev->name, __func__, __LINE__,
- skreq, skreq->id, req_id);
- } else {
- /*
- * Capture the outcome and post it back to the
- * native request.
- */
- if (likely(cmp_status == SAM_STAT_GOOD))
- skd_end_request(skdev, skreq, BLK_STS_OK);
- else
- skd_resolve_req_exception(skdev, skreq);
- }
+ skd_release_skreq(skdev, skreq);
/*
- * Release the skreq, its FIT msg (if one), timeout slot,
- * and queue depth.
+ * Capture the outcome and post it back to the native request.
*/
- skd_release_skreq(skdev, skreq);
+ if (likely(cmp_status == SAM_STAT_GOOD)) {
+ skreq->status = BLK_STS_OK;
+ blk_mq_complete_request(rq);
+ } else {
+ skd_resolve_req_exception(skdev, skreq, rq);
+ }
/* skd_isr_comp_limit equal zero means no limit */
if (limit) {
@@ -2765,8 +1569,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
}
}
- if ((skdev->state == SKD_DRVR_STATE_PAUSING)
- && (skdev->in_flight) == 0) {
+ if (skdev->state == SKD_DRVR_STATE_PAUSING &&
+ skd_in_flight(skdev) == 0) {
skdev->state = SKD_DRVR_STATE_PAUSED;
wake_up_interruptible(&skdev->waitq);
}
@@ -2775,21 +1579,22 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
}
static void skd_complete_other(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr)
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr)
{
u32 req_id = 0;
u32 req_table;
u32 req_slot;
struct skd_special_context *skspcl;
+ lockdep_assert_held(&skdev->lock);
+
req_id = skcomp->tag;
req_table = req_id & SKD_ID_TABLE_MASK;
req_slot = req_id & SKD_ID_SLOT_MASK;
- pr_debug("%s:%s:%d table=0x%x id=0x%x slot=%d\n",
- skdev->name, __func__, __LINE__,
- req_table, req_id, req_slot);
+ dev_dbg(&skdev->pdev->dev, "table=0x%x id=0x%x slot=%d\n", req_table,
+ req_id, req_slot);
/*
* Based on the request id, determine how to dispatch this completion.
@@ -2799,28 +1604,12 @@ static void skd_complete_other(struct skd_device *skdev,
switch (req_table) {
case SKD_ID_RW_REQUEST:
/*
- * The caller, skd_completion_posted_isr() above,
+ * The caller, skd_isr_completion_posted() above,
* handles r/w requests. The only way we get here
* is if the req_slot is out of bounds.
*/
break;
- case SKD_ID_SPECIAL_REQUEST:
- /*
- * Make sure the req_slot is in bounds and that the id
- * matches.
- */
- if (req_slot < skdev->n_special) {
- skspcl = &skdev->skspcl_table[req_slot];
- if (skspcl->req.id == req_id &&
- skspcl->req.state == SKD_REQ_STATE_BUSY) {
- skd_complete_special(skdev,
- skcomp, skerr, skspcl);
- return;
- }
- }
- break;
-
case SKD_ID_INTERNAL:
if (req_slot == 0) {
skspcl = &skdev->internal_skspcl;
@@ -2851,72 +1640,9 @@ static void skd_complete_other(struct skd_device *skdev,
*/
}
-static void skd_complete_special(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl)
-{
- pr_debug("%s:%s:%d completing special request %p\n",
- skdev->name, __func__, __LINE__, skspcl);
- if (skspcl->orphaned) {
- /* Discard orphaned request */
- /* ?: Can this release directly or does it need
- * to use a worker? */
- pr_debug("%s:%s:%d release orphaned %p\n",
- skdev->name, __func__, __LINE__, skspcl);
- skd_release_special(skdev, skspcl);
- return;
- }
-
- skd_process_scsi_inq(skdev, skcomp, skerr, skspcl);
-
- skspcl->req.state = SKD_REQ_STATE_COMPLETED;
- skspcl->req.completion = *skcomp;
- skspcl->req.err_info = *skerr;
-
- skd_log_check_status(skdev, skspcl->req.completion.status, skerr->key,
- skerr->code, skerr->qual, skerr->fruc);
-
- wake_up_interruptible(&skdev->waitq);
-}
-
-/* assume spinlock is already held */
-static void skd_release_special(struct skd_device *skdev,
- struct skd_special_context *skspcl)
-{
- int i, was_depleted;
-
- for (i = 0; i < skspcl->req.n_sg; i++) {
- struct page *page = sg_page(&skspcl->req.sg[i]);
- __free_page(page);
- }
-
- was_depleted = (skdev->skspcl_free_list == NULL);
-
- skspcl->req.state = SKD_REQ_STATE_IDLE;
- skspcl->req.id += SKD_ID_INCR;
- skspcl->req.next =
- (struct skd_request_context *)skdev->skspcl_free_list;
- skdev->skspcl_free_list = (struct skd_special_context *)skspcl;
-
- if (was_depleted) {
- pr_debug("%s:%s:%d skspcl was depleted\n",
- skdev->name, __func__, __LINE__);
- /* Free list was depleted. Their might be waiters. */
- wake_up_interruptible(&skdev->waitq);
- }
-}
-
static void skd_reset_skcomp(struct skd_device *skdev)
{
- u32 nbytes;
- struct fit_completion_entry_v1 *skcomp;
-
- nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
- nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
-
- memset(skdev->skcomp_table, 0, nbytes);
+ memset(skdev->skcomp_table, 0, SKD_SKCOMP_SIZE);
skdev->skcomp_ix = 0;
skdev->skcomp_cycle = 1;
@@ -2941,7 +1667,7 @@ static void skd_completion_worker(struct work_struct *work)
* process everything in compq
*/
skd_isr_completion_posted(skdev, 0, &flush_enqueued);
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock_irqrestore(&skdev->lock, flags);
}
@@ -2951,14 +1677,13 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev);
static irqreturn_t
skd_isr(int irq, void *ptr)
{
- struct skd_device *skdev;
+ struct skd_device *skdev = ptr;
u32 intstat;
u32 ack;
int rc = 0;
int deferred = 0;
int flush_enqueued = 0;
- skdev = (struct skd_device *)ptr;
spin_lock(&skdev->lock);
for (;; ) {
@@ -2967,8 +1692,8 @@ skd_isr(int irq, void *ptr)
ack = FIT_INT_DEF_MASK;
ack &= intstat;
- pr_debug("%s:%s:%d intstat=0x%x ack=0x%x\n",
- skdev->name, __func__, __LINE__, intstat, ack);
+ dev_dbg(&skdev->pdev->dev, "intstat=0x%x ack=0x%x\n", intstat,
+ ack);
/* As long as there is an int pending on device, keep
* running loop. When none, get out, but if we've never
@@ -3018,12 +1743,12 @@ skd_isr(int irq, void *ptr)
}
if (unlikely(flush_enqueued))
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
if (deferred)
schedule_work(&skdev->completion_worker);
else if (!flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock(&skdev->lock);
@@ -3033,13 +1758,13 @@ skd_isr(int irq, void *ptr)
static void skd_drive_fault(struct skd_device *skdev)
{
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): Drive FAULT\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive FAULT\n");
}
static void skd_drive_disappeared(struct skd_device *skdev)
{
skdev->state = SKD_DRVR_STATE_DISAPPEARED;
- pr_err("(%s): Drive DISAPPEARED\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive DISAPPEARED\n");
}
static void skd_isr_fwstate(struct skd_device *skdev)
@@ -3052,10 +1777,9 @@ static void skd_isr_fwstate(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_STATUS);
state = sense & FIT_SR_DRIVE_STATE_MASK;
- pr_err("(%s): s1120 state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
- skd_drive_state_to_str(state), state);
+ dev_err(&skdev->pdev->dev, "s1120 state %s(%d)=>%s(%d)\n",
+ skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
+ skd_drive_state_to_str(state), state);
skdev->drive_state = state;
@@ -3066,7 +1790,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
break;
}
if (skdev->state == SKD_DRVR_STATE_RESTARTING)
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
if (skdev->state == SKD_DRVR_STATE_WAIT_BOOT) {
skdev->timer_countdown = SKD_STARTING_TIMO;
skdev->state = SKD_DRVR_STATE_STARTING;
@@ -3087,11 +1811,11 @@ static void skd_isr_fwstate(struct skd_device *skdev)
skdev->cur_max_queue_depth * 2 / 3 + 1;
if (skdev->queue_low_water_mark < 1)
skdev->queue_low_water_mark = 1;
- pr_info(
- "(%s): Queue depth limit=%d dev=%d lowat=%d\n",
- skd_name(skdev),
- skdev->cur_max_queue_depth,
- skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
+ dev_info(&skdev->pdev->dev,
+ "Queue depth limit=%d dev=%d lowat=%d\n",
+ skdev->cur_max_queue_depth,
+ skdev->dev_max_queue_depth,
+ skdev->queue_low_water_mark);
skd_refresh_device_data(skdev);
break;
@@ -3107,7 +1831,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
*/
skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
skdev->timer_countdown = SKD_TIMER_SECONDS(3);
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
break;
case FIT_SR_DRIVE_BUSY_ERASE:
skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
@@ -3128,8 +1852,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
}
break;
case FIT_SR_DRIVE_FW_BOOTING:
- pr_debug("%s:%s:%d ISR FIT_SR_DRIVE_FW_BOOTING %s\n",
- skdev->name, __func__, __LINE__, skdev->name);
+ dev_dbg(&skdev->pdev->dev, "ISR FIT_SR_DRIVE_FW_BOOTING\n");
skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
break;
@@ -3141,17 +1864,17 @@ static void skd_isr_fwstate(struct skd_device *skdev)
case FIT_SR_DRIVE_FAULT:
skd_drive_fault(skdev);
- skd_recover_requests(skdev, 0);
- blk_start_queue(skdev->queue);
+ skd_recover_requests(skdev);
+ schedule_work(&skdev->start_queue);
break;
/* PCIe bus returned all Fs? */
case 0xFF:
- pr_info("(%s): state=0x%x sense=0x%x\n",
- skd_name(skdev), state, sense);
+ dev_info(&skdev->pdev->dev, "state=0x%x sense=0x%x\n", state,
+ sense);
skd_drive_disappeared(skdev);
- skd_recover_requests(skdev, 0);
- blk_start_queue(skdev->queue);
+ skd_recover_requests(skdev);
+ schedule_work(&skdev->start_queue);
break;
default:
/*
@@ -3159,92 +1882,33 @@ static void skd_isr_fwstate(struct skd_device *skdev)
*/
break;
}
- pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_skdev_state_to_str(prev_driver_state), prev_driver_state,
- skd_skdev_state_to_str(skdev->state), skdev->state);
+ dev_err(&skdev->pdev->dev, "Driver state %s(%d)=>%s(%d)\n",
+ skd_skdev_state_to_str(prev_driver_state), prev_driver_state,
+ skd_skdev_state_to_str(skdev->state), skdev->state);
}
-static void skd_recover_requests(struct skd_device *skdev, int requeue)
+static void skd_recover_request(struct request *req, void *data, bool reserved)
{
- int i;
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq = &skdev->skreq_table[i];
-
- if (skreq->state == SKD_REQ_STATE_BUSY) {
- skd_log_skreq(skdev, skreq, "recover");
-
- SKD_ASSERT((skreq->id & SKD_ID_INCR) != 0);
- SKD_ASSERT(skreq->req != NULL);
-
- /* Release DMA resources for the request. */
- if (skreq->n_sg > 0)
- skd_postop_sg_list(skdev, skreq);
-
- if (requeue &&
- (unsigned long) ++skreq->req->special <
- SKD_MAX_RETRIES)
- blk_requeue_request(skdev->queue, skreq->req);
- else
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
-
- skreq->req = NULL;
-
- skreq->state = SKD_REQ_STATE_IDLE;
- skreq->id += SKD_ID_INCR;
- }
- if (i > 0)
- skreq[-1].next = skreq;
- skreq->next = NULL;
- }
- skdev->skreq_free_list = skdev->skreq_table;
+ struct skd_device *const skdev = data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
- for (i = 0; i < skdev->num_fitmsg_context; i++) {
- struct skd_fitmsg_context *skmsg = &skdev->skmsg_table[i];
+ if (skreq->state != SKD_REQ_STATE_BUSY)
+ return;
- if (skmsg->state == SKD_MSG_STATE_BUSY) {
- skd_log_skmsg(skdev, skmsg, "salvaged");
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) != 0);
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- }
- if (i > 0)
- skmsg[-1].next = skmsg;
- skmsg->next = NULL;
- }
- skdev->skmsg_free_list = skdev->skmsg_table;
+ skd_log_skreq(skdev, skreq, "recover");
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl = &skdev->skspcl_table[i];
+ /* Release DMA resources for the request. */
+ if (skreq->n_sg > 0)
+ skd_postop_sg_list(skdev, skreq);
- /* If orphaned, reclaim it because it has already been reported
- * to the process as an error (it was just waiting for
- * a completion that didn't come, and now it will never come)
- * If busy, change to a state that will cause it to error
- * out in the wait routine and let it do the normal
- * reporting and reclaiming
- */
- if (skspcl->req.state == SKD_REQ_STATE_BUSY) {
- if (skspcl->orphaned) {
- pr_debug("%s:%s:%d orphaned %p\n",
- skdev->name, __func__, __LINE__,
- skspcl);
- skd_release_special(skdev, skspcl);
- } else {
- pr_debug("%s:%s:%d not orphaned %p\n",
- skdev->name, __func__, __LINE__,
- skspcl);
- skspcl->req.state = SKD_REQ_STATE_ABORTED;
- }
- }
- }
- skdev->skspcl_free_list = skdev->skspcl_table;
-
- for (i = 0; i < SKD_N_TIMEOUT_SLOT; i++)
- skdev->timeout_slot[i] = 0;
+ skreq->state = SKD_REQ_STATE_IDLE;
+ skreq->status = BLK_STS_IOERR;
+ blk_mq_complete_request(req);
+}
- skdev->in_flight = 0;
+static void skd_recover_requests(struct skd_device *skdev)
+{
+ blk_mq_tagset_busy_iter(&skdev->tag_set, skd_recover_request, skdev);
}
static void skd_isr_msg_from_dev(struct skd_device *skdev)
@@ -3255,8 +1919,8 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
mfd = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
- pr_debug("%s:%s:%d mfd=0x%x last_mtd=0x%x\n",
- skdev->name, __func__, __LINE__, mfd, skdev->last_mtd);
+ dev_dbg(&skdev->pdev->dev, "mfd=0x%x last_mtd=0x%x\n", mfd,
+ skdev->last_mtd);
/* ignore any mtd that is an ack for something we didn't send */
if (FIT_MXD_TYPE(mfd) != FIT_MXD_TYPE(skdev->last_mtd))
@@ -3267,13 +1931,10 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
skdev->proto_ver = FIT_PROTOCOL_MAJOR_VER(mfd);
if (skdev->proto_ver != FIT_PROTOCOL_VERSION_1) {
- pr_err("(%s): protocol mismatch\n",
- skdev->name);
- pr_err("(%s): got=%d support=%d\n",
- skdev->name, skdev->proto_ver,
- FIT_PROTOCOL_VERSION_1);
- pr_err("(%s): please upgrade driver\n",
- skdev->name);
+ dev_err(&skdev->pdev->dev, "protocol mismatch\n");
+ dev_err(&skdev->pdev->dev, " got=%d support=%d\n",
+ skdev->proto_ver, FIT_PROTOCOL_VERSION_1);
+ dev_err(&skdev->pdev->dev, " please upgrade driver\n");
skdev->state = SKD_DRVR_STATE_PROTOCOL_MISMATCH;
skd_soft_reset(skdev);
break;
@@ -3327,9 +1988,8 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
skdev->last_mtd = mtd;
- pr_err("(%s): Time sync driver=0x%x device=0x%x\n",
- skd_name(skdev),
- skdev->connect_time_stamp, skdev->drive_jiffies);
+ dev_err(&skdev->pdev->dev, "Time sync driver=0x%x device=0x%x\n",
+ skdev->connect_time_stamp, skdev->drive_jiffies);
break;
case FIT_MTD_ARM_QUEUE:
@@ -3351,8 +2011,7 @@ static void skd_disable_interrupts(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_CONTROL);
sense &= ~FIT_CR_ENABLE_INTERRUPTS;
SKD_WRITEL(skdev, sense, FIT_CONTROL);
- pr_debug("%s:%s:%d sense 0x%x\n",
- skdev->name, __func__, __LINE__, sense);
+ dev_dbg(&skdev->pdev->dev, "sense 0x%x\n", sense);
/* Note that the 1s is written. A 1-bit means
* disable, a 0 means enable.
@@ -3371,13 +2030,11 @@ static void skd_enable_interrupts(struct skd_device *skdev)
/* Note that the compliment of mask is written. A 1-bit means
* disable, a 0 means enable. */
SKD_WRITEL(skdev, ~val, FIT_INT_MASK_HOST);
- pr_debug("%s:%s:%d interrupt mask=0x%x\n",
- skdev->name, __func__, __LINE__, ~val);
+ dev_dbg(&skdev->pdev->dev, "interrupt mask=0x%x\n", ~val);
val = SKD_READL(skdev, FIT_CONTROL);
val |= FIT_CR_ENABLE_INTERRUPTS;
- pr_debug("%s:%s:%d control=0x%x\n",
- skdev->name, __func__, __LINE__, val);
+ dev_dbg(&skdev->pdev->dev, "control=0x%x\n", val);
SKD_WRITEL(skdev, val, FIT_CONTROL);
}
@@ -3393,8 +2050,7 @@ static void skd_soft_reset(struct skd_device *skdev)
val = SKD_READL(skdev, FIT_CONTROL);
val |= (FIT_CR_SOFT_RESET);
- pr_debug("%s:%s:%d control=0x%x\n",
- skdev->name, __func__, __LINE__, val);
+ dev_dbg(&skdev->pdev->dev, "control=0x%x\n", val);
SKD_WRITEL(skdev, val, FIT_CONTROL);
}
@@ -3411,8 +2067,7 @@ static void skd_start_device(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_STATUS);
- pr_debug("%s:%s:%d initial status=0x%x\n",
- skdev->name, __func__, __LINE__, sense);
+ dev_dbg(&skdev->pdev->dev, "initial status=0x%x\n", sense);
state = sense & FIT_SR_DRIVE_STATE_MASK;
skdev->drive_state = state;
@@ -3425,25 +2080,23 @@ static void skd_start_device(struct skd_device *skdev)
switch (skdev->drive_state) {
case FIT_SR_DRIVE_OFFLINE:
- pr_err("(%s): Drive offline...\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive offline...\n");
break;
case FIT_SR_DRIVE_FW_BOOTING:
- pr_debug("%s:%s:%d FIT_SR_DRIVE_FW_BOOTING %s\n",
- skdev->name, __func__, __LINE__, skdev->name);
+ dev_dbg(&skdev->pdev->dev, "FIT_SR_DRIVE_FW_BOOTING\n");
skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
break;
case FIT_SR_DRIVE_BUSY_SANITIZE:
- pr_info("(%s): Start: BUSY_SANITIZE\n",
- skd_name(skdev));
+ dev_info(&skdev->pdev->dev, "Start: BUSY_SANITIZE\n");
skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
case FIT_SR_DRIVE_BUSY_ERASE:
- pr_info("(%s): Start: BUSY_ERASE\n", skd_name(skdev));
+ dev_info(&skdev->pdev->dev, "Start: BUSY_ERASE\n");
skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
@@ -3454,14 +2107,13 @@ static void skd_start_device(struct skd_device *skdev)
break;
case FIT_SR_DRIVE_BUSY:
- pr_err("(%s): Drive Busy...\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive Busy...\n");
skdev->state = SKD_DRVR_STATE_BUSY;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
case FIT_SR_DRIVE_SOFT_RESET:
- pr_err("(%s) drive soft reset in prog\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "drive soft reset in prog\n");
break;
case FIT_SR_DRIVE_FAULT:
@@ -3471,9 +2123,8 @@ static void skd_start_device(struct skd_device *skdev)
*/
skd_drive_fault(skdev);
/*start the queue so we can respond with error to requests */
- pr_debug("%s:%s:%d starting %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_start_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "starting queue\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -3483,38 +2134,33 @@ static void skd_start_device(struct skd_device *skdev)
* to the BAR1 addresses. */
skd_drive_disappeared(skdev);
/*start the queue so we can respond with error to requests */
- pr_debug("%s:%s:%d starting %s queue to error-out reqs\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_start_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev,
+ "starting queue to error-out reqs\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
default:
- pr_err("(%s) Start: unknown state %x\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "Start: unknown state %x\n",
+ skdev->drive_state);
break;
}
state = SKD_READL(skdev, FIT_CONTROL);
- pr_debug("%s:%s:%d FIT Control Status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "FIT Control Status=0x%x\n", state);
state = SKD_READL(skdev, FIT_INT_STATUS_HOST);
- pr_debug("%s:%s:%d Intr Status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Intr Status=0x%x\n", state);
state = SKD_READL(skdev, FIT_INT_MASK_HOST);
- pr_debug("%s:%s:%d Intr Mask=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Intr Mask=0x%x\n", state);
state = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
- pr_debug("%s:%s:%d Msg from Dev=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Msg from Dev=0x%x\n", state);
state = SKD_READL(skdev, FIT_HW_VERSION);
- pr_debug("%s:%s:%d HW version=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "HW version=0x%x\n", state);
spin_unlock_irqrestore(&skdev->lock, flags);
}
@@ -3529,14 +2175,12 @@ static void skd_stop_device(struct skd_device *skdev)
spin_lock_irqsave(&skdev->lock, flags);
if (skdev->state != SKD_DRVR_STATE_ONLINE) {
- pr_err("(%s): skd_stop_device not online no sync\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s not online no sync\n", __func__);
goto stop_out;
}
if (skspcl->req.state != SKD_REQ_STATE_IDLE) {
- pr_err("(%s): skd_stop_device no special\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s no special\n", __func__);
goto stop_out;
}
@@ -3554,16 +2198,13 @@ static void skd_stop_device(struct skd_device *skdev)
switch (skdev->sync_done) {
case 0:
- pr_err("(%s): skd_stop_device no sync\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s no sync\n", __func__);
break;
case 1:
- pr_err("(%s): skd_stop_device sync done\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s sync done\n", __func__);
break;
default:
- pr_err("(%s): skd_stop_device sync error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s sync error\n", __func__);
}
stop_out:
@@ -3593,8 +2234,8 @@ stop_out:
}
if (dev_state != FIT_SR_DRIVE_INIT)
- pr_err("(%s): skd_stop_device state error 0x%02x\n",
- skd_name(skdev), dev_state);
+ dev_err(&skdev->pdev->dev, "%s state error 0x%02x\n", __func__,
+ dev_state);
}
/* assume spinlock is held */
@@ -3607,8 +2248,7 @@ static void skd_restart_device(struct skd_device *skdev)
state = SKD_READL(skdev, FIT_STATUS);
- pr_debug("%s:%s:%d drive status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "drive status=0x%x\n", state);
state &= FIT_SR_DRIVE_STATE_MASK;
skdev->drive_state = state;
@@ -3628,9 +2268,8 @@ static int skd_quiesce_dev(struct skd_device *skdev)
switch (skdev->state) {
case SKD_DRVR_STATE_BUSY:
case SKD_DRVR_STATE_BUSY_IMMINENT:
- pr_debug("%s:%s:%d stopping %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_stop_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "stopping queue\n");
+ blk_mq_stop_hw_queues(skdev->queue);
break;
case SKD_DRVR_STATE_ONLINE:
case SKD_DRVR_STATE_STOPPING:
@@ -3642,8 +2281,8 @@ static int skd_quiesce_dev(struct skd_device *skdev)
case SKD_DRVR_STATE_RESUMING:
default:
rc = -EINVAL;
- pr_debug("%s:%s:%d state [%d] not implemented\n",
- skdev->name, __func__, __LINE__, skdev->state);
+ dev_dbg(&skdev->pdev->dev, "state [%d] not implemented\n",
+ skdev->state);
}
return rc;
}
@@ -3655,8 +2294,7 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
skd_log_skdev(skdev, "unquiesce");
if (skdev->state == SKD_DRVR_STATE_ONLINE) {
- pr_debug("%s:%s:%d **** device already ONLINE\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** device already ONLINE\n");
return 0;
}
if (skdev->drive_state != FIT_SR_DRIVE_ONLINE) {
@@ -3669,8 +2307,7 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
* to become available.
*/
skdev->state = SKD_DRVR_STATE_BUSY;
- pr_debug("%s:%s:%d drive BUSY state\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "drive BUSY state\n");
return 0;
}
@@ -3689,26 +2326,24 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
case SKD_DRVR_STATE_IDLE:
case SKD_DRVR_STATE_LOAD:
skdev->state = SKD_DRVR_STATE_ONLINE;
- pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_skdev_state_to_str(prev_driver_state),
- prev_driver_state, skd_skdev_state_to_str(skdev->state),
- skdev->state);
- pr_debug("%s:%s:%d **** device ONLINE...starting block queue\n",
- skdev->name, __func__, __LINE__);
- pr_debug("%s:%s:%d starting %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- pr_info("(%s): STEC s1120 ONLINE\n", skd_name(skdev));
- blk_start_queue(skdev->queue);
+ dev_err(&skdev->pdev->dev, "Driver state %s(%d)=>%s(%d)\n",
+ skd_skdev_state_to_str(prev_driver_state),
+ prev_driver_state, skd_skdev_state_to_str(skdev->state),
+ skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "**** device ONLINE...starting block queue\n");
+ dev_dbg(&skdev->pdev->dev, "starting queue\n");
+ dev_info(&skdev->pdev->dev, "STEC s1120 ONLINE\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = 1;
wake_up_interruptible(&skdev->waitq);
break;
case SKD_DRVR_STATE_DISAPPEARED:
default:
- pr_debug("%s:%s:%d **** driver state %d, not implemented \n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "**** driver state %d, not implemented\n",
+ skdev->state);
return -EBUSY;
}
return 0;
@@ -3726,11 +2361,10 @@ static irqreturn_t skd_reserved_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
- pr_err("(%s): MSIX reserved irq %d = 0x%x\n", skd_name(skdev),
- irq, SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_err(&skdev->pdev->dev, "MSIX reserved irq %d = 0x%x\n", irq,
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_INT_RESERVED_MASK, FIT_INT_STATUS_HOST);
spin_unlock_irqrestore(&skdev->lock, flags);
return IRQ_HANDLED;
@@ -3742,9 +2376,8 @@ static irqreturn_t skd_statec_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_FW_STATE_CHANGE, FIT_INT_STATUS_HOST);
skd_isr_fwstate(skdev);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3759,19 +2392,18 @@ static irqreturn_t skd_comp_q(int irq, void *skd_host_data)
int deferred;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_COMPLETION_POSTED, FIT_INT_STATUS_HOST);
deferred = skd_isr_completion_posted(skdev, skd_isr_comp_limit,
&flush_enqueued);
if (flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
if (deferred)
schedule_work(&skdev->completion_worker);
else if (!flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3784,9 +2416,8 @@ static irqreturn_t skd_msg_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_MSG_FROM_DEV, FIT_INT_STATUS_HOST);
skd_isr_msg_from_dev(skdev);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3799,9 +2430,8 @@ static irqreturn_t skd_qfull_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_INT_QUEUE_FULL, FIT_INT_STATUS_HOST);
spin_unlock_irqrestore(&skdev->lock, flags);
return IRQ_HANDLED;
@@ -3850,8 +2480,7 @@ static int skd_acquire_msix(struct skd_device *skdev)
rc = pci_alloc_irq_vectors(pdev, SKD_MAX_MSIX_COUNT, SKD_MAX_MSIX_COUNT,
PCI_IRQ_MSIX);
if (rc < 0) {
- pr_err("(%s): failed to enable MSI-X %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev, "failed to enable MSI-X %d\n", rc);
goto out;
}
@@ -3859,8 +2488,7 @@ static int skd_acquire_msix(struct skd_device *skdev)
sizeof(struct skd_msix_entry), GFP_KERNEL);
if (!skdev->msix_entries) {
rc = -ENOMEM;
- pr_err("(%s): msix table allocation error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "msix table allocation error\n");
goto out;
}
@@ -3877,16 +2505,15 @@ static int skd_acquire_msix(struct skd_device *skdev)
msix_entries[i].handler, 0,
qentry->isr_name, skdev);
if (rc) {
- pr_err("(%s): Unable to register(%d) MSI-X "
- "handler %d: %s\n",
- skd_name(skdev), rc, i, qentry->isr_name);
+ dev_err(&skdev->pdev->dev,
+ "Unable to register(%d) MSI-X handler %d: %s\n",
+ rc, i, qentry->isr_name);
goto msix_out;
}
}
- pr_debug("%s:%s:%d %s: <%s> msix %d irq(s) enabled\n",
- skdev->name, __func__, __LINE__,
- pci_name(pdev), skdev->name, SKD_MAX_MSIX_COUNT);
+ dev_dbg(&skdev->pdev->dev, "%d msix irq(s) enabled\n",
+ SKD_MAX_MSIX_COUNT);
return 0;
msix_out:
@@ -3909,8 +2536,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
if (!rc)
return 0;
- pr_err("(%s): failed to enable MSI-X, re-trying with MSI %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev,
+ "failed to enable MSI-X, re-trying with MSI %d\n", rc);
}
snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d", DRV_NAME,
@@ -3920,8 +2547,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
irq_flag |= PCI_IRQ_MSI;
rc = pci_alloc_irq_vectors(pdev, 1, 1, irq_flag);
if (rc < 0) {
- pr_err("(%s): failed to allocate the MSI interrupt %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev,
+ "failed to allocate the MSI interrupt %d\n", rc);
return rc;
}
@@ -3930,8 +2557,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
skdev->isr_name, skdev);
if (rc) {
pci_free_irq_vectors(pdev);
- pr_err("(%s): failed to allocate interrupt %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev, "failed to allocate interrupt %d\n",
+ rc);
return rc;
}
@@ -3965,20 +2592,45 @@ static void skd_release_irq(struct skd_device *skdev)
*****************************************************************************
*/
+static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ enum dma_data_direction dir)
+{
+ struct device *dev = &skdev->pdev->dev;
+ void *buf;
+
+ buf = kmem_cache_alloc(s, gfp);
+ if (!buf)
+ return NULL;
+ *dma_handle = dma_map_single(dev, buf, s->size, dir);
+ if (dma_mapping_error(dev, *dma_handle)) {
+ kfree(buf);
+ buf = NULL;
+ }
+ return buf;
+}
+
+static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s,
+ void *vaddr, dma_addr_t dma_handle,
+ enum dma_data_direction dir)
+{
+ if (!vaddr)
+ return;
+
+ dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir);
+ kmem_cache_free(s, vaddr);
+}
+
static int skd_cons_skcomp(struct skd_device *skdev)
{
int rc = 0;
struct fit_completion_entry_v1 *skcomp;
- u32 nbytes;
- nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
- nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
+ dev_dbg(&skdev->pdev->dev,
+ "comp pci_alloc, total bytes %zd entries %d\n",
+ SKD_SKCOMP_SIZE, SKD_N_COMPLETION_ENTRY);
- pr_debug("%s:%s:%d comp pci_alloc, total bytes %d entries %d\n",
- skdev->name, __func__, __LINE__,
- nbytes, SKD_N_COMPLETION_ENTRY);
-
- skcomp = pci_zalloc_consistent(skdev->pdev, nbytes,
+ skcomp = pci_zalloc_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
&skdev->cq_dma_address);
if (skcomp == NULL) {
@@ -4000,14 +2652,14 @@ static int skd_cons_skmsg(struct skd_device *skdev)
int rc = 0;
u32 i;
- pr_debug("%s:%s:%d skmsg_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_fitmsg_context),
- skdev->num_fitmsg_context,
- sizeof(struct skd_fitmsg_context) * skdev->num_fitmsg_context);
+ dev_dbg(&skdev->pdev->dev,
+ "skmsg_table kcalloc, struct %lu, count %u total %lu\n",
+ sizeof(struct skd_fitmsg_context), skdev->num_fitmsg_context,
+ sizeof(struct skd_fitmsg_context) * skdev->num_fitmsg_context);
- skdev->skmsg_table = kzalloc(sizeof(struct skd_fitmsg_context)
- *skdev->num_fitmsg_context, GFP_KERNEL);
+ skdev->skmsg_table = kcalloc(skdev->num_fitmsg_context,
+ sizeof(struct skd_fitmsg_context),
+ GFP_KERNEL);
if (skdev->skmsg_table == NULL) {
rc = -ENOMEM;
goto err_out;
@@ -4020,9 +2672,8 @@ static int skd_cons_skmsg(struct skd_device *skdev)
skmsg->id = i + SKD_ID_FIT_MSG;
- skmsg->state = SKD_MSG_STATE_IDLE;
skmsg->msg_buf = pci_alloc_consistent(skdev->pdev,
- SKD_N_FITMSG_BYTES + 64,
+ SKD_N_FITMSG_BYTES,
&skmsg->mb_dma_address);
if (skmsg->msg_buf == NULL) {
@@ -4030,22 +2681,13 @@ static int skd_cons_skmsg(struct skd_device *skdev)
goto err_out;
}
- skmsg->offset = (u32)((u64)skmsg->msg_buf &
- (~FIT_QCMD_BASE_ADDRESS_MASK));
- skmsg->msg_buf += ~FIT_QCMD_BASE_ADDRESS_MASK;
- skmsg->msg_buf = (u8 *)((u64)skmsg->msg_buf &
- FIT_QCMD_BASE_ADDRESS_MASK);
- skmsg->mb_dma_address += ~FIT_QCMD_BASE_ADDRESS_MASK;
- skmsg->mb_dma_address &= FIT_QCMD_BASE_ADDRESS_MASK;
+ WARN(((uintptr_t)skmsg->msg_buf | skmsg->mb_dma_address) &
+ (FIT_QCMD_ALIGN - 1),
+ "not aligned: msg_buf %p mb_dma_address %#llx\n",
+ skmsg->msg_buf, skmsg->mb_dma_address);
memset(skmsg->msg_buf, 0, SKD_N_FITMSG_BYTES);
-
- skmsg->next = &skmsg[1];
}
- /* Free list is in order starting with the 0th entry. */
- skdev->skmsg_table[i - 1].next = NULL;
- skdev->skmsg_free_list = skdev->skmsg_table;
-
err_out:
return rc;
}
@@ -4055,18 +2697,14 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
dma_addr_t *ret_dma_addr)
{
struct fit_sg_descriptor *sg_list;
- u32 nbytes;
-
- nbytes = sizeof(*sg_list) * n_sg;
- sg_list = pci_alloc_consistent(skdev->pdev, nbytes, ret_dma_addr);
+ sg_list = skd_alloc_dma(skdev, skdev->sglist_cache, ret_dma_addr,
+ GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
if (sg_list != NULL) {
uint64_t dma_address = *ret_dma_addr;
u32 i;
- memset(sg_list, 0, nbytes);
-
for (i = 0; i < n_sg - 1; i++) {
uint64_t ndp_off;
ndp_off = (i + 1) * sizeof(struct fit_sg_descriptor);
@@ -4079,153 +2717,63 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
return sg_list;
}
-static int skd_cons_skreq(struct skd_device *skdev)
+static void skd_free_sg_list(struct skd_device *skdev,
+ struct fit_sg_descriptor *sg_list,
+ dma_addr_t dma_addr)
{
- int rc = 0;
- u32 i;
-
- pr_debug("%s:%s:%d skreq_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_request_context),
- skdev->num_req_context,
- sizeof(struct skd_request_context) * skdev->num_req_context);
-
- skdev->skreq_table = kzalloc(sizeof(struct skd_request_context)
- * skdev->num_req_context, GFP_KERNEL);
- if (skdev->skreq_table == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- pr_debug("%s:%s:%d alloc sg_table sg_per_req %u scatlist %lu total %lu\n",
- skdev->name, __func__, __LINE__,
- skdev->sgs_per_request, sizeof(struct scatterlist),
- skdev->sgs_per_request * sizeof(struct scatterlist));
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq;
-
- skreq = &skdev->skreq_table[i];
-
- skreq->id = i + SKD_ID_RW_REQUEST;
- skreq->state = SKD_REQ_STATE_IDLE;
-
- skreq->sg = kzalloc(sizeof(struct scatterlist) *
- skdev->sgs_per_request, GFP_KERNEL);
- if (skreq->sg == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
- sg_init_table(skreq->sg, skdev->sgs_per_request);
-
- skreq->sksg_list = skd_cons_sg_list(skdev,
- skdev->sgs_per_request,
- &skreq->sksg_dma_address);
-
- if (skreq->sksg_list == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skreq->next = &skreq[1];
- }
-
- /* Free list is in order starting with the 0th entry. */
- skdev->skreq_table[i - 1].next = NULL;
- skdev->skreq_free_list = skdev->skreq_table;
+ if (WARN_ON_ONCE(!sg_list))
+ return;
-err_out:
- return rc;
+ skd_free_dma(skdev, skdev->sglist_cache, sg_list, dma_addr,
+ DMA_TO_DEVICE);
}
-static int skd_cons_skspcl(struct skd_device *skdev)
+static int skd_init_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx, unsigned int numa_node)
{
- int rc = 0;
- u32 i, nbytes;
-
- pr_debug("%s:%s:%d skspcl_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_special_context),
- skdev->n_special,
- sizeof(struct skd_special_context) * skdev->n_special);
-
- skdev->skspcl_table = kzalloc(sizeof(struct skd_special_context)
- * skdev->n_special, GFP_KERNEL);
- if (skdev->skspcl_table == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
+ struct skd_device *skdev = set->driver_data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl;
-
- skspcl = &skdev->skspcl_table[i];
-
- skspcl->req.id = i + SKD_ID_SPECIAL_REQUEST;
- skspcl->req.state = SKD_REQ_STATE_IDLE;
-
- skspcl->req.next = &skspcl[1].req;
-
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-
- skspcl->msg_buf =
- pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
- if (skspcl->msg_buf == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skspcl->req.sg = kzalloc(sizeof(struct scatterlist) *
- SKD_N_SG_PER_SPECIAL, GFP_KERNEL);
- if (skspcl->req.sg == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skspcl->req.sksg_list = skd_cons_sg_list(skdev,
- SKD_N_SG_PER_SPECIAL,
- &skspcl->req.
- sksg_dma_address);
- if (skspcl->req.sksg_list == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
- }
+ skreq->state = SKD_REQ_STATE_IDLE;
+ skreq->sg = (void *)(skreq + 1);
+ sg_init_table(skreq->sg, skd_sgs_per_request);
+ skreq->sksg_list = skd_cons_sg_list(skdev, skd_sgs_per_request,
+ &skreq->sksg_dma_address);
- /* Free list is in order starting with the 0th entry. */
- skdev->skspcl_table[i - 1].req.next = NULL;
- skdev->skspcl_free_list = skdev->skspcl_table;
+ return skreq->sksg_list ? 0 : -ENOMEM;
+}
- return rc;
+static void skd_exit_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx)
+{
+ struct skd_device *skdev = set->driver_data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
-err_out:
- return rc;
+ skd_free_sg_list(skdev, skreq->sksg_list, skreq->sksg_dma_address);
}
static int skd_cons_sksb(struct skd_device *skdev)
{
int rc = 0;
struct skd_special_context *skspcl;
- u32 nbytes;
skspcl = &skdev->internal_skspcl;
skspcl->req.id = 0 + SKD_ID_INTERNAL;
skspcl->req.state = SKD_REQ_STATE_IDLE;
- nbytes = SKD_N_INTERNAL_BYTES;
-
- skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->db_dma_address);
+ skspcl->data_buf = skd_alloc_dma(skdev, skdev->databuf_cache,
+ &skspcl->db_dma_address,
+ GFP_DMA | __GFP_ZERO,
+ DMA_BIDIRECTIONAL);
if (skspcl->data_buf == NULL) {
rc = -ENOMEM;
goto err_out;
}
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
+ skspcl->msg_buf = skd_alloc_dma(skdev, skdev->msgbuf_cache,
+ &skspcl->mb_dma_address,
+ GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
if (skspcl->msg_buf == NULL) {
rc = -ENOMEM;
goto err_out;
@@ -4247,6 +2795,14 @@ err_out:
return rc;
}
+static const struct blk_mq_ops skd_mq_ops = {
+ .queue_rq = skd_mq_queue_rq,
+ .complete = skd_complete_rq,
+ .timeout = skd_timed_out,
+ .init_request = skd_init_request,
+ .exit_request = skd_exit_request,
+};
+
static int skd_cons_disk(struct skd_device *skdev)
{
int rc = 0;
@@ -4268,31 +2824,46 @@ static int skd_cons_disk(struct skd_device *skdev)
disk->fops = &skd_blockdev_ops;
disk->private_data = skdev;
- q = blk_init_queue(skd_request_fn, &skdev->lock);
- if (!q) {
- rc = -ENOMEM;
+ memset(&skdev->tag_set, 0, sizeof(skdev->tag_set));
+ skdev->tag_set.ops = &skd_mq_ops;
+ skdev->tag_set.nr_hw_queues = 1;
+ skdev->tag_set.queue_depth = skd_max_queue_depth;
+ skdev->tag_set.cmd_size = sizeof(struct skd_request_context) +
+ skdev->sgs_per_request * sizeof(struct scatterlist);
+ skdev->tag_set.numa_node = NUMA_NO_NODE;
+ skdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
+ BLK_MQ_F_SG_MERGE |
+ BLK_ALLOC_POLICY_TO_MQ_FLAG(BLK_TAG_ALLOC_FIFO);
+ skdev->tag_set.driver_data = skdev;
+ rc = blk_mq_alloc_tag_set(&skdev->tag_set);
+ if (rc)
+ goto err_out;
+ q = blk_mq_init_queue(&skdev->tag_set);
+ if (IS_ERR(q)) {
+ blk_mq_free_tag_set(&skdev->tag_set);
+ rc = PTR_ERR(q);
goto err_out;
}
- blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+ q->queuedata = skdev;
skdev->queue = q;
disk->queue = q;
- q->queuedata = skdev;
blk_queue_write_cache(q, true, true);
blk_queue_max_segments(q, skdev->sgs_per_request);
blk_queue_max_hw_sectors(q, SKD_N_MAX_SECTORS);
- /* set sysfs ptimal_io_size to 8K */
+ /* set optimal I/O size to 8KB */
blk_queue_io_opt(q, 8192);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_rq_timeout(q, 8 * HZ);
+
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d stopping %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_stop_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "stopping queue\n");
+ blk_mq_stop_hw_queues(skdev->queue);
spin_unlock_irqrestore(&skdev->lock, flags);
err_out:
@@ -4306,13 +2877,13 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
{
struct skd_device *skdev;
int blk_major = skd_major;
+ size_t size;
int rc;
skdev = kzalloc(sizeof(*skdev), GFP_KERNEL);
if (!skdev) {
- pr_err(PFX "(%s): memory alloc failure\n",
- pci_name(pdev));
+ dev_err(&pdev->dev, "memory alloc failure\n");
return NULL;
}
@@ -4320,60 +2891,71 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
skdev->pdev = pdev;
skdev->devno = skd_next_devno++;
skdev->major = blk_major;
- sprintf(skdev->name, DRV_NAME "%d", skdev->devno);
skdev->dev_max_queue_depth = 0;
skdev->num_req_context = skd_max_queue_depth;
skdev->num_fitmsg_context = skd_max_queue_depth;
- skdev->n_special = skd_max_pass_thru;
skdev->cur_max_queue_depth = 1;
skdev->queue_low_water_mark = 1;
skdev->proto_ver = 99;
skdev->sgs_per_request = skd_sgs_per_request;
skdev->dbg_level = skd_dbg_level;
- atomic_set(&skdev->device_count, 0);
-
spin_lock_init(&skdev->lock);
+ INIT_WORK(&skdev->start_queue, skd_start_queue);
INIT_WORK(&skdev->completion_worker, skd_completion_worker);
- pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skcomp(skdev);
- if (rc < 0)
+ size = max(SKD_N_FITMSG_BYTES, SKD_N_SPECIAL_FITMSG_BYTES);
+ skdev->msgbuf_cache = kmem_cache_create("skd-msgbuf", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->msgbuf_cache)
goto err_out;
-
- pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skmsg(skdev);
- if (rc < 0)
+ WARN_ONCE(kmem_cache_size(skdev->msgbuf_cache) < size,
+ "skd-msgbuf: %d < %zd\n",
+ kmem_cache_size(skdev->msgbuf_cache), size);
+ size = skd_sgs_per_request * sizeof(struct fit_sg_descriptor);
+ skdev->sglist_cache = kmem_cache_create("skd-sglist", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->sglist_cache)
+ goto err_out;
+ WARN_ONCE(kmem_cache_size(skdev->sglist_cache) < size,
+ "skd-sglist: %d < %zd\n",
+ kmem_cache_size(skdev->sglist_cache), size);
+ size = SKD_N_INTERNAL_BYTES;
+ skdev->databuf_cache = kmem_cache_create("skd-databuf", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->databuf_cache)
goto err_out;
+ WARN_ONCE(kmem_cache_size(skdev->databuf_cache) < size,
+ "skd-databuf: %d < %zd\n",
+ kmem_cache_size(skdev->databuf_cache), size);
- pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skreq(skdev);
+ dev_dbg(&skdev->pdev->dev, "skcomp\n");
+ rc = skd_cons_skcomp(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skspcl(skdev);
+ dev_dbg(&skdev->pdev->dev, "skmsg\n");
+ rc = skd_cons_skmsg(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "sksb\n");
rc = skd_cons_sksb(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "disk\n");
rc = skd_cons_disk(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d VICTORY\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "VICTORY\n");
return skdev;
err_out:
- pr_debug("%s:%s:%d construct failed\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "construct failed\n");
skd_destruct(skdev);
return NULL;
}
@@ -4386,14 +2968,9 @@ err_out:
static void skd_free_skcomp(struct skd_device *skdev)
{
- if (skdev->skcomp_table != NULL) {
- u32 nbytes;
-
- nbytes = sizeof(skdev->skcomp_table[0]) *
- SKD_N_COMPLETION_ENTRY;
- pci_free_consistent(skdev->pdev, nbytes,
+ if (skdev->skcomp_table)
+ pci_free_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
skdev->skcomp_table, skdev->cq_dma_address);
- }
skdev->skcomp_table = NULL;
skdev->cq_dma_address = 0;
@@ -4412,8 +2989,6 @@ static void skd_free_skmsg(struct skd_device *skdev)
skmsg = &skdev->skmsg_table[i];
if (skmsg->msg_buf != NULL) {
- skmsg->msg_buf += skmsg->offset;
- skmsg->mb_dma_address += skmsg->offset;
pci_free_consistent(skdev->pdev, SKD_N_FITMSG_BYTES,
skmsg->msg_buf,
skmsg->mb_dma_address);
@@ -4426,109 +3001,23 @@ static void skd_free_skmsg(struct skd_device *skdev)
skdev->skmsg_table = NULL;
}
-static void skd_free_sg_list(struct skd_device *skdev,
- struct fit_sg_descriptor *sg_list,
- u32 n_sg, dma_addr_t dma_addr)
-{
- if (sg_list != NULL) {
- u32 nbytes;
-
- nbytes = sizeof(*sg_list) * n_sg;
-
- pci_free_consistent(skdev->pdev, nbytes, sg_list, dma_addr);
- }
-}
-
-static void skd_free_skreq(struct skd_device *skdev)
-{
- u32 i;
-
- if (skdev->skreq_table == NULL)
- return;
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq;
-
- skreq = &skdev->skreq_table[i];
-
- skd_free_sg_list(skdev, skreq->sksg_list,
- skdev->sgs_per_request,
- skreq->sksg_dma_address);
-
- skreq->sksg_list = NULL;
- skreq->sksg_dma_address = 0;
-
- kfree(skreq->sg);
- }
-
- kfree(skdev->skreq_table);
- skdev->skreq_table = NULL;
-}
-
-static void skd_free_skspcl(struct skd_device *skdev)
-{
- u32 i;
- u32 nbytes;
-
- if (skdev->skspcl_table == NULL)
- return;
-
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl;
-
- skspcl = &skdev->skspcl_table[i];
-
- if (skspcl->msg_buf != NULL) {
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->msg_buf,
- skspcl->mb_dma_address);
- }
-
- skspcl->msg_buf = NULL;
- skspcl->mb_dma_address = 0;
-
- skd_free_sg_list(skdev, skspcl->req.sksg_list,
- SKD_N_SG_PER_SPECIAL,
- skspcl->req.sksg_dma_address);
-
- skspcl->req.sksg_list = NULL;
- skspcl->req.sksg_dma_address = 0;
-
- kfree(skspcl->req.sg);
- }
-
- kfree(skdev->skspcl_table);
- skdev->skspcl_table = NULL;
-}
-
static void skd_free_sksb(struct skd_device *skdev)
{
- struct skd_special_context *skspcl;
- u32 nbytes;
-
- skspcl = &skdev->internal_skspcl;
-
- if (skspcl->data_buf != NULL) {
- nbytes = SKD_N_INTERNAL_BYTES;
+ struct skd_special_context *skspcl = &skdev->internal_skspcl;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->data_buf, skspcl->db_dma_address);
- }
+ skd_free_dma(skdev, skdev->databuf_cache, skspcl->data_buf,
+ skspcl->db_dma_address, DMA_BIDIRECTIONAL);
skspcl->data_buf = NULL;
skspcl->db_dma_address = 0;
- if (skspcl->msg_buf != NULL) {
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->msg_buf, skspcl->mb_dma_address);
- }
+ skd_free_dma(skdev, skdev->msgbuf_cache, skspcl->msg_buf,
+ skspcl->mb_dma_address, DMA_TO_DEVICE);
skspcl->msg_buf = NULL;
skspcl->mb_dma_address = 0;
- skd_free_sg_list(skdev, skspcl->req.sksg_list, 1,
+ skd_free_sg_list(skdev, skspcl->req.sksg_list,
skspcl->req.sksg_dma_address);
skspcl->req.sksg_list = NULL;
@@ -4539,15 +3028,20 @@ static void skd_free_disk(struct skd_device *skdev)
{
struct gendisk *disk = skdev->disk;
- if (disk != NULL) {
- struct request_queue *q = disk->queue;
+ if (disk && (disk->flags & GENHD_FL_UP))
+ del_gendisk(disk);
- if (disk->flags & GENHD_FL_UP)
- del_gendisk(disk);
- if (q)
- blk_cleanup_queue(q);
- put_disk(disk);
+ if (skdev->queue) {
+ blk_cleanup_queue(skdev->queue);
+ skdev->queue = NULL;
+ if (disk)
+ disk->queue = NULL;
}
+
+ if (skdev->tag_set.tags)
+ blk_mq_free_tag_set(&skdev->tag_set);
+
+ put_disk(disk);
skdev->disk = NULL;
}
@@ -4556,26 +3050,25 @@ static void skd_destruct(struct skd_device *skdev)
if (skdev == NULL)
return;
+ cancel_work_sync(&skdev->start_queue);
- pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "disk\n");
skd_free_disk(skdev);
- pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "sksb\n");
skd_free_sksb(skdev);
- pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
- skd_free_skspcl(skdev);
-
- pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
- skd_free_skreq(skdev);
-
- pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "skmsg\n");
skd_free_skmsg(skdev);
- pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "skcomp\n");
skd_free_skcomp(skdev);
- pr_debug("%s:%s:%d skdev\n", skdev->name, __func__, __LINE__);
+ kmem_cache_destroy(skdev->databuf_cache);
+ kmem_cache_destroy(skdev->sglist_cache);
+ kmem_cache_destroy(skdev->msgbuf_cache);
+
+ dev_dbg(&skdev->pdev->dev, "skdev\n");
kfree(skdev);
}
@@ -4592,9 +3085,8 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
skdev = bdev->bd_disk->private_data;
- pr_debug("%s:%s:%d %s: CMD[%s] getgeo device\n",
- skdev->name, __func__, __LINE__,
- bdev->bd_disk->disk_name, current->comm);
+ dev_dbg(&skdev->pdev->dev, "%s: CMD[%s] getgeo device\n",
+ bdev->bd_disk->disk_name, current->comm);
if (skdev->read_cap_is_valid) {
capacity = get_capacity(skdev->disk);
@@ -4609,18 +3101,16 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
static int skd_bdev_attach(struct device *parent, struct skd_device *skdev)
{
- pr_debug("%s:%s:%d add_disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "add_disk\n");
device_add_disk(parent, skdev->disk);
return 0;
}
static const struct block_device_operations skd_blockdev_ops = {
.owner = THIS_MODULE,
- .ioctl = skd_bdev_ioctl,
.getgeo = skd_bdev_getgeo,
};
-
/*
*****************************************************************************
* PCIe DRIVER GLUE
@@ -4671,10 +3161,8 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
char pci_str[32];
struct skd_device *skdev;
- pr_info("STEC s1120 Driver(%s) version %s-b%s\n",
- DRV_NAME, DRV_VERSION, DRV_BUILD_ID);
- pr_info("(skd?:??:[%s]): vendor=%04X device=%04x\n",
- pci_name(pdev), pdev->vendor, pdev->device);
+ dev_dbg(&pdev->dev, "vendor=%04X device=%04x\n", pdev->vendor,
+ pdev->device);
rc = pci_enable_device(pdev);
if (rc)
@@ -4685,16 +3173,13 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (!rc) {
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-
- pr_err("(%s): consistent DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "consistent DMA mask error %d\n",
+ rc);
}
} else {
- (rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)));
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc) {
-
- pr_err("(%s): DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
}
}
@@ -4714,19 +3199,17 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
skd_pci_info(skdev, pci_str);
- pr_info("(%s): %s 64bit\n", skd_name(skdev), pci_str);
+ dev_info(&pdev->dev, "%s 64bit\n", pci_str);
pci_set_master(pdev);
rc = pci_enable_pcie_error_reporting(pdev);
if (rc) {
- pr_err(
- "(%s): bad enable of PCIe error reporting rc=%d\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev,
+ "bad enable of PCIe error reporting rc=%d\n", rc);
skdev->pcie_error_reporting_is_enabled = 0;
} else
skdev->pcie_error_reporting_is_enabled = 1;
-
pci_set_drvdata(pdev, skdev);
for (i = 0; i < SKD_MAX_BARS; i++) {
@@ -4735,21 +3218,19 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
skdev->mem_size[i]);
if (!skdev->mem_map[i]) {
- pr_err("(%s): Unable to map adapter memory!\n",
- skd_name(skdev));
+ dev_err(&pdev->dev,
+ "Unable to map adapter memory!\n");
rc = -ENODEV;
goto err_out_iounmap;
}
- pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->mem_map[i],
- (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
+ dev_dbg(&pdev->dev, "mem_map=%p, phyd=%016llx, size=%d\n",
+ skdev->mem_map[i], (uint64_t)skdev->mem_phys[i],
+ skdev->mem_size[i]);
}
rc = skd_acquire_irq(skdev);
if (rc) {
- pr_err("(%s): interrupt resource error %d\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev, "interrupt resource error %d\n", rc);
goto err_out_iounmap;
}
@@ -4771,29 +3252,14 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
} else {
/* we timed out, something is wrong with the device,
don't add the disk structure */
- pr_err(
- "(%s): error: waiting for s1120 timed out %d!\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev, "error: waiting for s1120 timed out %d!\n",
+ rc);
/* in case of no error; we timeout with ENXIO */
if (!rc)
rc = -ENXIO;
goto err_out_timer;
}
-
-#ifdef SKD_VMK_POLL_HANDLER
- if (skdev->irq_type == SKD_IRQ_MSIX) {
- /* MSIX completion handler is being used for coredump */
- vmklnx_scsi_register_poll_handler(skdev->scsi_host,
- skdev->msix_entries[5].vector,
- skd_comp_q, skdev);
- } else {
- vmklnx_scsi_register_poll_handler(skdev->scsi_host,
- skdev->pdev->irq, skd_isr,
- skdev);
- }
-#endif /* SKD_VMK_POLL_HANDLER */
-
return rc;
err_out_timer:
@@ -4826,7 +3292,7 @@ static void skd_pci_remove(struct pci_dev *pdev)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return;
}
skd_stop_device(skdev);
@@ -4834,7 +3300,7 @@ static void skd_pci_remove(struct pci_dev *pdev)
for (i = 0; i < SKD_MAX_BARS; i++)
if (skdev->mem_map[i])
- iounmap((u32 *)skdev->mem_map[i]);
+ iounmap(skdev->mem_map[i]);
if (skdev->pcie_error_reporting_is_enabled)
pci_disable_pcie_error_reporting(pdev);
@@ -4855,7 +3321,7 @@ static int skd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return -EIO;
}
@@ -4865,7 +3331,7 @@ static int skd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
for (i = 0; i < SKD_MAX_BARS; i++)
if (skdev->mem_map[i])
- iounmap((u32 *)skdev->mem_map[i]);
+ iounmap(skdev->mem_map[i]);
if (skdev->pcie_error_reporting_is_enabled)
pci_disable_pcie_error_reporting(pdev);
@@ -4885,7 +3351,7 @@ static int skd_pci_resume(struct pci_dev *pdev)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return -1;
}
@@ -4903,15 +3369,14 @@ static int skd_pci_resume(struct pci_dev *pdev)
if (!rc) {
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
- pr_err("(%s): consistent DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "consistent DMA mask error %d\n",
+ rc);
}
} else {
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc) {
- pr_err("(%s): DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
}
}
@@ -4919,8 +3384,8 @@ static int skd_pci_resume(struct pci_dev *pdev)
pci_set_master(pdev);
rc = pci_enable_pcie_error_reporting(pdev);
if (rc) {
- pr_err("(%s): bad enable of PCIe error reporting rc=%d\n",
- skdev->name, rc);
+ dev_err(&pdev->dev,
+ "bad enable of PCIe error reporting rc=%d\n", rc);
skdev->pcie_error_reporting_is_enabled = 0;
} else
skdev->pcie_error_reporting_is_enabled = 1;
@@ -4932,21 +3397,17 @@ static int skd_pci_resume(struct pci_dev *pdev)
skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
skdev->mem_size[i]);
if (!skdev->mem_map[i]) {
- pr_err("(%s): Unable to map adapter memory!\n",
- skd_name(skdev));
+ dev_err(&pdev->dev, "Unable to map adapter memory!\n");
rc = -ENODEV;
goto err_out_iounmap;
}
- pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->mem_map[i],
- (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
+ dev_dbg(&pdev->dev, "mem_map=%p, phyd=%016llx, size=%d\n",
+ skdev->mem_map[i], (uint64_t)skdev->mem_phys[i],
+ skdev->mem_size[i]);
}
rc = skd_acquire_irq(skdev);
if (rc) {
-
- pr_err("(%s): interrupt resource error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "interrupt resource error %d\n", rc);
goto err_out_iounmap;
}
@@ -4984,15 +3445,15 @@ static void skd_pci_shutdown(struct pci_dev *pdev)
{
struct skd_device *skdev;
- pr_err("skd_pci_shutdown called\n");
+ dev_err(&pdev->dev, "%s called\n", __func__);
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return;
}
- pr_err("%s: calling stop\n", skd_name(skdev));
+ dev_err(&pdev->dev, "calling stop\n");
skd_stop_device(skdev);
}
@@ -5012,21 +3473,6 @@ static struct pci_driver skd_driver = {
*****************************************************************************
*/
-static const char *skd_name(struct skd_device *skdev)
-{
- memset(skdev->id_str, 0, sizeof(skdev->id_str));
-
- if (skdev->inquiry_is_valid)
- snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:%s:[%s]",
- skdev->name, skdev->inq_serial_num,
- pci_name(skdev->pdev));
- else
- snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:??:[%s]",
- skdev->name, pci_name(skdev->pdev));
-
- return skdev->id_str;
-}
-
const char *skd_drive_state_to_str(int state)
{
switch (state) {
@@ -5078,8 +3524,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
return "PAUSING";
case SKD_DRVR_STATE_PAUSED:
return "PAUSED";
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- return "DRAINING_TIMEOUT";
case SKD_DRVR_STATE_RESTARTING:
return "RESTARTING";
case SKD_DRVR_STATE_RESUMING:
@@ -5106,18 +3550,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
}
}
-static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
-{
- switch (state) {
- case SKD_MSG_STATE_IDLE:
- return "IDLE";
- case SKD_MSG_STATE_BUSY:
- return "BUSY";
- default:
- return "???";
- }
-}
-
static const char *skd_skreq_state_to_str(enum skd_req_state state)
{
switch (state) {
@@ -5131,8 +3563,6 @@ static const char *skd_skreq_state_to_str(enum skd_req_state state)
return "COMPLETED";
case SKD_REQ_STATE_TIMEOUT:
return "TIMEOUT";
- case SKD_REQ_STATE_ABORTED:
- return "ABORTED";
default:
return "???";
}
@@ -5140,58 +3570,34 @@ static const char *skd_skreq_state_to_str(enum skd_req_state state)
static void skd_log_skdev(struct skd_device *skdev, const char *event)
{
- pr_debug("%s:%s:%d (%s) skdev=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skdev, event);
- pr_debug("%s:%s:%d drive_state=%s(%d) driver_state=%s(%d)\n",
- skdev->name, __func__, __LINE__,
- skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
- skd_skdev_state_to_str(skdev->state), skdev->state);
- pr_debug("%s:%s:%d busy=%d limit=%d dev=%d lowat=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->in_flight, skdev->cur_max_queue_depth,
- skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
- pr_debug("%s:%s:%d timestamp=0x%x cycle=%d cycle_ix=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->timeout_stamp, skdev->skcomp_cycle, skdev->skcomp_ix);
-}
-
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event)
-{
- pr_debug("%s:%s:%d (%s) skmsg=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skmsg, event);
- pr_debug("%s:%s:%d state=%s(%d) id=0x%04x length=%d\n",
- skdev->name, __func__, __LINE__,
- skd_skmsg_state_to_str(skmsg->state), skmsg->state,
- skmsg->id, skmsg->length);
+ dev_dbg(&skdev->pdev->dev, "skdev=%p event='%s'\n", skdev, event);
+ dev_dbg(&skdev->pdev->dev, " drive_state=%s(%d) driver_state=%s(%d)\n",
+ skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
+ skd_skdev_state_to_str(skdev->state), skdev->state);
+ dev_dbg(&skdev->pdev->dev, " busy=%d limit=%d dev=%d lowat=%d\n",
+ skd_in_flight(skdev), skdev->cur_max_queue_depth,
+ skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
+ dev_dbg(&skdev->pdev->dev, " cycle=%d cycle_ix=%d\n",
+ skdev->skcomp_cycle, skdev->skcomp_ix);
}
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event)
{
- pr_debug("%s:%s:%d (%s) skreq=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skreq, event);
- pr_debug("%s:%s:%d state=%s(%d) id=0x%04x fitmsg=0x%04x\n",
- skdev->name, __func__, __LINE__,
- skd_skreq_state_to_str(skreq->state), skreq->state,
- skreq->id, skreq->fitmsg_id);
- pr_debug("%s:%s:%d timo=0x%x sg_dir=%d n_sg=%d\n",
- skdev->name, __func__, __LINE__,
- skreq->timeout_stamp, skreq->sg_data_dir, skreq->n_sg);
-
- if (skreq->req != NULL) {
- struct request *req = skreq->req;
- u32 lba = (u32)blk_rq_pos(req);
- u32 count = blk_rq_sectors(req);
-
- pr_debug("%s:%s:%d "
- "req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
- skdev->name, __func__, __LINE__,
- req, lba, lba, count, count,
- (int)rq_data_dir(req));
- } else
- pr_debug("%s:%s:%d req=NULL\n",
- skdev->name, __func__, __LINE__);
+ struct request *req = blk_mq_rq_from_pdu(skreq);
+ u32 lba = blk_rq_pos(req);
+ u32 count = blk_rq_sectors(req);
+
+ dev_dbg(&skdev->pdev->dev, "skreq=%p event='%s'\n", skreq, event);
+ dev_dbg(&skdev->pdev->dev, " state=%s(%d) id=0x%04x fitmsg=0x%04x\n",
+ skd_skreq_state_to_str(skreq->state), skreq->state, skreq->id,
+ skreq->fitmsg_id);
+ dev_dbg(&skdev->pdev->dev, " sg_dir=%d n_sg=%d\n",
+ skreq->data_dir, skreq->n_sg);
+
+ dev_dbg(&skdev->pdev->dev,
+ "req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", req, lba, lba,
+ count, count, (int)rq_data_dir(req));
}
/*
@@ -5202,7 +3608,14 @@ static void skd_log_skreq(struct skd_device *skdev,
static int __init skd_init(void)
{
- pr_info(PFX " v%s-b%s loaded\n", DRV_VERSION, DRV_BUILD_ID);
+ BUILD_BUG_ON(sizeof(struct fit_completion_entry_v1) != 8);
+ BUILD_BUG_ON(sizeof(struct fit_comp_error_info) != 32);
+ BUILD_BUG_ON(sizeof(struct skd_command_header) != 16);
+ BUILD_BUG_ON(sizeof(struct skd_scsi_request) != 32);
+ BUILD_BUG_ON(sizeof(struct driver_inquiry_data) != 44);
+ BUILD_BUG_ON(offsetof(struct skd_msg_buf, fmh) != 0);
+ BUILD_BUG_ON(offsetof(struct skd_msg_buf, scsi) != 64);
+ BUILD_BUG_ON(sizeof(struct skd_msg_buf) != SKD_N_FITMSG_BYTES);
switch (skd_isr_type) {
case SKD_IRQ_LEGACY:
@@ -5222,7 +3635,8 @@ static int __init skd_init(void)
skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT;
}
- if (skd_max_req_per_msg < 1 || skd_max_req_per_msg > 14) {
+ if (skd_max_req_per_msg < 1 ||
+ skd_max_req_per_msg > SKD_MAX_REQ_PER_MSG) {
pr_err(PFX "skd_max_req_per_msg %d invalid, re-set to %d\n",
skd_max_req_per_msg, SKD_MAX_REQ_PER_MSG_DEFAULT);
skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
@@ -5246,19 +3660,11 @@ static int __init skd_init(void)
skd_isr_comp_limit = 0;
}
- if (skd_max_pass_thru < 1 || skd_max_pass_thru > 50) {
- pr_err(PFX "skd_max_pass_thru %d invalid, re-set to %d\n",
- skd_max_pass_thru, SKD_N_SPECIAL_CONTEXT);
- skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
- }
-
return pci_register_driver(&skd_driver);
}
static void __exit skd_exit(void)
{
- pr_info(PFX " v%s-b%s unloading\n", DRV_VERSION, DRV_BUILD_ID);
-
pci_unregister_driver(&skd_driver);
if (skd_major)
diff --git a/drivers/block/skd_s1120.h b/drivers/block/skd_s1120.h
index 61c757ff0161..de35f47e953c 100644
--- a/drivers/block/skd_s1120.h
+++ b/drivers/block/skd_s1120.h
@@ -1,19 +1,15 @@
-/* Copyright 2012 STEC, Inc.
+/*
+ * Copyright 2012 STEC, Inc.
+ * Copyright (c) 2017 Western Digital Corporation or its affiliates.
*
- * This file is licensed under the terms of the 3-clause
- * BSD License (http://opensource.org/licenses/BSD-3-Clause)
- * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
- * at your option. Both licenses are also available in the LICENSE file
- * distributed with this project. This file may not be copied, modified,
- * or distributed except in accordance with those terms.
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
*/
#ifndef SKD_S1120_H
#define SKD_S1120_H
-#pragma pack(push, s1120_h, 1)
-
/*
* Q-channel, 64-bit r/w
*/
@@ -30,7 +26,7 @@
#define FIT_QCMD_MSGSIZE_128 (0x1 << 4)
#define FIT_QCMD_MSGSIZE_256 (0x2 << 4)
#define FIT_QCMD_MSGSIZE_512 (0x3 << 4)
-#define FIT_QCMD_BASE_ADDRESS_MASK (0xFFFFFFFFFFFFFFC0ull)
+#define FIT_QCMD_ALIGN L1_CACHE_BYTES
/*
* Control, 32-bit r/w
@@ -250,7 +246,7 @@ struct fit_msg_hdr {
* 20-23 of the FIT_MTD_FITFW_INIT response.
*/
struct fit_completion_entry_v1 {
- uint32_t num_returned_bytes;
+ __be32 num_returned_bytes;
uint16_t tag;
uint8_t status; /* SCSI status */
uint8_t cycle;
@@ -278,7 +274,7 @@ struct fit_comp_error_info {
uint16_t sks_low; /* 10: Sense Key Specific (LSW) */
uint16_t reserved3; /* 12: Part of additional sense bytes (unused) */
uint16_t uec; /* 14: Additional Sense Bytes */
- uint64_t per; /* 16: Additional Sense Bytes */
+ uint64_t per __packed; /* 16: Additional Sense Bytes */
uint8_t reserved4[2]; /* 1E: Additional Sense Bytes (unused) */
};
@@ -292,11 +288,11 @@ struct fit_comp_error_info {
* Version one has the last 32 bits sg_list_len_bytes;
*/
struct skd_command_header {
- uint64_t sg_list_dma_address;
+ __be64 sg_list_dma_address;
uint16_t tag;
uint8_t attribute;
uint8_t add_cdb_len; /* In 32 bit words */
- uint32_t sg_list_len_bytes;
+ __be32 sg_list_len_bytes;
};
struct skd_scsi_request {
@@ -309,22 +305,20 @@ struct driver_inquiry_data {
uint8_t peripheral_device_type:5;
uint8_t qualifier:3;
uint8_t page_code;
- uint16_t page_length;
- uint16_t pcie_bus_number;
+ __be16 page_length;
+ __be16 pcie_bus_number;
uint8_t pcie_device_number;
uint8_t pcie_function_number;
uint8_t pcie_link_speed;
uint8_t pcie_link_lanes;
- uint16_t pcie_vendor_id;
- uint16_t pcie_device_id;
- uint16_t pcie_subsystem_vendor_id;
- uint16_t pcie_subsystem_device_id;
+ __be16 pcie_vendor_id;
+ __be16 pcie_device_id;
+ __be16 pcie_subsystem_vendor_id;
+ __be16 pcie_subsystem_device_id;
uint8_t reserved1[2];
uint8_t reserved2[3];
uint8_t driver_version_length;
uint8_t driver_version[0x14];
};
-#pragma pack(pop, s1120_h)
-
#endif /* SKD_S1120_H */
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1498b899a593..34e17ee799be 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -265,7 +265,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
}
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
- if (req_op(req) == REQ_OP_SCSI_IN || req_op(req) == REQ_OP_SCSI_OUT)
+ if (blk_rq_is_scsi(req))
err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num);
else
err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
@@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
struct request_queue *q = vblk->disk->queue;
char cap_str_2[10], cap_str_10[10];
char *envp[] = { "RESIZE=1", NULL };
+ unsigned long long nblocks;
u64 capacity;
/* Host must always specify the capacity. */
@@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work)
capacity = (sector_t)-1;
}
- string_get_size(capacity, queue_logical_block_size(q),
+ nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
+
+ string_get_size(nblocks, queue_logical_block_size(q),
STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
- string_get_size(capacity, queue_logical_block_size(q),
+ string_get_size(nblocks, queue_logical_block_size(q),
STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
dev_notice(&vdev->dev,
- "new size: %llu %d-byte logical blocks (%s/%s)\n",
- (unsigned long long)capacity,
- queue_logical_block_size(q),
- cap_str_10, cap_str_2);
+ "new size: %llu %d-byte logical blocks (%s/%s)\n",
+ nblocks,
+ queue_logical_block_size(q),
+ cap_str_10,
+ cap_str_2);
set_capacity(vblk->disk, capacity);
revalidate_disk(vblk->disk);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index fe7cd58c43d0..987d665e82de 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -705,9 +705,9 @@ static unsigned int xen_blkbk_unmap_prepare(
GNTMAP_host_map, pages[i]->handle);
pages[i]->handle = BLKBACK_INVALID_HANDLE;
invcount++;
- }
+ }
- return invcount;
+ return invcount;
}
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
@@ -1251,6 +1251,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
break;
case BLKIF_OP_WRITE_BARRIER:
drain = true;
+ /* fall through */
case BLKIF_OP_FLUSH_DISKCACHE:
ring->st_f_req++;
operation = REQ_OP_WRITE;
@@ -1362,7 +1363,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
goto fail_put_bio;
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
+ bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_iter.bi_sector = preq.sector_number;
@@ -1381,7 +1382,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
goto fail_put_bio;
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
+ bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio_set_op_attrs(bio, operation, operation_flags);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 792da683e70d..21c1be1eb226 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -244,6 +244,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
struct pending_req *req, *n;
unsigned int j, r;
+ bool busy = false;
for (r = 0; r < blkif->nr_rings; r++) {
struct xen_blkif_ring *ring = &blkif->rings[r];
@@ -261,8 +262,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
* don't have any discard_io or other_io requests. So, checking
* for inflight IO is enough.
*/
- if (atomic_read(&ring->inflight) > 0)
- return -EBUSY;
+ if (atomic_read(&ring->inflight) > 0) {
+ busy = true;
+ continue;
+ }
if (ring->irq) {
unbind_from_irqhandler(ring->irq, ring);
@@ -300,6 +303,9 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
ring->active = false;
}
+ if (busy)
+ return -EBUSY;
+
blkif->nr_ring_pages = 0;
/*
* blkif->rings was allocated in connect_ring, so we should free it in
@@ -810,7 +816,8 @@ static void frontend_changed(struct xenbus_device *dev,
xenbus_switch_state(dev, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
- /* fall through if not online */
+ /* fall through */
+ /* if not online */
case XenbusStateUnknown:
/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
device_unregister(&dev->dev);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 98e34e4c62b8..891265acb10e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2075,9 +2075,9 @@ static int blkfront_resume(struct xenbus_device *dev)
/*
* Get the bios in the request so we can re-queue them.
*/
- if (req_op(shadow[i].request) == REQ_OP_FLUSH ||
- req_op(shadow[i].request) == REQ_OP_DISCARD ||
- req_op(shadow[i].request) == REQ_OP_SECURE_ERASE ||
+ if (req_op(shadow[j].request) == REQ_OP_FLUSH ||
+ req_op(shadow[j].request) == REQ_OP_DISCARD ||
+ req_op(shadow[j].request) == REQ_OP_SECURE_ERASE ||
shadow[j].request->cmd_flags & REQ_FUA) {
/*
* Flush operations don't contain bios, so
@@ -2456,7 +2456,7 @@ static void blkback_changed(struct xenbus_device *dev,
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
- /* Missed the backend's Closing state -- fallthrough */
+ /* fall through */
case XenbusStateClosing:
if (info)
blkfront_closing(info);
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index b8ecba6dcd3b..7cd4a8ec3c8f 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -13,3 +13,15 @@ config ZRAM
disks and maybe many more.
See zram.txt for more information.
+
+config ZRAM_WRITEBACK
+ bool "Write back incompressible page to backing device"
+ depends on ZRAM
+ default n
+ help
+ With incompressible page, there is no memory saving to keep it
+ in memory. Instead, write it out to backing device.
+ For this feature, admin should set up backing device via
+ /sys/block/zramX/backing_dev.
+
+ See zram.txt for more infomration.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 3b1b6340ba13..2981c27d3aae 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -175,20 +175,11 @@ static inline void update_used_max(struct zram *zram,
} while (old_max != cur_max);
}
-static inline void zram_fill_page(char *ptr, unsigned long len,
+static inline void zram_fill_page(void *ptr, unsigned long len,
unsigned long value)
{
- int i;
- unsigned long *page = (unsigned long *)ptr;
-
WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
-
- if (likely(value == 0)) {
- memset(ptr, 0, len);
- } else {
- for (i = 0; i < len / sizeof(*page); i++)
- page[i] = value;
- }
+ memset_l(ptr, value, len / sizeof(unsigned long));
}
static bool page_same_filled(void *ptr, unsigned long *element)
@@ -270,6 +261,349 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+#ifdef CONFIG_ZRAM_WRITEBACK
+static bool zram_wb_enabled(struct zram *zram)
+{
+ return zram->backing_dev;
+}
+
+static void reset_bdev(struct zram *zram)
+{
+ struct block_device *bdev;
+
+ if (!zram_wb_enabled(zram))
+ return;
+
+ bdev = zram->bdev;
+ if (zram->old_block_size)
+ set_blocksize(bdev, zram->old_block_size);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ /* hope filp_close flush all of IO */
+ filp_close(zram->backing_dev, NULL);
+ zram->backing_dev = NULL;
+ zram->old_block_size = 0;
+ zram->bdev = NULL;
+
+ kvfree(zram->bitmap);
+ zram->bitmap = NULL;
+}
+
+static ssize_t backing_dev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ struct file *file = zram->backing_dev;
+ char *p;
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ if (!zram_wb_enabled(zram)) {
+ memcpy(buf, "none\n", 5);
+ up_read(&zram->init_lock);
+ return 5;
+ }
+
+ p = file_path(file, buf, PAGE_SIZE - 1);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ goto out;
+ }
+
+ ret = strlen(p);
+ memmove(buf, p, ret);
+ buf[ret++] = '\n';
+out:
+ up_read(&zram->init_lock);
+ return ret;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ char *file_name;
+ struct file *backing_dev = NULL;
+ struct inode *inode;
+ struct address_space *mapping;
+ unsigned int bitmap_sz, old_block_size = 0;
+ unsigned long nr_pages, *bitmap = NULL;
+ struct block_device *bdev = NULL;
+ int err;
+ struct zram *zram = dev_to_zram(dev);
+
+ file_name = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!file_name)
+ return -ENOMEM;
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Can't setup backing device for initialized device\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ strlcpy(file_name, buf, len);
+
+ backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
+ if (IS_ERR(backing_dev)) {
+ err = PTR_ERR(backing_dev);
+ backing_dev = NULL;
+ goto out;
+ }
+
+ mapping = backing_dev->f_mapping;
+ inode = mapping->host;
+
+ /* Support only block device in this moment */
+ if (!S_ISBLK(inode->i_mode)) {
+ err = -ENOTBLK;
+ goto out;
+ }
+
+ bdev = bdgrab(I_BDEV(inode));
+ err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
+ if (err < 0)
+ goto out;
+
+ nr_pages = i_size_read(inode) >> PAGE_SHIFT;
+ bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
+ bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
+ if (!bitmap) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ old_block_size = block_size(bdev);
+ err = set_blocksize(bdev, PAGE_SIZE);
+ if (err)
+ goto out;
+
+ reset_bdev(zram);
+ spin_lock_init(&zram->bitmap_lock);
+
+ zram->old_block_size = old_block_size;
+ zram->bdev = bdev;
+ zram->backing_dev = backing_dev;
+ zram->bitmap = bitmap;
+ zram->nr_pages = nr_pages;
+ up_write(&zram->init_lock);
+
+ pr_info("setup backing device %s\n", file_name);
+ kfree(file_name);
+
+ return len;
+out:
+ if (bitmap)
+ kvfree(bitmap);
+
+ if (bdev)
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+
+ if (backing_dev)
+ filp_close(backing_dev, NULL);
+
+ up_write(&zram->init_lock);
+
+ kfree(file_name);
+
+ return err;
+}
+
+static unsigned long get_entry_bdev(struct zram *zram)
+{
+ unsigned long entry;
+
+ spin_lock(&zram->bitmap_lock);
+ /* skip 0 bit to confuse zram.handle = 0 */
+ entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
+ if (entry == zram->nr_pages) {
+ spin_unlock(&zram->bitmap_lock);
+ return 0;
+ }
+
+ set_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+
+ return entry;
+}
+
+static void put_entry_bdev(struct zram *zram, unsigned long entry)
+{
+ int was_set;
+
+ spin_lock(&zram->bitmap_lock);
+ was_set = test_and_clear_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+ WARN_ON_ONCE(!was_set);
+}
+
+void zram_page_end_io(struct bio *bio)
+{
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ page_endio(page, op_is_write(bio_op(bio)),
+ blk_status_to_errno(bio->bi_status));
+ bio_put(bio);
+}
+
+/*
+ * Returns 1 if the submission is successful.
+ */
+static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio_set_dev(bio, zram->bdev);
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
+ bio_put(bio);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_READ;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ return 1;
+}
+
+struct zram_work {
+ struct work_struct work;
+ struct zram *zram;
+ unsigned long entry;
+ struct bio *bio;
+};
+
+#if PAGE_SIZE != 4096
+static void zram_sync_read(struct work_struct *work)
+{
+ struct bio_vec bvec;
+ struct zram_work *zw = container_of(work, struct zram_work, work);
+ struct zram *zram = zw->zram;
+ unsigned long entry = zw->entry;
+ struct bio *bio = zw->bio;
+
+ read_from_bdev_async(zram, &bvec, entry, bio);
+}
+
+/*
+ * Block layer want one ->make_request_fn to be active at a time
+ * so if we use chained IO with parent IO in same context,
+ * it's a deadlock. To avoid, it, it uses worker thread context.
+ */
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ struct zram_work work;
+
+ work.zram = zram;
+ work.entry = entry;
+ work.bio = bio;
+
+ INIT_WORK_ONSTACK(&work.work, zram_sync_read);
+ queue_work(system_unbound_wq, &work.work);
+ flush_work(&work.work);
+ destroy_work_on_stack(&work.work);
+
+ return 1;
+}
+#else
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ WARN_ON(1);
+ return -EIO;
+}
+#endif
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ if (sync)
+ return read_from_bdev_sync(zram, bvec, entry, parent);
+ else
+ return read_from_bdev_async(zram, bvec, entry, parent);
+}
+
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+{
+ struct bio *bio;
+ unsigned long entry;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ entry = get_entry_bdev(zram);
+ if (!entry) {
+ bio_put(bio);
+ return -ENOSPC;
+ }
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio_set_dev(bio, zram->bdev);
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset)) {
+ bio_put(bio);
+ put_entry_bdev(zram, entry);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ *pentry = entry;
+
+ return 0;
+}
+
+static void zram_wb_clear(struct zram *zram, u32 index)
+{
+ unsigned long entry;
+
+ zram_clear_flag(zram, index, ZRAM_WB);
+ entry = zram_get_element(zram, index);
+ zram_set_element(zram, index, 0);
+ put_entry_bdev(zram, entry);
+}
+
+#else
+static bool zram_wb_enabled(struct zram *zram) { return false; }
+static inline void reset_bdev(struct zram *zram) {};
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+
+{
+ return -EIO;
+}
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ return -EIO;
+}
+static void zram_wb_clear(struct zram *zram, u32 index) {}
+#endif
+
+
/*
* We switched to per-cpu streams and this attr is not needed anymore.
* However, we will keep it around for some time, because:
@@ -453,30 +787,6 @@ static bool zram_same_page_read(struct zram *zram, u32 index,
return false;
}
-static bool zram_same_page_write(struct zram *zram, u32 index,
- struct page *page)
-{
- unsigned long element;
- void *mem = kmap_atomic(page);
-
- if (page_same_filled(mem, &element)) {
- kunmap_atomic(mem);
- /* Free memory associated with this sector now. */
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_set_flag(zram, index, ZRAM_SAME);
- zram_set_element(zram, index, element);
- zram_slot_unlock(zram, index);
-
- atomic64_inc(&zram->stats.same_pages);
- atomic64_inc(&zram->stats.pages_stored);
- return true;
- }
- kunmap_atomic(mem);
-
- return false;
-}
-
static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
@@ -515,7 +825,13 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- unsigned long handle = zram_get_handle(zram, index);
+ unsigned long handle;
+
+ if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_wb_clear(zram, index);
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
+ }
/*
* No memory is allocated for same element filled pages.
@@ -529,6 +845,7 @@ static void zram_free_page(struct zram *zram, size_t index)
return;
}
+ handle = zram_get_handle(zram, index);
if (!handle)
return;
@@ -542,13 +859,31 @@ static void zram_free_page(struct zram *zram, size_t index)
zram_set_obj_size(zram, index, 0);
}
-static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+ struct bio *bio, bool partial_io)
{
int ret;
unsigned long handle;
unsigned int size;
void *src, *dst;
+ if (zram_wb_enabled(zram)) {
+ zram_slot_lock(zram, index);
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ struct bio_vec bvec;
+
+ zram_slot_unlock(zram, index);
+
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+ return read_from_bdev(zram, &bvec,
+ zram_get_element(zram, index),
+ bio, partial_io);
+ }
+ zram_slot_unlock(zram, index);
+ }
+
if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE))
return 0;
@@ -581,7 +916,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
@@ -594,7 +929,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return -ENOMEM;
}
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
if (unlikely(ret))
goto out;
@@ -613,30 +948,57 @@ out:
return ret;
}
-static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
- struct page *page,
- unsigned long *out_handle, unsigned int *out_comp_len)
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *bio)
{
- int ret;
- unsigned int comp_len;
- void *src;
+ int ret = 0;
unsigned long alloced_pages;
unsigned long handle = 0;
+ unsigned int comp_len = 0;
+ void *src, *dst, *mem;
+ struct zcomp_strm *zstrm;
+ struct page *page = bvec->bv_page;
+ unsigned long element = 0;
+ enum zram_pageflags flags = 0;
+ bool allow_wb = true;
+
+ mem = kmap_atomic(page);
+ if (page_same_filled(mem, &element)) {
+ kunmap_atomic(mem);
+ /* Free memory associated with this sector now. */
+ flags = ZRAM_SAME;
+ atomic64_inc(&zram->stats.same_pages);
+ goto out;
+ }
+ kunmap_atomic(mem);
compress_again:
+ zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
- ret = zcomp_compress(*zstrm, src, &comp_len);
+ ret = zcomp_compress(zstrm, src, &comp_len);
kunmap_atomic(src);
if (unlikely(ret)) {
+ zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
- if (handle)
- zs_free(zram->mem_pool, handle);
+ zs_free(zram->mem_pool, handle);
return ret;
}
- if (unlikely(comp_len > max_zpage_size))
+ if (unlikely(comp_len > max_zpage_size)) {
+ if (zram_wb_enabled(zram) && allow_wb) {
+ zcomp_stream_put(zram->comp);
+ ret = write_to_bdev(zram, bvec, index, bio, &element);
+ if (!ret) {
+ flags = ZRAM_WB;
+ ret = 1;
+ goto out;
+ }
+ allow_wb = false;
+ goto compress_again;
+ }
comp_len = PAGE_SIZE;
+ }
/*
* handle allocation has 2 paths:
@@ -663,7 +1025,6 @@ compress_again:
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
- *zstrm = zcomp_stream_get(zram->comp);
if (handle)
goto compress_again;
return -ENOMEM;
@@ -673,34 +1034,11 @@ compress_again:
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
+ zcomp_stream_put(zram->comp);
zs_free(zram->mem_pool, handle);
return -ENOMEM;
}
- *out_handle = handle;
- *out_comp_len = comp_len;
- return 0;
-}
-
-static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
-{
- int ret;
- unsigned long handle;
- unsigned int comp_len;
- void *src, *dst;
- struct zcomp_strm *zstrm;
- struct page *page = bvec->bv_page;
-
- if (zram_same_page_write(zram, index, page))
- return 0;
-
- zstrm = zcomp_stream_get(zram->comp);
- ret = zram_compress(zram, &zstrm, page, &handle, &comp_len);
- if (ret) {
- zcomp_stream_put(zram->comp);
- return ret;
- }
-
dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
src = zstrm->buffer;
@@ -712,25 +1050,31 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
zcomp_stream_put(zram->comp);
zs_unmap_object(zram->mem_pool, handle);
-
+ atomic64_add(comp_len, &zram->stats.compr_data_size);
+out:
/*
* Free memory associated with this sector
* before overwriting unused sectors.
*/
zram_slot_lock(zram, index);
zram_free_page(zram, index);
- zram_set_handle(zram, index, handle);
- zram_set_obj_size(zram, index, comp_len);
+
+ if (flags) {
+ zram_set_flag(zram, index, flags);
+ zram_set_element(zram, index, element);
+ } else {
+ zram_set_handle(zram, index, handle);
+ zram_set_obj_size(zram, index, comp_len);
+ }
zram_slot_unlock(zram, index);
/* Update stats */
- atomic64_add(comp_len, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
- return 0;
+ return ret;
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page = NULL;
@@ -748,7 +1092,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (!page)
return -ENOMEM;
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, true);
if (ret)
goto out;
@@ -763,7 +1107,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
vec.bv_offset = 0;
}
- ret = __zram_bvec_write(zram, &vec, index);
+ ret = __zram_bvec_write(zram, &vec, index, bio);
out:
if (is_partial_io(bvec))
__free_page(page);
@@ -808,28 +1152,34 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
}
+/*
+ * Returns errno if it has some problem. Otherwise return 0 or 1.
+ * Returns 0 if IO request was done synchronously
+ * Returns 1 if IO request was successfully submitted.
+ */
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, bool is_write)
+ int offset, bool is_write, struct bio *bio)
{
unsigned long start_time = jiffies;
int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
+ struct request_queue *q = zram->disk->queue;
int ret;
- generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
+ generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
&zram->disk->part0);
if (!is_write) {
atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
+ ret = zram_bvec_read(zram, bvec, index, offset, bio);
flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
+ ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
- generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
+ generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
- if (unlikely(ret)) {
+ if (unlikely(ret < 0)) {
if (!is_write)
atomic64_inc(&zram->stats.failed_reads);
else
@@ -868,7 +1218,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
unwritten);
if (zram_bvec_rw(zram, &bv, index, offset,
- op_is_write(bio_op(bio))) < 0)
+ op_is_write(bio_op(bio)), bio) < 0)
goto out;
bv.bv_offset += bv.bv_len;
@@ -922,16 +1272,18 @@ static void zram_slot_free_notify(struct block_device *bdev,
static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
- int offset, err = -EIO;
+ int offset, ret;
u32 index;
struct zram *zram;
struct bio_vec bv;
+ if (PageTransHuge(page))
+ return -ENOTSUPP;
zram = bdev->bd_disk->private_data;
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
- err = -EINVAL;
+ ret = -EINVAL;
goto out;
}
@@ -942,7 +1294,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- err = zram_bvec_rw(zram, &bv, index, offset, is_write);
+ ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -952,9 +1304,20 @@ out:
* bio->bi_end_io does things to handle the error
* (e.g., SetPageError, set_page_dirty and extra works).
*/
- if (err == 0)
+ if (unlikely(ret < 0))
+ return ret;
+
+ switch (ret) {
+ case 0:
page_endio(page, is_write, 0);
- return err;
+ break;
+ case 1:
+ ret = 0;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
}
static void zram_reset_device(struct zram *zram)
@@ -983,6 +1346,7 @@ static void zram_reset_device(struct zram *zram)
zram_meta_free(zram, disksize);
memset(&zram->stats, 0, sizeof(zram->stats));
zcomp_destroy(comp);
+ reset_bdev(zram);
}
static ssize_t disksize_store(struct device *dev,
@@ -1108,6 +1472,9 @@ static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RW(backing_dev);
+#endif
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1118,6 +1485,9 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_mem_used_max.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+ &dev_attr_backing_dev.attr,
+#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
&dev_attr_debug_stat.attr,
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index e34e44d02e3e..31762db861e3 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -60,9 +60,10 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
- /* Page consists entirely of zeros */
+ /* Page consists the same element */
ZRAM_SAME = ZRAM_FLAG_SHIFT,
ZRAM_ACCESS, /* page is now accessed */
+ ZRAM_WB, /* page is stored on backing_device */
__NR_ZRAM_PAGEFLAGS,
};
@@ -115,5 +116,13 @@ struct zram {
* zram is claimed so open request will be failed
*/
bool claim; /* Protected by bdev->bd_mutex */
+#ifdef CONFIG_ZRAM_WRITEBACK
+ struct file *backing_dev;
+ struct block_device *bdev;
+ unsigned int old_block_size;
+ unsigned long *bitmap;
+ unsigned long nr_pages;
+ spinlock_t bitmap_lock;
+#endif
};
#endif