diff options
Diffstat (limited to 'drivers/nvdimm/pmem.c')
-rw-r--r-- | drivers/nvdimm/pmem.c | 496 |
1 files changed, 327 insertions, 169 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4eae441f86c9..96e6e9a5f235 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -7,8 +7,8 @@ * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. */ -#include <asm/cacheflush.h> #include <linux/blkdev.h> +#include <linux/pagemap.h> #include <linux/hdreg.h> #include <linux/init.h> #include <linux/platform_device.h> @@ -24,8 +24,10 @@ #include <linux/uio.h> #include <linux/dax.h> #include <linux/nd.h> -#include <linux/backing-dev.h> +#include <linux/mm.h> +#include <asm/cacheflush.h> #include "pmem.h" +#include "btt.h" #include "pfn.h" #include "nd.h" @@ -43,9 +45,25 @@ static struct nd_region *to_region(struct pmem_device *pmem) return to_nd_region(to_dev(pmem)->parent); } -static void hwpoison_clear(struct pmem_device *pmem, - phys_addr_t phys, unsigned int len) +static phys_addr_t pmem_to_phys(struct pmem_device *pmem, phys_addr_t offset) +{ + return pmem->phys_addr + offset; +} + +static sector_t to_sect(struct pmem_device *pmem, phys_addr_t offset) +{ + return (offset - pmem->data_offset) >> SECTOR_SHIFT; +} + +static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector) +{ + return (sector << SECTOR_SHIFT) + pmem->data_offset; +} + +static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset, + unsigned int len) { + phys_addr_t phys = pmem_to_phys(pmem, offset); unsigned long pfn_start, pfn_end, pfn; /* only pmem in the linear map supports HWPoison */ @@ -67,33 +85,40 @@ static void hwpoison_clear(struct pmem_device *pmem, } } -static blk_status_t pmem_clear_poison(struct pmem_device *pmem, - phys_addr_t offset, unsigned int len) +static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks) { - struct device *dev = to_dev(pmem); - sector_t sector; - long cleared; - blk_status_t rc = BLK_STS_OK; + if (blks == 0) + return; + badblocks_clear(&pmem->bb, sector, blks); + if (pmem->bb_state) + sysfs_notify_dirent(pmem->bb_state); +} - sector = (offset - pmem->data_offset) / 512; +static long __pmem_clear_poison(struct pmem_device *pmem, + phys_addr_t offset, unsigned int len) +{ + phys_addr_t phys = pmem_to_phys(pmem, offset); + long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len); - cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); - if (cleared < len) - rc = BLK_STS_IOERR; - if (cleared > 0 && cleared / 512) { - hwpoison_clear(pmem, pmem->phys_addr + offset, cleared); - cleared /= 512; - dev_dbg(dev, "%#llx clear %ld sector%s\n", - (unsigned long long) sector, cleared, - cleared > 1 ? "s" : ""); - badblocks_clear(&pmem->bb, sector, cleared); - if (pmem->bb_state) - sysfs_notify_dirent(pmem->bb_state); + if (cleared > 0) { + pmem_mkpage_present(pmem, offset, cleared); + arch_invalidate_pmem(pmem->virt_addr + offset, len); } + return cleared; +} - arch_invalidate_pmem(pmem->virt_addr + offset, len); +static blk_status_t pmem_clear_poison(struct pmem_device *pmem, + phys_addr_t offset, unsigned int len) +{ + long cleared = __pmem_clear_poison(pmem, offset, len); - return rc; + if (cleared < 0) + return BLK_STS_IOERR; + + pmem_clear_bb(pmem, to_sect(pmem, offset), cleared >> SECTOR_SHIFT); + if (cleared < len) + return BLK_STS_IOERR; + return BLK_STS_OK; } static void write_pmem(void *pmem_addr, struct page *page, @@ -124,7 +149,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, while (len) { mem = kmap_atomic(page); chunk = min_t(unsigned int, len, PAGE_SIZE - off); - rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); + rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); kunmap_atomic(mem); if (rem) return BLK_STS_IOERR; @@ -136,52 +161,43 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, return BLK_STS_OK; } -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, unsigned int op, - sector_t sector) +static blk_status_t pmem_do_read(struct pmem_device *pmem, + struct page *page, unsigned int page_off, + sector_t sector, unsigned int len) { - blk_status_t rc = BLK_STS_OK; - bool bad_pmem = false; - phys_addr_t pmem_off = sector * 512 + pmem->data_offset; + blk_status_t rc; + phys_addr_t pmem_off = to_offset(pmem, sector); void *pmem_addr = pmem->virt_addr + pmem_off; if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) - bad_pmem = true; - - if (!op_is_write(op)) { - if (unlikely(bad_pmem)) - rc = BLK_STS_IOERR; - else { - rc = read_pmem(page, off, pmem_addr, len); - flush_dcache_page(page); - } - } else { - /* - * Note that we write the data both before and after - * clearing poison. The write before clear poison - * handles situations where the latest written data is - * preserved and the clear poison operation simply marks - * the address range as valid without changing the data. - * In this case application software can assume that an - * interrupted write will either return the new good - * data or an error. - * - * However, if pmem_clear_poison() leaves the data in an - * indeterminate state we need to perform the write - * after clear poison. - */ - flush_dcache_page(page); - write_pmem(pmem_addr, page, off, len); - if (unlikely(bad_pmem)) { - rc = pmem_clear_poison(pmem, pmem_off, len); - write_pmem(pmem_addr, page, off, len); - } - } + return BLK_STS_IOERR; + rc = read_pmem(page, page_off, pmem_addr, len); + flush_dcache_page(page); return rc; } -static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) +static blk_status_t pmem_do_write(struct pmem_device *pmem, + struct page *page, unsigned int page_off, + sector_t sector, unsigned int len) +{ + phys_addr_t pmem_off = to_offset(pmem, sector); + void *pmem_addr = pmem->virt_addr + pmem_off; + + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) { + blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len); + + if (rc != BLK_STS_OK) + return rc; + } + + flush_dcache_page(page); + write_pmem(pmem_addr, page, page_off, len); + + return BLK_STS_OK; +} + +static void pmem_submit_bio(struct bio *bio) { int ret = 0; blk_status_t rc = 0; @@ -189,23 +205,29 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) unsigned long start; struct bio_vec bvec; struct bvec_iter iter; - struct pmem_device *pmem = q->queuedata; + struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data; struct nd_region *nd_region = to_region(pmem); if (bio->bi_opf & REQ_PREFLUSH) ret = nvdimm_flush(nd_region, bio); - do_acct = nd_iostat_start(bio, &start); + do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue); + if (do_acct) + start = bio_start_io_acct(bio); bio_for_each_segment(bvec, bio, iter) { - rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, bio_op(bio), iter.bi_sector); + if (op_is_write(bio_op(bio))) + rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, + iter.bi_sector, bvec.bv_len); + else + rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, + iter.bi_sector, bvec.bv_len); if (rc) { bio->bi_status = rc; break; } } if (do_acct) - nd_iostat_end(bio, start); + bio_end_io_acct(bio, start); if (bio->bi_opf & REQ_FUA) ret = nvdimm_flush(nd_region, bio); @@ -214,18 +236,18 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) bio->bi_status = errno_to_blk_status(ret); bio_endio(bio); - return BLK_QC_T_NONE; } static int pmem_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, unsigned int op) + struct page *page, enum req_op op) { - struct pmem_device *pmem = bdev->bd_queue->queuedata; + struct pmem_device *pmem = bdev->bd_disk->private_data; blk_status_t rc; - rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, - 0, op, sector); - + if (op_is_write(op)) + rc = pmem_do_write(pmem, page, 0, sector, thp_size(page)); + else + rc = pmem_do_read(pmem, page, 0, sector, thp_size(page)); /* * The ->rw_page interface is subtle and tricky. The core * retries on any error, so we can only invoke page_endio() in @@ -240,106 +262,210 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, enum dax_access_mode mode, void **kaddr, + pfn_t *pfn) { resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; - - if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, - PFN_PHYS(nr_pages)))) - return -EIO; + sector_t sector = PFN_PHYS(pgoff) >> SECTOR_SHIFT; + unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT; + struct badblocks *bb = &pmem->bb; + sector_t first_bad; + int num_bad; if (kaddr) *kaddr = pmem->virt_addr + offset; if (pfn) *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); + if (bb->count && + badblocks_check(bb, sector, num, &first_bad, &num_bad)) { + long actual_nr; + + if (mode != DAX_RECOVERY_WRITE) + return -EIO; + + /* + * Set the recovery stride is set to kernel page size because + * the underlying driver and firmware clear poison functions + * don't appear to handle large chunk(such as 2MiB) reliably. + */ + actual_nr = PHYS_PFN( + PAGE_ALIGN((first_bad - sector) << SECTOR_SHIFT)); + dev_dbg(pmem->bb.dev, "start sector(%llu), nr_pages(%ld), first_bad(%llu), actual_nr(%ld)\n", + sector, nr_pages, first_bad, actual_nr); + if (actual_nr) + return actual_nr; + return 1; + } + /* - * If badblocks are present, limit known good range to the - * requested range. + * If badblocks are present but not in the range, limit known good range + * to the requested range. */ - if (unlikely(pmem->bb.count)) + if (bb->count) return nr_pages; return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); } static const struct block_device_operations pmem_fops = { .owner = THIS_MODULE, + .submit_bio = pmem_submit_bio, .rw_page = pmem_rw_page, - .revalidate_disk = nvdimm_revalidate_disk, }; +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, + size_t nr_pages) +{ + struct pmem_device *pmem = dax_get_private(dax_dev); + + return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0, + PFN_PHYS(pgoff) >> SECTOR_SHIFT, + PAGE_SIZE)); +} + static long pmem_dax_direct_access(struct dax_device *dax_dev, - pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) + pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, + void **kaddr, pfn_t *pfn) { struct pmem_device *pmem = dax_get_private(dax_dev); - return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); + return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn); } /* - * Use the 'no check' versions of copy_from_iter_flushcache() and - * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds - * checking, both file offset and device offset, is handled by - * dax_iomap_actor() + * The recovery write thread started out as a normal pwrite thread and + * when the filesystem was told about potential media error in the + * range, filesystem turns the normal pwrite to a dax_recovery_write. + * + * The recovery write consists of clearing media poison, clearing page + * HWPoison bit, reenable page-wide read-write permission, flush the + * caches and finally write. A competing pread thread will be held + * off during the recovery process since data read back might not be + * valid, and this is achieved by clearing the badblock records after + * the recovery write is complete. Competing recovery write threads + * are already serialized by writer lock held by dax_iomap_rw(). */ -static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, +static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - return _copy_from_iter_flushcache(addr, bytes, i); -} + struct pmem_device *pmem = dax_get_private(dax_dev); + size_t olen, len, off; + phys_addr_t pmem_off; + struct device *dev = pmem->bb.dev; + long cleared; -static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - return _copy_to_iter_mcsafe(addr, bytes, i); + off = offset_in_page(addr); + len = PFN_PHYS(PFN_UP(off + bytes)); + if (!is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) >> SECTOR_SHIFT, len)) + return _copy_from_iter_flushcache(addr, bytes, i); + + /* + * Not page-aligned range cannot be recovered. This should not + * happen unless something else went wrong. + */ + if (off || !PAGE_ALIGNED(bytes)) { + dev_dbg(dev, "Found poison, but addr(%p) or bytes(%#zx) not page aligned\n", + addr, bytes); + return 0; + } + + pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; + cleared = __pmem_clear_poison(pmem, pmem_off, len); + if (cleared > 0 && cleared < len) { + dev_dbg(dev, "poison cleared only %ld out of %zu bytes\n", + cleared, len); + return 0; + } + if (cleared < 0) { + dev_dbg(dev, "poison clear failed: %ld\n", cleared); + return 0; + } + + olen = _copy_from_iter_flushcache(addr, bytes, i); + pmem_clear_bb(pmem, to_sect(pmem, pmem_off), cleared >> SECTOR_SHIFT); + + return olen; } static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, - .dax_supported = generic_fsdax_supported, - .copy_from_iter = pmem_copy_from_iter, - .copy_to_iter = pmem_copy_to_iter, -}; - -static const struct attribute_group *pmem_attribute_groups[] = { - &dax_attribute_group, - NULL, + .zero_page_range = pmem_dax_zero_page_range, + .recovery_write = pmem_recovery_write, }; -static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap) +static ssize_t write_cache_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct request_queue *q = - container_of(pgmap->ref, struct request_queue, q_usage_counter); + struct pmem_device *pmem = dev_to_disk(dev)->private_data; - blk_cleanup_queue(q); + return sprintf(buf, "%d\n", !!dax_write_cache_enabled(pmem->dax_dev)); } -static void pmem_release_queue(void *pgmap) +static ssize_t write_cache_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { - pmem_pagemap_cleanup(pgmap); + struct pmem_device *pmem = dev_to_disk(dev)->private_data; + bool write_cache; + int rc; + + rc = strtobool(buf, &write_cache); + if (rc) + return rc; + dax_write_cache(pmem->dax_dev, write_cache); + return len; } +static DEVICE_ATTR_RW(write_cache); -static void pmem_pagemap_kill(struct dev_pagemap *pgmap) +static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) { - struct request_queue *q = - container_of(pgmap->ref, struct request_queue, q_usage_counter); - - blk_freeze_queue_start(q); +#ifndef CONFIG_ARCH_HAS_PMEM_API + if (a == &dev_attr_write_cache.attr) + return 0; +#endif + return a->mode; } +static struct attribute *dax_attributes[] = { + &dev_attr_write_cache.attr, + NULL, +}; + +static const struct attribute_group dax_attribute_group = { + .name = "dax", + .attrs = dax_attributes, + .is_visible = dax_visible, +}; + +static const struct attribute_group *pmem_attribute_groups[] = { + &dax_attribute_group, + NULL, +}; + static void pmem_release_disk(void *__pmem) { struct pmem_device *pmem = __pmem; + dax_remove_host(pmem->disk); kill_dax(pmem->dax_dev); put_dax(pmem->dax_dev); del_gendisk(pmem->disk); + put_disk(pmem->disk); } +static int pmem_pagemap_memory_failure(struct dev_pagemap *pgmap, + unsigned long pfn, unsigned long nr_pages, int mf_flags) +{ + struct pmem_device *pmem = + container_of(pgmap, struct pmem_device, pgmap); + u64 offset = PFN_PHYS(pfn) - pmem->phys_addr - pmem->data_offset; + u64 len = nr_pages << PAGE_SHIFT; + + return dax_holder_notify_failure(pmem->dax_dev, offset, len, mf_flags); +} + static const struct dev_pagemap_ops fsdax_pagemap_ops = { - .kill = pmem_pagemap_kill, - .cleanup = pmem_pagemap_cleanup, + .memory_failure = pmem_pagemap_memory_failure, }; static int pmem_attach_disk(struct device *dev, @@ -349,17 +475,15 @@ static int pmem_attach_disk(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); int nid = dev_to_node(dev), fua; struct resource *res = &nsio->res; - struct resource bb_res; + struct range bb_range; struct nd_pfn *nd_pfn = NULL; struct dax_device *dax_dev; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; struct request_queue *q; - struct device *gendev; struct gendisk *disk; void *addr; int rc; - unsigned long flags = 0UL; pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) @@ -395,12 +519,14 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); - if (!q) + disk = blk_alloc_disk(nid); + if (!disk) return -ENOMEM; + q = disk->queue; + pmem->disk = disk; + pmem->pgmap.owner = pmem; pmem->pfn_flags = PFN_DEV; - pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; pmem->pgmap.ops = &fsdax_pagemap_ops; @@ -408,81 +534,87 @@ static int pmem_attach_disk(struct device *dev, pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); pmem->pfn_pad = resource_size(res) - - resource_size(&pmem->pgmap.res); + range_len(&pmem->pgmap.range); pmem->pfn_flags |= PFN_MAP; - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); - bb_res.start += pmem->data_offset; + bb_range = pmem->pgmap.range; + bb_range.start += pmem->data_offset; } else if (pmem_should_map_pages(dev)) { - memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); + pmem->pgmap.range.start = res->start; + pmem->pgmap.range.end = res->end; + pmem->pgmap.nr_range = 1; pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); + bb_range = pmem->pgmap.range; } else { - if (devm_add_action_or_reset(dev, pmem_release_queue, - &pmem->pgmap)) - return -ENOMEM; addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); - memcpy(&bb_res, &nsio->res, sizeof(bb_res)); + bb_range.start = res->start; + bb_range.end = res->end; } - if (IS_ERR(addr)) - return PTR_ERR(addr); + if (IS_ERR(addr)) { + rc = PTR_ERR(addr); + goto out; + } pmem->virt_addr = addr; blk_queue_write_cache(q, true, fua); - blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_logical_block_size(q, pmem_sector_size(ndns)); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_flag_set(QUEUE_FLAG_NONROT, q); if (pmem->pfn_flags & PFN_MAP) blk_queue_flag_set(QUEUE_FLAG_DAX, q); - q->queuedata = pmem; - - disk = alloc_disk_node(0, nid); - if (!disk) - return -ENOMEM; - pmem->disk = disk; disk->fops = &pmem_fops; - disk->queue = q; - disk->flags = GENHD_FL_EXT_DEVT; - disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; + disk->private_data = pmem; nvdimm_namespace_disk_name(ndns, disk->disk_name); set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) / 512); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); + nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); disk->bb = &pmem->bb; - if (is_nvdimm_sync(nd_region)) - flags = DAXDEV_F_SYNC; - dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); - if (!dax_dev) { - put_disk(disk); - return -ENOMEM; + dax_dev = alloc_dax(pmem, &pmem_dax_ops); + if (IS_ERR(dax_dev)) { + rc = PTR_ERR(dax_dev); + goto out; } + set_dax_nocache(dax_dev); + set_dax_nomc(dax_dev); + if (is_nvdimm_sync(nd_region)) + set_dax_synchronous(dax_dev); + rc = dax_add_host(dax_dev, disk); + if (rc) + goto out_cleanup_dax; dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); pmem->dax_dev = dax_dev; - gendev = disk_to_dev(disk); - gendev->groups = pmem_attribute_groups; - device_add_disk(dev, disk, NULL); + rc = device_add_disk(dev, disk, pmem_attribute_groups); + if (rc) + goto out_remove_host; if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; - revalidate_disk(disk); + nvdimm_check_and_set_ro(disk); pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, "badblocks"); if (!pmem->bb_state) dev_warn(dev, "'badblocks' notification disabled\n"); - return 0; + +out_remove_host: + dax_remove_host(pmem->disk); +out_cleanup_dax: + kill_dax(pmem->dax_dev); + put_dax(pmem->dax_dev); +out: + put_disk(pmem->disk); + return rc; } static int nd_pmem_probe(struct device *dev) @@ -537,7 +669,7 @@ static int nd_pmem_probe(struct device *dev) return pmem_attach_disk(dev, ndns); } -static int nd_pmem_remove(struct device *dev) +static void nd_pmem_remove(struct device *dev) { struct pmem_device *pmem = dev_get_drvdata(dev); @@ -545,15 +677,13 @@ static int nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)); else { /* - * Note, this assumes nd_device_lock() context to not + * Note, this assumes device_lock() context to not * race nd_pmem_notify() */ sysfs_put(pmem->bb_state); pmem->bb_state = NULL; } nvdimm_flush(to_nd_region(dev->parent), NULL); - - return 0; } static void nd_pmem_shutdown(struct device *dev) @@ -561,19 +691,16 @@ static void nd_pmem_shutdown(struct device *dev) nvdimm_flush(to_nd_region(dev->parent), NULL); } -static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +static void pmem_revalidate_poison(struct device *dev) { struct nd_region *nd_region; resource_size_t offset = 0, end_trunc = 0; struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; - struct resource res; struct badblocks *bb; + struct range range; struct kernfs_node *bb_state; - if (event != NVDIMM_REVALIDATE_POISON) - return; - if (is_nd_btt(dev)) { struct nd_btt *nd_btt = to_nd_btt(dev); @@ -604,13 +731,44 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) nsio = to_nd_namespace_io(&ndns->dev); } - res.start = nsio->res.start + offset; - res.end = nsio->res.end - end_trunc; - nvdimm_badblocks_populate(nd_region, bb, &res); + range.start = nsio->res.start + offset; + range.end = nsio->res.end - end_trunc; + nvdimm_badblocks_populate(nd_region, bb, &range); if (bb_state) sysfs_notify_dirent(bb_state); } +static void pmem_revalidate_region(struct device *dev) +{ + struct pmem_device *pmem; + + if (is_nd_btt(dev)) { + struct nd_btt *nd_btt = to_nd_btt(dev); + struct btt *btt = nd_btt->btt; + + nvdimm_check_and_set_ro(btt->btt_disk); + return; + } + + pmem = dev_get_drvdata(dev); + nvdimm_check_and_set_ro(pmem->disk); +} + +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + switch (event) { + case NVDIMM_REVALIDATE_POISON: + pmem_revalidate_poison(dev); + break; + case NVDIMM_REVALIDATE_REGION: + pmem_revalidate_region(dev); + break; + default: + dev_WARN_ONCE(dev, 1, "notify: unknown event: %d\n", event); + break; + } +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); |