aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/nfit/core.c4
-rw-r--r--drivers/block/rbd.c2188
-rw-r--r--drivers/block/rbd_types.h10
-rw-r--r--drivers/dax/bus.c21
-rw-r--r--drivers/dax/super.c19
-rw-r--r--drivers/md/dm-kcopyd.c34
-rw-r--r--drivers/md/dm-snap.c10
-rw-r--r--drivers/md/dm-table.c24
-rw-r--r--drivers/md/dm-zoned-metadata.c24
-rw-r--r--drivers/md/dm-zoned.h28
-rw-r--r--drivers/md/dm.c5
-rw-r--r--drivers/md/dm.h5
-rw-r--r--drivers/nvdimm/Makefile1
-rw-r--r--drivers/nvdimm/claim.c6
-rw-r--r--drivers/nvdimm/namespace_devs.c8
-rw-r--r--drivers/nvdimm/nd.h1
-rw-r--r--drivers/nvdimm/nd_virtio.c125
-rw-r--r--drivers/nvdimm/pmem.c18
-rw-r--r--drivers/nvdimm/region_devs.c33
-rw-r--r--drivers/nvdimm/virtio_pmem.c122
-rw-r--r--drivers/nvdimm/virtio_pmem.h55
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/virtio/Kconfig11
-rw-r--r--drivers/watchdog/Kconfig10
-rw-r--r--drivers/watchdog/acquirewdt.c6
-rw-r--r--drivers/watchdog/advantechwdt.c6
-rw-r--r--drivers/watchdog/aspeed_wdt.c8
-rw-r--r--drivers/watchdog/bcm2835_wdt.c5
-rw-r--r--drivers/watchdog/bcm7038_wdt.c4
-rw-r--r--drivers/watchdog/bcm_kona_wdt.c4
-rw-r--r--drivers/watchdog/cadence_wdt.c4
-rw-r--r--drivers/watchdog/da9052_wdt.c9
-rw-r--r--drivers/watchdog/da9062_wdt.c5
-rw-r--r--drivers/watchdog/davinci_wdt.c14
-rw-r--r--drivers/watchdog/digicolor_wdt.c9
-rw-r--r--drivers/watchdog/ebc-c384_wdt.c9
-rw-r--r--drivers/watchdog/eurotechwdt.c6
-rw-r--r--drivers/watchdog/ftwdt010_wdt.c4
-rw-r--r--drivers/watchdog/gpio_wdt.c7
-rw-r--r--drivers/watchdog/hpwdt.c59
-rw-r--r--drivers/watchdog/i6300esb.c5
-rw-r--r--drivers/watchdog/iTCO_vendor_support.c7
-rw-r--r--drivers/watchdog/iTCO_wdt.c6
-rw-r--r--drivers/watchdog/ib700wdt.c6
-rw-r--r--drivers/watchdog/ie6xx_wdt.c8
-rw-r--r--drivers/watchdog/imx2_wdt.c4
-rw-r--r--drivers/watchdog/imx_sc_wdt.c123
-rw-r--r--drivers/watchdog/intel-mid_wdt.c4
-rw-r--r--drivers/watchdog/jz4740_wdt.c57
-rw-r--r--drivers/watchdog/loongson1_wdt.c4
-rw-r--r--drivers/watchdog/max77620_wdt.c8
-rw-r--r--drivers/watchdog/mei_wdt.c4
-rw-r--r--drivers/watchdog/mena21_wdt.c4
-rw-r--r--drivers/watchdog/menf21bmc_wdt.c4
-rw-r--r--drivers/watchdog/mpc8xxx_wdt.c5
-rw-r--r--drivers/watchdog/mv64x60_wdt.c6
-rw-r--r--drivers/watchdog/ni903x_wdt.c4
-rw-r--r--drivers/watchdog/nic7018_wdt.c1
-rw-r--r--drivers/watchdog/npcm_wdt.c4
-rw-r--r--drivers/watchdog/nv_tco.h6
-rw-r--r--drivers/watchdog/octeon-wdt-main.c11
-rw-r--r--drivers/watchdog/of_xilinx_wdt.c4
-rw-r--r--drivers/watchdog/omap_wdt.c6
-rw-r--r--drivers/watchdog/omap_wdt.h21
-rw-r--r--drivers/watchdog/pc87413_wdt.c6
-rw-r--r--drivers/watchdog/pcwd_pci.c6
-rw-r--r--drivers/watchdog/pcwd_usb.c6
-rw-r--r--drivers/watchdog/pic32-dmt.c4
-rw-r--r--drivers/watchdog/pic32-wdt.c4
-rw-r--r--drivers/watchdog/pnx4008_wdt.c9
-rw-r--r--drivers/watchdog/qcom-wdt.c4
-rw-r--r--drivers/watchdog/rave-sp-wdt.c1
-rw-r--r--drivers/watchdog/renesas_wdt.c35
-rw-r--r--drivers/watchdog/retu_wdt.c10
-rw-r--r--drivers/watchdog/s3c2410_wdt.c4
-rw-r--r--drivers/watchdog/sa1100_wdt.c6
-rw-r--r--drivers/watchdog/sama5d4_wdt.c29
-rw-r--r--drivers/watchdog/sbc7240_wdt.c11
-rw-r--r--drivers/watchdog/sbc8360.c6
-rw-r--r--drivers/watchdog/sch311x_wdt.c6
-rw-r--r--drivers/watchdog/softdog.c6
-rw-r--r--drivers/watchdog/sp5100_tco.c4
-rw-r--r--drivers/watchdog/sp805_wdt.c5
-rw-r--r--drivers/watchdog/sprd_wdt.c1
-rw-r--r--drivers/watchdog/st_lpc_wdt.c4
-rw-r--r--drivers/watchdog/stm32_iwdg.c4
-rw-r--r--drivers/watchdog/stmp3xxx_rtc_wdt.c4
-rw-r--r--drivers/watchdog/tegra_wdt.c4
-rw-r--r--drivers/watchdog/ts4800_wdt.c4
-rw-r--r--drivers/watchdog/w83627hf_wdt.c6
-rw-r--r--drivers/watchdog/wafer5823wdt.c6
-rw-r--r--drivers/watchdog/watchdog_core.c22
-rw-r--r--drivers/watchdog/watchdog_core.h6
-rw-r--r--drivers/watchdog/watchdog_dev.c54
-rw-r--r--drivers/watchdog/wd501p.h6
-rw-r--r--drivers/watchdog/wdt.c6
-rw-r--r--drivers/watchdog/wdt_pci.c6
-rw-r--r--drivers/watchdog/wm831x_wdt.c9
-rw-r--r--drivers/watchdog/xen_wdt.c4
-rw-r--r--drivers/xen/swiotlb-xen.c2
100 files changed, 2465 insertions, 1080 deletions
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 23022cf20d26..c02fa27dd3f3 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2426,7 +2426,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
offset = to_interleave_offset(offset, mmio);
writeq(cmd, mmio->addr.base + offset);
- nvdimm_flush(nfit_blk->nd_region);
+ nvdimm_flush(nfit_blk->nd_region, NULL);
if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
readq(mmio->addr.base + offset);
@@ -2475,7 +2475,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
}
if (rw)
- nvdimm_flush(nfit_blk->nd_region);
+ nvdimm_flush(nfit_blk->nd_region, NULL);
rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
return rc;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index e5009a34f9c2..3327192bb71f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -115,6 +115,8 @@ static int atomic_dec_return_safe(atomic_t *v)
#define RBD_FEATURE_LAYERING (1ULL<<0)
#define RBD_FEATURE_STRIPINGV2 (1ULL<<1)
#define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2)
+#define RBD_FEATURE_OBJECT_MAP (1ULL<<3)
+#define RBD_FEATURE_FAST_DIFF (1ULL<<4)
#define RBD_FEATURE_DEEP_FLATTEN (1ULL<<5)
#define RBD_FEATURE_DATA_POOL (1ULL<<7)
#define RBD_FEATURE_OPERATIONS (1ULL<<8)
@@ -122,6 +124,8 @@ static int atomic_dec_return_safe(atomic_t *v)
#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \
RBD_FEATURE_STRIPINGV2 | \
RBD_FEATURE_EXCLUSIVE_LOCK | \
+ RBD_FEATURE_OBJECT_MAP | \
+ RBD_FEATURE_FAST_DIFF | \
RBD_FEATURE_DEEP_FLATTEN | \
RBD_FEATURE_DATA_POOL | \
RBD_FEATURE_OPERATIONS)
@@ -203,6 +207,11 @@ struct rbd_client {
struct list_head node;
};
+struct pending_result {
+ int result; /* first nonzero result */
+ int num_pending;
+};
+
struct rbd_img_request;
enum obj_request_type {
@@ -219,6 +228,18 @@ enum obj_operation_type {
OBJ_OP_ZEROOUT,
};
+#define RBD_OBJ_FLAG_DELETION (1U << 0)
+#define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1)
+#define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2)
+#define RBD_OBJ_FLAG_MAY_EXIST (1U << 3)
+#define RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT (1U << 4)
+
+enum rbd_obj_read_state {
+ RBD_OBJ_READ_START = 1,
+ RBD_OBJ_READ_OBJECT,
+ RBD_OBJ_READ_PARENT,
+};
+
/*
* Writes go through the following state machine to deal with
* layering:
@@ -245,17 +266,28 @@ enum obj_operation_type {
* even if there is a parent).
*/
enum rbd_obj_write_state {
- RBD_OBJ_WRITE_FLAT = 1,
- RBD_OBJ_WRITE_GUARD,
- RBD_OBJ_WRITE_READ_FROM_PARENT,
- RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC,
- RBD_OBJ_WRITE_COPYUP_OPS,
+ RBD_OBJ_WRITE_START = 1,
+ RBD_OBJ_WRITE_PRE_OBJECT_MAP,
+ RBD_OBJ_WRITE_OBJECT,
+ __RBD_OBJ_WRITE_COPYUP,
+ RBD_OBJ_WRITE_COPYUP,
+ RBD_OBJ_WRITE_POST_OBJECT_MAP,
+};
+
+enum rbd_obj_copyup_state {
+ RBD_OBJ_COPYUP_START = 1,
+ RBD_OBJ_COPYUP_READ_PARENT,
+ __RBD_OBJ_COPYUP_OBJECT_MAPS,
+ RBD_OBJ_COPYUP_OBJECT_MAPS,
+ __RBD_OBJ_COPYUP_WRITE_OBJECT,
+ RBD_OBJ_COPYUP_WRITE_OBJECT,
};
struct rbd_obj_request {
struct ceph_object_extent ex;
+ unsigned int flags; /* RBD_OBJ_FLAG_* */
union {
- bool tried_parent; /* for reads */
+ enum rbd_obj_read_state read_state; /* for reads */
enum rbd_obj_write_state write_state; /* for writes */
};
@@ -271,14 +303,15 @@ struct rbd_obj_request {
u32 bvec_idx;
};
};
+
+ enum rbd_obj_copyup_state copyup_state;
struct bio_vec *copyup_bvecs;
u32 copyup_bvec_count;
- struct ceph_osd_request *osd_req;
-
- u64 xferred; /* bytes transferred */
- int result;
+ struct list_head osd_reqs; /* w/ r_private_item */
+ struct mutex state_mutex;
+ struct pending_result pending;
struct kref kref;
};
@@ -287,11 +320,19 @@ enum img_req_flags {
IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */
};
+enum rbd_img_state {
+ RBD_IMG_START = 1,
+ RBD_IMG_EXCLUSIVE_LOCK,
+ __RBD_IMG_OBJECT_REQUESTS,
+ RBD_IMG_OBJECT_REQUESTS,
+};
+
struct rbd_img_request {
struct rbd_device *rbd_dev;
enum obj_operation_type op_type;
enum obj_request_type data_type;
unsigned long flags;
+ enum rbd_img_state state;
union {
u64 snap_id; /* for reads */
struct ceph_snap_context *snapc; /* for writes */
@@ -300,13 +341,14 @@ struct rbd_img_request {
struct request *rq; /* block request */
struct rbd_obj_request *obj_request; /* obj req initiator */
};
- spinlock_t completion_lock;
- u64 xferred;/* aggregate bytes transferred */
- int result; /* first nonzero obj_request result */
+ struct list_head lock_item;
struct list_head object_extents; /* obj_req.ex structs */
- u32 pending_count;
+ struct mutex state_mutex;
+ struct pending_result pending;
+ struct work_struct work;
+ int work_result;
struct kref kref;
};
@@ -380,7 +422,17 @@ struct rbd_device {
struct work_struct released_lock_work;
struct delayed_work lock_dwork;
struct work_struct unlock_work;
- wait_queue_head_t lock_waitq;
+ spinlock_t lock_lists_lock;
+ struct list_head acquiring_list;
+ struct list_head running_list;
+ struct completion acquire_wait;
+ int acquire_err;
+ struct completion releasing_wait;
+
+ spinlock_t object_map_lock;
+ u8 *object_map;
+ u64 object_map_size; /* in objects */
+ u64 object_map_flags;
struct workqueue_struct *task_wq;
@@ -408,12 +460,10 @@ struct rbd_device {
* Flag bits for rbd_dev->flags:
* - REMOVING (which is coupled with rbd_dev->open_count) is protected
* by rbd_dev->lock
- * - BLACKLISTED is protected by rbd_dev->lock_rwsem
*/
enum rbd_dev_flags {
RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */
RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */
- RBD_DEV_FLAG_BLACKLISTED, /* our ceph_client is blacklisted */
};
static DEFINE_MUTEX(client_mutex); /* Serialize client creation */
@@ -466,6 +516,8 @@ static int minor_to_rbd_dev_id(int minor)
static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
{
+ lockdep_assert_held(&rbd_dev->lock_rwsem);
+
return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING;
}
@@ -583,6 +635,26 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
u8 *order, u64 *snap_size);
static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
u64 *snap_features);
+static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev);
+
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result);
+static void rbd_img_handle_request(struct rbd_img_request *img_req, int result);
+
+/*
+ * Return true if nothing else is pending.
+ */
+static bool pending_result_dec(struct pending_result *pending, int *result)
+{
+ rbd_assert(pending->num_pending > 0);
+
+ if (*result && !pending->result)
+ pending->result = *result;
+ if (--pending->num_pending)
+ return false;
+
+ *result = pending->result;
+ return true;
+}
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
@@ -1317,6 +1389,8 @@ static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
u32 bytes)
{
+ dout("%s %p data buf %u~%u\n", __func__, obj_req, off, bytes);
+
switch (obj_req->img_request->data_type) {
case OBJ_REQUEST_BIO:
zero_bios(&obj_req->bio_pos, off, bytes);
@@ -1339,13 +1413,6 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
kref_put(&obj_request->kref, rbd_obj_request_destroy);
}
-static void rbd_img_request_get(struct rbd_img_request *img_request)
-{
- dout("%s: img %p (was %d)\n", __func__, img_request,
- kref_read(&img_request->kref));
- kref_get(&img_request->kref);
-}
-
static void rbd_img_request_destroy(struct kref *kref);
static void rbd_img_request_put(struct rbd_img_request *img_request)
{
@@ -1362,7 +1429,6 @@ static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
/* Image request now owns object's original reference */
obj_request->img_request = img_request;
- img_request->pending_count++;
dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
}
@@ -1375,13 +1441,13 @@ static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
rbd_obj_request_put(obj_request);
}
-static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
+static void rbd_osd_submit(struct ceph_osd_request *osd_req)
{
- struct ceph_osd_request *osd_req = obj_request->osd_req;
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
- dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__,
- obj_request, obj_request->ex.oe_objno, obj_request->ex.oe_off,
- obj_request->ex.oe_len, osd_req);
+ dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n",
+ __func__, osd_req, obj_req, obj_req->ex.oe_objno,
+ obj_req->ex.oe_off, obj_req->ex.oe_len);
ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
}
@@ -1457,41 +1523,38 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req)
}
}
-static void rbd_obj_handle_request(struct rbd_obj_request *obj_req);
-
static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
{
struct rbd_obj_request *obj_req = osd_req->r_priv;
+ int result;
dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
osd_req->r_result, obj_req);
- rbd_assert(osd_req == obj_req->osd_req);
- obj_req->result = osd_req->r_result < 0 ? osd_req->r_result : 0;
- if (!obj_req->result && !rbd_img_is_write(obj_req->img_request))
- obj_req->xferred = osd_req->r_result;
+ /*
+ * Writes aren't allowed to return a data payload. In some
+ * guarded write cases (e.g. stat + zero on an empty object)
+ * a stat response makes it through, but we don't care.
+ */
+ if (osd_req->r_result > 0 && rbd_img_is_write(obj_req->img_request))
+ result = 0;
else
- /*
- * Writes aren't allowed to return a data payload. In some
- * guarded write cases (e.g. stat + zero on an empty object)
- * a stat response makes it through, but we don't care.
- */
- obj_req->xferred = 0;
+ result = osd_req->r_result;
- rbd_obj_handle_request(obj_req);
+ rbd_obj_handle_request(obj_req, result);
}
-static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
+static void rbd_osd_format_read(struct ceph_osd_request *osd_req)
{
- struct ceph_osd_request *osd_req = obj_request->osd_req;
+ struct rbd_obj_request *obj_request = osd_req->r_priv;
osd_req->r_flags = CEPH_OSD_FLAG_READ;
osd_req->r_snapid = obj_request->img_request->snap_id;
}
-static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
+static void rbd_osd_format_write(struct ceph_osd_request *osd_req)
{
- struct ceph_osd_request *osd_req = obj_request->osd_req;
+ struct rbd_obj_request *obj_request = osd_req->r_priv;
osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&osd_req->r_mtime);
@@ -1499,19 +1562,21 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
}
static struct ceph_osd_request *
-__rbd_osd_req_create(struct rbd_obj_request *obj_req,
- struct ceph_snap_context *snapc, unsigned int num_ops)
+__rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
+ struct ceph_snap_context *snapc, int num_ops)
{
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct ceph_osd_request *req;
const char *name_format = rbd_dev->image_format == 1 ?
RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
+ int ret;
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
if (!req)
- return NULL;
+ return ERR_PTR(-ENOMEM);
+ list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
req->r_callback = rbd_osd_req_callback;
req->r_priv = obj_req;
@@ -1522,27 +1587,20 @@ __rbd_osd_req_create(struct rbd_obj_request *obj_req,
ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
req->r_base_oloc.pool = rbd_dev->layout.pool_id;
- if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
- rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
- goto err_req;
+ ret = ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
+ rbd_dev->header.object_prefix,
+ obj_req->ex.oe_objno);
+ if (ret)
+ return ERR_PTR(ret);
return req;
-
-err_req:
- ceph_osdc_put_request(req);
- return NULL;
}
static struct ceph_osd_request *
-rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
+rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
{
- return __rbd_osd_req_create(obj_req, obj_req->img_request->snapc,
- num_ops);
-}
-
-static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
-{
- ceph_osdc_put_request(osd_req);
+ return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
+ num_ops);
}
static struct rbd_obj_request *rbd_obj_request_create(void)
@@ -1554,6 +1612,8 @@ static struct rbd_obj_request *rbd_obj_request_create(void)
return NULL;
ceph_object_extent_init(&obj_request->ex);
+ INIT_LIST_HEAD(&obj_request->osd_reqs);
+ mutex_init(&obj_request->state_mutex);
kref_init(&obj_request->kref);
dout("%s %p\n", __func__, obj_request);
@@ -1563,14 +1623,19 @@ static struct rbd_obj_request *rbd_obj_request_create(void)
static void rbd_obj_request_destroy(struct kref *kref)
{
struct rbd_obj_request *obj_request;
+ struct ceph_osd_request *osd_req;
u32 i;
obj_request = container_of(kref, struct rbd_obj_request, kref);
dout("%s: obj %p\n", __func__, obj_request);
- if (obj_request->osd_req)
- rbd_osd_req_destroy(obj_request->osd_req);
+ while (!list_empty(&obj_request->osd_reqs)) {
+ osd_req = list_first_entry(&obj_request->osd_reqs,
+ struct ceph_osd_request, r_private_item);
+ list_del_init(&osd_req->r_private_item);
+ ceph_osdc_put_request(osd_req);
+ }
switch (obj_request->img_request->data_type) {
case OBJ_REQUEST_NODATA:
@@ -1684,8 +1749,9 @@ static struct rbd_img_request *rbd_img_request_create(
if (rbd_dev_parent_get(rbd_dev))
img_request_layered_set(img_request);
- spin_lock_init(&img_request->completion_lock);
+ INIT_LIST_HEAD(&img_request->lock_item);
INIT_LIST_HEAD(&img_request->object_extents);
+ mutex_init(&img_request->state_mutex);
kref_init(&img_request->kref);
dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
@@ -1703,6 +1769,7 @@ static void rbd_img_request_destroy(struct kref *kref)
dout("%s: img %p\n", __func__, img_request);
+ WARN_ON(!list_empty(&img_request->lock_item));
for_each_obj_request_safe(img_request, obj_request, next_obj_request)
rbd_img_obj_request_del(img_request, obj_request);
@@ -1717,6 +1784,466 @@ static void rbd_img_request_destroy(struct kref *kref)
kmem_cache_free(rbd_img_request_cache, img_request);
}
+#define BITS_PER_OBJ 2
+#define OBJS_PER_BYTE (BITS_PER_BYTE / BITS_PER_OBJ)
+#define OBJ_MASK ((1 << BITS_PER_OBJ) - 1)
+
+static void __rbd_object_map_index(struct rbd_device *rbd_dev, u64 objno,
+ u64 *index, u8 *shift)
+{
+ u32 off;
+
+ rbd_assert(objno < rbd_dev->object_map_size);
+ *index = div_u64_rem(objno, OBJS_PER_BYTE, &off);
+ *shift = (OBJS_PER_BYTE - off - 1) * BITS_PER_OBJ;
+}
+
+static u8 __rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
+{
+ u64 index;
+ u8 shift;
+
+ lockdep_assert_held(&rbd_dev->object_map_lock);
+ __rbd_object_map_index(rbd_dev, objno, &index, &shift);
+ return (rbd_dev->object_map[index] >> shift) & OBJ_MASK;
+}
+
+static void __rbd_object_map_set(struct rbd_device *rbd_dev, u64 objno, u8 val)
+{
+ u64 index;
+ u8 shift;
+ u8 *p;
+
+ lockdep_assert_held(&rbd_dev->object_map_lock);
+ rbd_assert(!(val & ~OBJ_MASK));
+
+ __rbd_object_map_index(rbd_dev, objno, &index, &shift);
+ p = &rbd_dev->object_map[index];
+ *p = (*p & ~(OBJ_MASK << shift)) | (val << shift);
+}
+
+static u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
+{
+ u8 state;
+
+ spin_lock(&rbd_dev->object_map_lock);
+ state = __rbd_object_map_get(rbd_dev, objno);
+ spin_unlock(&rbd_dev->object_map_lock);
+ return state;
+}
+
+static bool use_object_map(struct rbd_device *rbd_dev)
+{
+ return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) &&
+ !(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID));
+}
+
+static bool rbd_object_map_may_exist(struct rbd_device *rbd_dev, u64 objno)
+{
+ u8 state;
+
+ /* fall back to default logic if object map is disabled or invalid */
+ if (!use_object_map(rbd_dev))
+ return true;
+
+ state = rbd_object_map_get(rbd_dev, objno);
+ return state != OBJECT_NONEXISTENT;
+}
+
+static void rbd_object_map_name(struct rbd_device *rbd_dev, u64 snap_id,
+ struct ceph_object_id *oid)
+{
+ if (snap_id == CEPH_NOSNAP)
+ ceph_oid_printf(oid, "%s%s", RBD_OBJECT_MAP_PREFIX,
+ rbd_dev->spec->image_id);
+ else
+ ceph_oid_printf(oid, "%s%s.%016llx", RBD_OBJECT_MAP_PREFIX,
+ rbd_dev->spec->image_id, snap_id);
+}
+
+static int rbd_object_map_lock(struct rbd_device *rbd_dev)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ CEPH_DEFINE_OID_ONSTACK(oid);
+ u8 lock_type;
+ char *lock_tag;
+ struct ceph_locker *lockers;
+ u32 num_lockers;
+ bool broke_lock = false;
+ int ret;
+
+ rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid);
+
+again:
+ ret = ceph_cls_lock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
+ CEPH_CLS_LOCK_EXCLUSIVE, "", "", "", 0);
+ if (ret != -EBUSY || broke_lock) {
+ if (ret == -EEXIST)
+ ret = 0; /* already locked by myself */
+ if (ret)
+ rbd_warn(rbd_dev, "failed to lock object map: %d", ret);
+ return ret;
+ }
+
+ ret = ceph_cls_lock_info(osdc, &oid, &rbd_dev->header_oloc,
+ RBD_LOCK_NAME, &lock_type, &lock_tag,
+ &lockers, &num_lockers);
+ if (ret) {
+ if (ret == -ENOENT)
+ goto again;
+
+ rbd_warn(rbd_dev, "failed to get object map lockers: %d", ret);
+ return ret;
+ }
+
+ kfree(lock_tag);
+ if (num_lockers == 0)
+ goto again;
+
+ rbd_warn(rbd_dev, "breaking object map lock owned by %s%llu",
+ ENTITY_NAME(lockers[0].id.name));
+
+ ret = ceph_cls_break_lock(osdc, &oid, &rbd_dev->header_oloc,
+ RBD_LOCK_NAME, lockers[0].id.cookie,
+ &lockers[0].id.name);
+ ceph_free_lockers(lockers, num_lockers);
+ if (ret) {
+ if (ret == -ENOENT)
+ goto again;
+
+ rbd_warn(rbd_dev, "failed to break object map lock: %d", ret);
+ return ret;
+ }
+
+ broke_lock = true;
+ goto again;
+}
+
+static void rbd_object_map_unlock(struct rbd_device *rbd_dev)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ CEPH_DEFINE_OID_ONSTACK(oid);
+ int ret;
+
+ rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid);
+
+ ret = ceph_cls_unlock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
+ "");
+ if (ret && ret != -ENOENT)
+ rbd_warn(rbd_dev, "failed to unlock object map: %d", ret);
+}
+
+static int decode_object_map_header(void **p, void *end, u64 *object_map_size)
+{
+ u8 struct_v;
+ u32 struct_len;
+ u32 header_len;
+ void *header_end;
+ int ret;
+
+ ceph_decode_32_safe(p, end, header_len, e_inval);
+ header_end = *p + header_len;
+
+ ret = ceph_start_decoding(p, end, 1, "BitVector header", &struct_v,
+ &struct_len);
+ if (ret)
+ return ret;
+
+ ceph_decode_64_safe(p, end, *object_map_size, e_inval);
+
+ *p = header_end;
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int __rbd_object_map_load(struct rbd_device *rbd_dev)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ CEPH_DEFINE_OID_ONSTACK(oid);
+ struct page **pages;
+ void *p, *end;
+ size_t reply_len;
+ u64 num_objects;
+ u64 object_map_bytes;
+ u64 object_map_size;
+ int num_pages;
+ int ret;
+
+ rbd_assert(!rbd_dev->object_map && !rbd_dev->object_map_size);
+
+ num_objects = ceph_get_num_objects(&rbd_dev->layout,
+ rbd_dev->mapping.size);
+ object_map_bytes = DIV_ROUND_UP_ULL(num_objects * BITS_PER_OBJ,
+ BITS_PER_BYTE);
+ num_pages = calc_pages_for(0, object_map_bytes) + 1;
+ pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ reply_len = num_pages * PAGE_SIZE;
+ rbd_object_map_name(rbd_dev, rbd_dev->spec->snap_id, &oid);
+ ret = ceph_osdc_call(osdc, &oid, &rbd_dev->header_oloc,
+ "rbd", "object_map_load", CEPH_OSD_FLAG_READ,
+ NULL, 0, pages, &reply_len);
+ if (ret)
+ goto out;
+
+ p = page_address(pages[0]);
+ end = p + min(reply_len, (size_t)PAGE_SIZE);
+ ret = decode_object_map_header(&p, end, &object_map_size);
+ if (ret)
+ goto out;
+
+ if (object_map_size != num_objects) {
+ rbd_warn(rbd_dev, "object map size mismatch: %llu vs %llu",
+ object_map_size, num_objects);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (offset_in_page(p) + object_map_bytes > reply_len) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rbd_dev->object_map = kvmalloc(object_map_bytes, GFP_KERNEL);
+ if (!rbd_dev->object_map) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rbd_dev->object_map_size = object_map_size;
+ ceph_copy_from_page_vector(pages, rbd_dev->object_map,
+ offset_in_page(p), object_map_bytes);
+
+out:
+ ceph_release_page_vector(pages, num_pages);
+ return ret;
+}
+
+static void rbd_object_map_free(struct rbd_device *rbd_dev)
+{
+ kvfree(rbd_dev->object_map);
+ rbd_dev->object_map = NULL;
+ rbd_dev->object_map_size = 0;
+}
+
+static int rbd_object_map_load(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ ret = __rbd_object_map_load(rbd_dev);
+ if (ret)
+ return ret;
+
+ ret = rbd_dev_v2_get_flags(rbd_dev);
+ if (ret) {
+ rbd_object_map_free(rbd_dev);
+ return ret;
+ }
+
+ if (rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)
+ rbd_warn(rbd_dev, "object map is invalid");
+
+ return 0;
+}
+
+static int rbd_object_map_open(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ ret = rbd_object_map_lock(rbd_dev);
+ if (ret)
+ return ret;
+
+ ret = rbd_object_map_load(rbd_dev);
+ if (ret) {
+ rbd_object_map_unlock(rbd_dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void rbd_object_map_close(struct rbd_device *rbd_dev)
+{
+ rbd_object_map_free(rbd_dev);
+ rbd_object_map_unlock(rbd_dev);
+}
+
+/*
+ * This function needs snap_id (or more precisely just something to
+ * distinguish between HEAD and snapshot object maps), new_state and
+ * current_state that were passed to rbd_object_map_update().
+ *
+ * To avoid allocating and stashing a context we piggyback on the OSD
+ * request. A HEAD update has two ops (assert_locked). For new_state
+ * and current_state we decode our own object_map_update op, encoded in
+ * rbd_cls_object_map_update().
+ */
+static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
+ struct ceph_osd_request *osd_req)
+{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+ struct ceph_osd_data *osd_data;
+ u64 objno;
+ u8 state, new_state, current_state;
+ bool has_current_state;
+ void *p;
+
+ if (osd_req->r_result)
+ return osd_req->r_result;
+
+ /*
+ * Nothing to do for a snapshot object map.
+ */
+ if (osd_req->r_num_ops == 1)
+ return 0;
+
+ /*
+ * Update in-memory HEAD object map.
+ */
+ rbd_assert(osd_req->r_num_ops == 2);
+ osd_data = osd_req_op_data(osd_req, 1, cls, request_data);
+ rbd_assert(osd_data->type == CEPH_OSD_DATA_TYPE_PAGES);
+
+ p = page_address(osd_data->pages[0]);
+ objno = ceph_decode_64(&p);
+ rbd_assert(objno == obj_req->ex.oe_objno);
+ rbd_assert(ceph_decode_64(&p) == objno + 1);
+ new_state = ceph_decode_8(&p);
+ has_current_state = ceph_decode_8(&p);
+ if (has_current_state)
+ current_state = ceph_decode_8(&p);
+
+ spin_lock(&rbd_dev->object_map_lock);
+ state = __rbd_object_map_get(rbd_dev, objno);
+ if (!has_current_state || current_state == state ||
+ (current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN))
+ __rbd_object_map_set(rbd_dev, objno, new_state);
+ spin_unlock(&rbd_dev->object_map_lock);
+
+ return 0;
+}
+
+static void rbd_object_map_callback(struct ceph_osd_request *osd_req)
+{
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
+ int result;
+
+ dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
+ osd_req->r_result, obj_req);
+
+ result = rbd_object_map_update_finish(obj_req, osd_req);
+ rbd_obj_handle_request(obj_req, result);
+}
+
+static bool update_needed(struct rbd_device *rbd_dev, u64 objno, u8 new_state)
+{
+ u8 state = rbd_object_map_get(rbd_dev, objno);
+
+ if (state == new_state ||
+ (new_state == OBJECT_PENDING && state == OBJECT_NONEXISTENT) ||
+ (new_state == OBJECT_NONEXISTENT && state != OBJECT_PENDING))
+ return false;
+
+ return true;
+}
+
+static int rbd_cls_object_map_update(struct ceph_osd_request *req,
+ int which, u64 objno, u8 new_state,
+ const u8 *current_state)
+{
+ struct page **pages;
+ void *p, *start;
+ int ret;
+
+ ret = osd_req_op_cls_init(req, which, "rbd", "object_map_update");
+ if (ret)
+ return ret;
+
+ pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ p = start = page_address(pages[0]);
+ ceph_encode_64(&p, objno);
+ ceph_encode_64(&p, objno + 1);
+ ceph_encode_8(&p, new_state);
+ if (current_state) {
+ ceph_encode_8(&p, 1);
+ ceph_encode_8(&p, *current_state);
+ } else {
+ ceph_encode_8(&p, 0);
+ }
+
+ osd_req_op_cls_request_data_pages(req, which, pages, p - start, 0,
+ false, true);
+ return 0;
+}
+
+/*
+ * Return:
+ * 0 - object map update sent
+ * 1 - object map update isn't needed
+ * <0 - error
+ */
+static int rbd_object_map_update(struct rbd_obj_request *obj_req, u64 snap_id,
+ u8 new_state, const u8 *current_state)
+{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct ceph_osd_request *req;
+ int num_ops = 1;
+ int which = 0;
+ int ret;
+
+ if (snap_id == CEPH_NOSNAP) {
+ if (!update_needed(rbd_dev, obj_req->ex.oe_objno, new_state))
+ return 1;
+
+ num_ops++; /* assert_locked */
+ }
+
+ req = ceph_osdc_alloc_request(osdc, NULL, num_ops, false, GFP_NOIO);
+ if (!req)
+ return -ENOMEM;
+
+ list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
+ req->r_callback = rbd_object_map_callback;
+ req->r_priv = obj_req;
+
+ rbd_object_map_name(rbd_dev, snap_id, &req->r_base_oid);
+ ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
+ req->r_flags = CEPH_OSD_FLAG_WRITE;
+ ktime_get_real_ts64(&req->r_mtime);
+
+ if (snap_id == CEPH_NOSNAP) {
+ /*
+ * Protect against possible race conditions during lock
+ * ownership transitions.
+ */
+ ret = ceph_cls_assert_locked(req, which++, RBD_LOCK_NAME,
+ CEPH_CLS_LOCK_EXCLUSIVE, "", "");
+ if (ret)
+ return ret;
+ }
+
+ ret = rbd_cls_object_map_update(req, which, obj_req->ex.oe_objno,
+ new_state, current_state);
+ if (ret)
+ return ret;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+ return ret;
+
+ ceph_osdc_start_request(osdc, req, false);
+ return 0;
+}
+
static void prune_extents(struct ceph_file_extent *img_extents,
u32 *num_img_extents, u64 overlap)
{
@@ -1764,11 +2291,13 @@ static int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req,
return 0;
}
-static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
+static void rbd_osd_setup_data(struct ceph_osd_request *osd_req, int which)
{
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
+
switch (obj_req->img_request->data_type) {
case OBJ_REQUEST_BIO:
- osd_req_op_extent_osd_data_bio(obj_req->osd_req, which,
+ osd_req_op_extent_osd_data_bio(osd_req, which,
&obj_req->bio_pos,
obj_req->ex.oe_len);
break;
@@ -1777,7 +2306,7 @@ static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
rbd_assert(obj_req->bvec_pos.iter.bi_size ==
obj_req->ex.oe_len);
rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
- osd_req_op_extent_osd_data_bvec_pos(obj_req->osd_req, which,
+ osd_req_op_extent_osd_data_bvec_pos(osd_req, which,
&obj_req->bvec_pos);
break;
default:
@@ -1785,22 +2314,7 @@ static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
}
}
-static int rbd_obj_setup_read(struct rbd_obj_request *obj_req)
-{
- obj_req->osd_req = __rbd_osd_req_create(obj_req, NULL, 1);
- if (!obj_req->osd_req)
- return -ENOMEM;
-
- osd_req_op_extent_init(obj_req->osd_req, 0, CEPH_OSD_OP_READ,
- obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
- rbd_osd_req_setup_data(obj_req, 0);
-
- rbd_osd_req_format_read(obj_req);
- return 0;
-}
-
-static int __rbd_obj_setup_stat(struct rbd_obj_request *obj_req,
- unsigned int which)
+static int rbd_osd_setup_stat(struct ceph_osd_request *osd_req, int which)
{
struct page **pages;
@@ -1816,45 +2330,60 @@ static int __rbd_obj_setup_stat(struct rbd_obj_request *obj_req,
if (IS_ERR(pages))
return PTR_ERR(pages);
- osd_req_op_init(obj_req->osd_req, which, CEPH_OSD_OP_STAT, 0);
- osd_req_op_raw_data_in_pages(obj_req->osd_req, which, pages,
+ osd_req_op_init(osd_req, which, CEPH_OSD_OP_STAT, 0);
+ osd_req_op_raw_data_in_pages(osd_req, which, pages,
8 + sizeof(struct ceph_timespec),
0, false, true);
return 0;
}
-static int count_write_ops(struct rbd_obj_request *obj_req)
+static int rbd_osd_setup_copyup(struct ceph_osd_request *osd_req, int which,
+ u32 bytes)
+{
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
+ int ret;
+
+ ret = osd_req_op_cls_init(osd_req, which, "rbd", "copyup");
+ if (ret)
+ return ret;
+
+ osd_req_op_cls_request_data_bvecs(osd_req, which, obj_req->copyup_bvecs,
+ obj_req->copyup_bvec_count, bytes);
+ return 0;
+}
+
+static int rbd_obj_init_read(struct rbd_obj_request *obj_req)
{
- return 2; /* setallochint + write/writefull */
+ obj_req->read_state = RBD_OBJ_READ_START;
+ return 0;
}
-static void __rbd_obj_setup_write(struct rbd_obj_request *obj_req,
- unsigned int which)
+static void __rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req,
+ int which)
{
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
u16 opcode;
- osd_req_op_alloc_hint_init(obj_req->osd_req, which++,
- rbd_dev->layout.object_size,
- rbd_dev->layout.object_size);
+ if (!use_object_map(rbd_dev) ||
+ !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) {
+ osd_req_op_alloc_hint_init(osd_req, which++,
+ rbd_dev->layout.object_size,
+ rbd_dev->layout.object_size);
+ }
if (rbd_obj_is_entire(obj_req))
opcode = CEPH_OSD_OP_WRITEFULL;
else
opcode = CEPH_OSD_OP_WRITE;
- osd_req_op_extent_init(obj_req->osd_req, which, opcode,
+ osd_req_op_extent_init(osd_req, which, opcode,
obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
- rbd_osd_req_setup_data(obj_req, which++);
-
- rbd_assert(which == obj_req->osd_req->r_num_ops);
- rbd_osd_req_format_write(obj_req);
+ rbd_osd_setup_data(osd_req, which);
}
-static int rbd_obj_setup_write(struct rbd_obj_request *obj_req)
+static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
{
- unsigned int num_osd_ops, which = 0;
- bool need_guard;
int ret;
/* reverse map the entire object onto the parent */
@@ -1862,24 +2391,10 @@ static int rbd_obj_setup_write(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- need_guard = rbd_obj_copyup_enabled(obj_req);
- num_osd_ops = need_guard + count_write_ops(obj_req);
-
- obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
- if (!obj_req->osd_req)
- return -ENOMEM;
-
- if (need_guard) {
- ret = __rbd_obj_setup_stat(obj_req, which++);
- if (ret)
- return ret;
+ if (rbd_obj_copyup_enabled(obj_req))
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
- obj_req->write_state = RBD_OBJ_WRITE_GUARD;
- } else {
- obj_req->write_state = RBD_OBJ_WRITE_FLAT;
- }
-
- __rbd_obj_setup_write(obj_req, which);
+ obj_req->write_state = RBD_OBJ_WRITE_START;
return 0;
}
@@ -1889,11 +2404,26 @@ static u16 truncate_or_zero_opcode(struct rbd_obj_request *obj_req)
CEPH_OSD_OP_ZERO;
}
-static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
+static void __rbd_osd_setup_discard_ops(struct ceph_osd_request *osd_req,
+ int which)
+{
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
+
+ if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) {
+ rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
+ osd_req_op_init(osd_req, which, CEPH_OSD_OP_DELETE, 0);
+ } else {
+ osd_req_op_extent_init(osd_req, which,
+ truncate_or_zero_opcode(obj_req),
+ obj_req->ex.oe_off, obj_req->ex.oe_len,
+ 0, 0);
+ }
+}
+
+static int rbd_obj_init_discard(struct rbd_obj_request *obj_req)
{
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
- u64 off = obj_req->ex.oe_off;
- u64 next_off = obj_req->ex.oe_off + obj_req->ex.oe_len;
+ u64 off, next_off;
int ret;
/*
@@ -1906,10 +2436,17 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
*/
if (rbd_dev->opts->alloc_size != rbd_dev->layout.object_size ||
!rbd_obj_is_tail(obj_req)) {
- off = round_up(off, rbd_dev->opts->alloc_size);
- next_off = round_down(next_off, rbd_dev->opts->alloc_size);
+ off = round_up(obj_req->ex.oe_off, rbd_dev->opts->alloc_size);
+ next_off = round_down(obj_req->ex.oe_off + obj_req->ex.oe_len,
+ rbd_dev->opts->alloc_size);
if (off >= next_off)
return 1;
+
+ dout("%s %p %llu~%llu -> %llu~%llu\n", __func__,
+ obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len,
+ off, next_off - off);
+ obj_req->ex.oe_off = off;
+ obj_req->ex.oe_len = next_off - off;
}
/* reverse map the entire object onto the parent */
@@ -1917,52 +2454,29 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- obj_req->osd_req = rbd_osd_req_create(obj_req, 1);
- if (!obj_req->osd_req)
- return -ENOMEM;
-
- if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) {
- osd_req_op_init(obj_req->osd_req, 0, CEPH_OSD_OP_DELETE, 0);
- } else {
- dout("%s %p %llu~%llu -> %llu~%llu\n", __func__,
- obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len,
- off, next_off - off);
- osd_req_op_extent_init(obj_req->osd_req, 0,
- truncate_or_zero_opcode(obj_req),
- off, next_off - off, 0, 0);
- }
+ obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
+ if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents)
+ obj_req->flags |= RBD_OBJ_FLAG_DELETION;
- obj_req->write_state = RBD_OBJ_WRITE_FLAT;
- rbd_osd_req_format_write(obj_req);
+ obj_req->write_state = RBD_OBJ_WRITE_START;
return 0;
}
-static int count_zeroout_ops(struct rbd_obj_request *obj_req)
-{
- int num_osd_ops;
-
- if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents &&
- !rbd_obj_copyup_enabled(obj_req))
- num_osd_ops = 2; /* create + truncate */
- else
- num_osd_ops = 1; /* delete/truncate/zero */
-
- return num_osd_ops;
-}
-
-static void __rbd_obj_setup_zeroout(struct rbd_obj_request *obj_req,
- unsigned int which)
+static void __rbd_osd_setup_zeroout_ops(struct ceph_osd_request *osd_req,
+ int which)
{
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
u16 opcode;
if (rbd_obj_is_entire(obj_req)) {
if (obj_req->num_img_extents) {
- if (!rbd_obj_copyup_enabled(obj_req))
- osd_req_op_init(obj_req->osd_req, which++,
+ if (!(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
+ osd_req_op_init(osd_req, which++,
CEPH_OSD_OP_CREATE, 0);
opcode = CEPH_OSD_OP_TRUNCATE;
} else {
- osd_req_op_init(obj_req->osd_req, which++,
+ rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
+ osd_req_op_init(osd_req, which++,
CEPH_OSD_OP_DELETE, 0);
opcode = 0;
}
@@ -1971,18 +2485,13 @@ static void __rbd_obj_setup_zeroout(struct rbd_obj_request *obj_req,
}
if (opcode)
- osd_req_op_extent_init(obj_req->osd_req, which++, opcode,
+ osd_req_op_extent_init(osd_req, which, opcode,
obj_req->ex.oe_off, obj_req->ex.oe_len,
0, 0);
-
- rbd_assert(which == obj_req->osd_req->r_num_ops);
- rbd_osd_req_format_write(obj_req);
}
-static int rbd_obj_setup_zeroout(struct rbd_obj_request *obj_req)
+static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
{
- unsigned int num_osd_ops, which = 0;
- bool need_guard;
int ret;
/* reverse map the entire object onto the parent */
@@ -1990,31 +2499,66 @@ static int rbd_obj_setup_zeroout(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- need_guard = rbd_obj_copyup_enabled(obj_req);
- num_osd_ops = need_guard + count_zeroout_ops(obj_req);
+ if (rbd_obj_copyup_enabled(obj_req))
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
+ if (!obj_req->num_img_extents) {
+ obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
+ if (rbd_obj_is_entire(obj_req))
+ obj_req->flags |= RBD_OBJ_FLAG_DELETION;
+ }
- obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
- if (!obj_req->osd_req)
- return -ENOMEM;
+ obj_req->write_state = RBD_OBJ_WRITE_START;
+ return 0;
+}
- if (need_guard) {
- ret = __rbd_obj_setup_stat(obj_req, which++);
- if (ret)
- return ret;
+static int count_write_ops(struct rbd_obj_request *obj_req)
+{
+ struct rbd_img_request *img_req = obj_req->img_request;
- obj_req->write_state = RBD_OBJ_WRITE_GUARD;
- } else {
- obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+ switch (img_req->op_type) {
+ case OBJ_OP_WRITE:
+ if (!use_object_map(img_req->rbd_dev) ||
+ !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST))
+ return 2; /* setallochint + write/writefull */
+
+ return 1; /* write/writefull */
+ case OBJ_OP_DISCARD:
+ return 1; /* delete/truncate/zero */
+ case OBJ_OP_ZEROOUT:
+ if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents &&
+ !(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
+ return 2; /* create + truncate */
+
+ return 1; /* delete/truncate/zero */
+ default:
+ BUG();
}
+}
- __rbd_obj_setup_zeroout(obj_req, which);
- return 0;
+static void rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req,
+ int which)
+{
+ struct rbd_obj_request *obj_req = osd_req->r_priv;
+
+ switch (obj_req->img_request->op_type) {
+ case OBJ_OP_WRITE:
+ __rbd_osd_setup_write_ops(osd_req, which);
+ break;
+ case OBJ_OP_DISCARD:
+ __rbd_osd_setup_discard_ops(osd_req, which);
+ break;
+ case OBJ_OP_ZEROOUT:
+ __rbd_osd_setup_zeroout_ops(osd_req, which);
+ break;
+ default:
+ BUG();
+ }
}
/*
- * For each object request in @img_req, allocate an OSD request, add
- * individual OSD ops and prepare them for submission. The number of
- * OSD ops depends on op_type and the overlap point (if any).
+ * Prune the list of object requests (adjust offset and/or length, drop
+ * redundant requests). Prepare object request state machines and image
+ * request state machine for execution.
*/
static int __rbd_img_fill_request(struct rbd_img_request *img_req)
{
@@ -2024,16 +2568,16 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
switch (img_req->op_type) {
case OBJ_OP_READ:
- ret = rbd_obj_setup_read(obj_req);
+ ret = rbd_obj_init_read(obj_req);
break;
case OBJ_OP_WRITE:
- ret = rbd_obj_setup_write(obj_req);
+ ret = rbd_obj_init_write(obj_req);
break;
case OBJ_OP_DISCARD:
- ret = rbd_obj_setup_discard(obj_req);
+ ret = rbd_obj_init_discard(obj_req);
break;
case OBJ_OP_ZEROOUT:
- ret = rbd_obj_setup_zeroout(obj_req);
+ ret = rbd_obj_init_zeroout(obj_req);
break;
default:
BUG();
@@ -2041,17 +2585,12 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
if (ret < 0)
return ret;
if (ret > 0) {
- img_req->xferred += obj_req->ex.oe_len;
- img_req->pending_count--;
rbd_img_obj_request_del(img_req, obj_req);
continue;
}
-
- ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
- if (ret)
- return ret;
}
+ img_req->state = RBD_IMG_START;
return 0;
}
@@ -2340,17 +2879,55 @@ static int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
&it);
}
-static void rbd_img_request_submit(struct rbd_img_request *img_request)
+static void rbd_img_handle_request_work(struct work_struct *work)
{
- struct rbd_obj_request *obj_request;
+ struct rbd_img_request *img_req =
+ container_of(work, struct rbd_img_request, work);
- dout("%s: img %p\n", __func__, img_request);
+ rbd_img_handle_request(img_req, img_req->work_result);
+}
- rbd_img_request_get(img_request);
- for_each_obj_request(img_request, obj_request)
- rbd_obj_request_submit(obj_request);
+static void rbd_img_schedule(struct rbd_img_request *img_req, int result)
+{
+ INIT_WORK(&img_req->work, rbd_img_handle_request_work);
+ img_req->work_result = result;
+ queue_work(rbd_wq, &img_req->work);
+}
- rbd_img_request_put(img_request);
+static bool rbd_obj_may_exist(struct rbd_obj_request *obj_req)
+{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+
+ if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) {
+ obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
+ return true;
+ }
+
+ dout("%s %p objno %llu assuming dne\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return false;
+}
+
+static int rbd_obj_read_object(struct rbd_obj_request *obj_req)
+{
+ struct ceph_osd_request *osd_req;
+ int ret;
+
+ osd_req = __rbd_obj_add_osd_request(obj_req, NULL, 1);
+ if (IS_ERR(osd_req))
+ return PTR_ERR(osd_req);
+
+ osd_req_op_extent_init(osd_req, 0, CEPH_OSD_OP_READ,
+ obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
+ rbd_osd_setup_data(osd_req, 0);
+ rbd_osd_format_read(osd_req);
+
+ ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
+ if (ret)
+ return ret;
+
+ rbd_osd_submit(osd_req);
+ return 0;
}
static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
@@ -2396,51 +2973,144 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
return ret;
}
- rbd_img_request_submit(child_img_req);
+ /* avoid parent chain recursion */
+ rbd_img_schedule(child_img_req, 0);
return 0;
}
-static bool rbd_obj_handle_read(struct rbd_obj_request *obj_req)
+static bool rbd_obj_advance_read(struct rbd_obj_request *obj_req, int *result)
{
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
int ret;
- if (obj_req->result == -ENOENT &&
- rbd_dev->parent_overlap && !obj_req->tried_parent) {
- /* reverse map this object extent onto the parent */
- ret = rbd_obj_calc_img_extents(obj_req, false);
+again:
+ switch (obj_req->read_state) {
+ case RBD_OBJ_READ_START:
+ rbd_assert(!*result);
+
+ if (!rbd_obj_may_exist(obj_req)) {
+ *result = -ENOENT;
+ obj_req->read_state = RBD_OBJ_READ_OBJECT;
+ goto again;
+ }
+
+ ret = rbd_obj_read_object(obj_req);
if (ret) {
- obj_req->result = ret;
+ *result = ret;
return true;
}
-
- if (obj_req->num_img_extents) {
- obj_req->tried_parent = true;
- ret = rbd_obj_read_from_parent(obj_req);
+ obj_req->read_state = RBD_OBJ_READ_OBJECT;
+ return false;
+ case RBD_OBJ_READ_OBJECT:
+ if (*result == -ENOENT && rbd_dev->parent_overlap) {
+ /* reverse map this object extent onto the parent */
+ ret = rbd_obj_calc_img_extents(obj_req, false);
if (ret) {
- obj_req->result = ret;
+ *result = ret;
return true;
}
- return false;
+ if (obj_req->num_img_extents) {
+ ret = rbd_obj_read_from_parent(obj_req);
+ if (ret) {
+ *result = ret;
+ return true;
+ }
+ obj_req->read_state = RBD_OBJ_READ_PARENT;
+ return false;
+ }
+ }
+
+ /*
+ * -ENOENT means a hole in the image -- zero-fill the entire
+ * length of the request. A short read also implies zero-fill
+ * to the end of the request.
+ */
+ if (*result == -ENOENT) {
+ rbd_obj_zero_range(obj_req, 0, obj_req->ex.oe_len);
+ *result = 0;
+ } else if (*result >= 0) {
+ if (*result < obj_req->ex.oe_len)
+ rbd_obj_zero_range(obj_req, *result,
+ obj_req->ex.oe_len - *result);
+ else
+ rbd_assert(*result == obj_req->ex.oe_len);
+ *result = 0;
}
+ return true;
+ case RBD_OBJ_READ_PARENT:
+ return true;
+ default:
+ BUG();
}
+}
- /*
- * -ENOENT means a hole in the image -- zero-fill the entire
- * length of the request. A short read also implies zero-fill
- * to the end of the request. In both cases we update xferred
- * count to indicate the whole request was satisfied.
- */
- if (obj_req->result == -ENOENT ||
- (!obj_req->result && obj_req->xferred < obj_req->ex.oe_len)) {
- rbd_assert(!obj_req->xferred || !obj_req->result);
- rbd_obj_zero_range(obj_req, obj_req->xferred,
- obj_req->ex.oe_len - obj_req->xferred);
- obj_req->result = 0;
- obj_req->xferred = obj_req->ex.oe_len;
+static bool rbd_obj_write_is_noop(struct rbd_obj_request *obj_req)
+{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+
+ if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno))
+ obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
+
+ if (!(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST) &&
+ (obj_req->flags & RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT)) {
+ dout("%s %p noop for nonexistent\n", __func__, obj_req);
+ return true;
}
- return true;
+ return false;
+}
+
+/*
+ * Return:
+ * 0 - object map update sent
+ * 1 - object map update isn't needed
+ * <0 - error
+ */
+static int rbd_obj_write_pre_object_map(struct rbd_obj_request *obj_req)
+{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+ u8 new_state;
+
+ if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
+ return 1;
+
+ if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
+ new_state = OBJECT_PENDING;
+ else
+ new_state = OBJECT_EXISTS;
+
+ return rbd_object_map_update(obj_req, CEPH_NOSNAP, new_state, NULL);
+}
+
+static int rbd_obj_write_object(struct rbd_obj_request *obj_req)
+{
+ struct ceph_osd_request *osd_req;
+ int num_ops = count_write_ops(obj_req);
+ int which = 0;
+ int ret;
+
+ if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)
+ num_ops++; /* stat */
+
+ osd_req = rbd_obj_add_osd_request(obj_req, num_ops);
+ if (IS_ERR(osd_req))
+ return PTR_ERR(osd_req);
+
+ if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
+ ret = rbd_osd_setup_stat(osd_req, which++);
+ if (ret)
+ return ret;
+ }
+
+ rbd_osd_setup_write_ops(osd_req, which);
+ rbd_osd_format_write(osd_req);
+
+ ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
+ if (ret)
+ return ret;
+
+ rbd_osd_submit(osd_req);
+ return 0;
}
/*
@@ -2463,123 +3133,67 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
#define MODS_ONLY U32_MAX
-static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req,
- u32 bytes)
+static int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req,
+ u32 bytes)
{
+ struct ceph_osd_request *osd_req;
int ret;
dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
- rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
rbd_assert(bytes > 0 && bytes != MODS_ONLY);
- rbd_osd_req_destroy(obj_req->osd_req);
- obj_req->osd_req = __rbd_osd_req_create(obj_req, &rbd_empty_snapc, 1);
- if (!obj_req->osd_req)
- return -ENOMEM;
+ osd_req = __rbd_obj_add_osd_request(obj_req, &rbd_empty_snapc, 1);
+ if (IS_ERR(osd_req))
+ return PTR_ERR(osd_req);
- ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup");
+ ret = rbd_osd_setup_copyup(osd_req, 0, bytes);
if (ret)
return ret;
- osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
- obj_req->copyup_bvecs,
- obj_req->copyup_bvec_count,
- bytes);
- rbd_osd_req_format_write(obj_req);
+ rbd_osd_format_write(osd_req);
- ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
+ ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
if (ret)
return ret;
- rbd_obj_request_submit(obj_req);
+ rbd_osd_submit(osd_req);
return 0;
}
-static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
+static int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req,
+ u32 bytes)
{
- struct rbd_img_request *img_req = obj_req->img_request;
- unsigned int num_osd_ops = (bytes != MODS_ONLY);
- unsigned int which = 0;
+ struct ceph_osd_request *osd_req;
+ int num_ops = count_write_ops(obj_req);
+ int which = 0;
int ret;
dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
- rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT ||
- obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_CALL);
- rbd_osd_req_destroy(obj_req->osd_req);
- switch (img_req->op_type) {
- case OBJ_OP_WRITE:
- num_osd_ops += count_write_ops(obj_req);
- break;
- case OBJ_OP_ZEROOUT:
- num_osd_ops += count_zeroout_ops(obj_req);
- break;
- default:
- BUG();
- }
+ if (bytes != MODS_ONLY)
+ num_ops++; /* copyup */
- obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
- if (!obj_req->osd_req)
- return -ENOMEM;
+ osd_req = rbd_obj_add_osd_request(obj_req, num_ops);
+ if (IS_ERR(osd_req))
+ return PTR_ERR(osd_req);
if (bytes != MODS_ONLY) {
- ret = osd_req_op_cls_init(obj_req->osd_req, which, "rbd",
- "copyup");
+ ret = rbd_osd_setup_copyup(osd_req, which++, bytes);
if (ret)
return ret;
-
- osd_req_op_cls_request_data_bvecs(obj_req->osd_req, which++,
- obj_req->copyup_bvecs,
- obj_req->copyup_bvec_count,
- bytes);
}
- switch (img_req->op_type) {
- case OBJ_OP_WRITE:
- __rbd_obj_setup_write(obj_req, which);
- break;
- case OBJ_OP_ZEROOUT:
- __rbd_obj_setup_zeroout(obj_req, which);
- break;
- default:
- BUG();
- }
+ rbd_osd_setup_write_ops(osd_req, which);
+ rbd_osd_format_write(osd_req);
- ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
+ ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
if (ret)
return ret;
- rbd_obj_request_submit(obj_req);
+ rbd_osd_submit(osd_req);
return 0;
}
-static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
-{
- /*
- * Only send non-zero copyup data to save some I/O and network
- * bandwidth -- zero copyup data is equivalent to the object not
- * existing.
- */
- if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) {
- dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
- bytes = 0;
- }
-
- if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
- /*
- * Send a copyup request with an empty snapshot context to
- * deep-copyup the object through all existing snapshots.
- * A second request with the current snapshot context will be
- * sent for the actual modification.
- */
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC;
- return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes);
- }
-
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
- return rbd_obj_issue_copyup_ops(obj_req, bytes);
-}
-
static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
{
u32 i;
@@ -2608,7 +3222,12 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
return 0;
}
-static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
+/*
+ * The target object doesn't exist. Read the data for the entire
+ * target object up to the overlap point (if any) from the parent,
+ * so we can use it for a copyup.
+ */
+static int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req)
{
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
int ret;
@@ -2623,178 +3242,492 @@ static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
* request -- pass MODS_ONLY since the copyup isn't needed
* anymore.
*/
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
- return rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY);
+ return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY);
}
ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
if (ret)
return ret;
- obj_req->write_state = RBD_OBJ_WRITE_READ_FROM_PARENT;
return rbd_obj_read_from_parent(obj_req);
}
-static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req)
+static void rbd_obj_copyup_object_maps(struct rbd_obj_request *obj_req)
{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+ struct ceph_snap_context *snapc = obj_req->img_request->snapc;
+ u8 new_state;
+ u32 i;
int ret;
- switch (obj_req->write_state) {
- case RBD_OBJ_WRITE_GUARD:
- rbd_assert(!obj_req->xferred);
- if (obj_req->result == -ENOENT) {
- /*
- * The target object doesn't exist. Read the data for
- * the entire target object up to the overlap point (if
- * any) from the parent, so we can use it for a copyup.
- */
- ret = rbd_obj_handle_write_guard(obj_req);
- if (ret) {
- obj_req->result = ret;
- return true;
- }
- return false;
+ rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
+
+ if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
+ return;
+
+ if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
+ return;
+
+ for (i = 0; i < snapc->num_snaps; i++) {
+ if ((rbd_dev->header.features & RBD_FEATURE_FAST_DIFF) &&
+ i + 1 < snapc->num_snaps)
+ new_state = OBJECT_EXISTS_CLEAN;
+ else
+ new_state = OBJECT_EXISTS;
+
+ ret = rbd_object_map_update(obj_req, snapc->snaps[i],
+ new_state, NULL);
+ if (ret < 0) {
+ obj_req->pending.result = ret;
+ return;
}
- /* fall through */
- case RBD_OBJ_WRITE_FLAT:
- case RBD_OBJ_WRITE_COPYUP_OPS:
- if (!obj_req->result)
- /*
- * There is no such thing as a successful short
- * write -- indicate the whole request was satisfied.
- */
- obj_req->xferred = obj_req->ex.oe_len;
- return true;
- case RBD_OBJ_WRITE_READ_FROM_PARENT:
- if (obj_req->result)
- return true;
- rbd_assert(obj_req->xferred);
- ret = rbd_obj_issue_copyup(obj_req, obj_req->xferred);
+ rbd_assert(!ret);
+ obj_req->pending.num_pending++;
+ }
+}
+
+static void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req)
+{
+ u32 bytes = rbd_obj_img_extents_bytes(obj_req);
+ int ret;
+
+ rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
+
+ /*
+ * Only send non-zero copyup data to save some I/O and network
+ * bandwidth -- zero copyup data is equivalent to the object not
+ * existing.
+ */
+ if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
+ bytes = 0;
+
+ if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
+ /*
+ * Send a copyup request with an empty snapshot context to
+ * deep-copyup the object through all existing snapshots.
+ * A second request with the current snapshot context will be
+ * sent for the actual modification.
+ */
+ ret = rbd_obj_copyup_empty_snapc(obj_req, bytes);
+ if (ret) {
+ obj_req->pending.result = ret;
+ return;
+ }
+
+ obj_req->pending.num_pending++;
+ bytes = MODS_ONLY;
+ }
+
+ ret = rbd_obj_copyup_current_snapc(obj_req, bytes);
+ if (ret) {
+ obj_req->pending.result = ret;
+ return;
+ }
+
+ obj_req->pending.num_pending++;
+}
+
+static bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result)
+{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+ int ret;
+
+again:
+ switch (obj_req->copyup_state) {
+ case RBD_OBJ_COPYUP_START:
+ rbd_assert(!*result);
+
+ ret = rbd_obj_copyup_read_parent(obj_req);
if (ret) {
- obj_req->result = ret;
- obj_req->xferred = 0;
+ *result = ret;
return true;
}
+ if (obj_req->num_img_extents)
+ obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT;
+ else
+ obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
return false;
- case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC:
- if (obj_req->result)
+ case RBD_OBJ_COPYUP_READ_PARENT:
+ if (*result)
return true;
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
- ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY);
- if (ret) {
- obj_req->result = ret;
+ if (is_zero_bvecs(obj_req->copyup_bvecs,
+ rbd_obj_img_extents_bytes(obj_req))) {
+ dout("%s %p detected zeros\n", __func__, obj_req);
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS;
+ }
+
+ rbd_obj_copyup_object_maps(obj_req);
+ if (!obj_req->pending.num_pending) {
+ *result = obj_req->pending.result;
+ obj_req->copyup_state = RBD_OBJ_COPYUP_OBJECT_MAPS;
+ goto again;
+ }
+ obj_req->copyup_state = __RBD_OBJ_COPYUP_OBJECT_MAPS;
+ return false;
+ case __RBD_OBJ_COPYUP_OBJECT_MAPS:
+ if (!pending_result_dec(&obj_req->pending, result))
+ return false;
+ /* fall through */
+ case RBD_OBJ_COPYUP_OBJECT_MAPS:
+ if (*result) {
+ rbd_warn(rbd_dev, "snap object map update failed: %d",
+ *result);
return true;
}
+
+ rbd_obj_copyup_write_object(obj_req);
+ if (!obj_req->pending.num_pending) {
+ *result = obj_req->pending.result;
+ obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
+ goto again;
+ }
+ obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT;
return false;
+ case __RBD_OBJ_COPYUP_WRITE_OBJECT:
+ if (!pending_result_dec(&obj_req->pending, result))
+ return false;
+ /* fall through */
+ case RBD_OBJ_COPYUP_WRITE_OBJECT:
+ return true;
default:
BUG();
}
}
/*
- * Returns true if @obj_req is completed, or false otherwise.
+ * Return:
+ * 0 - object map update sent
+ * 1 - object map update isn't needed
+ * <0 - error
*/
-static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+static int rbd_obj_write_post_object_map(struct rbd_obj_request *obj_req)
{
- switch (obj_req->img_request->op_type) {
- case OBJ_OP_READ:
- return rbd_obj_handle_read(obj_req);
- case OBJ_OP_WRITE:
- return rbd_obj_handle_write(obj_req);
- case OBJ_OP_DISCARD:
- case OBJ_OP_ZEROOUT:
- if (rbd_obj_handle_write(obj_req)) {
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+ u8 current_state = OBJECT_PENDING;
+
+ if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
+ return 1;
+
+ if (!(obj_req->flags & RBD_OBJ_FLAG_DELETION))
+ return 1;
+
+ return rbd_object_map_update(obj_req, CEPH_NOSNAP, OBJECT_NONEXISTENT,
+ &current_state);
+}
+
+static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
+{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+ int ret;
+
+again:
+ switch (obj_req->write_state) {
+ case RBD_OBJ_WRITE_START:
+ rbd_assert(!*result);
+
+ if (rbd_obj_write_is_noop(obj_req))
+ return true;
+
+ ret = rbd_obj_write_pre_object_map(obj_req);
+ if (ret < 0) {
+ *result = ret;
+ return true;
+ }
+ obj_req->write_state = RBD_OBJ_WRITE_PRE_OBJECT_MAP;
+ if (ret > 0)
+ goto again;
+ return false;
+ case RBD_OBJ_WRITE_PRE_OBJECT_MAP:
+ if (*result) {
+ rbd_warn(rbd_dev, "pre object map update failed: %d",
+ *result);
+ return true;
+ }
+ ret = rbd_obj_write_object(obj_req);
+ if (ret) {
+ *result = ret;
+ return true;
+ }
+ obj_req->write_state = RBD_OBJ_WRITE_OBJECT;
+ return false;
+ case RBD_OBJ_WRITE_OBJECT:
+ if (*result == -ENOENT) {
+ if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
+ *result = 0;
+ obj_req->copyup_state = RBD_OBJ_COPYUP_START;
+ obj_req->write_state = __RBD_OBJ_WRITE_COPYUP;
+ goto again;
+ }
/*
- * Hide -ENOENT from delete/truncate/zero -- discarding
- * a non-existent object is not a problem.
+ * On a non-existent object:
+ * delete - -ENOENT, truncate/zero - 0
*/
- if (obj_req->result == -ENOENT) {
- obj_req->result = 0;
- obj_req->xferred = obj_req->ex.oe_len;
- }
+ if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
+ *result = 0;
+ }
+ if (*result)
+ return true;
+
+ obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
+ goto again;
+ case __RBD_OBJ_WRITE_COPYUP:
+ if (!rbd_obj_advance_copyup(obj_req, result))
+ return false;
+ /* fall through */
+ case RBD_OBJ_WRITE_COPYUP:
+ if (*result) {
+ rbd_warn(rbd_dev, "copyup failed: %d", *result);
+ return true;
+ }
+ ret = rbd_obj_write_post_object_map(obj_req);
+ if (ret < 0) {
+ *result = ret;
return true;
}
+ obj_req->write_state = RBD_OBJ_WRITE_POST_OBJECT_MAP;
+ if (ret > 0)
+ goto again;
return false;
+ case RBD_OBJ_WRITE_POST_OBJECT_MAP:
+ if (*result)
+ rbd_warn(rbd_dev, "post object map update failed: %d",
+ *result);
+ return true;
default:
BUG();
}
}
-static void rbd_obj_end_request(struct rbd_obj_request *obj_req)
+/*
+ * Return true if @obj_req is completed.
+ */
+static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req,
+ int *result)
{
struct rbd_img_request *img_req = obj_req->img_request;
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+ bool done;
- rbd_assert((!obj_req->result &&
- obj_req->xferred == obj_req->ex.oe_len) ||
- (obj_req->result < 0 && !obj_req->xferred));
- if (!obj_req->result) {
- img_req->xferred += obj_req->xferred;
- return;
- }
+ mutex_lock(&obj_req->state_mutex);
+ if (!rbd_img_is_write(img_req))
+ done = rbd_obj_advance_read(obj_req, result);
+ else
+ done = rbd_obj_advance_write(obj_req, result);
+ mutex_unlock(&obj_req->state_mutex);
- rbd_warn(img_req->rbd_dev,
- "%s at objno %llu %llu~%llu result %d xferred %llu",
- obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
- obj_req->ex.oe_off, obj_req->ex.oe_len, obj_req->result,
- obj_req->xferred);
- if (!img_req->result) {
- img_req->result = obj_req->result;
- img_req->xferred = 0;
+ if (done && *result) {
+ rbd_assert(*result < 0);
+ rbd_warn(rbd_dev, "%s at objno %llu %llu~%llu result %d",
+ obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
+ obj_req->ex.oe_off, obj_req->ex.oe_len, *result);
}
+ return done;
}
-static void rbd_img_end_child_request(struct rbd_img_request *img_req)
+/*
+ * This is open-coded in rbd_img_handle_request() to avoid parent chain
+ * recursion.
+ */
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result)
{
- struct rbd_obj_request *obj_req = img_req->obj_request;
+ if (__rbd_obj_handle_request(obj_req, &result))
+ rbd_img_handle_request(obj_req->img_request, result);
+}
- rbd_assert(test_bit(IMG_REQ_CHILD, &img_req->flags));
- rbd_assert((!img_req->result &&
- img_req->xferred == rbd_obj_img_extents_bytes(obj_req)) ||
- (img_req->result < 0 && !img_req->xferred));
+static bool need_exclusive_lock(struct rbd_img_request *img_req)
+{
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
- obj_req->result = img_req->result;
- obj_req->xferred = img_req->xferred;
- rbd_img_request_put(img_req);
+ if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK))
+ return false;
+
+ if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ return false;
+
+ rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
+ if (rbd_dev->opts->lock_on_read ||
+ (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
+ return true;
+
+ return rbd_img_is_write(img_req);
}
-static void rbd_img_end_request(struct rbd_img_request *img_req)
+static bool rbd_lock_add_request(struct rbd_img_request *img_req)
{
- rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
- rbd_assert((!img_req->result &&
- img_req->xferred == blk_rq_bytes(img_req->rq)) ||
- (img_req->result < 0 && !img_req->xferred));
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+ bool locked;
+
+ lockdep_assert_held(&rbd_dev->lock_rwsem);
+ locked = rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED;
+ spin_lock(&rbd_dev->lock_lists_lock);
+ rbd_assert(list_empty(&img_req->lock_item));
+ if (!locked)
+ list_add_tail(&img_req->lock_item, &rbd_dev->acquiring_list);
+ else
+ list_add_tail(&img_req->lock_item, &rbd_dev->running_list);
+ spin_unlock(&rbd_dev->lock_lists_lock);
+ return locked;
+}
+
+static void rbd_lock_del_request(struct rbd_img_request *img_req)
+{
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+ bool need_wakeup;
- blk_mq_end_request(img_req->rq,
- errno_to_blk_status(img_req->result));
- rbd_img_request_put(img_req);
+ lockdep_assert_held(&rbd_dev->lock_rwsem);
+ spin_lock(&rbd_dev->lock_lists_lock);
+ rbd_assert(!list_empty(&img_req->lock_item));
+ list_del_init(&img_req->lock_item);
+ need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+ list_empty(&rbd_dev->running_list));
+ spin_unlock(&rbd_dev->lock_lists_lock);
+ if (need_wakeup)
+ complete(&rbd_dev->releasing_wait);
}
-static void rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
{
- struct rbd_img_request *img_req;
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+
+ if (!need_exclusive_lock(img_req))
+ return 1;
+
+ if (rbd_lock_add_request(img_req))
+ return 1;
+
+ if (rbd_dev->opts->exclusive) {
+ WARN_ON(1); /* lock got released? */
+ return -EROFS;
+ }
+
+ /*
+ * Note the use of mod_delayed_work() in rbd_acquire_lock()
+ * and cancel_delayed_work() in wake_lock_waiters().
+ */
+ dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev);
+ queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
+ return 0;
+}
+
+static void rbd_img_object_requests(struct rbd_img_request *img_req)
+{
+ struct rbd_obj_request *obj_req;
+
+ rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
+
+ for_each_obj_request(img_req, obj_req) {
+ int result = 0;
+
+ if (__rbd_obj_handle_request(obj_req, &result)) {
+ if (result) {
+ img_req->pending.result = result;
+ return;
+ }
+ } else {
+ img_req->pending.num_pending++;
+ }
+ }
+}
+
+static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
+{
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+ int ret;
again:
- if (!__rbd_obj_handle_request(obj_req))
- return;
+ switch (img_req->state) {
+ case RBD_IMG_START:
+ rbd_assert(!*result);
- img_req = obj_req->img_request;
- spin_lock(&img_req->completion_lock);
- rbd_obj_end_request(obj_req);
- rbd_assert(img_req->pending_count);
- if (--img_req->pending_count) {
- spin_unlock(&img_req->completion_lock);
- return;
+ ret = rbd_img_exclusive_lock(img_req);
+ if (ret < 0) {
+ *result = ret;
+ return true;
+ }
+ img_req->state = RBD_IMG_EXCLUSIVE_LOCK;
+ if (ret > 0)
+ goto again;
+ return false;
+ case RBD_IMG_EXCLUSIVE_LOCK:
+ if (*result)
+ return true;
+
+ rbd_assert(!need_exclusive_lock(img_req) ||
+ __rbd_is_lock_owner(rbd_dev));
+
+ rbd_img_object_requests(img_req);
+ if (!img_req->pending.num_pending) {
+ *result = img_req->pending.result;
+ img_req->state = RBD_IMG_OBJECT_REQUESTS;
+ goto again;
+ }
+ img_req->state = __RBD_IMG_OBJECT_REQUESTS;
+ return false;
+ case __RBD_IMG_OBJECT_REQUESTS:
+ if (!pending_result_dec(&img_req->pending, result))
+ return false;
+ /* fall through */
+ case RBD_IMG_OBJECT_REQUESTS:
+ return true;
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Return true if @img_req is completed.
+ */
+static bool __rbd_img_handle_request(struct rbd_img_request *img_req,
+ int *result)
+{
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+ bool done;
+
+ if (need_exclusive_lock(img_req)) {
+ down_read(&rbd_dev->lock_rwsem);
+ mutex_lock(&img_req->state_mutex);
+ done = rbd_img_advance(img_req, result);
+ if (done)
+ rbd_lock_del_request(img_req);
+ mutex_unlock(&img_req->state_mutex);
+ up_read(&rbd_dev->lock_rwsem);
+ } else {
+ mutex_lock(&img_req->state_mutex);
+ done = rbd_img_advance(img_req, result);
+ mutex_unlock(&img_req->state_mutex);
+ }
+
+ if (done && *result) {
+ rbd_assert(*result < 0);
+ rbd_warn(rbd_dev, "%s%s result %d",
+ test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "",
+ obj_op_name(img_req->op_type), *result);
}
+ return done;
+}
+
+static void rbd_img_handle_request(struct rbd_img_request *img_req, int result)
+{
+again:
+ if (!__rbd_img_handle_request(img_req, &result))
+ return;
- spin_unlock(&img_req->completion_lock);
if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
- obj_req = img_req->obj_request;
- rbd_img_end_child_request(img_req);
- goto again;
+ struct rbd_obj_request *obj_req = img_req->obj_request;
+
+ rbd_img_request_put(img_req);
+ if (__rbd_obj_handle_request(obj_req, &result)) {
+ img_req = obj_req->img_request;
+ goto again;
+ }
+ } else {
+ struct request *rq = img_req->rq;
+
+ rbd_img_request_put(img_req);
+ blk_mq_end_request(rq, errno_to_blk_status(result));
}
- rbd_img_end_request(img_req);
}
static const struct rbd_client_id rbd_empty_cid;
@@ -2839,6 +3772,7 @@ static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
{
struct rbd_client_id cid = rbd_get_cid(rbd_dev);
+ rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
strcpy(rbd_dev->lock_cookie, cookie);
rbd_set_owner_cid(rbd_dev, &cid);
queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
@@ -2863,7 +3797,6 @@ static int rbd_lock(struct rbd_device *rbd_dev)
if (ret)
return ret;
- rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
__rbd_lock(rbd_dev, cookie);
return 0;
}
@@ -2882,7 +3815,7 @@ static void rbd_unlock(struct rbd_device *rbd_dev)
ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
RBD_LOCK_NAME, rbd_dev->lock_cookie);
if (ret && ret != -ENOENT)
- rbd_warn(rbd_dev, "failed to unlock: %d", ret);
+ rbd_warn(rbd_dev, "failed to unlock header: %d", ret);
/* treat errors as the image is unlocked */
rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
@@ -3009,15 +3942,34 @@ e_inval:
goto out;
}
-static void wake_requests(struct rbd_device *rbd_dev, bool wake_all)
+/*
+ * Either image request state machine(s) or rbd_add_acquire_lock()
+ * (i.e. "rbd map").
+ */
+static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
{
- dout("%s rbd_dev %p wake_all %d\n", __func__, rbd_dev, wake_all);
+ struct rbd_img_request *img_req;
+
+ dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result);
+ lockdep_assert_held_write(&rbd_dev->lock_rwsem);
cancel_delayed_work(&rbd_dev->lock_dwork);
- if (wake_all)
- wake_up_all(&rbd_dev->lock_waitq);
- else
- wake_up(&rbd_dev->lock_waitq);
+ if (!completion_done(&rbd_dev->acquire_wait)) {
+ rbd_assert(list_empty(&rbd_dev->acquiring_list) &&
+ list_empty(&rbd_dev->running_list));
+ rbd_dev->acquire_err = result;
+ complete_all(&rbd_dev->acquire_wait);
+ return;
+ }
+
+ list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
+ mutex_lock(&img_req->state_mutex);
+ rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
+ rbd_img_schedule(img_req, result);
+ mutex_unlock(&img_req->state_mutex);
+ }
+
+ list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
}
static int get_lock_owner_info(struct rbd_device *rbd_dev,
@@ -3132,13 +4084,10 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
goto again;
ret = find_watcher(rbd_dev, lockers);
- if (ret) {
- if (ret > 0)
- ret = 0; /* have to request lock */
- goto out;
- }
+ if (ret)
+ goto out; /* request lock or error */
- rbd_warn(rbd_dev, "%s%llu seems dead, breaking lock",
+ rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
ENTITY_NAME(lockers[0].id.name));
ret = ceph_monc_blacklist_add(&client->monc,
@@ -3165,53 +4114,90 @@ out:
return ret;
}
+static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
+ ret = rbd_object_map_open(rbd_dev);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
/*
- * ret is set only if lock_state is RBD_LOCK_STATE_UNLOCKED
+ * Return:
+ * 0 - lock acquired
+ * 1 - caller should call rbd_request_lock()
+ * <0 - error
*/
-static enum rbd_lock_state rbd_try_acquire_lock(struct rbd_device *rbd_dev,
- int *pret)
+static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
{
- enum rbd_lock_state lock_state;
+ int ret;
down_read(&rbd_dev->lock_rwsem);
dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
rbd_dev->lock_state);
if (__rbd_is_lock_owner(rbd_dev)) {
- lock_state = rbd_dev->lock_state;
up_read(&rbd_dev->lock_rwsem);
- return lock_state;
+ return 0;
}
up_read(&rbd_dev->lock_rwsem);
down_write(&rbd_dev->lock_rwsem);
dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev,
rbd_dev->lock_state);
- if (!__rbd_is_lock_owner(rbd_dev)) {
- *pret = rbd_try_lock(rbd_dev);
- if (*pret)
- rbd_warn(rbd_dev, "failed to acquire lock: %d", *pret);
+ if (__rbd_is_lock_owner(rbd_dev)) {
+ up_write(&rbd_dev->lock_rwsem);
+ return 0;
+ }
+
+ ret = rbd_try_lock(rbd_dev);
+ if (ret < 0) {
+ rbd_warn(rbd_dev, "failed to lock header: %d", ret);
+ if (ret == -EBLACKLISTED)
+ goto out;
+
+ ret = 1; /* request lock anyway */
+ }
+ if (ret > 0) {
+ up_write(&rbd_dev->lock_rwsem);
+ return ret;
+ }
+
+ rbd_assert(rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED);
+ rbd_assert(list_empty(&rbd_dev->running_list));
+
+ ret = rbd_post_acquire_action(rbd_dev);
+ if (ret) {
+ rbd_warn(rbd_dev, "post-acquire action failed: %d", ret);
+ /*
+ * Can't stay in RBD_LOCK_STATE_LOCKED because
+ * rbd_lock_add_request() would let the request through,
+ * assuming that e.g. object map is locked and loaded.
+ */
+ rbd_unlock(rbd_dev);
}
- lock_state = rbd_dev->lock_state;
+out:
+ wake_lock_waiters(rbd_dev, ret);
up_write(&rbd_dev->lock_rwsem);
- return lock_state;
+ return ret;
}
static void rbd_acquire_lock(struct work_struct *work)
{
struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
struct rbd_device, lock_dwork);
- enum rbd_lock_state lock_state;
- int ret = 0;
+ int ret;
dout("%s rbd_dev %p\n", __func__, rbd_dev);
again:
- lock_state = rbd_try_acquire_lock(rbd_dev, &ret);
- if (lock_state != RBD_LOCK_STATE_UNLOCKED || ret == -EBLACKLISTED) {
- if (lock_state == RBD_LOCK_STATE_LOCKED)
- wake_requests(rbd_dev, true);
- dout("%s rbd_dev %p lock_state %d ret %d - done\n", __func__,
- rbd_dev, lock_state, ret);
+ ret = rbd_try_acquire_lock(rbd_dev);
+ if (ret <= 0) {
+ dout("%s rbd_dev %p ret %d - done\n", __func__, rbd_dev, ret);
return;
}
@@ -3220,16 +4206,9 @@ again:
goto again; /* treat this as a dead client */
} else if (ret == -EROFS) {
rbd_warn(rbd_dev, "peer will not release lock");
- /*
- * If this is rbd_add_acquire_lock(), we want to fail
- * immediately -- reuse BLACKLISTED flag. Otherwise we
- * want to block.
- */
- if (!(rbd_dev->disk->flags & GENHD_FL_UP)) {
- set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
- /* wake "rbd map --exclusive" process */
- wake_requests(rbd_dev, false);
- }
+ down_write(&rbd_dev->lock_rwsem);
+ wake_lock_waiters(rbd_dev, ret);
+ up_write(&rbd_dev->lock_rwsem);
} else if (ret < 0) {
rbd_warn(rbd_dev, "error requesting lock: %d", ret);
mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
@@ -3246,43 +4225,67 @@ again:
}
}
-/*
- * lock_rwsem must be held for write
- */
-static bool rbd_release_lock(struct rbd_device *rbd_dev)
+static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
{
- dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
- rbd_dev->lock_state);
+ bool need_wait;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+ lockdep_assert_held_write(&rbd_dev->lock_rwsem);
+
if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
return false;
- rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
- downgrade_write(&rbd_dev->lock_rwsem);
/*
* Ensure that all in-flight IO is flushed.
- *
- * FIXME: ceph_osdc_sync() flushes the entire OSD client, which
- * may be shared with other devices.
*/
- ceph_osdc_sync(&rbd_dev->rbd_client->client->osdc);
+ rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
+ rbd_assert(!completion_done(&rbd_dev->releasing_wait));
+ need_wait = !list_empty(&rbd_dev->running_list);
+ downgrade_write(&rbd_dev->lock_rwsem);
+ if (need_wait)
+ wait_for_completion(&rbd_dev->releasing_wait);
up_read(&rbd_dev->lock_rwsem);
down_write(&rbd_dev->lock_rwsem);
- dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev,
- rbd_dev->lock_state);
if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
return false;
+ rbd_assert(list_empty(&rbd_dev->running_list));
+ return true;
+}
+
+static void rbd_pre_release_action(struct rbd_device *rbd_dev)
+{
+ if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)
+ rbd_object_map_close(rbd_dev);
+}
+
+static void __rbd_release_lock(struct rbd_device *rbd_dev)
+{
+ rbd_assert(list_empty(&rbd_dev->running_list));
+
+ rbd_pre_release_action(rbd_dev);
rbd_unlock(rbd_dev);
+}
+
+/*
+ * lock_rwsem must be held for write
+ */
+static void rbd_release_lock(struct rbd_device *rbd_dev)
+{
+ if (!rbd_quiesce_lock(rbd_dev))
+ return;
+
+ __rbd_release_lock(rbd_dev);
+
/*
* Give others a chance to grab the lock - we would re-acquire
- * almost immediately if we got new IO during ceph_osdc_sync()
- * otherwise. We need to ack our own notifications, so this
- * lock_dwork will be requeued from rbd_wait_state_locked()
- * after wake_requests() in rbd_handle_released_lock().
+ * almost immediately if we got new IO while draining the running
+ * list otherwise. We need to ack our own notifications, so this
+ * lock_dwork will be requeued from rbd_handle_released_lock() by
+ * way of maybe_kick_acquire().
*/
cancel_delayed_work(&rbd_dev->lock_dwork);
- return true;
}
static void rbd_release_lock_work(struct work_struct *work)
@@ -3295,6 +4298,23 @@ static void rbd_release_lock_work(struct work_struct *work)
up_write(&rbd_dev->lock_rwsem);
}
+static void maybe_kick_acquire(struct rbd_device *rbd_dev)
+{
+ bool have_requests;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+ if (__rbd_is_lock_owner(rbd_dev))
+ return;
+
+ spin_lock(&rbd_dev->lock_lists_lock);
+ have_requests = !list_empty(&rbd_dev->acquiring_list);
+ spin_unlock(&rbd_dev->lock_lists_lock);
+ if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) {
+ dout("%s rbd_dev %p kicking lock_dwork\n", __func__, rbd_dev);
+ mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
+ }
+}
+
static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
void **p)
{
@@ -3324,8 +4344,7 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
down_read(&rbd_dev->lock_rwsem);
}
- if (!__rbd_is_lock_owner(rbd_dev))
- wake_requests(rbd_dev, false);
+ maybe_kick_acquire(rbd_dev);
up_read(&rbd_dev->lock_rwsem);
}
@@ -3357,8 +4376,7 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
down_read(&rbd_dev->lock_rwsem);
}
- if (!__rbd_is_lock_owner(rbd_dev))
- wake_requests(rbd_dev, false);
+ maybe_kick_acquire(rbd_dev);
up_read(&rbd_dev->lock_rwsem);
}
@@ -3608,7 +4626,6 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
static void rbd_unregister_watch(struct rbd_device *rbd_dev)
{
- WARN_ON(waitqueue_active(&rbd_dev->lock_waitq));
cancel_tasks_sync(rbd_dev);
mutex_lock(&rbd_dev->watch_mutex);
@@ -3630,7 +4647,8 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
char cookie[32];
int ret;
- WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
+ if (!rbd_quiesce_lock(rbd_dev))
+ return;
format_lock_cookie(rbd_dev, cookie);
ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
@@ -3646,11 +4664,11 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
*/
- if (rbd_release_lock(rbd_dev))
- queue_delayed_work(rbd_dev->task_wq,
- &rbd_dev->lock_dwork, 0);
+ __rbd_release_lock(rbd_dev);
+ queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
} else {
__rbd_lock(rbd_dev, cookie);
+ wake_lock_waiters(rbd_dev, 0);
}
}
@@ -3671,15 +4689,18 @@ static void rbd_reregister_watch(struct work_struct *work)
ret = __rbd_register_watch(rbd_dev);
if (ret) {
rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
- if (ret == -EBLACKLISTED || ret == -ENOENT) {
- set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
- wake_requests(rbd_dev, true);
- } else {
+ if (ret != -EBLACKLISTED && ret != -ENOENT) {
queue_delayed_work(rbd_dev->task_wq,
&rbd_dev->watch_dwork,
RBD_RETRY_DELAY);
+ mutex_unlock(&rbd_dev->watch_mutex);
+ return;
}
+
mutex_unlock(&rbd_dev->watch_mutex);
+ down_write(&rbd_dev->lock_rwsem);
+ wake_lock_waiters(rbd_dev, ret);
+ up_write(&rbd_dev->lock_rwsem);
return;
}
@@ -3742,7 +4763,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
ret = ceph_osdc_call(osdc, oid, oloc, RBD_DRV_NAME, method_name,
CEPH_OSD_FLAG_READ, req_page, outbound_size,
- reply_page, &inbound_size);
+ &reply_page, &inbound_size);
if (!ret) {
memcpy(inbound, page_address(reply_page), inbound_size);
ret = inbound_size;
@@ -3754,54 +4775,6 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
return ret;
}
-/*
- * lock_rwsem must be held for read
- */
-static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire)
-{
- DEFINE_WAIT(wait);
- unsigned long timeout;
- int ret = 0;
-
- if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
- return -EBLACKLISTED;
-
- if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
- return 0;
-
- if (!may_acquire) {
- rbd_warn(rbd_dev, "exclusive lock required");
- return -EROFS;
- }
-
- do {
- /*
- * Note the use of mod_delayed_work() in rbd_acquire_lock()
- * and cancel_delayed_work() in wake_requests().
- */
- dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev);
- queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
- prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait,
- TASK_UNINTERRUPTIBLE);
- up_read(&rbd_dev->lock_rwsem);
- timeout = schedule_timeout(ceph_timeout_jiffies(
- rbd_dev->opts->lock_timeout));
- down_read(&rbd_dev->lock_rwsem);
- if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
- ret = -EBLACKLISTED;
- break;
- }
- if (!timeout) {
- rbd_warn(rbd_dev, "timed out waiting for lock");
- ret = -ETIMEDOUT;
- break;
- }
- } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
-
- finish_wait(&rbd_dev->lock_waitq, &wait);
- return ret;
-}
-
static void rbd_queue_workfn(struct work_struct *work)
{
struct request *rq = blk_mq_rq_from_pdu(work);
@@ -3812,7 +4785,6 @@ static void rbd_queue_workfn(struct work_struct *work)
u64 length = blk_rq_bytes(rq);
enum obj_operation_type op_type;
u64 mapping_size;
- bool must_be_locked;
int result;
switch (req_op(rq)) {
@@ -3886,21 +4858,10 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
- must_be_locked =
- (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) &&
- (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read);
- if (must_be_locked) {
- down_read(&rbd_dev->lock_rwsem);
- result = rbd_wait_state_locked(rbd_dev,
- !rbd_dev->opts->exclusive);
- if (result)
- goto err_unlock;
- }
-
img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
if (!img_request) {
result = -ENOMEM;
- goto err_unlock;
+ goto err_rq;
}
img_request->rq = rq;
snapc = NULL; /* img_request consumes a ref */
@@ -3910,19 +4871,14 @@ static void rbd_queue_workfn(struct work_struct *work)
else
result = rbd_img_fill_from_bio(img_request, offset, length,
rq->bio);
- if (result || !img_request->pending_count)
+ if (result)
goto err_img_request;
- rbd_img_request_submit(img_request);
- if (must_be_locked)
- up_read(&rbd_dev->lock_rwsem);
+ rbd_img_handle_request(img_request, 0);
return;
err_img_request:
rbd_img_request_put(img_request);
-err_unlock:
- if (must_be_locked)
- up_read(&rbd_dev->lock_rwsem);
err_rq:
if (result)
rbd_warn(rbd_dev, "%s %llx at %llx result %d",
@@ -4589,7 +5545,13 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
INIT_WORK(&rbd_dev->released_lock_work, rbd_notify_released_lock);
INIT_DELAYED_WORK(&rbd_dev->lock_dwork, rbd_acquire_lock);
INIT_WORK(&rbd_dev->unlock_work, rbd_release_lock_work);
- init_waitqueue_head(&rbd_dev->lock_waitq);
+ spin_lock_init(&rbd_dev->lock_lists_lock);
+ INIT_LIST_HEAD(&rbd_dev->acquiring_list);
+ INIT_LIST_HEAD(&rbd_dev->running_list);
+ init_completion(&rbd_dev->acquire_wait);
+ init_completion(&rbd_dev->releasing_wait);
+
+ spin_lock_init(&rbd_dev->object_map_lock);
rbd_dev->dev.bus = &rbd_bus_type;
rbd_dev->dev.type = &rbd_device_type;
@@ -4772,6 +5734,32 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
&rbd_dev->header.features);
}
+/*
+ * These are generic image flags, but since they are used only for
+ * object map, store them in rbd_dev->object_map_flags.
+ *
+ * For the same reason, this function is called only on object map
+ * (re)load and not on header refresh.
+ */
+static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev)
+{
+ __le64 snapid = cpu_to_le64(rbd_dev->spec->snap_id);
+ __le64 flags;
+ int ret;
+
+ ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, "get_flags",
+ &snapid, sizeof(snapid),
+ &flags, sizeof(flags));
+ if (ret < 0)
+ return ret;
+ if (ret < sizeof(flags))
+ return -EBADMSG;
+
+ rbd_dev->object_map_flags = le64_to_cpu(flags);
+ return 0;
+}
+
struct parent_image_info {
u64 pool_id;
const char *pool_ns;
@@ -4829,7 +5817,7 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
"rbd", "parent_get", CEPH_OSD_FLAG_READ,
- req_page, sizeof(u64), reply_page, &reply_len);
+ req_page, sizeof(u64), &reply_page, &reply_len);
if (ret)
return ret == -EOPNOTSUPP ? 1 : ret;
@@ -4841,7 +5829,7 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
"rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ,
- req_page, sizeof(u64), reply_page, &reply_len);
+ req_page, sizeof(u64), &reply_page, &reply_len);
if (ret)
return ret;
@@ -4872,7 +5860,7 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
"rbd", "get_parent", CEPH_OSD_FLAG_READ,
- req_page, sizeof(u64), reply_page, &reply_len);
+ req_page, sizeof(u64), &reply_page, &reply_len);
if (ret)
return ret;
@@ -5605,28 +6593,49 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
{
down_write(&rbd_dev->lock_rwsem);
if (__rbd_is_lock_owner(rbd_dev))
- rbd_unlock(rbd_dev);
+ __rbd_release_lock(rbd_dev);
up_write(&rbd_dev->lock_rwsem);
}
+/*
+ * If the wait is interrupted, an error is returned even if the lock
+ * was successfully acquired. rbd_dev_image_unlock() will release it
+ * if needed.
+ */
static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
{
- int ret;
+ long ret;
if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
+ if (!rbd_dev->opts->exclusive && !rbd_dev->opts->lock_on_read)
+ return 0;
+
rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
return -EINVAL;
}
- /* FIXME: "rbd map --exclusive" should be in interruptible */
- down_read(&rbd_dev->lock_rwsem);
- ret = rbd_wait_state_locked(rbd_dev, true);
- up_read(&rbd_dev->lock_rwsem);
+ if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ return 0;
+
+ rbd_assert(!rbd_is_lock_owner(rbd_dev));
+ queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
+ ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
+ ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
+ if (ret > 0)
+ ret = rbd_dev->acquire_err;
+ else if (!ret)
+ ret = -ETIMEDOUT;
+
if (ret) {
- rbd_warn(rbd_dev, "failed to acquire exclusive lock");
- return -EROFS;
+ rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
+ return ret;
}
+ /*
+ * The lock may have been released by now, unless automatic lock
+ * transitions are disabled.
+ */
+ rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev));
return 0;
}
@@ -5724,6 +6733,8 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
struct rbd_image_header *header;
rbd_dev_parent_put(rbd_dev);
+ rbd_object_map_free(rbd_dev);
+ rbd_dev_mapping_clear(rbd_dev);
/* Free dynamic fields from the header, then zero it out */
@@ -5824,7 +6835,6 @@ out_err:
static void rbd_dev_device_release(struct rbd_device *rbd_dev)
{
clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
- rbd_dev_mapping_clear(rbd_dev);
rbd_free_disk(rbd_dev);
if (!single_major)
unregister_blkdev(rbd_dev->major, rbd_dev->name);
@@ -5858,23 +6868,17 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
if (ret)
goto err_out_blkdev;
- ret = rbd_dev_mapping_set(rbd_dev);
- if (ret)
- goto err_out_disk;
-
set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
set_disk_ro(rbd_dev->disk, rbd_dev->opts->read_only);
ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
if (ret)
- goto err_out_mapping;
+ goto err_out_disk;
set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
up_write(&rbd_dev->header_rwsem);
return 0;
-err_out_mapping:
- rbd_dev_mapping_clear(rbd_dev);
err_out_disk:
rbd_free_disk(rbd_dev);
err_out_blkdev:
@@ -5975,6 +6979,17 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
goto err_out_probe;
}
+ ret = rbd_dev_mapping_set(rbd_dev);
+ if (ret)
+ goto err_out_probe;
+
+ if (rbd_dev->spec->snap_id != CEPH_NOSNAP &&
+ (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) {
+ ret = rbd_object_map_load(rbd_dev);
+ if (ret)
+ goto err_out_probe;
+ }
+
if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
ret = rbd_dev_v2_parent_info(rbd_dev);
if (ret)
@@ -6071,11 +7086,9 @@ static ssize_t do_rbd_add(struct bus_type *bus,
if (rc)
goto err_out_image_probe;
- if (rbd_dev->opts->exclusive) {
- rc = rbd_add_acquire_lock(rbd_dev);
- if (rc)
- goto err_out_device_setup;
- }
+ rc = rbd_add_acquire_lock(rbd_dev);
+ if (rc)
+ goto err_out_image_lock;
/* Everything's ready. Announce the disk to the world. */
@@ -6101,7 +7114,6 @@ out:
err_out_image_lock:
rbd_dev_image_unlock(rbd_dev);
-err_out_device_setup:
rbd_dev_device_release(rbd_dev);
err_out_image_probe:
rbd_dev_image_release(rbd_dev);
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index 62ff50d3e7a6..ac98ab6ccd3b 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -18,6 +18,7 @@
/* For format version 2, rbd image 'foo' consists of objects
* rbd_id.foo - id of image
* rbd_header.<id> - image metadata
+ * rbd_object_map.<id> - optional image object map
* rbd_data.<id>.0000000000000000
* rbd_data.<id>.0000000000000001
* ... - data
@@ -25,6 +26,7 @@
*/
#define RBD_HEADER_PREFIX "rbd_header."
+#define RBD_OBJECT_MAP_PREFIX "rbd_object_map."
#define RBD_ID_PREFIX "rbd_id."
#define RBD_V2_DATA_FORMAT "%s.%016llx"
@@ -39,6 +41,14 @@ enum rbd_notify_op {
RBD_NOTIFY_OP_HEADER_UPDATE = 3,
};
+#define OBJECT_NONEXISTENT 0
+#define OBJECT_EXISTS 1
+#define OBJECT_PENDING 2
+#define OBJECT_EXISTS_CLEAN 3
+
+#define RBD_FLAG_OBJECT_MAP_INVALID (1ULL << 0)
+#define RBD_FLAG_FAST_DIFF_INVALID (1ULL << 1)
+
/*
* For format version 1, rbd image 'foo' consists of objects
* foo.rbd - image metadata
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 2109cfe80219..8fafbeab510a 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -295,6 +295,22 @@ static ssize_t target_node_show(struct device *dev,
}
static DEVICE_ATTR_RO(target_node);
+static unsigned long long dev_dax_resource(struct dev_dax *dev_dax)
+{
+ struct dax_region *dax_region = dev_dax->region;
+
+ return dax_region->res.start;
+}
+
+static ssize_t resource_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+
+ return sprintf(buf, "%#llx\n", dev_dax_resource(dev_dax));
+}
+static DEVICE_ATTR_RO(resource);
+
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -313,6 +329,8 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
return 0;
+ if (a == &dev_attr_resource.attr)
+ return 0400;
return a->mode;
}
@@ -320,6 +338,7 @@ static struct attribute *dev_dax_attributes[] = {
&dev_attr_modalias.attr,
&dev_attr_size.attr,
&dev_attr_target_node.attr,
+ &dev_attr_resource.attr,
NULL,
};
@@ -388,7 +407,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
* No 'host' or dax_operations since there is no access to this
* device outside of mmap of the resulting character device.
*/
- dax_dev = alloc_dax(dev_dax, NULL, NULL);
+ dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
if (!dax_dev)
goto err;
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 4e5ae7e8b557..8ab12068eea3 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -195,6 +195,8 @@ enum dax_device_flags {
DAXDEV_ALIVE,
/* gate whether dax_flush() calls the low level flush routine */
DAXDEV_WRITE_CACHE,
+ /* flag to check if device supports synchronous flush */
+ DAXDEV_SYNC,
};
/**
@@ -372,6 +374,18 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
+bool __dax_synchronous(struct dax_device *dax_dev)
+{
+ return test_bit(DAXDEV_SYNC, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(__dax_synchronous);
+
+void __set_dax_synchronous(struct dax_device *dax_dev)
+{
+ set_bit(DAXDEV_SYNC, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(__set_dax_synchronous);
+
bool dax_alive(struct dax_device *dax_dev)
{
lockdep_assert_held(&dax_srcu);
@@ -526,7 +540,7 @@ static void dax_add_host(struct dax_device *dax_dev, const char *host)
}
struct dax_device *alloc_dax(void *private, const char *__host,
- const struct dax_operations *ops)
+ const struct dax_operations *ops, unsigned long flags)
{
struct dax_device *dax_dev;
const char *host;
@@ -549,6 +563,9 @@ struct dax_device *alloc_dax(void *private, const char *__host,
dax_add_host(dax_dev, host);
dax_dev->ops = ops;
dax_dev->private = private;
+ if (flags & DAXDEV_F_SYNC)
+ set_dax_synchronous(dax_dev);
+
return dax_dev;
err_dev:
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 671c24332802..df2011de7be2 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -28,10 +28,27 @@
#include "dm-core.h"
-#define SUB_JOB_SIZE 128
#define SPLIT_COUNT 8
#define MIN_JOBS 8
-#define RESERVE_PAGES (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE))
+
+#define DEFAULT_SUB_JOB_SIZE_KB 512
+#define MAX_SUB_JOB_SIZE_KB 1024
+
+static unsigned kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB;
+
+module_param(kcopyd_subjob_size_kb, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients");
+
+static unsigned dm_get_kcopyd_subjob_size(void)
+{
+ unsigned sub_job_size_kb;
+
+ sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb,
+ DEFAULT_SUB_JOB_SIZE_KB,
+ MAX_SUB_JOB_SIZE_KB);
+
+ return sub_job_size_kb << 1;
+}
/*-----------------------------------------------------------------
* Each kcopyd client has its own little pool of preallocated
@@ -41,6 +58,7 @@ struct dm_kcopyd_client {
struct page_list *pages;
unsigned nr_reserved_pages;
unsigned nr_free_pages;
+ unsigned sub_job_size;
struct dm_io_client *io_client;
@@ -693,8 +711,8 @@ static void segment_complete(int read_err, unsigned long write_err,
progress = job->progress;
count = job->source.count - progress;
if (count) {
- if (count > SUB_JOB_SIZE)
- count = SUB_JOB_SIZE;
+ if (count > kc->sub_job_size)
+ count = kc->sub_job_size;
job->progress += count;
}
@@ -821,7 +839,7 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->master_job = job;
job->write_offset = 0;
- if (job->source.count <= SUB_JOB_SIZE)
+ if (job->source.count <= kc->sub_job_size)
dispatch_job(job);
else {
job->progress = 0;
@@ -888,6 +906,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
{
int r;
+ unsigned reserve_pages;
struct dm_kcopyd_client *kc;
kc = kzalloc(sizeof(*kc), GFP_KERNEL);
@@ -912,9 +931,12 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
goto bad_workqueue;
}
+ kc->sub_job_size = dm_get_kcopyd_subjob_size();
+ reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE);
+
kc->pages = NULL;
kc->nr_reserved_pages = kc->nr_free_pages = 0;
- r = client_reserve_pages(kc, RESERVE_PAGES);
+ r = client_reserve_pages(kc, reserve_pages);
if (r)
goto bad_client_pages;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 63916e1dc569..f150f5c5492b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2072,6 +2072,12 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ /* Once merging, discards no longer effect change */
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+
chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
down_write(&s->lock);
@@ -2331,6 +2337,8 @@ static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
if (snap->discard_zeroes_cow) {
struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+ down_read(&_origins_lock);
+
(void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest)
snap = snap_src;
@@ -2338,6 +2346,8 @@ static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
/* All discards are split on chunk_size boundary */
limits->discard_granularity = snap->store->chunk_size;
limits->max_discard_sectors = snap->store->chunk_size;
+
+ up_read(&_origins_lock);
}
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ec8b27e20de3..caaee8032afe 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -881,7 +881,7 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
EXPORT_SYMBOL_GPL(dm_table_set_type);
/* validate the dax capability of the target device span */
-static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
int blocksize = *(int *) data;
@@ -890,7 +890,15 @@ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
start, len);
}
-bool dm_table_supports_dax(struct dm_table *t, int blocksize)
+/* Check devices support synchronous DAX */
+static int device_synchronous(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ return dax_synchronous(dev->dax_dev);
+}
+
+bool dm_table_supports_dax(struct dm_table *t,
+ iterate_devices_callout_fn iterate_fn, int *blocksize)
{
struct dm_target *ti;
unsigned i;
@@ -903,8 +911,7 @@ bool dm_table_supports_dax(struct dm_table *t, int blocksize)
return false;
if (!ti->type->iterate_devices ||
- !ti->type->iterate_devices(ti, device_supports_dax,
- &blocksize))
+ !ti->type->iterate_devices(ti, iterate_fn, blocksize))
return false;
}
@@ -940,6 +947,7 @@ static int dm_table_determine_type(struct dm_table *t)
struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
+ int page_size = PAGE_SIZE;
if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
@@ -984,7 +992,7 @@ static int dm_table_determine_type(struct dm_table *t)
verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
- if (dm_table_supports_dax(t, PAGE_SIZE) ||
+ if (dm_table_supports_dax(t, device_supports_dax, &page_size) ||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
t->type = DM_TYPE_DAX_BIO_BASED;
} else {
@@ -1883,6 +1891,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
bool wc = false, fua = false;
+ int page_size = PAGE_SIZE;
/*
* Copy table's limits to the DM device's request_queue
@@ -1910,8 +1919,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
blk_queue_write_cache(q, wc, fua);
- if (dm_table_supports_dax(t, PAGE_SIZE))
+ if (dm_table_supports_dax(t, device_supports_dax, &page_size)) {
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+ if (dm_table_supports_dax(t, device_synchronous, NULL))
+ set_dax_synchronous(t->md->dax_dev);
+ }
else
blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 9faf3e49c7af..8545dcee9fd0 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1602,30 +1602,6 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd)
}
/*
- * Activate a zone (increment its reference count).
- */
-void dmz_activate_zone(struct dm_zone *zone)
-{
- set_bit(DMZ_ACTIVE, &zone->flags);
- atomic_inc(&zone->refcount);
-}
-
-/*
- * Deactivate a zone. This decrement the zone reference counter
- * and clears the active state of the zone once the count reaches 0,
- * indicating that all BIOs to the zone have completed. Returns
- * true if the zone was deactivated.
- */
-void dmz_deactivate_zone(struct dm_zone *zone)
-{
- if (atomic_dec_and_test(&zone->refcount)) {
- WARN_ON(!test_bit(DMZ_ACTIVE, &zone->flags));
- clear_bit_unlock(DMZ_ACTIVE, &zone->flags);
- smp_mb__after_atomic();
- }
-}
-
-/*
* Get the zone mapping a chunk, if the chunk is mapped already.
* If no mapping exist and the operation is WRITE, a zone is
* allocated and used to map the chunk.
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index 12419f0bfe78..ed8de49c9a08 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -115,7 +115,6 @@ enum {
DMZ_BUF,
/* Zone internal state */
- DMZ_ACTIVE,
DMZ_RECLAIM,
DMZ_SEQ_WRITE_ERR,
};
@@ -128,7 +127,6 @@ enum {
#define dmz_is_empty(z) ((z)->wp_block == 0)
#define dmz_is_offline(z) test_bit(DMZ_OFFLINE, &(z)->flags)
#define dmz_is_readonly(z) test_bit(DMZ_READ_ONLY, &(z)->flags)
-#define dmz_is_active(z) test_bit(DMZ_ACTIVE, &(z)->flags)
#define dmz_in_reclaim(z) test_bit(DMZ_RECLAIM, &(z)->flags)
#define dmz_seq_write_err(z) test_bit(DMZ_SEQ_WRITE_ERR, &(z)->flags)
@@ -188,8 +186,30 @@ void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone);
unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd);
unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd);
-void dmz_activate_zone(struct dm_zone *zone);
-void dmz_deactivate_zone(struct dm_zone *zone);
+/*
+ * Activate a zone (increment its reference count).
+ */
+static inline void dmz_activate_zone(struct dm_zone *zone)
+{
+ atomic_inc(&zone->refcount);
+}
+
+/*
+ * Deactivate a zone. This decrement the zone reference counter
+ * indicating that all BIOs to the zone have completed when the count is 0.
+ */
+static inline void dmz_deactivate_zone(struct dm_zone *zone)
+{
+ atomic_dec(&zone->refcount);
+}
+
+/*
+ * Test if a zone is active, that is, has a refcount > 0.
+ */
+static inline bool dmz_is_active(struct dm_zone *zone)
+{
+ return atomic_read(&zone->refcount);
+}
int dmz_lock_zone_reclaim(struct dm_zone *zone);
void dmz_unlock_zone_reclaim(struct dm_zone *zone);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 61f1152b74e9..d0beef033e2f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1117,7 +1117,7 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd
if (!map)
return false;
- ret = dm_table_supports_dax(map, blocksize);
+ ret = dm_table_supports_dax(map, device_supports_dax, &blocksize);
dm_put_live_table(md, srcu_idx);
@@ -1989,7 +1989,8 @@ static struct mapped_device *alloc_dev(int minor)
sprintf(md->disk->disk_name, "dm-%d", minor);
if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
- md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+ md->dax_dev = alloc_dax(md, md->disk->disk_name,
+ &dm_dax_ops, 0);
if (!md->dax_dev)
goto bad;
}
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 17e3db54404c..0475673337f3 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -72,7 +72,10 @@ bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
-bool dm_table_supports_dax(struct dm_table *t, int blocksize);
+bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
+ int *blocksize);
+int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 6f2a088afad6..cefe233e0b52 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_OF_PMEM) += of_pmem.o
+obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
nd_pmem-y := pmem.o
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 26c1c7618891..2985ca949912 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -255,7 +255,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
sector_t sector = offset >> 9;
- int rc = 0;
+ int rc = 0, ret = 0;
if (unlikely(!size))
return 0;
@@ -293,7 +293,9 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}
memcpy_flushcache(nsio->addr + offset, buf, size);
- nvdimm_flush(to_nd_region(ndns->dev.parent));
+ ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
+ if (ret)
+ rc = ret;
return rc;
}
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index a434a5964cb9..2d8d7e554877 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1822,8 +1822,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
&& !guid_equal(&nd_set->type_guid,
&nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- nd_set->type_guid.b,
- nd_label->type_guid.b);
+ &nd_set->type_guid,
+ &nd_label->type_guid);
continue;
}
@@ -2227,8 +2227,8 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
if (namespace_label_has(ndd, type_guid)) {
if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- nd_set->type_guid.b,
- nd_label->type_guid.b);
+ &nd_set->type_guid,
+ &nd_label->type_guid);
return ERR_PTR(-EAGAIN);
}
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index d24304c0e6d7..1b9955651379 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -155,6 +155,7 @@ struct nd_region {
struct badblocks bb;
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
+ int (*flush)(struct nd_region *nd_region, struct bio *bio);
struct nd_mapping mapping[0];
};
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
new file mode 100644
index 000000000000..10351d5b49fa
--- /dev/null
+++ b/drivers/nvdimm/nd_virtio.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio_pmem.c: Virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and provides a virtio based flushing
+ * interface.
+ */
+#include "virtio_pmem.h"
+#include "nd.h"
+
+ /* The interrupt handler */
+void virtio_pmem_host_ack(struct virtqueue *vq)
+{
+ struct virtio_pmem *vpmem = vq->vdev->priv;
+ struct virtio_pmem_request *req_data, *req_buf;
+ unsigned long flags;
+ unsigned int len;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
+ req_data->done = true;
+ wake_up(&req_data->host_acked);
+
+ if (!list_empty(&vpmem->req_list)) {
+ req_buf = list_first_entry(&vpmem->req_list,
+ struct virtio_pmem_request, list);
+ req_buf->wq_buf_avail = true;
+ wake_up(&req_buf->wq_buf);
+ list_del(&req_buf->list);
+ }
+ }
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+}
+EXPORT_SYMBOL_GPL(virtio_pmem_host_ack);
+
+ /* The request submission function */
+static int virtio_pmem_flush(struct nd_region *nd_region)
+{
+ struct virtio_device *vdev = nd_region->provider_data;
+ struct virtio_pmem *vpmem = vdev->priv;
+ struct virtio_pmem_request *req_data;
+ struct scatterlist *sgs[2], sg, ret;
+ unsigned long flags;
+ int err, err1;
+
+ might_sleep();
+ req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
+ if (!req_data)
+ return -ENOMEM;
+
+ req_data->done = false;
+ init_waitqueue_head(&req_data->host_acked);
+ init_waitqueue_head(&req_data->wq_buf);
+ INIT_LIST_HEAD(&req_data->list);
+ req_data->req.type = cpu_to_le32(VIRTIO_PMEM_REQ_TYPE_FLUSH);
+ sg_init_one(&sg, &req_data->req, sizeof(req_data->req));
+ sgs[0] = &sg;
+ sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp));
+ sgs[1] = &ret;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ /*
+ * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual
+ * queue does not have free descriptor. We add the request
+ * to req_list and wait for host_ack to wake us up when free
+ * slots are available.
+ */
+ while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
+ GFP_ATOMIC)) == -ENOSPC) {
+
+ dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
+ req_data->wq_buf_avail = false;
+ list_add_tail(&req_data->list, &vpmem->req_list);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+
+ /* A host response results in "host_ack" getting called */
+ wait_event(req_data->wq_buf, req_data->wq_buf_avail);
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ }
+ err1 = virtqueue_kick(vpmem->req_vq);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+ /*
+ * virtqueue_add_sgs failed with error different than -ENOSPC, we can't
+ * do anything about that.
+ */
+ if (err || !err1) {
+ dev_info(&vdev->dev, "failed to send command to virtio pmem device\n");
+ err = -EIO;
+ } else {
+ /* A host repsonse results in "host_ack" getting called */
+ wait_event(req_data->host_acked, req_data->done);
+ err = le32_to_cpu(req_data->resp.ret);
+ }
+
+ kfree(req_data);
+ return err;
+};
+
+/* The asynchronous flush callback function */
+int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
+{
+ /*
+ * Create child bio for asynchronous flush and chain with
+ * parent bio. Otherwise directly call nd_region flush.
+ */
+ if (bio && bio->bi_iter.bi_sector != -1) {
+ struct bio *child = bio_alloc(GFP_ATOMIC, 0);
+
+ if (!child)
+ return -ENOMEM;
+ bio_copy_dev(child, bio);
+ child->bi_opf = REQ_PREFLUSH;
+ child->bi_iter.bi_sector = -1;
+ bio_chain(child, bio);
+ submit_bio(child);
+ return 0;
+ }
+ if (virtio_pmem_flush(nd_region))
+ return -EIO;
+
+ return 0;
+};
+EXPORT_SYMBOL_GPL(async_pmem_flush);
+MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index e7d8cc9f41e8..2bf3acd69613 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -184,6 +184,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
+ int ret = 0;
blk_status_t rc = 0;
bool do_acct;
unsigned long start;
@@ -193,7 +194,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
struct nd_region *nd_region = to_region(pmem);
if (bio->bi_opf & REQ_PREFLUSH)
- nvdimm_flush(nd_region);
+ ret = nvdimm_flush(nd_region, bio);
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
@@ -208,7 +209,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
nd_iostat_end(bio, start);
if (bio->bi_opf & REQ_FUA)
- nvdimm_flush(nd_region);
+ ret = nvdimm_flush(nd_region, bio);
+
+ if (ret)
+ bio->bi_status = errno_to_blk_status(ret);
bio_endio(bio);
return BLK_QC_T_NONE;
@@ -362,6 +366,7 @@ static int pmem_attach_disk(struct device *dev,
struct gendisk *disk;
void *addr;
int rc;
+ unsigned long flags = 0UL;
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem)
@@ -457,14 +462,15 @@ static int pmem_attach_disk(struct device *dev,
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
disk->bb = &pmem->bb;
- dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
+ if (is_nvdimm_sync(nd_region))
+ flags = DAXDEV_F_SYNC;
+ dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
if (!dax_dev) {
put_disk(disk);
return -ENOMEM;
}
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
-
gendev = disk_to_dev(disk);
gendev->groups = pmem_attribute_groups;
@@ -522,14 +528,14 @@ static int nd_pmem_remove(struct device *dev)
sysfs_put(pmem->bb_state);
pmem->bb_state = NULL;
}
- nvdimm_flush(to_nd_region(dev->parent));
+ nvdimm_flush(to_nd_region(dev->parent), NULL);
return 0;
}
static void nd_pmem_shutdown(struct device *dev)
{
- nvdimm_flush(to_nd_region(dev->parent));
+ nvdimm_flush(to_nd_region(dev->parent), NULL);
}
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4fed9ce9c2fe..56f2227f192a 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -287,7 +287,9 @@ static ssize_t deep_flush_store(struct device *dev, struct device_attribute *att
return rc;
if (!flush)
return -EINVAL;
- nvdimm_flush(nd_region);
+ rc = nvdimm_flush(nd_region, NULL);
+ if (rc)
+ return rc;
return len;
}
@@ -1077,6 +1079,11 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
dev->of_node = ndr_desc->of_node;
nd_region->ndr_size = resource_size(ndr_desc->res);
nd_region->ndr_start = ndr_desc->res->start;
+ if (ndr_desc->flush)
+ nd_region->flush = ndr_desc->flush;
+ else
+ nd_region->flush = NULL;
+
nd_device_register(dev);
return nd_region;
@@ -1117,11 +1124,24 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
+int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
+{
+ int rc = 0;
+
+ if (!nd_region->flush)
+ rc = generic_nvdimm_flush(nd_region);
+ else {
+ if (nd_region->flush(nd_region, bio))
+ rc = -EIO;
+ }
+
+ return rc;
+}
/**
* nvdimm_flush - flush any posted write queues between the cpu and pmem media
* @nd_region: blk or interleaved pmem region
*/
-void nvdimm_flush(struct nd_region *nd_region)
+int generic_nvdimm_flush(struct nd_region *nd_region)
{
struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
int i, idx;
@@ -1145,6 +1165,8 @@ void nvdimm_flush(struct nd_region *nd_region)
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
wmb();
+
+ return 0;
}
EXPORT_SYMBOL_GPL(nvdimm_flush);
@@ -1189,6 +1211,13 @@ int nvdimm_has_cache(struct nd_region *nd_region)
}
EXPORT_SYMBOL_GPL(nvdimm_has_cache);
+bool is_nvdimm_sync(struct nd_region *nd_region)
+{
+ return is_nd_pmem(&nd_region->dev) &&
+ !test_bit(ND_REGION_ASYNC, &nd_region->flags);
+}
+EXPORT_SYMBOL_GPL(is_nvdimm_sync);
+
struct conflict_context {
struct nd_region *nd_region;
resource_size_t start, size;
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
new file mode 100644
index 000000000000..5e3d07b47e0c
--- /dev/null
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio_pmem.c: Virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and registers the virtual pmem device
+ * with libnvdimm core.
+ */
+#include "virtio_pmem.h"
+#include "nd.h"
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_PMEM, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+ /* Initialize virt queue */
+static int init_vq(struct virtio_pmem *vpmem)
+{
+ /* single vq */
+ vpmem->req_vq = virtio_find_single_vq(vpmem->vdev,
+ virtio_pmem_host_ack, "flush_queue");
+ if (IS_ERR(vpmem->req_vq))
+ return PTR_ERR(vpmem->req_vq);
+
+ spin_lock_init(&vpmem->pmem_lock);
+ INIT_LIST_HEAD(&vpmem->req_list);
+
+ return 0;
+};
+
+static int virtio_pmem_probe(struct virtio_device *vdev)
+{
+ struct nd_region_desc ndr_desc = {};
+ int nid = dev_to_node(&vdev->dev);
+ struct nd_region *nd_region;
+ struct virtio_pmem *vpmem;
+ struct resource res;
+ int err = 0;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "%s failure: config access disabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ vpmem = devm_kzalloc(&vdev->dev, sizeof(*vpmem), GFP_KERNEL);
+ if (!vpmem) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ vpmem->vdev = vdev;
+ vdev->priv = vpmem;
+ err = init_vq(vpmem);
+ if (err) {
+ dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
+ goto out_err;
+ }
+
+ virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ start, &vpmem->start);
+ virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ size, &vpmem->size);
+
+ res.start = vpmem->start;
+ res.end = vpmem->start + vpmem->size - 1;
+ vpmem->nd_desc.provider_name = "virtio-pmem";
+ vpmem->nd_desc.module = THIS_MODULE;
+
+ vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev,
+ &vpmem->nd_desc);
+ if (!vpmem->nvdimm_bus) {
+ dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n");
+ err = -ENXIO;
+ goto out_vq;
+ }
+
+ dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus);
+
+ ndr_desc.res = &res;
+ ndr_desc.numa_node = nid;
+ ndr_desc.flush = async_pmem_flush;
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+ set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+ nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
+ if (!nd_region) {
+ dev_err(&vdev->dev, "failed to create nvdimm region\n");
+ err = -ENXIO;
+ goto out_nd;
+ }
+ nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
+ return 0;
+out_nd:
+ nvdimm_bus_unregister(vpmem->nvdimm_bus);
+out_vq:
+ vdev->config->del_vqs(vdev);
+out_err:
+ return err;
+}
+
+static void virtio_pmem_remove(struct virtio_device *vdev)
+{
+ struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev);
+
+ nvdimm_bus_unregister(nvdimm_bus);
+ vdev->config->del_vqs(vdev);
+ vdev->config->reset(vdev);
+}
+
+static struct virtio_driver virtio_pmem_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtio_pmem_probe,
+ .remove = virtio_pmem_remove,
+};
+
+module_virtio_driver(virtio_pmem_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio pmem driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
new file mode 100644
index 000000000000..0dddefe594c4
--- /dev/null
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * virtio_pmem.h: virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and provides a virtio based flushing
+ * interface.
+ **/
+
+#ifndef _LINUX_VIRTIO_PMEM_H
+#define _LINUX_VIRTIO_PMEM_H
+
+#include <linux/module.h>
+#include <uapi/linux/virtio_pmem.h>
+#include <linux/libnvdimm.h>
+#include <linux/spinlock.h>
+
+struct virtio_pmem_request {
+ struct virtio_pmem_req req;
+ struct virtio_pmem_resp resp;
+
+ /* Wait queue to process deferred work after ack from host */
+ wait_queue_head_t host_acked;
+ bool done;
+
+ /* Wait queue to process deferred work after virt queue buffer avail */
+ wait_queue_head_t wq_buf;
+ bool wq_buf_avail;
+ struct list_head list;
+};
+
+struct virtio_pmem {
+ struct virtio_device *vdev;
+
+ /* Virtio pmem request queue */
+ struct virtqueue *req_vq;
+
+ /* nvdimm bus registers virtio pmem device */
+ struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_bus_descriptor nd_desc;
+
+ /* List to store deferred work if virtqueue is full */
+ struct list_head req_list;
+
+ /* Synchronize virtqueue data */
+ spinlock_t pmem_lock;
+
+ /* Memory region information */
+ __u64 start;
+ __u64 size;
+};
+
+void virtio_pmem_host_ack(struct virtqueue *vq);
+int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
+#endif
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index d04d4378ca50..63502ca537eb 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -679,7 +679,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
goto put_dev;
dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name,
- &dcssblk_dax_ops);
+ &dcssblk_dax_ops, DAXDEV_F_SYNC);
if (!dev_info->dax_dev) {
rc = -ENOMEM;
goto put_dev;
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 023fc3bc01c6..078615cf2afc 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -43,6 +43,17 @@ config VIRTIO_PCI_LEGACY
If unsure, say Y.
+config VIRTIO_PMEM
+ tristate "Support for virtio pmem driver"
+ depends on VIRTIO
+ depends on LIBNVDIMM
+ help
+ This driver provides access to virtio-pmem devices, storage devices
+ that are mapped into the physical address space - similar to NVDIMMs
+ - with a virtio-based flushing interface.
+
+ If unsure, say Y.
+
config VIRTIO_BALLOON
tristate "Virtio balloon driver"
depends on VIRTIO
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 6cad0b33d7ad..8188963a405b 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -58,6 +58,15 @@ config WATCHDOG_HANDLE_BOOT_ENABLED
the watchdog on its own. Thus if your userspace does not start fast
enough your device will reboot.
+config WATCHDOG_OPEN_TIMEOUT
+ int "Timeout value for opening watchdog device"
+ default 0
+ help
+ The maximum time, in seconds, for which the watchdog framework takes
+ care of pinging a hardware watchdog. A value of 0 means infinite. The
+ value set here can be overridden by the commandline parameter
+ "watchdog.open_timeout".
+
config WATCHDOG_SYSFS
bool "Read different watchdog information through sysfs"
help
@@ -717,6 +726,7 @@ config IMX2_WDT
config IMX_SC_WDT
tristate "IMX SC Watchdog"
depends on HAVE_ARM_SMCCC
+ depends on IMX_SCU
select WATCHDOG_CORE
help
This is the driver for the system controller watchdog
diff --git a/drivers/watchdog/acquirewdt.c b/drivers/watchdog/acquirewdt.c
index 957d1255d4ca..848db958411e 100644
--- a/drivers/watchdog/acquirewdt.c
+++ b/drivers/watchdog/acquirewdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Acquire Single Board Computer Watchdog Timer driver
*
@@ -6,11 +7,6 @@
* (c) Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/advantechwdt.c b/drivers/watchdog/advantechwdt.c
index 2766af292a71..0d02bb275b3d 100644
--- a/drivers/watchdog/advantechwdt.c
+++ b/drivers/watchdog/advantechwdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Advantech Single Board Computer WDT driver
*
@@ -9,11 +10,6 @@
* (c) Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c
index f0148637e5dd..cc71861e033a 100644
--- a/drivers/watchdog/aspeed_wdt.c
+++ b/drivers/watchdog/aspeed_wdt.c
@@ -309,13 +309,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY)
wdt->wdd.bootstatus = WDIOF_CARDRESET;
- ret = devm_watchdog_register_device(dev, &wdt->wdd);
- if (ret) {
- dev_err(dev, "failed to register\n");
- return ret;
- }
-
- return 0;
+ return devm_watchdog_register_device(dev, &wdt->wdd);
}
static struct platform_driver aspeed_watchdog_driver = {
diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c
index 560c1c54c177..dec6ca019bea 100644
--- a/drivers/watchdog/bcm2835_wdt.c
+++ b/drivers/watchdog/bcm2835_wdt.c
@@ -202,10 +202,8 @@ static int bcm2835_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_reboot(&bcm2835_wdt_wdd);
err = devm_watchdog_register_device(dev, &bcm2835_wdt_wdd);
- if (err) {
- dev_err(dev, "Failed to register watchdog device");
+ if (err)
return err;
- }
if (pm_power_off == NULL) {
pm_power_off = bcm2835_power_off;
@@ -240,6 +238,7 @@ module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+MODULE_ALIAS("platform:bcm2835-wdt");
MODULE_AUTHOR("Lubomir Rintel <lkundrak@v3.sk>");
MODULE_DESCRIPTION("Driver for Broadcom BCM2835 watchdog timer");
MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/bcm7038_wdt.c b/drivers/watchdog/bcm7038_wdt.c
index d3d88f6703d7..979caa18d3c8 100644
--- a/drivers/watchdog/bcm7038_wdt.c
+++ b/drivers/watchdog/bcm7038_wdt.c
@@ -159,10 +159,8 @@ static int bcm7038_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_reboot(&wdt->wdd);
watchdog_stop_on_unregister(&wdt->wdd);
err = devm_watchdog_register_device(dev, &wdt->wdd);
- if (err) {
- dev_err(dev, "Failed to register watchdog device\n");
+ if (err)
return err;
- }
dev_info(dev, "Registered BCM7038 Watchdog\n");
diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c
index 921291025680..eb850a8d19df 100644
--- a/drivers/watchdog/bcm_kona_wdt.c
+++ b/drivers/watchdog/bcm_kona_wdt.c
@@ -301,10 +301,8 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_reboot(&bcm_kona_wdt_wdd);
watchdog_stop_on_unregister(&bcm_kona_wdt_wdd);
ret = devm_watchdog_register_device(dev, &bcm_kona_wdt_wdd);
- if (ret) {
- dev_err(dev, "Failed to register watchdog device");
+ if (ret)
return ret;
- }
bcm_kona_wdt_debug_init(pdev);
dev_dbg(dev, "Broadcom Kona Watchdog Timer");
diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
index a22f2d431a35..f8d4e91d0383 100644
--- a/drivers/watchdog/cadence_wdt.c
+++ b/drivers/watchdog/cadence_wdt.c
@@ -363,10 +363,8 @@ static int cdns_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_reboot(cdns_wdt_device);
watchdog_stop_on_unregister(cdns_wdt_device);
ret = devm_watchdog_register_device(dev, cdns_wdt_device);
- if (ret) {
- dev_err(dev, "Failed to register wdt device\n");
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdt);
dev_info(dev, "Xilinx Watchdog Timer at %p with timeout %ds%s\n",
diff --git a/drivers/watchdog/da9052_wdt.c b/drivers/watchdog/da9052_wdt.c
index a2feef1ff307..d708c091bf1b 100644
--- a/drivers/watchdog/da9052_wdt.c
+++ b/drivers/watchdog/da9052_wdt.c
@@ -176,14 +176,7 @@ static int da9052_wdt_probe(struct platform_device *pdev)
return ret;
}
- ret = devm_watchdog_register_device(dev, &driver_data->wdt);
- if (ret != 0) {
- dev_err(da9052->dev, "watchdog_register_device() failed: %d\n",
- ret);
- return ret;
- }
-
- return ret;
+ return devm_watchdog_register_device(dev, &driver_data->wdt);
}
static struct platform_driver da9052_wdt_driver = {
diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c
index aac749cfaccb..e149e66a6ea9 100644
--- a/drivers/watchdog/da9062_wdt.c
+++ b/drivers/watchdog/da9062_wdt.c
@@ -214,11 +214,8 @@ static int da9062_wdt_probe(struct platform_device *pdev)
watchdog_set_drvdata(&wdt->wdtdev, wdt);
ret = devm_watchdog_register_device(dev, &wdt->wdtdev);
- if (ret < 0) {
- dev_err(wdt->hw->dev,
- "watchdog registration failed (%d)\n", ret);
+ if (ret < 0)
return ret;
- }
return da9062_wdt_ping(&wdt->wdtdev);
}
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index 7b2ee35b5ffd..2b3f3cd382ef 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/char/watchdog/davinci_wdt.c
*
@@ -5,10 +6,7 @@
*
* Copyright (C) 2006-2013 Texas Instruments.
*
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2007 (c) MontaVista Software, Inc.
*/
#include <linux/module.h>
@@ -247,13 +245,7 @@ static int davinci_wdt_probe(struct platform_device *pdev)
if (IS_ERR(davinci_wdt->base))
return PTR_ERR(davinci_wdt->base);
- ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "cannot register watchdog device\n");
- return ret;
- }
-
- return 0;
+ return devm_watchdog_register_device(dev, wdd);
}
static const struct of_device_id davinci_wdt_of_match[] = {
diff --git a/drivers/watchdog/digicolor_wdt.c b/drivers/watchdog/digicolor_wdt.c
index 8af6e9a67d0d..073d37867f47 100644
--- a/drivers/watchdog/digicolor_wdt.c
+++ b/drivers/watchdog/digicolor_wdt.c
@@ -118,7 +118,6 @@ static int dc_wdt_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct dc_wdt *wdt;
- int ret;
wdt = devm_kzalloc(dev, sizeof(struct dc_wdt), GFP_KERNEL);
if (!wdt)
@@ -141,13 +140,7 @@ static int dc_wdt_probe(struct platform_device *pdev)
watchdog_set_restart_priority(&dc_wdt_wdd, 128);
watchdog_init_timeout(&dc_wdt_wdd, timeout, dev);
watchdog_stop_on_reboot(&dc_wdt_wdd);
- ret = devm_watchdog_register_device(dev, &dc_wdt_wdd);
- if (ret) {
- dev_err(dev, "Failed to register watchdog device");
- return ret;
- }
-
- return 0;
+ return devm_watchdog_register_device(dev, &dc_wdt_wdd);
}
static const struct of_device_id dc_wdt_of_match[] = {
diff --git a/drivers/watchdog/ebc-c384_wdt.c b/drivers/watchdog/ebc-c384_wdt.c
index c176f59fea28..8ef4b0df3855 100644
--- a/drivers/watchdog/ebc-c384_wdt.c
+++ b/drivers/watchdog/ebc-c384_wdt.c
@@ -2,15 +2,6 @@
/*
* Watchdog timer driver for the WinSystems EBC-C384
* Copyright (C) 2016 William Breathitt Gray
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/device.h>
#include <linux/dmi.h>
diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c
index 89129e6fa9b6..3a83a48abcae 100644
--- a/drivers/watchdog/eurotechwdt.c
+++ b/drivers/watchdog/eurotechwdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Eurotech CPU-1220/1410/1420 on board WDT driver
*
@@ -11,11 +12,6 @@
* (c) Copyright 1996-1997 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/ftwdt010_wdt.c b/drivers/watchdog/ftwdt010_wdt.c
index d9626ef9b9ae..21dcc7765688 100644
--- a/drivers/watchdog/ftwdt010_wdt.c
+++ b/drivers/watchdog/ftwdt010_wdt.c
@@ -165,10 +165,8 @@ static int ftwdt010_wdt_probe(struct platform_device *pdev)
}
ret = devm_watchdog_register_device(dev, &gwdt->wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog\n");
+ if (ret)
return ret;
- }
/* Set up platform driver data */
platform_set_drvdata(pdev, gwdt);
diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 777de10f2a78..0923201ce874 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c
@@ -13,6 +13,12 @@
#include <linux/platform_device.h>
#include <linux/watchdog.h>
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout,
+ "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
#define SOFT_TIMEOUT_MIN 1
#define SOFT_TIMEOUT_DEF 60
@@ -151,6 +157,7 @@ static int gpio_wdt_probe(struct platform_device *pdev)
priv->wdd.timeout = SOFT_TIMEOUT_DEF;
watchdog_init_timeout(&priv->wdd, 0, dev);
+ watchdog_set_nowayout(&priv->wdd, nowayout);
watchdog_stop_on_reboot(&priv->wdd);
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 8a90f159ffb1..7d34bcf1c45b 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -22,10 +22,11 @@
#include <linux/watchdog.h>
#include <asm/nmi.h>
-#define HPWDT_VERSION "2.0.2"
+#define HPWDT_VERSION "2.0.3"
#define SECS_TO_TICKS(secs) ((secs) * 1000 / 128)
#define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000)
-#define HPWDT_MAX_TIMER TICKS_TO_SECS(65535)
+#define HPWDT_MAX_TICKS 65535
+#define HPWDT_MAX_TIMER TICKS_TO_SECS(HPWDT_MAX_TICKS)
#define DEFAULT_MARGIN 30
#define PRETIMEOUT_SEC 9
@@ -33,6 +34,7 @@ static bool ilo5;
static unsigned int soft_margin = DEFAULT_MARGIN; /* in seconds */
static bool nowayout = WATCHDOG_NOWAYOUT;
static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
+static int kdumptimeout = -1;
static void __iomem *pci_mem_addr; /* the PCI-memory address */
static unsigned long __iomem *hpwdt_nmistat;
@@ -52,15 +54,21 @@ static const struct pci_device_id hpwdt_blacklist[] = {
{0}, /* terminate list */
};
+static struct watchdog_device hpwdt_dev;
/*
* Watchdog operations
*/
+static int hpwdt_hw_is_running(void)
+{
+ return ioread8(hpwdt_timer_con) & 0x01;
+}
+
static int hpwdt_start(struct watchdog_device *wdd)
{
int control = 0x81 | (pretimeout ? 0x4 : 0);
- int reload = SECS_TO_TICKS(wdd->timeout);
+ int reload = SECS_TO_TICKS(min(wdd->timeout, wdd->max_hw_heartbeat_ms/1000));
- dev_dbg(wdd->parent, "start watchdog 0x%08x:0x%02x\n", reload, control);
+ dev_dbg(wdd->parent, "start watchdog 0x%08x:0x%08x:0x%02x\n", wdd->timeout, reload, control);
iowrite16(reload, hpwdt_timer_reg);
iowrite8(control, hpwdt_timer_con);
@@ -85,12 +93,18 @@ static int hpwdt_stop_core(struct watchdog_device *wdd)
return 0;
}
+static void hpwdt_ping_ticks(int val)
+{
+ val = min(val, HPWDT_MAX_TICKS);
+ iowrite16(val, hpwdt_timer_reg);
+}
+
static int hpwdt_ping(struct watchdog_device *wdd)
{
- int reload = SECS_TO_TICKS(wdd->timeout);
+ int reload = SECS_TO_TICKS(min(wdd->timeout, wdd->max_hw_heartbeat_ms/1000));
- dev_dbg(wdd->parent, "ping watchdog 0x%08x\n", reload);
- iowrite16(reload, hpwdt_timer_reg);
+ dev_dbg(wdd->parent, "ping watchdog 0x%08x:0x%08x\n", wdd->timeout, reload);
+ hpwdt_ping_ticks(reload);
return 0;
}
@@ -166,7 +180,14 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
if (ilo5 && !pretimeout && !mynmi)
return NMI_DONE;
- hpwdt_stop();
+ if (kdumptimeout < 0)
+ hpwdt_stop();
+ else if (kdumptimeout == 0)
+ ;
+ else {
+ unsigned int val = max((unsigned int)kdumptimeout, hpwdt_dev.timeout);
+ hpwdt_ping_ticks(SECS_TO_TICKS(val));
+ }
hex_byte_pack(panic_msg, mynmi);
nmi_panic(regs, panic_msg);
@@ -204,9 +225,9 @@ static struct watchdog_device hpwdt_dev = {
.info = &ident,
.ops = &hpwdt_ops,
.min_timeout = 1,
- .max_timeout = HPWDT_MAX_TIMER,
.timeout = DEFAULT_MARGIN,
.pretimeout = PRETIMEOUT_SEC,
+ .max_hw_heartbeat_ms = HPWDT_MAX_TIMER * 1000,
};
@@ -298,14 +319,18 @@ static int hpwdt_init_one(struct pci_dev *dev,
hpwdt_timer_reg = pci_mem_addr + 0x70;
hpwdt_timer_con = pci_mem_addr + 0x72;
- /* Make sure that timer is disabled until /dev/watchdog is opened */
- hpwdt_stop();
+ /* Have the core update running timer until user space is ready */
+ if (hpwdt_hw_is_running()) {
+ dev_info(&dev->dev, "timer is running\n");
+ set_bit(WDOG_HW_RUNNING, &hpwdt_dev.status);
+ }
/* Initialize NMI Decoding functionality */
retval = hpwdt_init_nmi_decoding(dev);
if (retval != 0)
goto error_init_nmi_decoding;
+ watchdog_stop_on_unregister(&hpwdt_dev);
watchdog_set_nowayout(&hpwdt_dev, nowayout);
watchdog_init_timeout(&hpwdt_dev, soft_margin, NULL);
@@ -314,13 +339,12 @@ static int hpwdt_init_one(struct pci_dev *dev,
pretimeout = 0;
}
hpwdt_dev.pretimeout = pretimeout ? PRETIMEOUT_SEC : 0;
+ kdumptimeout = min(kdumptimeout, HPWDT_MAX_TIMER);
hpwdt_dev.parent = &dev->dev;
retval = watchdog_register_device(&hpwdt_dev);
- if (retval < 0) {
- dev_err(&dev->dev, "watchdog register failed: %d.\n", retval);
+ if (retval < 0)
goto error_wd_register;
- }
dev_info(&dev->dev, "HPE Watchdog Timer Driver: Version: %s\n",
HPWDT_VERSION);
@@ -328,6 +352,7 @@ static int hpwdt_init_one(struct pci_dev *dev,
hpwdt_dev.timeout, nowayout);
dev_info(&dev->dev, "pretimeout: %s.\n",
pretimeout ? "on" : "off");
+ dev_info(&dev->dev, "kdumptimeout: %d.\n", kdumptimeout);
if (dev->subsystem_vendor == PCI_VENDOR_ID_HP_3PAR)
ilo5 = true;
@@ -345,9 +370,6 @@ error_pci_iomap:
static void hpwdt_exit(struct pci_dev *dev)
{
- if (!nowayout)
- hpwdt_stop();
-
watchdog_unregister_device(&hpwdt_dev);
hpwdt_exit_nmi_decoding();
pci_iounmap(dev, pci_mem_addr);
@@ -376,6 +398,9 @@ module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+module_param(kdumptimeout, int, 0444);
+MODULE_PARM_DESC(kdumptimeout, "Timeout applied for crash kernel transition in seconds");
+
#ifdef CONFIG_HPWDT_NMI_DECODING
module_param(pretimeout, bool, 0);
MODULE_PARM_DESC(pretimeout, "Watchdog pretimeout enabled");
diff --git a/drivers/watchdog/i6300esb.c b/drivers/watchdog/i6300esb.c
index f98f35a05896..a30835f547b3 100644
--- a/drivers/watchdog/i6300esb.c
+++ b/drivers/watchdog/i6300esb.c
@@ -315,11 +315,8 @@ static int esb_probe(struct pci_dev *pdev,
/* Register the watchdog so that userspace has access to it */
ret = watchdog_register_device(&edev->wdd);
- if (ret != 0) {
- dev_err(&pdev->dev,
- "cannot register watchdog device (err=%d)\n", ret);
+ if (ret != 0)
goto err_unmap;
- }
dev_info(&pdev->dev,
"initialized. heartbeat=%d sec (nowayout=%d)\n",
edev->wdd.timeout, nowayout);
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c
index 68a9d9cc2eb8..4f1b96f59349 100644
--- a/drivers/watchdog/iTCO_vendor_support.c
+++ b/drivers/watchdog/iTCO_vendor_support.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* intel TCO vendor specific watchdog driver support
*
* (c) Copyright 2006-2009 Wim Van Sebroeck <wim@iguana.be>.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor
* provide warranty for any of this software. This material is
* provided "AS-IS" and at no charge.
@@ -216,4 +212,3 @@ MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>, "
MODULE_DESCRIPTION("Intel TCO Vendor Specific WatchDog Timer Driver Support");
MODULE_VERSION(DRV_VERSION);
MODULE_LICENSE("GPL");
-
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 89cea6ce9a08..c559f706ae7e 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* intel TCO Watchdog Driver
*
* (c) Copyright 2006-2011 Wim Van Sebroeck <wim@iguana.be>.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor
* provide warranty for any of this software. This material is
* provided "AS-IS" and at no charge.
diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c
index 30d6cec582af..92fd7f33bc4d 100644
--- a/drivers/watchdog/ib700wdt.c
+++ b/drivers/watchdog/ib700wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* IB700 Single Board Computer WDT driver
*
@@ -14,11 +15,6 @@
* (c) Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/ie6xx_wdt.c b/drivers/watchdog/ie6xx_wdt.c
index 508fbefce9f6..8f28993fab8b 100644
--- a/drivers/watchdog/ie6xx_wdt.c
+++ b/drivers/watchdog/ie6xx_wdt.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(resetmode,
static struct {
unsigned short sch_wdtba;
- struct spinlock unlock_sequence;
+ spinlock_t unlock_sequence;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs;
#endif
@@ -254,12 +254,8 @@ static int ie6xx_wdt_probe(struct platform_device *pdev)
ie6xx_wdt_debugfs_init();
ret = watchdog_register_device(&ie6xx_wdt_dev);
- if (ret) {
- dev_err(&pdev->dev,
- "Watchdog timer: cannot register device (err =%d)\n",
- ret);
+ if (ret)
goto misc_register_error;
- }
return 0;
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index a606005dd65f..32af3974e6bb 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -316,10 +316,8 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
regmap_write(wdev->regmap, IMX2_WDT_WMCR, 0);
ret = watchdog_register_device(wdog);
- if (ret) {
- dev_err(&pdev->dev, "cannot register watchdog device\n");
+ if (ret)
goto disable_clk;
- }
dev_info(&pdev->dev, "timeout %d sec (nowayout=%d)\n",
wdog->timeout, nowayout);
diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c
index 49848b66186c..78eaaf75a263 100644
--- a/drivers/watchdog/imx_sc_wdt.c
+++ b/drivers/watchdog/imx_sc_wdt.c
@@ -4,6 +4,7 @@
*/
#include <linux/arm-smccc.h>
+#include <linux/firmware/imx/sci.h>
#include <linux/io.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -33,11 +34,19 @@
#define SC_TIMER_WDOG_ACTION_PARTITION 0
+#define SC_IRQ_WDOG 1
+#define SC_IRQ_GROUP_WDOG 1
+
static bool nowayout = WATCHDOG_NOWAYOUT;
module_param(nowayout, bool, 0000);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+struct imx_sc_wdt_device {
+ struct watchdog_device wdd;
+ struct notifier_block wdt_notifier;
+};
+
static int imx_sc_wdt_ping(struct watchdog_device *wdog)
{
struct arm_smccc_res res;
@@ -85,24 +94,66 @@ static int imx_sc_wdt_set_timeout(struct watchdog_device *wdog,
return res.a0 ? -EACCES : 0;
}
+static int imx_sc_wdt_set_pretimeout(struct watchdog_device *wdog,
+ unsigned int pretimeout)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_SET_PRETIME_WDOG,
+ pretimeout * 1000, 0, 0, 0, 0, 0, &res);
+ if (res.a0)
+ return -EACCES;
+
+ wdog->pretimeout = pretimeout;
+
+ return 0;
+}
+
+static int imx_sc_wdt_notify(struct notifier_block *nb,
+ unsigned long event, void *group)
+{
+ struct imx_sc_wdt_device *imx_sc_wdd =
+ container_of(nb,
+ struct imx_sc_wdt_device,
+ wdt_notifier);
+
+ if (event & SC_IRQ_WDOG &&
+ *(u8 *)group == SC_IRQ_GROUP_WDOG)
+ watchdog_notify_pretimeout(&imx_sc_wdd->wdd);
+
+ return 0;
+}
+
+static void imx_sc_wdt_action(void *data)
+{
+ struct notifier_block *wdt_notifier = data;
+
+ imx_scu_irq_unregister_notifier(wdt_notifier);
+ imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG,
+ SC_IRQ_WDOG,
+ false);
+}
+
static const struct watchdog_ops imx_sc_wdt_ops = {
.owner = THIS_MODULE,
.start = imx_sc_wdt_start,
.stop = imx_sc_wdt_stop,
.ping = imx_sc_wdt_ping,
.set_timeout = imx_sc_wdt_set_timeout,
+ .set_pretimeout = imx_sc_wdt_set_pretimeout,
};
-static const struct watchdog_info imx_sc_wdt_info = {
+static struct watchdog_info imx_sc_wdt_info = {
.identity = "i.MX SC watchdog timer",
.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
- WDIOF_MAGICCLOSE | WDIOF_PRETIMEOUT,
+ WDIOF_MAGICCLOSE,
};
static int imx_sc_wdt_probe(struct platform_device *pdev)
{
+ struct imx_sc_wdt_device *imx_sc_wdd;
+ struct watchdog_device *wdog;
struct device *dev = &pdev->dev;
- struct watchdog_device *imx_sc_wdd;
int ret;
imx_sc_wdd = devm_kzalloc(dev, sizeof(*imx_sc_wdd), GFP_KERNEL);
@@ -111,42 +162,70 @@ static int imx_sc_wdt_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, imx_sc_wdd);
- imx_sc_wdd->info = &imx_sc_wdt_info;
- imx_sc_wdd->ops = &imx_sc_wdt_ops;
- imx_sc_wdd->min_timeout = 1;
- imx_sc_wdd->max_timeout = MAX_TIMEOUT;
- imx_sc_wdd->parent = dev;
- imx_sc_wdd->timeout = DEFAULT_TIMEOUT;
-
- watchdog_init_timeout(imx_sc_wdd, 0, dev);
- watchdog_stop_on_reboot(imx_sc_wdd);
- watchdog_stop_on_unregister(imx_sc_wdd);
+ wdog = &imx_sc_wdd->wdd;
+ wdog->info = &imx_sc_wdt_info;
+ wdog->ops = &imx_sc_wdt_ops;
+ wdog->min_timeout = 1;
+ wdog->max_timeout = MAX_TIMEOUT;
+ wdog->parent = dev;
+ wdog->timeout = DEFAULT_TIMEOUT;
+
+ watchdog_init_timeout(wdog, 0, dev);
+ watchdog_stop_on_reboot(wdog);
+ watchdog_stop_on_unregister(wdog);
+
+ ret = devm_watchdog_register_device(dev, wdog);
+
+ if (ret) {
+ dev_err(dev, "Failed to register watchdog device\n");
+ return ret;
+ }
+
+ ret = imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG,
+ SC_IRQ_WDOG,
+ true);
+ if (ret) {
+ dev_warn(dev, "Enable irq failed, pretimeout NOT supported\n");
+ return 0;
+ }
- ret = devm_watchdog_register_device(dev, imx_sc_wdd);
+ imx_sc_wdd->wdt_notifier.notifier_call = imx_sc_wdt_notify;
+ ret = imx_scu_irq_register_notifier(&imx_sc_wdd->wdt_notifier);
if (ret) {
- dev_err(dev, "Failed to register watchdog device\n");
- return ret;
+ imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG,
+ SC_IRQ_WDOG,
+ false);
+ dev_warn(dev,
+ "Register irq notifier failed, pretimeout NOT supported\n");
+ return 0;
}
+ ret = devm_add_action_or_reset(dev, imx_sc_wdt_action,
+ &imx_sc_wdd->wdt_notifier);
+ if (!ret)
+ imx_sc_wdt_info.options |= WDIOF_PRETIMEOUT;
+ else
+ dev_warn(dev, "Add action failed, pretimeout NOT supported\n");
+
return 0;
}
static int __maybe_unused imx_sc_wdt_suspend(struct device *dev)
{
- struct watchdog_device *imx_sc_wdd = dev_get_drvdata(dev);
+ struct imx_sc_wdt_device *imx_sc_wdd = dev_get_drvdata(dev);
- if (watchdog_active(imx_sc_wdd))
- imx_sc_wdt_stop(imx_sc_wdd);
+ if (watchdog_active(&imx_sc_wdd->wdd))
+ imx_sc_wdt_stop(&imx_sc_wdd->wdd);
return 0;
}
static int __maybe_unused imx_sc_wdt_resume(struct device *dev)
{
- struct watchdog_device *imx_sc_wdd = dev_get_drvdata(dev);
+ struct imx_sc_wdt_device *imx_sc_wdd = dev_get_drvdata(dev);
- if (watchdog_active(imx_sc_wdd))
- imx_sc_wdt_start(imx_sc_wdd);
+ if (watchdog_active(&imx_sc_wdd->wdd))
+ imx_sc_wdt_start(&imx_sc_wdd->wdd);
return 0;
}
diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
index b2463f8276e6..2cdbd37c700c 100644
--- a/drivers/watchdog/intel-mid_wdt.c
+++ b/drivers/watchdog/intel-mid_wdt.c
@@ -161,10 +161,8 @@ static int mid_wdt_probe(struct platform_device *pdev)
set_bit(WDOG_HW_RUNNING, &wdt_dev->status);
ret = devm_watchdog_register_device(dev, wdt_dev);
- if (ret) {
- dev_err(dev, "error registering watchdog device\n");
+ if (ret)
return ret;
- }
dev_info(dev, "Intel MID watchdog device probed\n");
diff --git a/drivers/watchdog/jz4740_wdt.c b/drivers/watchdog/jz4740_wdt.c
index 313358b2e0b1..d4a90916dd38 100644
--- a/drivers/watchdog/jz4740_wdt.c
+++ b/drivers/watchdog/jz4740_wdt.c
@@ -4,6 +4,7 @@
* JZ4740 Watchdog driver
*/
+#include <linux/mfd/ingenic-tcu.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
@@ -19,23 +20,16 @@
#include <asm/mach-jz4740/timer.h>
-#define JZ_REG_WDT_TIMER_DATA 0x0
-#define JZ_REG_WDT_COUNTER_ENABLE 0x4
-#define JZ_REG_WDT_TIMER_COUNTER 0x8
-#define JZ_REG_WDT_TIMER_CONTROL 0xC
-
#define JZ_WDT_CLOCK_PCLK 0x1
#define JZ_WDT_CLOCK_RTC 0x2
#define JZ_WDT_CLOCK_EXT 0x4
-#define JZ_WDT_CLOCK_DIV_SHIFT 3
-
-#define JZ_WDT_CLOCK_DIV_1 (0 << JZ_WDT_CLOCK_DIV_SHIFT)
-#define JZ_WDT_CLOCK_DIV_4 (1 << JZ_WDT_CLOCK_DIV_SHIFT)
-#define JZ_WDT_CLOCK_DIV_16 (2 << JZ_WDT_CLOCK_DIV_SHIFT)
-#define JZ_WDT_CLOCK_DIV_64 (3 << JZ_WDT_CLOCK_DIV_SHIFT)
-#define JZ_WDT_CLOCK_DIV_256 (4 << JZ_WDT_CLOCK_DIV_SHIFT)
-#define JZ_WDT_CLOCK_DIV_1024 (5 << JZ_WDT_CLOCK_DIV_SHIFT)
+#define JZ_WDT_CLOCK_DIV_1 (0 << TCU_TCSR_PRESCALE_LSB)
+#define JZ_WDT_CLOCK_DIV_4 (1 << TCU_TCSR_PRESCALE_LSB)
+#define JZ_WDT_CLOCK_DIV_16 (2 << TCU_TCSR_PRESCALE_LSB)
+#define JZ_WDT_CLOCK_DIV_64 (3 << TCU_TCSR_PRESCALE_LSB)
+#define JZ_WDT_CLOCK_DIV_256 (4 << TCU_TCSR_PRESCALE_LSB)
+#define JZ_WDT_CLOCK_DIV_1024 (5 << TCU_TCSR_PRESCALE_LSB)
#define DEFAULT_HEARTBEAT 5
#define MAX_HEARTBEAT 2048
@@ -63,7 +57,7 @@ static int jz4740_wdt_ping(struct watchdog_device *wdt_dev)
{
struct jz4740_wdt_drvdata *drvdata = watchdog_get_drvdata(wdt_dev);
- writew(0x0, drvdata->base + JZ_REG_WDT_TIMER_COUNTER);
+ writew(0x0, drvdata->base + TCU_REG_WDT_TCNT);
return 0;
}
@@ -74,6 +68,7 @@ static int jz4740_wdt_set_timeout(struct watchdog_device *wdt_dev,
unsigned int rtc_clk_rate;
unsigned int timeout_value;
unsigned short clock_div = JZ_WDT_CLOCK_DIV_1;
+ u8 tcer;
rtc_clk_rate = clk_get_rate(drvdata->rtc_clk);
@@ -86,18 +81,19 @@ static int jz4740_wdt_set_timeout(struct watchdog_device *wdt_dev,
break;
}
timeout_value >>= 2;
- clock_div += (1 << JZ_WDT_CLOCK_DIV_SHIFT);
+ clock_div += (1 << TCU_TCSR_PRESCALE_LSB);
}
- writeb(0x0, drvdata->base + JZ_REG_WDT_COUNTER_ENABLE);
- writew(clock_div, drvdata->base + JZ_REG_WDT_TIMER_CONTROL);
+ tcer = readb(drvdata->base + TCU_REG_WDT_TCER);
+ writeb(0x0, drvdata->base + TCU_REG_WDT_TCER);
+ writew(clock_div, drvdata->base + TCU_REG_WDT_TCSR);
- writew((u16)timeout_value, drvdata->base + JZ_REG_WDT_TIMER_DATA);
- writew(0x0, drvdata->base + JZ_REG_WDT_TIMER_COUNTER);
- writew(clock_div | JZ_WDT_CLOCK_RTC,
- drvdata->base + JZ_REG_WDT_TIMER_CONTROL);
+ writew((u16)timeout_value, drvdata->base + TCU_REG_WDT_TDR);
+ writew(0x0, drvdata->base + TCU_REG_WDT_TCNT);
+ writew(clock_div | JZ_WDT_CLOCK_RTC, drvdata->base + TCU_REG_WDT_TCSR);
- writeb(0x1, drvdata->base + JZ_REG_WDT_COUNTER_ENABLE);
+ if (tcer & TCU_WDT_TCER_TCEN)
+ writeb(TCU_WDT_TCER_TCEN, drvdata->base + TCU_REG_WDT_TCER);
wdt_dev->timeout = new_timeout;
return 0;
@@ -105,9 +101,18 @@ static int jz4740_wdt_set_timeout(struct watchdog_device *wdt_dev,
static int jz4740_wdt_start(struct watchdog_device *wdt_dev)
{
+ struct jz4740_wdt_drvdata *drvdata = watchdog_get_drvdata(wdt_dev);
+ u8 tcer;
+
+ tcer = readb(drvdata->base + TCU_REG_WDT_TCER);
+
jz4740_timer_enable_watchdog();
jz4740_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
+ /* Start watchdog if it wasn't started already */
+ if (!(tcer & TCU_WDT_TCER_TCEN))
+ writeb(TCU_WDT_TCER_TCEN, drvdata->base + TCU_REG_WDT_TCER);
+
return 0;
}
@@ -115,7 +120,7 @@ static int jz4740_wdt_stop(struct watchdog_device *wdt_dev)
{
struct jz4740_wdt_drvdata *drvdata = watchdog_get_drvdata(wdt_dev);
- writeb(0x0, drvdata->base + JZ_REG_WDT_COUNTER_ENABLE);
+ writeb(0x0, drvdata->base + TCU_REG_WDT_TCER);
jz4740_timer_disable_watchdog();
return 0;
@@ -187,11 +192,7 @@ static int jz4740_wdt_probe(struct platform_device *pdev)
return PTR_ERR(drvdata->rtc_clk);
}
- ret = devm_watchdog_register_device(dev, &drvdata->wdt);
- if (ret < 0)
- return ret;
-
- return 0;
+ return devm_watchdog_register_device(dev, &drvdata->wdt);
}
static struct platform_driver jz4740_wdt_driver = {
diff --git a/drivers/watchdog/loongson1_wdt.c b/drivers/watchdog/loongson1_wdt.c
index c8c2b8a88fc2..bb3d075c0633 100644
--- a/drivers/watchdog/loongson1_wdt.c
+++ b/drivers/watchdog/loongson1_wdt.c
@@ -132,10 +132,8 @@ static int ls1x_wdt_probe(struct platform_device *pdev)
watchdog_set_drvdata(ls1x_wdt, drvdata);
err = devm_watchdog_register_device(dev, &drvdata->wdt);
- if (err) {
- dev_err(dev, "failed to register watchdog device\n");
+ if (err)
return err;
- }
platform_set_drvdata(pdev, drvdata);
diff --git a/drivers/watchdog/max77620_wdt.c b/drivers/watchdog/max77620_wdt.c
index 9937f9fccd2e..be6a53c30002 100644
--- a/drivers/watchdog/max77620_wdt.c
+++ b/drivers/watchdog/max77620_wdt.c
@@ -182,13 +182,7 @@ static int max77620_wdt_probe(struct platform_device *pdev)
watchdog_set_drvdata(wdt_dev, wdt);
watchdog_stop_on_unregister(wdt_dev);
- ret = devm_watchdog_register_device(dev, wdt_dev);
- if (ret < 0) {
- dev_err(dev, "watchdog registration failed: %d\n", ret);
- return ret;
- }
-
- return 0;
+ return devm_watchdog_register_device(dev, wdt_dev);
}
static const struct platform_device_id max77620_wdt_devtype[] = {
diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c
index 96a770938ff0..5391bf3e6b11 100644
--- a/drivers/watchdog/mei_wdt.c
+++ b/drivers/watchdog/mei_wdt.c
@@ -384,10 +384,8 @@ static int mei_wdt_register(struct mei_wdt *wdt)
watchdog_stop_on_reboot(&wdt->wdd);
ret = watchdog_register_device(&wdt->wdd);
- if (ret) {
- dev_err(dev, "unable to register watchdog device = %d.\n", ret);
+ if (ret)
watchdog_set_drvdata(&wdt->wdd, NULL);
- }
wdt->state = MEI_WDT_IDLE;
diff --git a/drivers/watchdog/mena21_wdt.c b/drivers/watchdog/mena21_wdt.c
index e9ca4e0e25dc..99d2359d5a8a 100644
--- a/drivers/watchdog/mena21_wdt.c
+++ b/drivers/watchdog/mena21_wdt.c
@@ -190,10 +190,8 @@ static int a21_wdt_probe(struct platform_device *pdev)
dev_set_drvdata(dev, drv);
ret = devm_watchdog_register_device(dev, &a21_wdt);
- if (ret) {
- dev_err(dev, "Cannot register watchdog device\n");
+ if (ret)
return ret;
- }
dev_info(dev, "MEN A21 watchdog timer driver enabled\n");
diff --git a/drivers/watchdog/menf21bmc_wdt.c b/drivers/watchdog/menf21bmc_wdt.c
index 7766d7361d3b..81ebdfc371f4 100644
--- a/drivers/watchdog/menf21bmc_wdt.c
+++ b/drivers/watchdog/menf21bmc_wdt.c
@@ -152,10 +152,8 @@ static int menf21bmc_wdt_probe(struct platform_device *pdev)
}
ret = devm_watchdog_register_device(dev, &drv_data->wdt);
- if (ret) {
- dev_err(dev, "failed to register Watchdog device\n");
+ if (ret)
return ret;
- }
dev_info(dev, "MEN 14F021P00 BMC Watchdog device enabled\n");
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index b6ffad421bd0..3fc457bc16db 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -201,11 +201,8 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev)
ddata->wdd.timeout = ddata->wdd.min_timeout;
ret = devm_watchdog_register_device(dev, &ddata->wdd);
- if (ret) {
- dev_err(dev, "cannot register watchdog device (err=%d)\n",
- ret);
+ if (ret)
return ret;
- }
dev_info(dev,
"WDT driver for MPC8xxx initialized. mode:%s timeout=%d sec\n",
diff --git a/drivers/watchdog/mv64x60_wdt.c b/drivers/watchdog/mv64x60_wdt.c
index c785f4f0a196..74bf7144a970 100644
--- a/drivers/watchdog/mv64x60_wdt.c
+++ b/drivers/watchdog/mv64x60_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* mv64x60_wdt.c - MV64X60 (Marvell Discovery) watchdog userspace interface
*
@@ -9,10 +10,7 @@
*
* Derived from mpc8xx_wdt.c, with the following copyright.
*
- * 2002 (c) Florian Schirmer <jolt@tuxbox.org> This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2002 (c) Florian Schirmer <jolt@tuxbox.org>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/watchdog/ni903x_wdt.c b/drivers/watchdog/ni903x_wdt.c
index 60f5608af2a8..4cebad324b20 100644
--- a/drivers/watchdog/ni903x_wdt.c
+++ b/drivers/watchdog/ni903x_wdt.c
@@ -211,10 +211,8 @@ static int ni903x_acpi_add(struct acpi_device *device)
watchdog_init_timeout(wdd, timeout, dev);
ret = watchdog_register_device(wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog\n");
+ if (ret)
return ret;
- }
/* Switch from boot mode to user mode */
outb(NIWD_CONTROL_RESET | NIWD_CONTROL_MODE,
diff --git a/drivers/watchdog/nic7018_wdt.c b/drivers/watchdog/nic7018_wdt.c
index 2e1a2a3d4ec9..2a46cc662943 100644
--- a/drivers/watchdog/nic7018_wdt.c
+++ b/drivers/watchdog/nic7018_wdt.c
@@ -210,7 +210,6 @@ static int nic7018_probe(struct platform_device *pdev)
ret = watchdog_register_device(wdd);
if (ret) {
outb(LOCK, wdt->io_base + WDT_REG_LOCK);
- dev_err(dev, "failed to register watchdog\n");
return ret;
}
diff --git a/drivers/watchdog/npcm_wdt.c b/drivers/watchdog/npcm_wdt.c
index 9d6c1689b12c..9c773c3d6d5d 100644
--- a/drivers/watchdog/npcm_wdt.c
+++ b/drivers/watchdog/npcm_wdt.c
@@ -220,10 +220,8 @@ static int npcm_wdt_probe(struct platform_device *pdev)
return ret;
ret = devm_watchdog_register_device(dev, &wdt->wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog\n");
+ if (ret)
return ret;
- }
dev_info(dev, "NPCM watchdog driver enabled\n");
diff --git a/drivers/watchdog/nv_tco.h b/drivers/watchdog/nv_tco.h
index c2d1d04e055b..d325e528010f 100644
--- a/drivers/watchdog/nv_tco.h
+++ b/drivers/watchdog/nv_tco.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* nv_tco: TCO timer driver for nVidia chipsets.
*
@@ -10,11 +11,6 @@
* Reserved.
* http://www.kernelconcepts.de
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither kernel concepts nor Nils Faerber admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/octeon-wdt-main.c b/drivers/watchdog/octeon-wdt-main.c
index 0ec419a3f7ed..fde9e739b436 100644
--- a/drivers/watchdog/octeon-wdt-main.c
+++ b/drivers/watchdog/octeon-wdt-main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Octeon Watchdog driver
*
@@ -10,22 +11,12 @@
* (c) Copyright 1996-1997 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
*
* (c) Copyright 1995 Alan Cox <alan@lxorguk.ukuu.org.uk>
*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- *
* The OCTEON watchdog has a maximum timeout of 2^32 * io_clock.
* For most systems this is less than 10 seconds, so to allow for
* software to request longer watchdog heartbeats, we maintain software
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
index 03786992b701..7fe4f7c3f7ce 100644
--- a/drivers/watchdog/of_xilinx_wdt.c
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -238,10 +238,8 @@ static int xwdt_probe(struct platform_device *pdev)
}
rc = devm_watchdog_register_device(dev, xilinx_wdt_wdd);
- if (rc) {
- dev_err(dev, "Cannot register watchdog (err=%d)\n", rc);
+ if (rc)
return rc;
- }
clk_disable(xdev->clk);
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index d49688d93f6a..9b91882fe3c4 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* omap_wdt.c
*
@@ -6,10 +7,7 @@
* Author: MontaVista Software, Inc.
* <gdavis@mvista.com> or <source@mvista.com>
*
- * 2003 (c) MontaVista Software, Inc. This file is licensed under the
- * terms of the GNU General Public License version 2. This program is
- * licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2003 (c) MontaVista Software, Inc.
*
* History:
*
diff --git a/drivers/watchdog/omap_wdt.h b/drivers/watchdog/omap_wdt.h
index 42f31ec5e90d..950b4643f3e7 100644
--- a/drivers/watchdog/omap_wdt.h
+++ b/drivers/watchdog/omap_wdt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* linux/drivers/char/watchdog/omap_wdt.h
*
@@ -5,26 +6,6 @@
* OMAP Watchdog timer register definitions
*
* Copyright (C) 2004 Texas Instruments.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _OMAP_WATCHDOG_H
diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c
index ca21d6c240a3..2af1a8b3f973 100644
--- a/drivers/watchdog/pc87413_wdt.c
+++ b/drivers/watchdog/pc87413_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* NS pc87413-wdt Watchdog Timer driver for Linux 2.6.x.x
*
@@ -6,11 +7,6 @@
* (C) Copyright 2006 Sven Anders, <anders@anduras.de>
* and Marcus Junker, <junker@anduras.de>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Sven Anders, Marcus Junker nor ANDURAS AG
* admit liability nor provide warranty for any of this software.
* This material is provided "AS-IS" and at no charge.
diff --git a/drivers/watchdog/pcwd_pci.c b/drivers/watchdog/pcwd_pci.c
index 5773d2591d3f..e30c1f762045 100644
--- a/drivers/watchdog/pcwd_pci.c
+++ b/drivers/watchdog/pcwd_pci.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Berkshire PCI-PC Watchdog Card Driver
*
@@ -10,11 +11,6 @@
* Matt Domsch <Matt_Domsch@dell.com>,
* Rob Radez <rob@osinvestor.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor
* provide warranty for any of this software. This material is
* provided "AS-IS" and at no charge.
diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c
index 5de6182dae33..6727f8ab2d18 100644
--- a/drivers/watchdog/pcwd_usb.c
+++ b/drivers/watchdog/pcwd_usb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Berkshire USB-PC Watchdog Card Driver
*
@@ -10,11 +11,6 @@
* Rob Radez <rob@osinvestor.com>,
* Greg Kroah-Hartman <greg@kroah.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor
* provide warranty for any of this software. This material is
* provided "AS-IS" and at no charge.
diff --git a/drivers/watchdog/pic32-dmt.c b/drivers/watchdog/pic32-dmt.c
index 4f2aca78f13a..f43062b3c4c8 100644
--- a/drivers/watchdog/pic32-dmt.c
+++ b/drivers/watchdog/pic32-dmt.c
@@ -212,10 +212,8 @@ static int pic32_dmt_probe(struct platform_device *pdev)
watchdog_set_drvdata(wdd, dmt);
ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "watchdog register failed, err %d\n", ret);
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdd);
return 0;
diff --git a/drivers/watchdog/pic32-wdt.c b/drivers/watchdog/pic32-wdt.c
index 5ecdd880f0b7..41715d68d9e9 100644
--- a/drivers/watchdog/pic32-wdt.c
+++ b/drivers/watchdog/pic32-wdt.c
@@ -221,10 +221,8 @@ static int pic32_wdt_drv_probe(struct platform_device *pdev)
watchdog_set_drvdata(wdd, wdt);
ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "watchdog register failed, err %d\n", ret);
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdd);
diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c
index d9e03544aeae..7b446b696f2b 100644
--- a/drivers/watchdog/pnx4008_wdt.c
+++ b/drivers/watchdog/pnx4008_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/char/watchdog/pnx4008_wdt.c
*
@@ -11,10 +12,6 @@
* 2005-2006 (c) MontaVista Software, Inc.
*
* (C) 2012 Wolfram Sang, Pengutronix
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -221,10 +218,8 @@ static int pnx4008_wdt_probe(struct platform_device *pdev)
set_bit(WDOG_HW_RUNNING, &pnx4008_wdd.status);
ret = devm_watchdog_register_device(dev, &pnx4008_wdd);
- if (ret < 0) {
- dev_err(dev, "cannot register watchdog device\n");
+ if (ret < 0)
return ret;
- }
dev_info(dev, "heartbeat %d sec\n", pnx4008_wdd.timeout);
diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c
index fc0f7e5de38d..7be7f87be28f 100644
--- a/drivers/watchdog/qcom-wdt.c
+++ b/drivers/watchdog/qcom-wdt.c
@@ -223,10 +223,8 @@ static int qcom_wdt_probe(struct platform_device *pdev)
watchdog_init_timeout(&wdt->wdd, 0, dev);
ret = devm_watchdog_register_device(dev, &wdt->wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog\n");
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdt);
return 0;
diff --git a/drivers/watchdog/rave-sp-wdt.c b/drivers/watchdog/rave-sp-wdt.c
index 35db173252f9..2c95615b6354 100644
--- a/drivers/watchdog/rave-sp-wdt.c
+++ b/drivers/watchdog/rave-sp-wdt.c
@@ -310,7 +310,6 @@ static int rave_sp_wdt_probe(struct platform_device *pdev)
ret = devm_watchdog_register_device(dev, wdd);
if (ret) {
- dev_err(dev, "Failed to register watchdog device\n");
rave_sp_wdt_stop(wdd);
return ret;
}
diff --git a/drivers/watchdog/renesas_wdt.c b/drivers/watchdog/renesas_wdt.c
index 565dbc1ec638..00662a8e039c 100644
--- a/drivers/watchdog/renesas_wdt.c
+++ b/drivers/watchdog/renesas_wdt.c
@@ -7,6 +7,7 @@
*/
#include <linux/bitops.h>
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -70,6 +71,15 @@ static int rwdt_init_timeout(struct watchdog_device *wdev)
return 0;
}
+static void rwdt_wait_cycles(struct rwdt_priv *priv, unsigned int cycles)
+{
+ unsigned int delay;
+
+ delay = DIV_ROUND_UP(cycles * 1000000, priv->clk_rate);
+
+ usleep_range(delay, 2 * delay);
+}
+
static int rwdt_start(struct watchdog_device *wdev)
{
struct rwdt_priv *priv = watchdog_get_drvdata(wdev);
@@ -80,6 +90,8 @@ static int rwdt_start(struct watchdog_device *wdev)
/* Stop the timer before we modify any register */
val = readb_relaxed(priv->base + RWTCSRA) & ~RWTCSRA_TME;
rwdt_write(priv, val, RWTCSRA);
+ /* Delay 2 cycles before setting watchdog counter */
+ rwdt_wait_cycles(priv, 2);
rwdt_init_timeout(wdev);
rwdt_write(priv, priv->cks, RWTCSRA);
@@ -98,6 +110,8 @@ static int rwdt_stop(struct watchdog_device *wdev)
struct rwdt_priv *priv = watchdog_get_drvdata(wdev);
rwdt_write(priv, priv->cks, RWTCSRA);
+ /* Delay 3 cycles before disabling module clock */
+ rwdt_wait_cycles(priv, 3);
pm_runtime_put(wdev->parent);
return 0;
@@ -175,15 +189,16 @@ static inline bool rwdt_blacklisted(struct device *dev) { return false; }
static int rwdt_probe(struct platform_device *pdev)
{
+ struct device *dev = &pdev->dev;
struct rwdt_priv *priv;
struct clk *clk;
unsigned long clks_per_sec;
int ret, i;
- if (rwdt_blacklisted(&pdev->dev))
+ if (rwdt_blacklisted(dev))
return -ENODEV;
- priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
@@ -191,16 +206,16 @@ static int rwdt_probe(struct platform_device *pdev)
if (IS_ERR(priv->base))
return PTR_ERR(priv->base);
- clk = devm_clk_get(&pdev->dev, NULL);
+ clk = devm_clk_get(dev, NULL);
if (IS_ERR(clk))
return PTR_ERR(clk);
- pm_runtime_enable(&pdev->dev);
- pm_runtime_get_sync(&pdev->dev);
+ pm_runtime_enable(dev);
+ pm_runtime_get_sync(dev);
priv->clk_rate = clk_get_rate(clk);
priv->wdev.bootstatus = (readb_relaxed(priv->base + RWTCSRA) &
RWTCSRA_WOVF) ? WDIOF_CARDRESET : 0;
- pm_runtime_put(&pdev->dev);
+ pm_runtime_put(dev);
if (!priv->clk_rate) {
ret = -ENOENT;
@@ -216,14 +231,14 @@ static int rwdt_probe(struct platform_device *pdev)
}
if (i < 0) {
- dev_err(&pdev->dev, "Can't find suitable clock divider\n");
+ dev_err(dev, "Can't find suitable clock divider\n");
ret = -ERANGE;
goto out_pm_disable;
}
priv->wdev.info = &rwdt_ident;
priv->wdev.ops = &rwdt_ops;
- priv->wdev.parent = &pdev->dev;
+ priv->wdev.parent = dev;
priv->wdev.min_timeout = 1;
priv->wdev.max_timeout = DIV_BY_CLKS_PER_SEC(priv, 65536);
priv->wdev.timeout = min(priv->wdev.max_timeout, RWDT_DEFAULT_TIMEOUT);
@@ -235,7 +250,7 @@ static int rwdt_probe(struct platform_device *pdev)
watchdog_stop_on_unregister(&priv->wdev);
/* This overrides the default timeout only if DT configuration was found */
- watchdog_init_timeout(&priv->wdev, 0, &pdev->dev);
+ watchdog_init_timeout(&priv->wdev, 0, dev);
ret = watchdog_register_device(&priv->wdev);
if (ret < 0)
@@ -244,7 +259,7 @@ static int rwdt_probe(struct platform_device *pdev)
return 0;
out_pm_disable:
- pm_runtime_disable(&pdev->dev);
+ pm_runtime_disable(dev);
return ret;
}
diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c
index 39cd51df2ffc..258dfcf9cbda 100644
--- a/drivers/watchdog/retu_wdt.c
+++ b/drivers/watchdog/retu_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Retu watchdog driver
*
@@ -5,15 +6,6 @@
*
* Based on code written by Amit Kucheria and Michael Buesch.
* Rewritten by Aaro Koskinen.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/slab.h>
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index daf3bf0d86b8..2395f353e52d 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -606,10 +606,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
wdt->wdt_device.parent = dev;
ret = watchdog_register_device(&wdt->wdt_device);
- if (ret) {
- dev_err(dev, "cannot register watchdog (%d)\n", ret);
+ if (ret)
goto err_cpufreq;
- }
ret = s3c2410wdt_mask_and_disable_reset(wdt, false);
if (ret < 0)
diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index bfa035e1a75e..cbd8c957182f 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Watchdog driver for the SA11x0/PXA2xx
*
* (c) Copyright 2000 Oleg Drokin <green@crimea.edu>
* Based on SoftDog driver by Alan Cox <alan@lxorguk.ukuu.org.uk>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Oleg Drokin nor iXcelerator.com admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index b8da1bf21e12..d193a60430b2 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -110,9 +110,7 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd,
u32 value = WDT_SEC2TICKS(timeout);
wdt->mr &= ~AT91_WDT_WDV;
- wdt->mr &= ~AT91_WDT_WDD;
wdt->mr |= AT91_WDT_SET_WDV(value);
- wdt->mr |= AT91_WDT_SET_WDD(value);
/*
* WDDIS has to be 0 when updating WDD/WDV. The datasheet states: When
@@ -248,7 +246,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
timeout = WDT_SEC2TICKS(wdd->timeout);
- wdt->mr |= AT91_WDT_SET_WDD(timeout);
+ wdt->mr |= AT91_WDT_SET_WDD(WDT_SEC2TICKS(MAX_WDT_TIMEOUT));
wdt->mr |= AT91_WDT_SET_WDV(timeout);
ret = sama5d4_wdt_init(wdt);
@@ -259,10 +257,8 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_unregister(wdd);
ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog device\n");
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdt);
@@ -279,7 +275,17 @@ static const struct of_device_id sama5d4_wdt_of_match[] = {
MODULE_DEVICE_TABLE(of, sama5d4_wdt_of_match);
#ifdef CONFIG_PM_SLEEP
-static int sama5d4_wdt_resume(struct device *dev)
+static int sama5d4_wdt_suspend_late(struct device *dev)
+{
+ struct sama5d4_wdt *wdt = dev_get_drvdata(dev);
+
+ if (watchdog_active(&wdt->wdd))
+ sama5d4_wdt_stop(&wdt->wdd);
+
+ return 0;
+}
+
+static int sama5d4_wdt_resume_early(struct device *dev)
{
struct sama5d4_wdt *wdt = dev_get_drvdata(dev);
@@ -290,12 +296,17 @@ static int sama5d4_wdt_resume(struct device *dev)
*/
sama5d4_wdt_init(wdt);
+ if (watchdog_active(&wdt->wdd))
+ sama5d4_wdt_start(&wdt->wdd);
+
return 0;
}
#endif
-static SIMPLE_DEV_PM_OPS(sama5d4_wdt_pm_ops, NULL,
- sama5d4_wdt_resume);
+static const struct dev_pm_ops sama5d4_wdt_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(sama5d4_wdt_suspend_late,
+ sama5d4_wdt_resume_early)
+};
static struct platform_driver sama5d4_wdt_driver = {
.probe = sama5d4_wdt_probe,
diff --git a/drivers/watchdog/sbc7240_wdt.c b/drivers/watchdog/sbc7240_wdt.c
index efc81b318939..12cdee7d5069 100644
--- a/drivers/watchdog/sbc7240_wdt.c
+++ b/drivers/watchdog/sbc7240_wdt.c
@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NANO7240 SBC Watchdog device driver
*
* Based on w83877f.c by Scott Jennings,
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation;
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * rights and limitations under the License.
- *
* (c) Copyright 2007 Gilles GIGAN <gilles.gigan@jcu.edu.au>
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/watchdog/sbc8360.c b/drivers/watchdog/sbc8360.c
index 3396024e7b76..4f8b9912fc51 100644
--- a/drivers/watchdog/sbc8360.c
+++ b/drivers/watchdog/sbc8360.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* SBC8360 Watchdog driver
*
@@ -19,11 +20,6 @@
* (c) Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index ed6e9fac5d74..3612f1df381b 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* sch311x_wdt.c - Driver for the SCH311x Super-I/O chips
* integrated watchdog.
*
* (c) Copyright 2008 Wim Van Sebroeck <wim@iguana.be>.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor
* provide warranty for any of this software. This material is
* provided "AS-IS" and at no charge.
diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
index 060740625485..3e4885c1545e 100644
--- a/drivers/watchdog/softdog.c
+++ b/drivers/watchdog/softdog.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* SoftDog: A Software Watchdog Device
*
* (c) Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c
index cd4430ff9b1c..93bd302ae7c5 100644
--- a/drivers/watchdog/sp5100_tco.c
+++ b/drivers/watchdog/sp5100_tco.c
@@ -402,10 +402,8 @@ static int sp5100_tco_probe(struct platform_device *pdev)
return ret;
ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "cannot register watchdog device (err=%d)\n", ret);
+ if (ret)
return ret;
- }
/* Show module parameters */
dev_info(dev, "initialized. heartbeat=%d sec (nowayout=%d)\n",
diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index 072986d461b7..53e04926a7b2 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c
@@ -288,11 +288,8 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id)
}
ret = watchdog_register_device(&wdt->wdd);
- if (ret) {
- dev_err(&adev->dev, "watchdog_register_device() failed: %d\n",
- ret);
+ if (ret)
goto err;
- }
amba_set_drvdata(adev, wdt);
dev_info(&adev->dev, "registration successful\n");
diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c
index 916fb3f96bdc..edba4e278685 100644
--- a/drivers/watchdog/sprd_wdt.c
+++ b/drivers/watchdog/sprd_wdt.c
@@ -320,7 +320,6 @@ static int sprd_wdt_probe(struct platform_device *pdev)
ret = devm_watchdog_register_device(dev, &wdt->wdd);
if (ret) {
sprd_wdt_disable(wdt);
- dev_err(dev, "failed to register watchdog\n");
return ret;
}
platform_set_drvdata(pdev, wdt);
diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c
index 7a90184eb950..14ab6559c748 100644
--- a/drivers/watchdog/st_lpc_wdt.c
+++ b/drivers/watchdog/st_lpc_wdt.c
@@ -228,10 +228,8 @@ static int st_wdog_probe(struct platform_device *pdev)
return ret;
ret = devm_watchdog_register_device(dev, &st_wdog_dev);
- if (ret) {
- dev_err(dev, "Unable to register watchdog\n");
+ if (ret)
return ret;
- }
st_wdog_setup(st_wdog, true);
diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c
index d569a3634d9b..a3a329011a06 100644
--- a/drivers/watchdog/stm32_iwdg.c
+++ b/drivers/watchdog/stm32_iwdg.c
@@ -263,10 +263,8 @@ static int stm32_iwdg_probe(struct platform_device *pdev)
watchdog_init_timeout(wdd, 0, dev);
ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog device\n");
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdt);
diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c
index 671f4ba7b4ed..7caf3aa71c6a 100644
--- a/drivers/watchdog/stmp3xxx_rtc_wdt.c
+++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c
@@ -98,10 +98,8 @@ static int stmp3xxx_wdt_probe(struct platform_device *pdev)
stmp3xxx_wdd.parent = dev;
ret = devm_watchdog_register_device(dev, &stmp3xxx_wdd);
- if (ret < 0) {
- dev_err(dev, "cannot register watchdog device\n");
+ if (ret < 0)
return ret;
- }
if (register_reboot_notifier(&wdt_notifier))
dev_warn(dev, "cannot register reboot notifier\n");
diff --git a/drivers/watchdog/tegra_wdt.c b/drivers/watchdog/tegra_wdt.c
index a58b000acc4f..dfe06e506cad 100644
--- a/drivers/watchdog/tegra_wdt.c
+++ b/drivers/watchdog/tegra_wdt.c
@@ -219,10 +219,8 @@ static int tegra_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_unregister(wdd);
ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog device\n");
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdt);
diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c
index 9dc6d7f45806..c137ad2bd5c3 100644
--- a/drivers/watchdog/ts4800_wdt.c
+++ b/drivers/watchdog/ts4800_wdt.c
@@ -171,10 +171,8 @@ static int ts4800_wdt_probe(struct platform_device *pdev)
ts4800_wdt_stop(wdd);
ret = devm_watchdog_register_device(dev, wdd);
- if (ret) {
- dev_err(dev, "failed to register watchdog device\n");
+ if (ret)
return ret;
- }
platform_set_drvdata(pdev, wdt);
diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index 3a49ba9ea608..38b31e9947aa 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* w83627hf/thf WDT driver
*
@@ -17,11 +18,6 @@
* (c) Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/wafer5823wdt.c b/drivers/watchdog/wafer5823wdt.c
index 0a8073b419f8..6d2071a0590d 100644
--- a/drivers/watchdog/wafer5823wdt.c
+++ b/drivers/watchdog/wafer5823wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* ICP Wafer 5823 Single Board Computer WDT driver
* http://www.icpamerica.com/wafer_5823.php
@@ -13,11 +14,6 @@
* (c) Copyright 1996-1997 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 62be9e52a4de..21e8085b848b 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* watchdog_core.c
*
@@ -16,11 +17,6 @@
* Satyam Sharma <satyam@infradead.org>
* Randy Dunlap <randy.dunlap@oracle.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
* admit liability nor provide warranty for any of this software.
* This material is provided "AS-IS" and at no charge.
@@ -60,11 +56,10 @@ static DEFINE_MUTEX(wtd_deferred_reg_mutex);
static LIST_HEAD(wtd_deferred_reg_list);
static bool wtd_deferred_reg_done;
-static int watchdog_deferred_registration_add(struct watchdog_device *wdd)
+static void watchdog_deferred_registration_add(struct watchdog_device *wdd)
{
list_add_tail(&wdd->deferred,
&wtd_deferred_reg_list);
- return 0;
}
static void watchdog_deferred_registration_del(struct watchdog_device *wdd)
@@ -265,14 +260,23 @@ static int __watchdog_register_device(struct watchdog_device *wdd)
int watchdog_register_device(struct watchdog_device *wdd)
{
- int ret;
+ const char *dev_str;
+ int ret = 0;
mutex_lock(&wtd_deferred_reg_mutex);
if (wtd_deferred_reg_done)
ret = __watchdog_register_device(wdd);
else
- ret = watchdog_deferred_registration_add(wdd);
+ watchdog_deferred_registration_add(wdd);
mutex_unlock(&wtd_deferred_reg_mutex);
+
+ if (ret) {
+ dev_str = wdd->parent ? dev_name(wdd->parent) :
+ (const char *)wdd->info->identity;
+ pr_err("%s: failed to register watchdog device (err = %d)\n",
+ dev_str, ret);
+ }
+
return ret;
}
EXPORT_SYMBOL_GPL(watchdog_register_device);
diff --git a/drivers/watchdog/watchdog_core.h b/drivers/watchdog/watchdog_core.h
index 86ff962d1e15..a5062e8e0d13 100644
--- a/drivers/watchdog/watchdog_core.h
+++ b/drivers/watchdog/watchdog_core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* watchdog_core.h
*
@@ -16,11 +17,6 @@
* Satyam Sharma <satyam@infradead.org>
* Randy Dunlap <randy.dunlap@oracle.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
* admit liability nor provide warranty for any of this software.
* This material is provided "AS-IS" and at no charge.
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 252a7c7b6592..dbd2ad4c9294 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* watchdog_dev.c
*
@@ -20,11 +21,6 @@
* Satyam Sharma <satyam@infradead.org>
* Randy Dunlap <randy.dunlap@oracle.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
* admit liability nor provide warranty for any of this software.
* This material is provided "AS-IS" and at no charge.
@@ -69,6 +65,7 @@ struct watchdog_core_data {
struct mutex lock;
ktime_t last_keepalive;
ktime_t last_hw_keepalive;
+ ktime_t open_deadline;
struct hrtimer timer;
struct kthread_work work;
unsigned long status; /* Internal status bits */
@@ -87,6 +84,19 @@ static struct kthread_worker *watchdog_kworker;
static bool handle_boot_enabled =
IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED);
+static unsigned open_timeout = CONFIG_WATCHDOG_OPEN_TIMEOUT;
+
+static bool watchdog_past_open_deadline(struct watchdog_core_data *data)
+{
+ return ktime_after(ktime_get(), data->open_deadline);
+}
+
+static void watchdog_set_open_deadline(struct watchdog_core_data *data)
+{
+ data->open_deadline = open_timeout ?
+ ktime_get() + ktime_set(open_timeout, 0) : KTIME_MAX;
+}
+
static inline bool watchdog_need_worker(struct watchdog_device *wdd)
{
/* All variables in milli-seconds */
@@ -119,14 +129,15 @@ static ktime_t watchdog_next_keepalive(struct watchdog_device *wdd)
ktime_t virt_timeout;
unsigned int hw_heartbeat_ms;
- virt_timeout = ktime_add(wd_data->last_keepalive,
- ms_to_ktime(timeout_ms));
+ if (watchdog_active(wdd))
+ virt_timeout = ktime_add(wd_data->last_keepalive,
+ ms_to_ktime(timeout_ms));
+ else
+ virt_timeout = wd_data->open_deadline;
+
hw_heartbeat_ms = min_not_zero(timeout_ms, wdd->max_hw_heartbeat_ms);
keepalive_interval = ms_to_ktime(hw_heartbeat_ms / 2);
- if (!watchdog_active(wdd))
- return keepalive_interval;
-
/*
* To ensure that the watchdog times out wdd->timeout seconds
* after the most recent ping from userspace, the last
@@ -211,7 +222,13 @@ static bool watchdog_worker_should_ping(struct watchdog_core_data *wd_data)
{
struct watchdog_device *wdd = wd_data->wdd;
- return wdd && (watchdog_active(wdd) || watchdog_hw_running(wdd));
+ if (!wdd)
+ return false;
+
+ if (watchdog_active(wdd))
+ return true;
+
+ return watchdog_hw_running(wdd) && !watchdog_past_open_deadline(wd_data);
}
static void watchdog_ping_work(struct kthread_work *work)
@@ -824,6 +841,15 @@ static int watchdog_open(struct inode *inode, struct file *file)
if (!hw_running)
kref_get(&wd_data->kref);
+ /*
+ * open_timeout only applies for the first open from
+ * userspace. Set open_deadline to infinity so that the kernel
+ * will take care of an always-running hardware watchdog in
+ * case the device gets magic-closed or WDIOS_DISABLECARD is
+ * applied.
+ */
+ wd_data->open_deadline = KTIME_MAX;
+
/* dev/watchdog is a virtual (and thus non-seekable) filesystem */
return stream_open(inode, file);
@@ -983,6 +1009,7 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
/* Record time of most recent heartbeat as 'just before now'. */
wd_data->last_hw_keepalive = ktime_sub(ktime_get(), 1);
+ watchdog_set_open_deadline(wd_data);
/*
* If the watchdog is running, prevent its driver from being unloaded,
@@ -1181,3 +1208,8 @@ module_param(handle_boot_enabled, bool, 0444);
MODULE_PARM_DESC(handle_boot_enabled,
"Watchdog core auto-updates boot enabled watchdogs before userspace takes over (default="
__MODULE_STRING(IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED)) ")");
+
+module_param(open_timeout, uint, 0644);
+MODULE_PARM_DESC(open_timeout,
+ "Maximum time (in seconds, 0 means infinity) for userspace to take over a running watchdog (default="
+ __MODULE_STRING(CONFIG_WATCHDOG_OPEN_TIMEOUT) ")");
diff --git a/drivers/watchdog/wd501p.h b/drivers/watchdog/wd501p.h
index 0e3a497d5626..43a4d88fd363 100644
--- a/drivers/watchdog/wd501p.h
+++ b/drivers/watchdog/wd501p.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-1.0+ */
/*
* Industrial Computer Source WDT500/501 driver
*
@@ -11,12 +12,7 @@
*
* http://www.cymru.net
*
- * This driver is provided under the GNU General Public License,
- * incorporated herein by reference. The driver is provided without
- * warranty or support.
- *
* Release 0.04.
- *
*/
diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c
index 3d2f5ed60e88..0650100fad00 100644
--- a/drivers/watchdog/wdt.c
+++ b/drivers/watchdog/wdt.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Industrial Computer Source WDT501 driver
*
* (c) Copyright 1996-1997 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c
index ff3a41f47127..66303ab95685 100644
--- a/drivers/watchdog/wdt_pci.c
+++ b/drivers/watchdog/wdt_pci.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Industrial Computer Source PCI-WDT500/501 driver
*
* (c) Copyright 1996-1997 Alan Cox <alan@lxorguk.ukuu.org.uk>,
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Neither Alan Cox nor CymruNet Ltd. admit liability nor provide
* warranty for any of this software. This material is provided
* "AS-IS" and at no charge.
diff --git a/drivers/watchdog/wm831x_wdt.c b/drivers/watchdog/wm831x_wdt.c
index 9b6565a3fab4..030ce240620d 100644
--- a/drivers/watchdog/wm831x_wdt.c
+++ b/drivers/watchdog/wm831x_wdt.c
@@ -267,14 +267,7 @@ static int wm831x_wdt_probe(struct platform_device *pdev)
}
}
- ret = devm_watchdog_register_device(dev, &driver_data->wdt);
- if (ret != 0) {
- dev_err(wm831x->dev, "watchdog_register_device() failed: %d\n",
- ret);
- return ret;
- }
-
- return 0;
+ return devm_watchdog_register_device(dev, &driver_data->wdt);
}
static struct platform_driver wm831x_wdt_driver = {
diff --git a/drivers/watchdog/xen_wdt.c b/drivers/watchdog/xen_wdt.c
index 2ba0a3c4523c..b343f421dc72 100644
--- a/drivers/watchdog/xen_wdt.c
+++ b/drivers/watchdog/xen_wdt.c
@@ -138,10 +138,8 @@ static int xen_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_unregister(&xen_wdt_dev);
ret = devm_watchdog_register_device(dev, &xen_wdt_dev);
- if (ret) {
- dev_err(dev, "cannot register watchdog device (%d)\n", ret);
+ if (ret)
return ret;
- }
dev_info(dev, "initialized (timeout=%ds, nowayout=%d)\n",
xen_wdt_dev.timeout, nowayout);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index d53f3493a6b9..cfbe46785a3b 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -402,7 +402,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir,
attrs);
- if (map == DMA_MAPPING_ERROR)
+ if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
dev_addr = xen_phys_to_bus(map);