aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/blktrace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/blktrace.c')
-rw-r--r--kernel/trace/blktrace.c216
1 files changed, 104 insertions, 112 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1183c88634aa..a995ea1ef849 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -34,7 +34,7 @@ static struct trace_array *blk_tr;
static bool blk_tracer_enabled __read_mostly;
static LIST_HEAD(running_trace_list);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
+static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(running_trace_lock);
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -121,12 +121,12 @@ static void trace_note_tsk(struct task_struct *tsk)
struct blk_trace *bt;
tsk->btrace_seq = blktrace_seq;
- spin_lock_irqsave(&running_trace_lock, flags);
+ raw_spin_lock_irqsave(&running_trace_lock, flags);
list_for_each_entry(bt, &running_trace_list, running_list) {
trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
sizeof(tsk->comm), 0);
}
- spin_unlock_irqrestore(&running_trace_lock, flags);
+ raw_spin_unlock_irqrestore(&running_trace_lock, flags);
}
static void trace_note_time(struct blk_trace *bt)
@@ -145,13 +145,14 @@ static void trace_note_time(struct blk_trace *bt)
local_irq_restore(flags);
}
-void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
- const char *fmt, ...)
+void __blk_trace_note_message(struct blk_trace *bt,
+ struct cgroup_subsys_state *css, const char *fmt, ...)
{
int n;
va_list args;
unsigned long flags;
char *buf;
+ u64 cgid = 0;
if (unlikely(bt->trace_state != Blktrace_running &&
!blk_tracer_enabled))
@@ -170,17 +171,16 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
- if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
- blkcg = NULL;
#ifdef CONFIG_BLK_CGROUP
- trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
- blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
-#else
- trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
+ if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+ cgid = cgroup_id(css->cgroup);
+ else
+ cgid = 1;
#endif
+ trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid);
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(__trace_note_message);
+EXPORT_SYMBOL_GPL(__blk_trace_note_message);
static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
pid_t pid)
@@ -205,7 +205,7 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
#define BLK_TC_PREFLUSH BLK_TC_FLUSH
/* The ilog2() calls fall out because they're constant */
-#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
+#define MASK_TC_BIT(rw, __name) ((__force u32)(rw & REQ_ ## __name) << \
(ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
/*
@@ -213,8 +213,8 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
* blk_io_trace structure and places it in a per-cpu subbuffer.
*/
static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
- int op, int op_flags, u32 what, int error, int pdu_len,
- void *pdu_data, u64 cgid)
+ const blk_opf_t opf, u32 what, int error,
+ int pdu_len, void *pdu_data, u64 cgid)
{
struct task_struct *tsk = current;
struct ring_buffer_event *event = NULL;
@@ -227,16 +227,17 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
int cpu;
bool blk_tracer = blk_tracer_enabled;
ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
+ const enum req_op op = opf & REQ_OP_MASK;
if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
return;
what |= ddir_act[op_is_write(op) ? WRITE : READ];
- what |= MASK_TC_BIT(op_flags, SYNC);
- what |= MASK_TC_BIT(op_flags, RAHEAD);
- what |= MASK_TC_BIT(op_flags, META);
- what |= MASK_TC_BIT(op_flags, PREFLUSH);
- what |= MASK_TC_BIT(op_flags, FUA);
+ what |= MASK_TC_BIT(opf, SYNC);
+ what |= MASK_TC_BIT(opf, RAHEAD);
+ what |= MASK_TC_BIT(opf, META);
+ what |= MASK_TC_BIT(opf, PREFLUSH);
+ what |= MASK_TC_BIT(opf, FUA);
if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
what |= BLK_TC_ACT(BLK_TC_DISCARD);
if (op == REQ_OP_FLUSH)
@@ -310,10 +311,20 @@ record_it:
local_irq_restore(flags);
}
-static void blk_trace_free(struct blk_trace *bt)
+static void blk_trace_free(struct request_queue *q, struct blk_trace *bt)
{
relay_close(bt->rchan);
- debugfs_remove(bt->dir);
+
+ /*
+ * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created
+ * under 'q->debugfs_dir', thus lookup and remove them.
+ */
+ if (!bt->dir) {
+ debugfs_remove(debugfs_lookup("dropped", q->debugfs_dir));
+ debugfs_remove(debugfs_lookup("msg", q->debugfs_dir));
+ } else {
+ debugfs_remove(bt->dir);
+ }
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
@@ -335,10 +346,42 @@ static void put_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
-static void blk_trace_cleanup(struct blk_trace *bt)
+static int blk_trace_start(struct blk_trace *bt)
+{
+ if (bt->trace_state != Blktrace_setup &&
+ bt->trace_state != Blktrace_stopped)
+ return -EINVAL;
+
+ blktrace_seq++;
+ smp_mb();
+ bt->trace_state = Blktrace_running;
+ raw_spin_lock_irq(&running_trace_lock);
+ list_add(&bt->running_list, &running_trace_list);
+ raw_spin_unlock_irq(&running_trace_lock);
+ trace_note_time(bt);
+
+ return 0;
+}
+
+static int blk_trace_stop(struct blk_trace *bt)
+{
+ if (bt->trace_state != Blktrace_running)
+ return -EINVAL;
+
+ bt->trace_state = Blktrace_stopped;
+ raw_spin_lock_irq(&running_trace_lock);
+ list_del_init(&bt->running_list);
+ raw_spin_unlock_irq(&running_trace_lock);
+ relay_flush(bt->rchan);
+
+ return 0;
+}
+
+static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
{
+ blk_trace_stop(bt);
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
put_probe_ref();
}
@@ -351,8 +394,7 @@ static int __blk_trace_remove(struct request_queue *q)
if (!bt)
return -EINVAL;
- if (bt->trace_state != Blktrace_running)
- blk_trace_cleanup(bt);
+ blk_trace_cleanup(q, bt);
return 0;
}
@@ -401,7 +443,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
return PTR_ERR(msg);
bt = filp->private_data;
- __trace_note_message(bt, NULL, "%s", msg);
+ __blk_trace_note_message(bt, NULL, "%s", msg);
kfree(msg);
return count;
@@ -572,7 +614,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
ret = 0;
err:
if (ret)
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
@@ -647,7 +689,6 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
static int __blk_trace_startstop(struct request_queue *q, int start)
{
- int ret;
struct blk_trace *bt;
bt = rcu_dereference_protected(q->blk_trace,
@@ -655,36 +696,10 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
if (bt == NULL)
return -EINVAL;
- /*
- * For starting a trace, we can transition from a setup or stopped
- * trace. For stopping a trace, the state must be running
- */
- ret = -EINVAL;
- if (start) {
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped) {
- blktrace_seq++;
- smp_mb();
- bt->trace_state = Blktrace_running;
- spin_lock_irq(&running_trace_lock);
- list_add(&bt->running_list, &running_trace_list);
- spin_unlock_irq(&running_trace_lock);
-
- trace_note_time(bt);
- ret = 0;
- }
- } else {
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- spin_lock_irq(&running_trace_lock);
- list_del_init(&bt->running_list);
- spin_unlock_irq(&running_trace_lock);
- relay_flush(bt->rchan);
- ret = 0;
- }
- }
-
- return ret;
+ if (start)
+ return blk_trace_start(bt);
+ else
+ return blk_trace_stop(bt);
}
int blk_trace_startstop(struct request_queue *q, int start)
@@ -726,12 +741,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
switch (cmd) {
case BLKTRACESETUP:
- bdevname(bdev, b);
+ snprintf(b, sizeof(b), "%pg", bdev);
ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
break;
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
case BLKTRACESETUP32:
- bdevname(bdev, b);
+ snprintf(b, sizeof(b), "%pg", bdev);
ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
break;
#endif
@@ -760,19 +775,15 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
**/
void blk_trace_shutdown(struct request_queue *q)
{
- mutex_lock(&q->debugfs_mutex);
if (rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->debugfs_mutex))) {
- __blk_trace_startstop(q, 0);
+ lockdep_is_held(&q->debugfs_mutex)))
__blk_trace_remove(q);
- }
-
- mutex_unlock(&q->debugfs_mutex);
}
#ifdef CONFIG_BLK_CGROUP
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
{
+ struct cgroup_subsys_state *blkcg_css;
struct blk_trace *bt;
/* We don't use the 'bt' value here except as an optimization... */
@@ -780,9 +791,10 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return 0;
- if (!bio->bi_blkg)
+ blkcg_css = bio_blkcg_css(bio);
+ if (!blkcg_css)
return 0;
- return cgroup_id(bio_blkcg(bio)->css.cgroup);
+ return cgroup_id(blkcg_css->cgroup);
}
#else
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
@@ -833,9 +845,8 @@ static void blk_add_trace_rq(struct request *rq, blk_status_t error,
else
what |= BLK_TC_ACT(BLK_TC_FS);
- __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
- rq->cmd_flags, what, blk_status_to_errno(error), 0,
- NULL, cgid);
+ __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, rq->cmd_flags,
+ what, blk_status_to_errno(error), 0, NULL, cgid);
rcu_read_unlock();
}
@@ -894,7 +905,7 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
}
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
- bio_op(bio), bio->bi_opf, what, error, 0, NULL,
+ bio->bi_opf, what, error, 0, NULL,
blk_trace_bio_get_cgid(q, bio));
rcu_read_unlock();
}
@@ -940,7 +951,7 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
rcu_read_lock();
bt = rcu_dereference(q->blk_trace);
if (bt)
- __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
+ __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
rcu_read_unlock();
}
@@ -960,7 +971,7 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
else
what = BLK_TA_UNPLUG_TIMER;
- __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
+ __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
}
rcu_read_unlock();
}
@@ -976,8 +987,7 @@ static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu)
__be64 rpdu = cpu_to_be64(pdu);
__blk_add_trace(bt, bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
- BLK_TA_SPLIT,
+ bio->bi_iter.bi_size, bio->bi_opf, BLK_TA_SPLIT,
blk_status_to_errno(bio->bi_status),
sizeof(rpdu), &rpdu,
blk_trace_bio_get_cgid(q, bio));
@@ -1013,7 +1023,7 @@ static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev,
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
- bio_op(bio), bio->bi_opf, BLK_TA_REMAP,
+ bio->bi_opf, BLK_TA_REMAP,
blk_status_to_errno(bio->bi_status),
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
rcu_read_unlock();
@@ -1045,11 +1055,11 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
}
r.device_from = cpu_to_be32(dev);
- r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
+ r.device_to = cpu_to_be32(disk_devt(rq->q->disk));
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
- rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
+ rq->cmd_flags, BLK_TA_REMAP, 0,
sizeof(r), &r, blk_trace_request_get_cgid(rq));
rcu_read_unlock();
}
@@ -1075,7 +1085,7 @@ void blk_add_driver_data(struct request *rq, void *data, size_t len)
return;
}
- __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
+ __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0,
BLK_TA_DRV_DATA, 0, len, data,
blk_trace_request_get_cgid(rq));
rcu_read_unlock();
@@ -1606,17 +1616,11 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt == NULL)
return -EINVAL;
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- spin_lock_irq(&running_trace_lock);
- list_del_init(&bt->running_list);
- spin_unlock_irq(&running_trace_lock);
- relay_flush(bt->rchan);
- }
+ blk_trace_stop(bt);
put_probe_ref();
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return 0;
}
@@ -1647,7 +1651,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
return 0;
free_bt:
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
@@ -1858,17 +1862,6 @@ out_unlock_bdev:
out:
return ret ? ret : count;
}
-
-int blk_trace_init_sysfs(struct device *dev)
-{
- return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
-}
-
-void blk_trace_remove_sysfs(struct device *dev)
-{
- sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
-}
-
#endif /* CONFIG_BLK_DEV_IO_TRACE */
#ifdef CONFIG_EVENT_TRACING
@@ -1876,23 +1869,22 @@ void blk_trace_remove_sysfs(struct device *dev)
/**
* blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string.
* @rwbs: buffer to be filled
- * @op: REQ_OP_XXX for the tracepoint
+ * @opf: request operation type (REQ_OP_XXX) and flags for the tracepoint
*
* Description:
- * Maps the REQ_OP_XXX to character and fills the buffer provided by the
- * caller with resulting string.
+ * Maps each request operation and flag to a single character and fills the
+ * buffer provided by the caller with resulting string.
*
**/
-void blk_fill_rwbs(char *rwbs, unsigned int op)
+void blk_fill_rwbs(char *rwbs, blk_opf_t opf)
{
int i = 0;
- if (op & REQ_PREFLUSH)
+ if (opf & REQ_PREFLUSH)
rwbs[i++] = 'F';
- switch (op & REQ_OP_MASK) {
+ switch (opf & REQ_OP_MASK) {
case REQ_OP_WRITE:
- case REQ_OP_WRITE_SAME:
rwbs[i++] = 'W';
break;
case REQ_OP_DISCARD:
@@ -1912,13 +1904,13 @@ void blk_fill_rwbs(char *rwbs, unsigned int op)
rwbs[i++] = 'N';
}
- if (op & REQ_FUA)
+ if (opf & REQ_FUA)
rwbs[i++] = 'F';
- if (op & REQ_RAHEAD)
+ if (opf & REQ_RAHEAD)
rwbs[i++] = 'A';
- if (op & REQ_SYNC)
+ if (opf & REQ_SYNC)
rwbs[i++] = 'S';
- if (op & REQ_META)
+ if (opf & REQ_META)
rwbs[i++] = 'M';
rwbs[i] = '\0';