--- zzzz-none-000/linux-3.10.107/kernel/trace/blktrace.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/kernel/trace/blktrace.c 2021-02-04 17:41:59.000000000 +0000 @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -38,6 +39,9 @@ static struct trace_array *blk_tr; static bool blk_tracer_enabled __read_mostly; +static LIST_HEAD(running_trace_list); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock); + /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 @@ -99,7 +103,7 @@ memcpy((void *) t + sizeof(*t), data, len); if (blk_tracer) - trace_buffer_unlock_commit(buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); } } @@ -107,10 +111,18 @@ * Send out a notify for this process, if we haven't done so since a trace * started */ -static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) +static void trace_note_tsk(struct task_struct *tsk) { + unsigned long flags; + struct blk_trace *bt; + tsk->btrace_seq = blktrace_seq; - trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); + spin_lock_irqsave(&running_trace_lock, flags); + list_for_each_entry(bt, &running_trace_list, running_list) { + trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, + sizeof(tsk->comm)); + } + spin_unlock_irqrestore(&running_trace_lock, flags); } static void trace_note_time(struct blk_trace *bt) @@ -229,16 +241,15 @@ goto record_it; } + if (unlikely(tsk->btrace_seq != blktrace_seq)) + trace_note_tsk(tsk); + /* * A word about the locking here - we disable interrupts to reserve * some space in the relay per-cpu buffer, to prevent an irq * from coming in and stepping on our toes. */ local_irq_save(flags); - - if (unlikely(tsk->btrace_seq != blktrace_seq)) - trace_note_tsk(bt, tsk); - t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); if (t) { sequence = per_cpu_ptr(bt->sequence, cpu); @@ -267,7 +278,7 @@ memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); return; } } @@ -426,9 +437,9 @@ struct block_device *bdev, struct blk_user_trace_setup *buts) { - struct blk_trace *old_bt, *bt = NULL; + struct blk_trace *bt = NULL; struct dentry *dir = NULL; - int ret, i; + int ret; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -440,9 +451,7 @@ * some device names have larger paths - convert the slashes * to underscores for this to work as expected */ - for (i = 0; i < strlen(buts->name); i++) - if (buts->name[i] == '/') - buts->name[i] = '_'; + strreplace(buts->name, '/', '_'); bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) @@ -477,6 +486,7 @@ bt->dir = dir; bt->dev = dev; atomic_set(&bt->dropped, 0); + INIT_LIST_HEAD(&bt->running_list); ret = -EIO; bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, @@ -509,11 +519,8 @@ bt->trace_state = Blktrace_setup; ret = -EBUSY; - old_bt = xchg(&q->blk_trace, bt); - if (old_bt) { - (void) xchg(&q->blk_trace, old_bt); + if (cmpxchg(&q->blk_trace, NULL, bt)) goto err; - } if (atomic_inc_return(&blk_probes_ref) == 1) blk_register_tracepoints(); @@ -567,13 +574,12 @@ .end_lba = cbuts.end_lba, .pid = cbuts.pid, }; - memcpy(&buts.name, &cbuts.name, 32); ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; - if (copy_to_user(arg, &buts.name, 32)) { + if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { blk_trace_remove(q); return -EFAULT; } @@ -601,6 +607,9 @@ blktrace_seq++; smp_mb(); bt->trace_state = Blktrace_running; + spin_lock_irq(&running_trace_lock); + list_add(&bt->running_list, &running_trace_list); + spin_unlock_irq(&running_trace_lock); trace_note_time(bt); ret = 0; @@ -608,6 +617,9 @@ } else { if (bt->trace_state == Blktrace_running) { bt->trace_state = Blktrace_stopped; + spin_lock_irq(&running_trace_lock); + list_del_init(&bt->running_list); + spin_unlock_irq(&running_trace_lock); relay_flush(bt->rchan); ret = 0; } @@ -763,11 +775,8 @@ if (likely(!bt)) return; - if (!error && !bio_flagged(bio, BIO_UPTODATE)) - error = EIO; - - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, - error, 0, NULL); + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, + bio->bi_rw, what, error, 0, NULL); } static void blk_add_trace_bio_bounce(void *ignore, @@ -870,9 +879,9 @@ if (bt) { __be64 rpdu = cpu_to_be64(pdu); - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, - BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), - sizeof(rpdu), &rpdu); + __blk_add_trace(bt, bio->bi_iter.bi_sector, + bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT, + bio->bi_error, sizeof(rpdu), &rpdu); } } @@ -903,8 +912,8 @@ r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); r.sector_from = cpu_to_be64(from); - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, - BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, + bio->bi_rw, BLK_TA_REMAP, bio->bi_error, sizeof(r), &r); } @@ -1124,9 +1133,9 @@ r->sector_from = be64_to_cpu(sector_from); } -typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); +typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act); -static int blk_log_action_classic(struct trace_iterator *iter, const char *act) +static void blk_log_action_classic(struct trace_iterator *iter, const char *act) { char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; @@ -1136,33 +1145,33 @@ fill_rwbs(rwbs, t); - return trace_seq_printf(&iter->seq, - "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", - MAJOR(t->device), MINOR(t->device), iter->cpu, - secs, nsec_rem, iter->ent->pid, act, rwbs); + trace_seq_printf(&iter->seq, + "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", + MAJOR(t->device), MINOR(t->device), iter->cpu, + secs, nsec_rem, iter->ent->pid, act, rwbs); } -static int blk_log_action(struct trace_iterator *iter, const char *act) +static void blk_log_action(struct trace_iterator *iter, const char *act) { char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); - return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", - MAJOR(t->device), MINOR(t->device), act, rwbs); + trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", + MAJOR(t->device), MINOR(t->device), act, rwbs); } -static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) { const unsigned char *pdu_buf; int pdu_len; - int i, end, ret; + int i, end; pdu_buf = pdu_start(ent); pdu_len = te_blk_io_trace(ent)->pdu_len; if (!pdu_len) - return 1; + return; /* find the last zero that needs to be printed */ for (end = pdu_len - 1; end >= 0; end--) @@ -1170,119 +1179,107 @@ break; end++; - if (!trace_seq_putc(s, '(')) - return 0; + trace_seq_putc(s, '('); for (i = 0; i < pdu_len; i++) { - ret = trace_seq_printf(s, "%s%02x", - i == 0 ? "" : " ", pdu_buf[i]); - if (!ret) - return ret; + trace_seq_printf(s, "%s%02x", + i == 0 ? "" : " ", pdu_buf[i]); /* * stop when the rest is just zeroes and indicate so * with a ".." appended */ - if (i == end && end != pdu_len - 1) - return trace_seq_puts(s, " ..) "); + if (i == end && end != pdu_len - 1) { + trace_seq_puts(s, " ..) "); + return; + } } - return trace_seq_puts(s, ") "); + trace_seq_puts(s, ") "); } -static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { - int ret; - - ret = trace_seq_printf(s, "%u ", t_bytes(ent)); - if (!ret) - return 0; - ret = blk_log_dump_pdu(s, ent); - if (!ret) - return 0; - return trace_seq_printf(s, "[%s]\n", cmd); + trace_seq_printf(s, "%u ", t_bytes(ent)); + blk_log_dump_pdu(s, ent); + trace_seq_printf(s, "[%s]\n", cmd); } else { if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%s]\n", + trace_seq_printf(s, "%llu + %u [%s]\n", t_sector(ent), t_sec(ent), cmd); - return trace_seq_printf(s, "[%s]\n", cmd); + else + trace_seq_printf(s, "[%s]\n", cmd); } } -static int blk_log_with_error(struct trace_seq *s, +static void blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) { if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { - int ret; - - ret = blk_log_dump_pdu(s, ent); - if (ret) - return trace_seq_printf(s, "[%d]\n", t_error(ent)); - return 0; + blk_log_dump_pdu(s, ent); + trace_seq_printf(s, "[%d]\n", t_error(ent)); } else { if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%d]\n", - t_sector(ent), - t_sec(ent), t_error(ent)); - return trace_seq_printf(s, "%llu [%d]\n", - t_sector(ent), t_error(ent)); + trace_seq_printf(s, "%llu + %u [%d]\n", + t_sector(ent), + t_sec(ent), t_error(ent)); + else + trace_seq_printf(s, "%llu [%d]\n", + t_sector(ent), t_error(ent)); } } -static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) { struct blk_io_trace_remap r = { .device_from = 0, }; get_pdu_remap(ent, &r); - return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", - t_sector(ent), t_sec(ent), - MAJOR(r.device_from), MINOR(r.device_from), - (unsigned long long)r.sector_from); + trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", + t_sector(ent), t_sec(ent), + MAJOR(r.device_from), MINOR(r.device_from), + (unsigned long long)r.sector_from); } -static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - return trace_seq_printf(s, "[%s]\n", cmd); + trace_seq_printf(s, "[%s]\n", cmd); } -static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); + trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); } -static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), - get_pdu_int(ent), cmd); + trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), + get_pdu_int(ent), cmd); } -static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) { - int ret; const struct blk_io_trace *t = te_blk_io_trace(ent); - ret = trace_seq_putmem(s, t + 1, t->pdu_len); - if (ret) - return trace_seq_putc(s, '\n'); - return ret; + trace_seq_putmem(s, t + 1, t->pdu_len); + trace_seq_putc(s, '\n'); } /* @@ -1321,7 +1318,7 @@ static const struct { const char *act[2]; - int (*print)(struct trace_seq *s, const struct trace_entry *ent); + void (*print)(struct trace_seq *s, const struct trace_entry *ent); } what2act[] = { [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, @@ -1343,34 +1340,31 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, bool classic) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; const struct blk_io_trace *t; u16 what; - int ret; bool long_act; blk_log_action_t *log_action; t = te_blk_io_trace(iter->ent); what = t->action & ((1 << BLK_TC_SHIFT) - 1); - long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + long_act = !!(tr->trace_flags & TRACE_ITER_VERBOSE); log_action = classic ? &blk_log_action_classic : &blk_log_action; if (t->action == BLK_TN_MESSAGE) { - ret = log_action(iter, long_act ? "message" : "m"); - if (ret) - ret = blk_log_msg(s, iter->ent); - goto out; + log_action(iter, long_act ? "message" : "m"); + blk_log_msg(s, iter->ent); } if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) - ret = trace_seq_printf(s, "Unknown action %x\n", what); + trace_seq_printf(s, "Unknown action %x\n", what); else { - ret = log_action(iter, what2act[what].act[long_act]); - if (ret) - ret = what2act[what].print(s, iter->ent); + log_action(iter, what2act[what].act[long_act]); + what2act[what].print(s, iter->ent); } -out: - return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + + return trace_handle_return(s); } static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, @@ -1379,7 +1373,7 @@ return print_one_line(iter, false); } -static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) +static void blk_trace_synthesize_old_trace(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; @@ -1389,18 +1383,18 @@ .time = iter->ts, }; - if (!trace_seq_putmem(s, &old, offset)) - return 0; - return trace_seq_putmem(s, &t->sector, - sizeof(old) - offset + t->pdu_len); + trace_seq_putmem(s, &old, offset); + trace_seq_putmem(s, &t->sector, + sizeof(old) - offset + t->pdu_len); } static enum print_line_t blk_trace_event_print_binary(struct trace_iterator *iter, int flags, struct trace_event *event) { - return blk_trace_synthesize_old_trace(iter) ? - TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + blk_trace_synthesize_old_trace(iter); + + return trace_handle_return(&iter->seq); } static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) @@ -1411,14 +1405,15 @@ return print_one_line(iter, true); } -static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) +static int +blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { /* don't output context-info for blk_classic output */ if (bit == TRACE_BLK_OPT_CLASSIC) { if (set) - trace_flags &= ~TRACE_ITER_CONTEXT_INFO; + tr->trace_flags &= ~TRACE_ITER_CONTEXT_INFO; else - trace_flags |= TRACE_ITER_CONTEXT_INFO; + tr->trace_flags |= TRACE_ITER_CONTEXT_INFO; } return 0; } @@ -1447,14 +1442,14 @@ static int __init init_blk_tracer(void) { - if (!register_ftrace_event(&trace_blk_event)) { + if (!register_trace_event(&trace_blk_event)) { pr_warning("Warning: could not register block events\n"); return 1; } if (register_tracer(&blk_tracer) != 0) { pr_warning("Warning: could not register the block tracer\n"); - unregister_ftrace_event(&trace_blk_event); + unregister_trace_event(&trace_blk_event); return 1; } @@ -1484,7 +1479,7 @@ static int blk_trace_setup_queue(struct request_queue *q, struct block_device *bdev) { - struct blk_trace *old_bt, *bt = NULL; + struct blk_trace *bt = NULL; int ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); @@ -1500,12 +1495,9 @@ blk_trace_setup_lba(bt, bdev); - old_bt = xchg(&q->blk_trace, bt); - if (old_bt != NULL) { - (void)xchg(&q->blk_trace, old_bt); - ret = -EBUSY; + ret = -EBUSY; + if (cmpxchg(&q->blk_trace, NULL, bt)) goto free_bt; - } if (atomic_inc_return(&blk_probes_ref) == 1) blk_register_tracepoints();