/* * Xtensa Performance Monitor Module driver * See Tensilica Debug User's Guide for PMU registers documentation. * * Copyright (C) 2015 Cadence Design Systems Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/interrupt.h> #include <linux/irqdomain.h> #include <linux/module.h> #include <linux/of.h> #include <linux/perf_event.h> #include <linux/platform_device.h> #include <asm/processor.h> #include <asm/stacktrace.h> /* Global control/status for all perf counters */ #define XTENSA_PMU_PMG 0x1000 /* Perf counter values */ #define XTENSA_PMU_PM(i) (0x1080 + (i) * 4) /* Perf counter control registers */ #define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4) /* Perf counter status registers */ #define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4) #define XTENSA_PMU_PMG_PMEN 0x1 #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL #define XTENSA_PMU_COUNTER_MAX 0x7fffffff #define XTENSA_PMU_PMCTRL_INTEN 0x00000001 #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008 #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8 #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00 #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16 #define XTENSA_PMU_PMCTRL_MASK 0xffff0000 #define XTENSA_PMU_MASK(select, mask) \ (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \ ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \ XTENSA_PMU_PMCTRL_TRACELEVEL | \ XTENSA_PMU_PMCTRL_INTEN) #define XTENSA_PMU_PMSTAT_OVFL 0x00000001 #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010 struct xtensa_pmu_events { /* Array of events currently on this core */ struct perf_event *event[XCHAL_NUM_PERF_COUNTERS]; /* Bitmap of used hardware counters */ unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)]; }; static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events); static const u32 xtensa_hw_ctl[] = { [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1), [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff), [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1), [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1), /* Taken and non-taken branches + taken loop ends */ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490), /* Instruction-related + other global stall cycles */ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff), /* Data-related global stall cycles */ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff), }; #define C(_x) PERF_COUNT_HW_CACHE_##_x static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1), [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2), }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1), [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2), }, }, [C(L1I)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1), [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2), }, }, [C(DTLB)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1), [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8), }, }, [C(ITLB)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1), [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8), }, }, }; static int xtensa_pmu_cache_event(u64 config) { unsigned int cache_type, cache_op, cache_result; int ret; cache_type = (config >> 0) & 0xff; cache_op = (config >> 8) & 0xff; cache_result = (config >> 16) & 0xff; if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) || cache_op >= C(OP_MAX) || cache_result >= C(RESULT_MAX)) return -EINVAL; ret = xtensa_cache_ctl[cache_type][cache_op][cache_result]; if (ret == 0) return -EINVAL; return ret; } static inline uint32_t xtensa_pmu_read_counter(int idx) { return get_er(XTENSA_PMU_PM(idx)); } static inline void xtensa_pmu_write_counter(int idx, uint32_t v) { set_er(v, XTENSA_PMU_PM(idx)); } static void xtensa_perf_event_update(struct perf_event *event, struct hw_perf_event *hwc, int idx) { uint64_t prev_raw_count, new_raw_count; int64_t delta; do { prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = xtensa_pmu_read_counter(event->hw.idx); } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count); delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK; local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); } static bool xtensa_perf_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) { bool rc = false; s64 left; if (!is_sampling_event(event)) { left = XTENSA_PMU_COUNTER_MAX; } else { s64 period = hwc->sample_period; left = local64_read(&hwc->period_left); if (left <= -period) { left = period; local64_set(&hwc->period_left, left); hwc->last_period = period; rc = true; } else if (left <= 0) { left += period; local64_set(&hwc->period_left, left); hwc->last_period = period; rc = true; } if (left > XTENSA_PMU_COUNTER_MAX) left = XTENSA_PMU_COUNTER_MAX; } local64_set(&hwc->prev_count, -left); xtensa_pmu_write_counter(idx, -left); perf_event_update_userpage(event); return rc; } static void xtensa_pmu_enable(struct pmu *pmu) { set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); } static void xtensa_pmu_disable(struct pmu *pmu) { set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); } static int xtensa_pmu_event_init(struct perf_event *event) { int ret; switch (event->attr.type) { case PERF_TYPE_HARDWARE: if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) || xtensa_hw_ctl[event->attr.config] == 0) return -EINVAL; event->hw.config = xtensa_hw_ctl[event->attr.config]; return 0; case PERF_TYPE_HW_CACHE: ret = xtensa_pmu_cache_event(event->attr.config); if (ret < 0) return ret; event->hw.config = ret; return 0; case PERF_TYPE_RAW: /* Not 'previous counter' select */ if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) == (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT)) return -EINVAL; event->hw.config = (event->attr.config & (XTENSA_PMU_PMCTRL_KRNLCNT | XTENSA_PMU_PMCTRL_TRACELEVEL | XTENSA_PMU_PMCTRL_SELECT | XTENSA_PMU_PMCTRL_MASK)) | XTENSA_PMU_PMCTRL_INTEN; return 0; default: return -ENOENT; } } /* * Starts/Stops a counter present on the PMU. The PMI handler * should stop the counter when perf_event_overflow() returns * !0. ->start() will be used to continue. */ static void xtensa_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; if (WARN_ON_ONCE(idx == -1)) return; if (flags & PERF_EF_RELOAD) { WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); xtensa_perf_event_set_period(event, hwc, idx); } hwc->state = 0; set_er(hwc->config, XTENSA_PMU_PMCTRL(idx)); } static void xtensa_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; if (!(hwc->state & PERF_HES_STOPPED)) { set_er(0, XTENSA_PMU_PMCTRL(idx)); set_er(get_er(XTENSA_PMU_PMSTAT(idx)), XTENSA_PMU_PMSTAT(idx)); hwc->state |= PERF_HES_STOPPED; } if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { xtensa_perf_event_update(event, &event->hw, idx); event->hw.state |= PERF_HES_UPTODATE; } } /* * Adds/Removes a counter to/from the PMU, can be done inside * a transaction, see the ->*_txn() methods. */ static int xtensa_pmu_add(struct perf_event *event, int flags) { struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; if (__test_and_set_bit(idx, ev->used_mask)) { idx = find_first_zero_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); if (idx == XCHAL_NUM_PERF_COUNTERS) return -EAGAIN; __set_bit(idx, ev->used_mask); hwc->idx = idx; } ev->event[idx] = event; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (flags & PERF_EF_START) xtensa_pmu_start(event, PERF_EF_RELOAD); perf_event_update_userpage(event); return 0; } static void xtensa_pmu_del(struct perf_event *event, int flags) { struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); xtensa_pmu_stop(event, PERF_EF_UPDATE); __clear_bit(event->hw.idx, ev->used_mask); perf_event_update_userpage(event); } static void xtensa_pmu_read(struct perf_event *event) { xtensa_perf_event_update(event, &event->hw, event->hw.idx); } static int callchain_trace(struct stackframe *frame, void *data) { struct perf_callchain_entry *entry = data; perf_callchain_store(entry, frame->pc); return 0; } void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH, callchain_trace, NULL, entry); } void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH, callchain_trace, entry); } void perf_event_print_debug(void) { unsigned long flags; unsigned i; local_irq_save(flags); pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(), get_er(XTENSA_PMU_PMG)); for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n", i, get_er(XTENSA_PMU_PM(i)), i, get_er(XTENSA_PMU_PMCTRL(i)), i, get_er(XTENSA_PMU_PMSTAT(i))); local_irq_restore(flags); } irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) { irqreturn_t rc = IRQ_NONE; struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); unsigned i; for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); i < XCHAL_NUM_PERF_COUNTERS; i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); struct perf_event *event = ev->event[i]; struct hw_perf_event *hwc = &event->hw; u64 last_period; if (!(v & XTENSA_PMU_PMSTAT_OVFL)) continue; set_er(v, XTENSA_PMU_PMSTAT(i)); xtensa_perf_event_update(event, hwc, i); last_period = hwc->last_period; if (xtensa_perf_event_set_period(event, hwc, i)) { struct perf_sample_data data; struct pt_regs *regs = get_irq_regs(); perf_sample_data_init(&data, 0, last_period); if (perf_event_overflow(event, &data, regs)) xtensa_pmu_stop(event, 0); } rc = IRQ_HANDLED; } return rc; } static struct pmu xtensa_pmu = { .pmu_enable = xtensa_pmu_enable, .pmu_disable = xtensa_pmu_disable, .event_init = xtensa_pmu_event_init, .add = xtensa_pmu_add, .del = xtensa_pmu_del, .start = xtensa_pmu_start, .stop = xtensa_pmu_stop, .read = xtensa_pmu_read, }; static void xtensa_pmu_setup(void) { unsigned i; set_er(0, XTENSA_PMU_PMG); for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) { set_er(0, XTENSA_PMU_PMCTRL(i)); set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); } } static int xtensa_pmu_notifier(struct notifier_block *self, unsigned long action, void *data) { switch (action & ~CPU_TASKS_FROZEN) { case CPU_STARTING: xtensa_pmu_setup(); break; default: break; } return NOTIFY_OK; } static int __init xtensa_pmu_init(void) { int ret; int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); perf_cpu_notifier(xtensa_pmu_notifier); #if XTENSA_FAKE_NMI enable_irq(irq); #else ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU, "pmu", NULL); if (ret < 0) return ret; #endif ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW); if (ret) free_irq(irq, NULL); return ret; } early_initcall(xtensa_pmu_init);