/*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "avm_profile.h" #include "arch_profile.h" #include #include #include #include #include "asm/stacktrace.h" #include "asm/traps.h" #if defined(PROFILING_PERFORMANCE_COUNTER_SUPPORT) #ifdef CONFIG_AVM_FASTIRQ_TZ #ifdef CONFIG_AVM_FASTIRQ_ARCH_ARM_COMMON #include #else #include #endif #endif static void arm_profiling_special_enable(enum _simple_profile_enable_mode on, unsigned int enable_perfcnt); /*--- #define DBG_PERF(args...) printk(KERN_INFO args) ---*/ #define DBG_PERF(args...) #define GET_ARM_PERFORMANCE_EVENT_NAME(x) ((x) >= ARRAY_SIZE(performance_counter_options) ? NULL : performance_counter_options[x].name) #define GET_ARM_PERFORMANCE_EVENT_NORMFACTOR(x) ((x) >= ARRAY_SIZE(performance_counter_options) ? NORMED_MAX : performance_counter_options[x].norm_factor) /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ struct _perfctl_param_read { unsigned int counting_reg_mask; char *str_buf; /* optional: write out in str_buf */ int str_len; }; /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ struct _perfctl_param_write { struct _fiq_profiling *fiq_profile; /*--- wenn dieser Wert gesetzt, wird auch die jeweilige Statisik-Struktur geupdatet/ per perf_preset_ctl Werte restauriert werden ---*/ unsigned int counting_reg_mask; enum { perf_set_ctl = 0x1, /*--- setze komplettes Ctrl-Register ---*/ perf_reset_cnt = 0x2, /*--- setze Count-Register zurueck---*/ perf_preset_ctl = 0x4, /*--- Ctrl-Register aus simple_perfstat (nur wenn fiq_profile gesetzt) ---*/ } set_val; unsigned int preset_ctl; }; static void setup_all_perfcounter(struct _perfctl_param_write *perfctl_param, unsigned int cpu); #if defined(PROFILING_IN_FIQ) /*--------------------------------------------------------------------------------*\ * Simple Messung mit "festen" Performance-Countern \*--------------------------------------------------------------------------------*/ struct _simple_perfstat { unsigned int perf_ctrl[PROFILING_MAX_PERF_REGISTER]; /*--- Performance-Counter-Mode ---*/ unsigned long long sum_perf_count[PROFILING_MAX_PERF_REGISTER]; }; /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ struct _fiq_profiling { int id; int cpu; unsigned int long long tsum; unsigned int last_time; unsigned int perf_readmask; /*--- beim Profiling die ersten 2 Performance-Register nicht mittracen, da vom Profiler verwendet ---*/ struct _simple_perfstat simple_perfstat; struct _roundrobin_perf_ctrlstat *act_roundrobin_perfstat; struct _roundrobin_perf_ctrlstat roundrobin_perfstat[48]; }; static struct _fiq_profiling gFiqProfiler[NR_CPUS]; /*--------------------------------------------------------------------------------*\ * Initial fuer Round-Robin Mode vorbereiten * roundrobin_perfstat_table: zu initialisierende Table * perf_ctrl_ref: Roundrobin-Ctrl-Presets * Die Liste wird als Ringliste initialisiert \*--------------------------------------------------------------------------------*/ static void __init init_roundrobin_perfctrlstat(struct _roundrobin_perf_ctrlstat roundrobin_perfstat_table[], const struct _roundrobin_perf_ctrlstat perf_ctrl_ref[], unsigned int entries) { unsigned int reg, i; for( i = 0; i < entries; i++) { struct _roundrobin_perf_ctrlstat *table = &roundrobin_perfstat_table[i]; table->prefix = perf_ctrl_ref[i].prefix; for(reg = 0; reg < ARRAY_SIZE(table->perf_ctrl); reg++) { unsigned int event; table->sum_perf_count[reg] = 0; table->sum_perf_time[reg] = 0; event = perf_ctrl_ref[i].perf_ctrl[reg]; if(GET_ARM_PERFORMANCE_EVENT_NAME(event) == NULL) { printk(KERN_ERR"[simple-profiling]Warning: Performance-Counter ctrl%u Option %u do not exist \n", reg, event); continue; } table->perf_ctrl[reg] = event; /*--- printk(KERN_ERR"ctrl%x Option %2u (%25s) : %08x\n", reg, event, GET_ARM_PERFORMANCE_EVENT_NAME(event), table->perf_ctrl[reg]); ---*/ } table->next = &roundrobin_perfstat_table[(i + 1) % entries]; /*--- als Ringliste ---*/ } } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static char *get_percent(char *txt, int size, unsigned long cnt, unsigned long norm) { unsigned long long val_main, val_remainder; if(norm == 0) { return " ? "; } val_main = (unsigned long long)cnt * 100; do_div(val_main, norm); val_remainder = (unsigned long long)cnt * (100 * 100); do_div(val_remainder, norm); snprintf(txt, size, "%3lu.%02lu %%", (unsigned long)val_main, (unsigned long)val_remainder % 100); return txt; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ #define SHIFT_FACTOR 8 /*--- auf 2^x-tel Sekunde genau ---*/ static inline unsigned long norm_per_sec(unsigned long long count, unsigned long long cycle) { unsigned long long sec = cycle; do_div(sec, ((gCycle_per_usec * 1000 * 1000) >> SHIFT_FACTOR)); if(sec == 0) { return 0; } count <<= SHIFT_FACTOR; do_div(count, sec); return (unsigned long)count; } /*--------------------------------------------------------------------------------*\ * Cleanup der Ringliste \*--------------------------------------------------------------------------------*/ static void clean_roundrobin_perfstat(struct _roundrobin_perf_ctrlstat *roundrobin_perfstat) { struct _roundrobin_perf_ctrlstat *table = roundrobin_perfstat; unsigned int reg; while(table) { for(reg = 0; reg < ARRAY_SIZE(table->perf_ctrl); reg++) { table->sum_perf_count[reg] = 0; table->sum_perf_time[reg] = 0; } table = table->next; if(table == roundrobin_perfstat) { break; } } } /*--------------------------------------------------------------------------------*\ * Summiert die letzte Messung des aktuellen Performance-Counters und setzt auf neues Performance-Register * return: Zeiger auf naechsten Eintrag * aus FIQ-Kontext aufrufen \*--------------------------------------------------------------------------------*/ static struct _roundrobin_perf_ctrlstat *update_roundrobin_perfstat(struct _roundrobin_perf_ctrlstat *entry, unsigned int perf_cnt[], unsigned int perf_readmask, unsigned int meassure_time) { struct _roundrobin_perf_ctrlstat *next = entry->next; unsigned int reg; for(reg = 0; reg < PROFILING_MAX_PERF_REGISTER; reg++) { entry->sum_perf_count[reg] += perf_cnt[reg]; entry->sum_perf_time[reg] += (unsigned long long)(meassure_time); if(((1 << reg) & (perf_readmask))) { if(next) { write_p15_performance_event_type_with_cnt_reset(reg, next->perf_ctrl[reg]); } } } return next; } /*--------------------------------------------------------------------------------*\ * Simple-Perfomance-Countertable initialisieren * nur aus Kontext der entprechenden CPU aufrufen \*--------------------------------------------------------------------------------*/ static void __init init_simple_perfstat_per_cpu(struct _simple_perfstat *psimple_perfstat) { unsigned int reg; int this_cpu __maybe_unused = get_cpu(); for(reg = 0; reg < ARRAY_SIZE(psimple_perfstat->sum_perf_count); reg++) { psimple_perfstat->sum_perf_count[reg] = 0; if(((1 << reg) & (PROFILING_PERF_REGISTERMASK))) { psimple_perfstat->perf_ctrl[reg] = read_p15_performance_event_type(reg); DBG_PERF("%s: cpu=%u reg[%u] = %x\n", __func__, this_cpu, reg, psimple_perfstat->perf_ctrl[reg]); } } put_cpu(); psimple_perfstat->perf_ctrl[PROFILING_CYCLE_REGISTER] = PM_EVENT_CPU_CYCLES; /*--- not really an performance-register but needed ---*/ } /*--------------------------------------------------------------------------------*\ * Simple Messung mit "festen" Performance-Countern * nur aus Kontext der entprechenden CPU aufrufen \*--------------------------------------------------------------------------------*/ static void clean_simple_perfstat_per_cpu(struct _simple_perfstat *psimple_perfstat) { unsigned int reg; int this_cpu __maybe_unused = get_cpu(); for(reg = 0; reg < ARRAY_SIZE(psimple_perfstat->sum_perf_count); reg++) { psimple_perfstat->sum_perf_count[reg] = 0; if(((1 << reg) & (PROFILING_PERF_REGISTERMASK))) { write_p15_performance_event_type_with_cnt_reset(reg, psimple_perfstat->perf_ctrl[reg]); DBG_PERF("%s: cpu=%u reg[%u] = %x\n", __func__, this_cpu, reg, psimple_perfstat->perf_ctrl[reg]); } } put_cpu(); } /*--------------------------------------------------------------------------------*\ * Simple Messung mit "festen" Performance-Countern * aus FIQ-Kontext aufrufen \*--------------------------------------------------------------------------------*/ static void update_simple_perfstat(struct _simple_perfstat *simple_perfstat, unsigned int perf_cnt[]) { struct _simple_perfstat *psimple_perfstat = simple_perfstat; unsigned int reg; for(reg = 0; reg < ARRAY_SIZE(psimple_perfstat->sum_perf_count); reg++) { psimple_perfstat->sum_perf_count[reg] += perf_cnt[reg]; } } #endif/*--- #if defined(PROFILING_IN_FIQ) ---*/ /*--------------------------------------------------------------------------------*\ * liefert Anzahl der Performance-Counter \*--------------------------------------------------------------------------------*/ static unsigned int arm_get_performance_counter_nr(void){ #ifdef CONFIG_MACH_PUMA6 uint32_t p_cnts = 2; #else uint32_t p_cnts = (read_p15_performance_monitor_control() >> 11) & 0x1F; #endif return p_cnts; } /*--------------------------------------------------------------------------------*\ * Lese perf_ctrl und perf_cnt * Welche Register mit Option param->counting_reg_mask * Ausgabe wahlweise per printk oder per param->str_buf \*--------------------------------------------------------------------------------*/ static void read_perfcounter_per_cpu(struct _perfctl_param_read *param __maybe_unused) { #if defined(PROFILING_IN_FIQ) unsigned int reg_cnt; unsigned int max_perf; char *str_buf = param->str_buf; int str_len = param->str_len; max_perf = arm_get_performance_counter_nr(); for(reg_cnt = 0; reg_cnt < max_perf; reg_cnt++) { unsigned int cnt, ctl; if ((( 1 << reg_cnt) & param->counting_reg_mask) == 0) { continue; } cnt = read_p15_performance_counter(reg_cnt); ctl = read_p15_performance_event_type(reg_cnt); if(str_buf == NULL) { printk(KERN_INFO"perf%u: cnt=0x%08x ctl=0x%08x \"%s\" \n", reg_cnt, cnt, ctl, GET_ARM_PERFORMANCE_EVENT_NAME(ctl) ); } else { if(str_len > 0) { int len = snprintf(str_buf, str_len, "perf%u: cnt=0x%08x ctl=0x%08x \"%s\"\n", reg_cnt, cnt, ctl, GET_ARM_PERFORMANCE_EVENT_NAME(ctl) ); len = min(len, str_len); str_buf += len; str_len -= len; } } } if(str_buf) { param->str_len = str_buf - param->str_buf; } else { param->str_len = 0; } #endif/*--- #if defined(PROFILING_IN_FIQ) ---*/ } /*--------------------------------------------------------------------------------*\ * Setzt perf_ctrl und/oder perf_cnt (Option param->set_val) * Welche Register mit Option param->counting_reg_mask * * Wenn auch perf-Statistik initialisiert werden soll: param->fiq_profile gesetzt * in simple_perfstat wird das event vermerkt \*--------------------------------------------------------------------------------*/ static void setup_perfcounter_per_cpu(struct _perfctl_param_write *param __maybe_unused) { #if defined(PROFILING_IN_FIQ) int cnt = param->counting_reg_mask; int reg = 0; while(cnt) { if(cnt & 0x1) { if((1 << reg) & PROFILING_PERF_REGISTERMASK) { if (param->set_val & perf_preset_ctl && param->fiq_profile) { write_p15_performance_event_type(reg, param->fiq_profile->simple_perfstat.perf_ctrl[reg]); DBG_PERF("%s: %x -> reg[%u]\n", __func__, read_p15_performance_event_type(reg), reg); } else if (param->set_val & perf_set_ctl) { write_p15_performance_event_type(reg, param->preset_ctl); if(param->fiq_profile){ param->fiq_profile->simple_perfstat.perf_ctrl[reg] = param->preset_ctl; DBG_PERF("%s: reg[%u] = %x\n", __func__, reg, param->preset_ctl); } } if (param->set_val & perf_reset_cnt) { write_p15_performance_counter(reg, 0); } } } reg++; cnt >>= 1; } if((param->set_val & perf_reset_cnt) && param->fiq_profile) { clean_roundrobin_perfstat(param->fiq_profile->act_roundrobin_perfstat); clean_simple_perfstat_per_cpu(¶m->fiq_profile->simple_perfstat); } #endif/*--- #if defined(PROFILING_IN_FIQ) ---*/ } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void print_performance_options(void) { unsigned int max_perf, i; max_perf = arm_get_performance_counter_nr(); if(max_perf == 0) { return; } for(i = 0; i < ARRAY_SIZE(performance_counter_options); i++) { if(GET_ARM_PERFORMANCE_EVENT_NAME(i) == NULL) { continue; } printk(KERN_INFO "\tctrl:[%3d]: %s\n", i, GET_ARM_PERFORMANCE_EVENT_NAME(i)); } } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int print_all_performance_options_cb(unsigned int param1 __maybe_unused, unsigned int param2 __maybe_unused){ print_performance_options(); return 1; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int print_config_cb(unsigned int cpu __maybe_unused, unsigned int param2 __maybe_unused){ struct _perfctl_param_read perfctl_param; perfctl_param.counting_reg_mask = (1 << arm_get_performance_counter_nr()) - 1; perfctl_param.str_buf = NULL; perfctl_param.str_len = 0; #if defined(PROFILING_IN_FIQ) if(cpu >= NR_CPUS) { for(cpu = 0; cpu < NR_CPUS; cpu++) { smp_call_function_single(cpu, (smp_call_func_t)read_perfcounter_per_cpu, &perfctl_param, true); } } else { smp_call_function_single(cpu, (smp_call_func_t)read_perfcounter_per_cpu, &perfctl_param, true); } #else/*--- #if defined(PROFILING_IN_FIQ) ---*/ read_perfcounter_per_cpu(&perfctl_param); #endif/*--- #else ---*//*--- #if defined(PROFILING_IN_FIQ) ---*/ return 1; } /*--------------------------------------------------------------------------------*\ * ret: len of str \*--------------------------------------------------------------------------------*/ static int arm_get_performance_counter_mode(char *str, int str_len, unsigned int nr){ int len = 0; struct _perfctl_param_read perfctl_param; unsigned int cpu __maybe_unused; perfctl_param.counting_reg_mask = 0x1 << nr; #if defined(PROFILING_IN_FIQ) for(cpu = 0; cpu < NR_CPUS; cpu++) { int mlen = snprintf(str, str_len, "CPU%u:\n", cpu); mlen = min(str_len, mlen); str_len -= mlen, str += mlen, len += mlen; perfctl_param.str_len = str_len; perfctl_param.str_buf = str; smp_call_function_single(gFiqProfiler[cpu].cpu, (smp_call_func_t)read_perfcounter_per_cpu, &perfctl_param, true); /*--- printk("%s: smp-retlen: %u\n", __func__, perfctl_param.str_len); ---*/ str_len -= perfctl_param.str_len, str += perfctl_param.str_len, len += perfctl_param.str_len; } #else/*--- #if defined(PROFILING_IN_FIQ) ---*/ perfctl_param.str_len = str_len; perfctl_param.str_buf = str; read_perfcounter_per_cpu(&perfctl_param); len = perfctl_param.str_len; #endif/*--- #else ---*//*--- #if defined(PROFILING_IN_FIQ) ---*/ /*--- printk("%s: len: %u\n", __func__, len); ---*/ return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int check_setting_param_cb(unsigned int perf_reg, unsigned int option){ unsigned int perf_max_registers = arm_get_performance_counter_nr(); if(perf_reg >= perf_max_registers) { printk(KERN_ERR"error: invalid param1 on set %u