/*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "avm_profile.h" #define ARCH_MIPS_PROFILE_C #include "arch_profile.h" #if defined(PROFILING_CPU_HAS_TC) #define write_tc_c0_perfcntr0(val) mttc0(25, 1, val) #define write_tc_c0_perfcntr1(val) mttc0(25, 3, val) #define write_tc_c0_perfctrl0(val) mttc0(25, 0, val) #define write_tc_c0_perfctrl1(val) mttc0(25, 2, val) #define read_tc_c0_perfcntr0() mftc0(25, 1) #define read_tc_c0_perfcntr1() mftc0(25, 3) #define read_tc_c0_perfctrl0() mftc0(25, 0) #define read_tc_c0_perfctrl1() mftc0(25, 2) static DEFINE_SPINLOCK(perfcnt_lock); static DEFINE_SPINLOCK(trace_lock); static void mips_profiling_special_enable(enum _simple_profile_enable_mode on, unsigned int enable_perfcnt); extern u32 kernelsp[NR_CPUS]; #else /*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ #define PROFILING_MAX_COUNT_TCS 1 #define PROFILING_CORES 1 #define PROFILING_MAX_PERF_REGISTER 2 #endif/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ #define MIPS_PERFORMANCE_HAS_MORE (1 << 31) #define MIPS_PERFORMANCE_IRQ_ENABLE (1 << 4) #define MIPS_PERFORMANCE_USER_MODE_ENABLE (1 << 3) #define MIPS_PERFORMANCE_SUPERVISOR_MODE_ENABLE (1 << 2) #define MIPS_PERFORMANCE_KERNEL_MODE_ENABLE (1 << 1) #define MIPS_PERFORMANCE_EXCEPTION_ENABLE (1 << 0) #define MIPS_PERFORMANCE_MODE_MASK (MIPS_PERFORMANCE_USER_MODE_ENABLE | MIPS_PERFORMANCE_SUPERVISOR_MODE_ENABLE | MIPS_PERFORMANCE_KERNEL_MODE_ENABLE | MIPS_PERFORMANCE_EXCEPTION_ENABLE) #define MIPS_PERFORMANCE_VPE_SPECIFIC_ENABLE (0x01 << 20) #define MIPS_PERFORMANCE_TC_SPECIFIC_ENABLE (0x02 << 20) #define MIPS_PERFORMANCE_VPEID(x) (((x) & 0x0F) << 16) #define MIPS_PERFORMANCE_TCID(x) (((x) & 0xFF) << 22) #define MIPS_PERFORMANCE_EVENT(x) (((x) & 0x7F) << 5) #define GET_MIPS_PERFORMANCE_EVENT(x) (((x) >> 5) & 0x7F) #define GET_MIPS_PERFORMANCE_TCID(x) (((x) >> 22) & 0xFF) #define GET_MIPS_PERFORMANCE_VPEID(x) (((x) >> 16) & 0x0F) /*--- #define DBG_PERF(args...) printk(KERN_INFO args) ---*/ #define DBG_PERF(args...) /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ #define performance_counter_options(reg, ctrl) _performance_counter_options[(reg) & 0x1][(ctrl) & 0x7F] #if defined(CONFIG_CPU_MIPS_74K) static const char *_performance_counter_options[2][128] = { [0] = { [ 0] = "Cycles", [ 1] = "Instructions completed", [ 2] = "JR r31 (return) instructions", [ 3] = "Cycles where no instn is fetched or after wait", [ 4] = "ITLB accesses", [ 6] = "Instruction cache accesses", [ 7] = "Cycles without Instn fetch due to I-cache miss", [ 8] = "Cycles waiting for direct Instn fetch", [ 9] = "Replays in IFU due to full Instn buffer", [13] = "Cycles with no Instn to ALU due to full buffer", [14] = "Cycles with no Instn to ALU due to no free ALU CB", [15] = "Cycles without Instn added to ALU due to no free FIFOs", [16] = "Cycles with no ALU-pipe issue: no Instn avail.", [17] = "Cycles with no ALU-pipe issue: no operands ready", [18] = "Cycles with no ALU-pipe issue: ressource busy", [19] = "ALU pipe-bubbles issued", [20] = "Cycles with no Instn issued", [21] = "Out-of-order ALU issue", [22] = "Graduated JAR/JALR.HB", [23] = "Cacheable loads", [24] = "D-Cache writebacks", [26] = "D-side JTLB accesses", [28] = "L2 cache writebacks", [29] = "L2 cache misses", [30] = "Pipe stalls due to full FSB", [31] = "Pipe stalls due to full LDQ", [32] = "Pipe stalls due to full WBB", [35] = "Redirects following optimistic instn issue which failed", [36] = "JR (not r31) instructions", [37] = "Branch-likely instns graduated", [38] = "L2 I-miss stall cycles", [39] = "Branches graduated", [40] = "Integer instns graduated", [41] = "Loads graduated", [42] = "j/ja1 graduated", [43] = "Co-ops graduated", [44] = "DSP instructions graduated", [45] = "DSP branch instructions graduated", [46] = "Uncached loads graduated", [49] = "EJTAG Instruction Triggerpoints", [50] = "CP1 branches mispredicted", [51] = "sc instructions graduated", [52] = "prefetch instns graduated top of LSGB", [53] = "Cycles where no instns graduated", [54] = "Cycles where one instn graduated", [55] = "GFifo blocked cycles", [56] = "Cycles where 0 instns graduated", [58] = "Exceptions taken", [59] = "Impl. specific CorExtend event", [62] = "Impl. specific ISPRAM event", [63] = "L2 single bit errors corrected" }, [1] = { [ 0] = "Cycles", [ 1] = "Instructions completed", [ 2] = "JR r31 mispredictions", [ 3] = "JR r31 not predicted", [ 4] = "ITLB misses", [ 5] = "JTLB instruction access fails", [ 6] = "Instruction cache misses", [ 7] = "L2 I-miss cycles", [ 8] = "PDTrace back stalls", [ 9] = "Fetch slots killed in IFU", [13] = "AGEN issue pool full", [14] = "run out of AGEN CBs", [15] = "IOIQ FIFO full", [16] = "No instns avail. for AGEN-pipe issue", [17] = "No operands avail. for AGEN-pipe issue", [18] = "No AGEN-pipe issue, waiting for data", [20] = "Cycles with two instns issued", [21] = "Out-of-order AGEN issue", [22] = "D-cache line refill (not LD/ST misses)", [23] = "All D-cache accesses", [24] = "D-Cache misses", [25] = "D-side JTBL translt. fails", [26] = "Bogus D-ache misses", [28] = "L2 cache accesses", [29] = "L2 cache misses", [30] = "FSB >1/2 full", [31] = "LDQ >1/2 full", [32] = "WBB >1/2 full", [35] = "Copro. load instns.", [36] = "jr $31 graduated after mispredict", [37] = "CP1/CP2 conditional branch instns. graduated", [38] = "Mispredicted branch-like ins. graduated", [39] = "Mispredicted branches graduated", [40] = "FPU instructions graduated", [41] = "Stores graduated", [42] = "MIPS16 instn. graduated", [43] = "integer multiply/divide graduated", [44] = "ALU-DSP graduated, result saturated", [45] = "MDU-DSP graduated, result saturated", [46] = "Uncached stores graduated", [49] = "EJTAG data triggers", [51] = "sc instrns. failed", [52] = "prefetch instns. cache hits", [53] = "load misses graduated", [54] = "Two instns. graduated", [55] = "Floating point stores graduated", [56] = "Cycles where 0 instns. graduated", [58] = "Replays initiated from graduation", [59] = "Impl. specific system event", [61] = "Reserved for CP2 event", [62] = "Impl. specific DSPRAM block event" } }; #else/*--- #if defined(CONFIG_CPU_MIPS_74K) ---*/ static const char *_performance_counter_options[2][128] = { [0] = { [ 0] = "Cycles (P)", [ 1] = "Instructions completed", [ 2] = "branch instructions completed", [ 3] = "JR r31 (return) instructions", [ 4] = "JR (not r31) instructions", [ 5] = "ITLB accesses", [ 6] = "DTLB accesses", [ 7] = "JTLB instruction accesses", [ 8] = "JTLB data accesses", [ 9] = "Instruction Cache accesses", [10] = "Data cache load/stores", [11] = "Data cache load/store misses", [13] = "Store misses", [14] = "integer instructions completed", [15] = "loads completed", [16] = "J/JAL completed", [17] = "no-ops completed", [18] = "Main pipeline stalls (P)", [19] = "SC instructions completed", [20] = "Prefetch instructions to cached addresses", [21] = "L2 cache writebacks (P)", [22] = "L2 cache misses (P)", [23] = "Exceptions taken", [24] = "cache fixup", [25] = "IFU stall cycles (P)", [26] = "DSP Instructions Completed", [28] = "Impl. specific PM event", [29] = "Impl. specific ISPRAM event", [30] = "Impl. specific CorExtend event", [31] = "Impl. specific customer yield manager event", [32] = "ITC loads", [33] = "Uncached loads", [34] = "fork instructions completed", [35] = "CP2 register-to-register Instns Completed", [36] = "Intervention stall main pipe (P)", [37] = "I$ Miss Stall cycles", [38] = "SYNC stall cycles", [39] = "D$ miss cycles (P)", [40] = "Uncached stall cycles", [41] = "MDU stall cycles", [42] = "CP2 stall cycles", [43] = "ISPRAM Stall Cycles", [44] = "CACHE Instn stall cycles (P)", [45] = "Load to Use stalls", [46] = "Read-CP0-value interlock stalls", [47] = "Relax bubbles (V)", [48] = "IFU FB full refetches", [49] = "EJTAG Instruction Triggerpoints", [50] = "FSB < 1/4 full (P)", [51] = "FSB > 1/2 full (P)", [52] = "LDQ < 1/4 full (P)", [53] = "LDQ > 1/2 full (P)", [54] = "WBB < 1/4 full (P)", [55] = "WBB > 1/2 full (P)", [56] = "Intervention Hits (P)", [57] = "All Invalidates (P)", [58] = "Evictions (P)", [59] = "ST_Inval (P)", [60] = "ST_Store_S", [61] = "Request Latency to Self Intervention (P)", [62] = "Request Latency to Read Response (P)", [64] = "System Specific event 0 (P)", [65] = "System Specific event 2 (P)", [66] = "System Specific event 4 (P)", [67] = "System Specific event 6 (P)", }, [1] = { [ 0] = "Cycles (P)", [ 1] = "Instructions completed", [ 2] = "Branch mispredictions", [ 3] = "JR r31 mispredictions", [ 4] = "JR r31 not predicted", [ 5] = "ITLB misses", [ 6] = "DTLB misses", [ 7] = "JTLB instruction misses", [ 8] = "JTLB data misses", [ 9] = "Instruction cache misses", [10] = "Data cache writebacks", [11] = "Data cache load/store misses", [13] = "Load misses", [14] = "FPU instructions completed", [15] = "stores completed", [16] = "MIPS16 instructions completed", [17] = "integer multiply/divide completed", [18] = "replay traps (other than uTLB)", [19] = "SC instructions failed", [20] = "Prefetch instructions completed with cache hit", [21] = "L2 cache accesses (P)", [22] = "L2 cache single bit errors corrected (P)", [23] = "Cycles spent in Single Threaded Mode", [24] = "Refetches: refetched and reissued by IFU", [25] = "ALU stalls (P)", [26] = "ALU-DSP Saturations Done", [27] = "MDU-DSP Saturations Done", [28] = "Impl. specific Cp2 event", [29] = "Impl. specific DSPRAM event", [31] = "Custom ITC event", [32] = "ITC Stores", [33] = "Uncached Stores", [34] = "yield instructions completed", [35] = "CP2 To/From Instns completed", [36] = "Intevention response stalled on miss (P)", [37] = "D$ miss stall cycles", [38] = "FSB stall cycles", [39] = "L2 miss cycles (P)", [40] = "ITC stall cycles", [41] = "FPU stall cycles", [42] = "CorExtend stall cycles", [43] = "DSPRAM stall cycles", [44] = "Long stall cycle", [45] = "ALU to AGEN stalls", [46] = "Branch mispredict stalls (P)", [47] = "Number of corrected ECC errors in the L1 Data Cache or DSPRAM (V)", [48] = "FB entry allocated (P)", [49] = "EJTAG Data Triggerpoints", [50] = "FSB 1/4-1/2 full (P)", [51] = "FSB full pipeline stalls (P)", [52] = "LDQ 1/4-1/2 full (P)", [53] = "LDQ full pipeline stalls (P)", [54] = "WBB 1/4-1/2 full (P)", [55] = "WBB full pipeline stalls (P)", [56] = "All Intervention (P)", [57] = "Invalidate Hits (P)", [58] = "Writebacks (P)", [59] = "ST_Exclusive (P)", [60] = "ST_Downgrade (P)", [61] = "Request Count for SI Latency (P)", [62] = "Request Count for Resp Latency (P)", [64] = "System Specific event 1 (P)", [65] = "System Specific event 3 (P)", [66] = "System Specific event 5 (P)", [67] = "System Specific event 7 (P)", } }; #endif/*--- #else ---*//*--- #if defined(CONFIG_CPU_MIPS_74K) ---*/ /*--------------------------------------------------------------------------------*\ * Simple Messung mit "festen" Performance-Countern \*--------------------------------------------------------------------------------*/ struct _performance_count_tc_stat { unsigned int perf_ctrl[PROFILING_MAX_PERF_REGISTER]; /*--- Performance-Counter-Mode ---*/ unsigned int last_perf_count[PROFILING_MAX_PERF_REGISTER]; unsigned long long sum_perf_count[PROFILING_MAX_PERF_REGISTER]; }; #define MAX_PERF_REG 2 struct _yield_profiling; /*--------------------------------------------------------------------------------*\ * Liste mit Round-Robin-Performance-Counter pro TC \*--------------------------------------------------------------------------------*/ struct _perf_ctrl_tc_stat { unsigned int perf_ctrl[MAX_PERF_REG]; unsigned long long sum_perf_time[MAX_PERF_REG]; unsigned long long sum_perf_count[MAX_PERF_REG]; void (*proceed)(struct seq_file *seq, struct _yield_profiling *profile_entry, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec); const char *prefix; const char *prefix2; struct _perf_ctrl_tc_stat *next; }; #if defined(PROFILING_IN_YIELD) /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ struct _yield_profiling { int yield_handle; int yield_signal; int yield_profile_id; unsigned int linux_os_cpu; unsigned int core; unsigned int uart_trace; /*--- UART-Trace ueber GPIO's ---*/ unsigned int uart_trace_cnt; unsigned int ntc; unsigned int profiling_tc; unsigned int long long tsum; unsigned int last_time; unsigned int tc_to_cpu_nr[PROFILING_MAX_COUNT_TCS]; struct _performance_count_tc_stat perf_tcstat[PROFILING_MAX_COUNT_TCS]; struct _perf_ctrl_tc_stat *act_tc_stat[PROFILING_MAX_COUNT_TCS]; struct _perf_ctrl_tc_stat tc_stat[PROFILING_MAX_COUNT_TCS][32]; }; static struct _yield_profiling gYieldProfiler[PROFILING_CORES]; /*--------------------------------------------------------------------------------*\ * Per-TC-Table initial fuer Round-Robin Mode vorbereiten * mit dem Flag wird gesagt, ob dieser Thread nur Performance-Counter pro Prozessor * nehmen soll (der Monitor-TC ist dafuer vorgesehen) * tc_stat_table: zu initialisierende Table * Die Liste wird als Ringliste initialisiert \*--------------------------------------------------------------------------------*/ static void init_tc_stat(struct _perf_ctrl_tc_stat tc_stat_table[], const struct _perf_ctrl_tc_stat tc_ctrl_ref[], unsigned int entries, unsigned int tc, unsigned int per_processor) { unsigned int reg, i; for( i = 0; i < entries; i++) { struct _perf_ctrl_tc_stat *table = &tc_stat_table[i]; table->proceed = tc_ctrl_ref[i].proceed; table->prefix = tc_ctrl_ref[i].prefix; table->prefix2 = tc_ctrl_ref[i].prefix2; for(reg = 0; reg < ARRAY_SIZE(table->perf_ctrl); reg++) { unsigned int event; table->sum_perf_count[reg] = 0; table->sum_perf_time[reg] = 0; event = GET_MIPS_PERFORMANCE_EVENT(tc_ctrl_ref[i].perf_ctrl[reg]); if(performance_counter_options(reg, event) == NULL) { printk(KERN_ERR"[simple-profiling]Warning: Performance-Counter ctrl%u Option %u do not exist \n", reg, event); continue; } if(per_processor) { if(strstr(performance_counter_options(reg, event), "(P)") == NULL) { printk(KERN_ERR"[simple-profiling]Warning: Performance-Counter ctrl%u Option %u (%s) doesn't support per-Processor-Tracing\n", reg, event, performance_counter_options(reg, event)); } } else { if(strstr(performance_counter_options(reg, event), "(P)") || strstr(performance_counter_options(reg, event), "(V)")) { printk(KERN_ERR"[simple-profiling]Warning: Performance-Counter ctrl%u Option %u (%s) doesn't support per-TC-Tracing\n", reg, event, performance_counter_options(reg, event)); } } table->perf_ctrl[reg] = MIPS_PERFORMANCE_EVENT(event) | MIPS_PERFORMANCE_TC_SPECIFIC_ENABLE | MIPS_PERFORMANCE_TCID(tc) | MIPS_PERFORMANCE_USER_MODE_ENABLE | MIPS_PERFORMANCE_KERNEL_MODE_ENABLE | MIPS_PERFORMANCE_EXCEPTION_ENABLE | MIPS_PERFORMANCE_SUPERVISOR_MODE_ENABLE; /*--- printk(KERN_ERR"TC%x ctrl%x Option %2u (%25s) : %08x\n", tc, reg, event, performance_counter_options(reg, event), table->perf_ctrl[reg]); ---*/ } table->next = &tc_stat_table[(i + 1) % entries]; /*--- als Ringliste ---*/ } } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static char *get_percent(char *txt, int size, unsigned long cnt, unsigned long norm) { unsigned long long val_main, val_remainder; if(norm == 0) { return " ? "; } val_main = (unsigned long long)cnt * 100; do_div(val_main, norm); val_remainder = (unsigned long long)cnt * (100 * 100); do_div(val_remainder, norm); snprintf(txt, size, "%3lu.%02lu %%", (unsigned long)val_main, (unsigned long)val_remainder % 100); return txt; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static char *get_relation(char *txt, int size, unsigned long cnt, unsigned long norm) { unsigned long long val_main, val_remainder; if(norm == 0) { return " ? %"; } val_main = (long long)cnt; do_div(val_main, norm); val_remainder = (long long)cnt * 1000; do_div(val_remainder, norm); snprintf(txt, size, "%3lu.%03lu", (unsigned long)val_main, (unsigned long)val_remainder % 1000); return txt; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ #define SHIFT_FACTOR 8 /*--- auf 2^x-tel Sekunde genau ---*/ static inline unsigned long norm_per_sec(unsigned long long count, unsigned long long cycle) { unsigned long long sec = cycle; do_div(sec, ((gCycle_per_usec * 1000 * 1000) >> SHIFT_FACTOR)); if(sec == 0) { return 0; } count <<= SHIFT_FACTOR; do_div(count, sec); return (unsigned long)count; } /*--------------------------------------------------------------------------------*\ * Achtung! fixe Definition Entry0, Reg1 des PROFILING_MAX_COUNT_TCS muss Cycle-Counter sein ! \*--------------------------------------------------------------------------------*/ static unsigned long norm_cycle_sec(struct _yield_profiling *profile_entry) { struct _perf_ctrl_tc_stat *pstat = &profile_entry->tc_stat[profile_entry->profiling_tc][0]; if(GET_MIPS_PERFORMANCE_EVENT(pstat->perf_ctrl[1]) != 0) { printk("[simple-profiling]%s: error: cycle-counter not on position 0 - reg1\n", __func__); return 0; } return norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static unsigned long long all_instruction_completed(struct _yield_profiling *profile_entry) { unsigned int tc; unsigned long long all_instruction = 0; for(tc = 0; tc < profile_entry->ntc; tc++) { struct _performance_count_tc_stat *pperf_tcstat = &profile_entry->perf_tcstat[tc]; all_instruction += pperf_tcstat->sum_perf_count[2]; } return all_instruction; } #if 0 /*--------------------------------------------------------------------------------*\ * eine Option ueber alle TC's aufsummieren \*--------------------------------------------------------------------------------*/ static unsigned long long sum_tc_option(struct _yield_profiling *profile_entry, unsigned int reg, unsigned int option, unsigned long long *_tsum) { struct _perf_ctrl_tc_stat *table, *tc_stat; unsigned long long sum = 0, tsum = 0; unsigned int tc; if(reg >= ARRAY_SIZE(table->perf_ctrl)) { *_tsum = 0; return 0; } for ( tc = 0; tc < PROFILING_MAX_COUNT_TCS; tc++){ table = profile_entry->tc_stat[tc]; tc_stat = table; while(table) { if(table->perf_ctrl[reg] == option) { sum += table->sum_perf_count[reg]; tsum += table->sum_perf_time[reg]; } table = table->next; if(table == tc_stat) { break; } } } *_tsum = tsum; return sum; } #endif /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void instruction_per_cycle(struct seq_file *seq, struct _yield_profiling *profile_entry, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec __maybe_unused){ char txt[3][128]; unsigned long stall_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); unsigned long cycle_norm = norm_cycle_sec(profile_entry); unsigned long cpu_freq = avm_get_clock(avm_clock_id_cpu); unsigned long instruction_norm = norm_per_sec(all_instruction_completed(profile_entry), profile_entry->tsum); if(cycle_norm == 0) { return; } seq_printf(seq, "%s %lu MHz: Run: %s Instructions/Cycle %s Stalls/Instruction: %s\n", pstat->prefix ? pstat->prefix : "", cpu_freq / (1000 * 1000), get_percent(txt[0], sizeof(txt[0]), cycle_norm, cpu_freq), get_relation(txt[1], sizeof(txt[1]), instruction_norm, cycle_norm), get_relation(txt[2], sizeof(txt[2]), stall_norm, instruction_norm) ); } #if 0 /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void relation(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec __maybe_unused){ char txt[128]; unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); if(p1_norm == 0) { return; } seq_printf(seq, "%-33s: 1.00 : %s\n", pstat->prefix, get_relation(txt, sizeof(txt), p2_norm, p1_norm)); } #endif /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void sum_instruction_relation(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec __maybe_unused){ char txt[128]; unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); seq_printf(seq, "%-33s: %s (normed on instr)\n", pstat->prefix, get_percent(txt, sizeof(txt), p1_norm + p2_norm, inst_per_sec)); } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void param_instruction_relation(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec __maybe_unused){ char txt[128]; if(pstat->prefix) { unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); seq_printf(seq, "%-33s: %s (normed on instr)\n", pstat->prefix, get_percent(txt, sizeof(txt), p1_norm, inst_per_sec)); } if(pstat->prefix2) { unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); seq_printf(seq, "%-33s: %s (normed on instr)\n", pstat->prefix2, get_percent(txt, sizeof(txt), p2_norm, inst_per_sec)); } } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void param_cycle_relation(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec __maybe_unused){ unsigned long cycle_norm = norm_cycle_sec(profile_entry); char txt[128]; if(pstat->prefix) { unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); /*--- seq_printf(seq, "debug:%s: p1_norm=%lu cycle_per_sec: %lu\n", pstat->prefix, p1_norm, cycle_norm); ---*/ seq_printf(seq, "%-33s: %s (normed on cycle)\n", pstat->prefix, get_percent(txt, sizeof(txt), p1_norm, cycle_norm)); } if(pstat->prefix2) { unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); /*--- seq_printf(seq, "debug:%s: p1_norm=%lu cycle_per_sec: %lu\n", pstat->prefix, p2_norm, cycle_norm); ---*/ seq_printf(seq, "%-33s: %s (normed on cycle)\n", pstat->prefix2, get_percent(txt, sizeof(txt), p2_norm, cycle_norm)); } } /*--------------------------------------------------------------------------------*\ * Cnt im Verhaeltnis zu den Instructions \*--------------------------------------------------------------------------------*/ static void relation_instruction_and_pair(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec){ char txt[128], txt2[128]; unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); /*--- seq_printf(seq, "debug:%s: p1_norm=%lu p2_norm=%lu inst_per_sec: %lu\n", pstat->prefix, p1_norm, p2_norm, inst_per_sec); ---*/ seq_printf(seq, "%-33s: %s (normed on instr) %s: %s (normed on option)\n", pstat->prefix, get_percent(txt, sizeof(txt), p1_norm, inst_per_sec), pstat->prefix2 ? pstat->prefix2 : "misses ", get_percent(txt2, sizeof(txt2), p2_norm, p1_norm) ); } /*--------------------------------------------------------------------------------*\ * Cnt im Verhaeltnis zu den Instructions \*--------------------------------------------------------------------------------*/ static void relation_instruction_and_pair_swap(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec){ char txt[128], txt2[128]; unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); seq_printf(seq, "%-33s: %s (normed on instr) %s: %s (normed on option)\n", pstat->prefix, get_percent(txt, sizeof(txt), p1_norm, inst_per_sec), pstat->prefix2 ? pstat->prefix2 : "misses ", get_percent(txt2, sizeof(txt2), p2_norm, p1_norm) ); } #if 0 /*--------------------------------------------------------------------------------*\ * Cnt im Verhaeltnis zu den Cycles \*--------------------------------------------------------------------------------*/ static void relation_cycle_and_pair(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec __maybe_unused) { char txt[128], txt2[128]; unsigned long cycle_norm = norm_cycle_sec(profile_entry); unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); seq_printf(seq, "%-33s: %s (normed on cycle) %s: %s (normed on option)\n", pstat->prefix, get_percent(txt, sizeof(txt), p1_norm, cycle_norm), pstat->prefix2 ? pstat->prefix2 : "misses ", get_percent(txt2, sizeof(txt2), p2_norm, p1_norm) ); } /*--------------------------------------------------------------------------------*\ * Cnt im Verhaeltnis zu den Cycles \*--------------------------------------------------------------------------------*/ static void relation_cycle_and_pair_swap(struct seq_file *seq, struct _yield_profiling *profile_entry __maybe_unused, struct _perf_ctrl_tc_stat *pstat, unsigned long inst_per_sec __maybe_unused) { char txt[128], txt2[128]; unsigned long cycle_norm = norm_cycle_sec(profile_entry); unsigned long p2_norm = norm_per_sec(pstat->sum_perf_count[0], pstat->sum_perf_time[0]); unsigned long p1_norm = norm_per_sec(pstat->sum_perf_count[1], pstat->sum_perf_time[1]); seq_printf(seq, "%-33s: %s (normed on cycle) %s: %s (normed on option)\n", pstat->prefix, get_percent(txt, sizeof(txt), p1_norm, cycle_norm), pstat->prefix2 ? pstat->prefix2 : "misses ", get_percent(txt2, sizeof(txt2), p2_norm, p1_norm) ); } #endif /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static const struct _perf_ctrl_tc_stat performance_per_processor[] = { { .prefix = "Summary:", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(18), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(0) , .proceed = instruction_per_cycle }, /*--- Stall Cycles, Cycles ---*/ #if defined(CONFIG_SOC_GRX500) { .prefix ="L2 Cache Access", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(22), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(21), .proceed = relation_instruction_and_pair_swap }, /*--- L2 Cache Misses , L2 Cache accesses ---*/ { .prefix ="L2 Cache WB", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(21), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(22), .proceed = param_instruction_relation }, /*--- L2 Cache WB, L2 Cache Single Bit Error Corrected ---*/ #endif/*--- #if defined(CONFIG_SOC_GRX500) ---*/ { .prefix = "Fetch-Unit-Stalls", .prefix2 = "ALU-Stalls", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(25), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(25), .proceed = param_cycle_relation }, /*--- IFU stall cycles , ALU stall cycles---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(39), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(39), .proceed = NULL }, /*--- D$ miss cycles , L2 miss cycles---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(44), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(46), .proceed = NULL }, /*--- CACHE Instn stall cycles , Branch misspredict stall cycles ---*/ /*--- { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(50), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(50), .proceed = NULL }, ---*//*--- FSB < 1/4 , 1/4 < FSB > 1/2 ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(51), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(51), .proceed = NULL }, /*--- FSB > 1/2 , 1/4 < FSB full pipline stall cycles ---*/ /*--- { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(52), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(52), .proceed = NULL }, ---*//*--- LDQ < 1/4 , 1/4 < LDQ > 1/2 ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(53), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(53), .proceed = NULL }, /*--- LDQ > 1/2 , 1/4 < LDQ full pipline stall cycles ---*/ /*--- { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(54), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(54), .proceed = NULL }, ---*//*--- WBB < 1/4 , 1/4 < WBB > 1/2 ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(55), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(55), .proceed = NULL }, /*--- WBB > 1/2 , 1/4 < WBB full pipline stall cycles ---*/ /*--- { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(56), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(56), .proceed = NULL }, ---*//*--- Intervention Hits, All Interventions ---*/ /*--- { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(57), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(57), .proceed = NULL }, ---*//*--- All Invalidates, Invalidate Hits ---*/ /*--- { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(58), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(58), .proceed = NULL }, ---*//*--- Evictions , Writebacks ---*/ }; #if defined(CONFIG_SOC_GRX500) #define MIP16_INSTRUCTION_STRING NULL #else/*--- #if defined(CONFIG_SOC_GRX500) ---*/ #define MIP16_INSTRUCTION_STRING "MIPS16-Instruction" #endif/*--- #else ---*//*--- #if defined(CONFIG_SOC_GRX500) ---*/ /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static const struct _perf_ctrl_tc_stat performance_per_tc[] = { { .prefix = "Branch Instruction", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(2), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(2), .proceed = relation_instruction_and_pair }, /*--- branch instructions, branch mispredictions ---*/ { .prefix = "Jump Return", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(3), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(3), .proceed = relation_instruction_and_pair }, /*--- JR r31 (return) instructions, JR r31 mispredictions ---*/ { .prefix = "J/JAL", .prefix2 = MIP16_INSTRUCTION_STRING, .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(16), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(16), .proceed = param_instruction_relation }, /*--- J/JAL completed, MIP16 instructions completed ---*/ /*--- { .prefix = "JR not ra", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(4), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(4), .proceed = relation_instruction_and_pair}, ---*//*--- JR not r31 instructions, JR r31 not predicted ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(5), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(5), .proceed = NULL }, /*--- ITLB accesses, ITLB misses ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(6), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(6), .proceed = NULL }, /*--- DTLB accesses, DTLB misses ---*/ { .prefix = "Instruction-TLB accesses", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(7), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(7), .proceed = relation_instruction_and_pair }, /*--- JTLB instructions accesses, JTLB instructions misses ---*/ { .prefix = "Data-TLB accesses", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(8), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(8), .proceed = relation_instruction_and_pair }, /*--- JTLB data accesses, JTLB data misses ---*/ { .prefix = "Instruction cache accesses", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(9), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(9), .proceed = relation_instruction_and_pair }, /*--- Instn Cache accesses, Instn cache misses ---*/ { .prefix = "Data cache access", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(10), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(11), .proceed = relation_instruction_and_pair }, /*--- Data Cache accesses, Data cache misses ---*/ { .prefix = "Store access", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(13), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(15), .proceed = relation_instruction_and_pair_swap }, /*--- Store misses, Store completed ---*/ { .prefix = "Load access", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(15), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(13), .proceed = relation_instruction_and_pair}, /*--- loads completed, load miss ---*/ { .prefix = "DSP Inst + Mul/Div-Instruction", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(26), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(17), .proceed = sum_instruction_relation }, /*--- integer instruction completed, integer/divide completed ---*/ /*--- { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(20), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(20), .proceed = NULL }, ---*//*--- Prefetch instruction completed, Prefetch instruction completed with cache hit ---*/ { .prefix = "Exceptions", .prefix2 = "Data cache WB", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(23), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(10), .proceed = param_instruction_relation }, /*--- Exception taken, Data cache WB ---*/ { .prefix = "Uncached Load/Stores", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(33), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(33), .proceed = sum_instruction_relation }, /*--- Uncached Load, Uncached Stored ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(37), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(37), .proceed = NULL }, /*--- I$ Miss stall cycles, D$ Miss stall cycles ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(38), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(38), .proceed = NULL }, /*--- SYNC stall cycles, FSB conflict stall cycles ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(40), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(40), .proceed = NULL }, /*--- Uncached stall cycles, ITC stall cycles ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(41), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(42), .proceed = NULL }, /*--- MDU stall cycles, CoreExtend stall cycles ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(43), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(43), .proceed = NULL }, /*--- ISPRAM stall cycles, DSPRAM stall cycles ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(45), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(44), .proceed = NULL }, /*--- Load to Use stall, Long stall cycle ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(48), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(45), .proceed = NULL }, /*--- IFU FB full refetches, ALU to AGEN stalls cycle ---*/ { .prefix = "", .perf_ctrl[0] = MIPS_PERFORMANCE_EVENT(19), .perf_ctrl[1] = MIPS_PERFORMANCE_EVENT(19), .proceed = NULL }, /*--- SC, failed SC ---*/ }; /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void init_all_tc_stat(struct _yield_profiling *profile_entry) { unsigned int tc; for(tc = 0; tc < PROFILING_MAX_COUNT_TCS; tc++) { if(tc == profile_entry->profiling_tc) { init_tc_stat(profile_entry->tc_stat[tc], performance_per_processor, min(ARRAY_SIZE(profile_entry->tc_stat[tc]), ARRAY_SIZE(performance_per_processor)), tc, 1); } else { init_tc_stat(profile_entry->tc_stat[tc], performance_per_tc, min(ARRAY_SIZE(profile_entry->tc_stat[tc]), ARRAY_SIZE(performance_per_tc)), tc, 0); } } } #endif/*--- #if defined(PROFILING_IN_YIELD) ---*/ #if defined(PROFILING_IN_YIELD) /*--------------------------------------------------------------------------------*\ * Cleanup der Ringliste \*--------------------------------------------------------------------------------*/ static void clean_tc_stat(struct _perf_ctrl_tc_stat *tc_stat) { struct _perf_ctrl_tc_stat *table = tc_stat; unsigned int reg; while(table) { for(reg = 0; reg < ARRAY_SIZE(table->perf_ctrl); reg++) { table->sum_perf_count[reg] = 0; table->sum_perf_time[reg] = 0; } table = table->next; if(table == tc_stat) { break; } } } /*--------------------------------------------------------------------------------*\ * setze neue Ctl's-Modes \*--------------------------------------------------------------------------------*/ static void set_tc_perf_mod(struct _perf_ctrl_tc_stat *tc_stat, unsigned int mode) { struct _perf_ctrl_tc_stat *table = tc_stat; unsigned int reg; while(table) { for(reg = 0; reg < ARRAY_SIZE(table->perf_ctrl); reg++) { table->perf_ctrl[reg] &= ~MIPS_PERFORMANCE_MODE_MASK; table->perf_ctrl[reg] |= (mode & MIPS_PERFORMANCE_MODE_MASK); } table = table->next; if(table == tc_stat) { break; } } } #else/*--- #if defined(PROFILING_IN_YIELD) ---*/ #define clean_tc_stat(a) #define set_tc_perf_mod(a,b) #endif/*--- #else ---*//*--- #if defined(PROFILING_IN_YIELD) ---*/ #if defined(PROFILING_IN_YIELD) /*--------------------------------------------------------------------------------*\ * Summiert die letzte Messung des aktuellen Perofrmanc-Counters und setzt auf neuen * Counter * return: Zeiger auf naechsten Eintrag \*--------------------------------------------------------------------------------*/ static struct _perf_ctrl_tc_stat *set_tc_stat(struct _perf_ctrl_tc_stat *entry, unsigned int perf_cnt[], unsigned int last_perf_count[], unsigned int meassure_time) { struct _perf_ctrl_tc_stat *next = entry->next; unsigned int reg; for(reg = 0; reg < ARRAY_SIZE(entry->sum_perf_count); reg++) { unsigned int diff = (perf_cnt[reg] - last_perf_count[reg]); if(diff < (1U << 31)) { entry->sum_perf_count[reg] += (unsigned long long)(diff); entry->sum_perf_time[reg] += (unsigned long long)(meassure_time); } else { printk(KERN_ERR"[simple-profiling]%s: perf%u ovr %u > %u: ignore\n", __func__, reg, last_perf_count[reg], perf_cnt[reg]); } if(next) { switch(reg) { case 0: write_tc_c0_perfctrl0(next->perf_ctrl[0]); break; case 1: write_tc_c0_perfctrl1(next->perf_ctrl[1]); break; default: printk(KERN_ERR"[simple-profiling]%s: no suported reg %u\n", __func__, reg); } } } return next; } #endif/*--- #if defined(PROFILING_IN_YIELD) ---*/ #if defined(PROFILING_IN_YIELD) /*--------------------------------------------------------------------------------*\ * Simple Messung mit "festen" Performance-Countern \*--------------------------------------------------------------------------------*/ static void init_simple_performance_count_tc_stat(struct _performance_count_tc_stat *ptcstat, unsigned int ctrl, unsigned int cnt, unsigned int reg) { ptcstat->sum_perf_count[reg] = 0; ptcstat->last_perf_count[reg] = cnt; ptcstat->perf_ctrl[reg] = ctrl; } #else/*--- #if defined(PROFILING_IN_YIELD) ---*/ #define init_simple_performance_count_tc_stat(a, ctrl,cnt, reg) #endif/*--- #else ---*//*--- #if defined(PROFILING_IN_YIELD) ---*/ #if defined(PROFILING_IN_YIELD) /*--------------------------------------------------------------------------------*\ * Simple Messung mit "festen" Performance-Countern \*--------------------------------------------------------------------------------*/ static void set_simple_performance_count_tc_stat(int tc, struct _performance_count_tc_stat tcstat[], unsigned int perf_cnt[]) { struct _performance_count_tc_stat *ptcstat = &tcstat[tc]; unsigned int i; for(i = 0; i < ARRAY_SIZE(ptcstat->sum_perf_count); i++) { unsigned int diff = (perf_cnt[i] - ptcstat->last_perf_count[i]); if(diff < (1U << 31)) { ptcstat->sum_perf_count[i] += (unsigned long long)(diff); } else { printk(KERN_ERR"[simple-profiling]%s: tc=%u perf%u ovr %u > %u: ignore\n", __func__, tc, i, ptcstat->last_perf_count[i], perf_cnt[i]); } ptcstat->last_perf_count[i] = perf_cnt[i]; } } #endif/*--- #if defined(PROFILING_IN_YIELD) ---*/ /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static char *print_performance_counter_mode(char *str, int str_len, unsigned int mode) { char txt[32]; if(mode & MIPS_PERFORMANCE_TC_SPECIFIC_ENABLE) { sprintf(txt, "TC-ID=%u", GET_MIPS_PERFORMANCE_TCID(mode)); } else if(mode & MIPS_PERFORMANCE_TC_SPECIFIC_ENABLE) { sprintf(txt, "VPE-ID=%u", GET_MIPS_PERFORMANCE_VPEID(mode)); } else { txt[0] = 0; } snprintf(str, str_len, "%s%s%s%s%s", (MIPS_PERFORMANCE_USER_MODE_ENABLE & mode) ? "User " : "", (MIPS_PERFORMANCE_SUPERVISOR_MODE_ENABLE & mode) ? "Supervisor " : "", (MIPS_PERFORMANCE_KERNEL_MODE_ENABLE & mode) ? "Kernel " : "", (MIPS_PERFORMANCE_EXCEPTION_ENABLE & mode) ? "Exception " : "", txt ); return str; } /*--------------------------------------------------------------------------------*\ * liefert Anzahl der Performance-Counter * bei InterAptiv/34K inlusive tcschedFeedBack \*--------------------------------------------------------------------------------*/ static unsigned int mips_get_performance_counter_nr(void){ unsigned int perf_max_registers = 0; unsigned int val; val = read_c0_config1(); /*--- printk(KERN_ERR"%s: config1: %08x\n", __func__, val); ---*/ if((val & (1 << 4)) == 0) { return perf_max_registers; } for(;;){ perf_max_registers++; switch(perf_max_registers) { case 1: val = read_c0_perfctrl0(); break; case 2: val = read_c0_perfctrl1(); break; case 3: val = read_c0_perfctrl2(); break; case 4: val = read_c0_perfctrl3(); break; default: val = 0; } /*--- printk(KERN_ERR"%s: perfctl%u: %08x\n", __func__, perf_max_registers - 1, val); ---*/ if(!(val & MIPS_PERFORMANCE_HAS_MORE)) { break; } } #if defined(PROFILING_CPU_HAS_TC) perf_max_registers += 1; /*--- tcschedFeedBack hinzu ---*/ #endif/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ return perf_max_registers; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static unsigned int supported_tcs(void){ #if defined(PROFILING_CPU_HAS_TC) unsigned int mvpconf0; mvpconf0 = read_c0_mvpconf0(); return ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; #else/*--- #if defined(CONFIG_CPU_MIPS_34K) ---*/ return 1; #endif/*--- #else ---*//*--- #if defined(CONFIG_CPU_MIPS_34K) ---*/ } #define vpflags flags[0] #define sys_flag flags[1] #define old_tc flags[2] #define haltval flags[3] /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static inline unsigned int set_tcmode(unsigned int tc __maybe_unused, unsigned long flags[] __maybe_unused, int lock __maybe_unused) { #if defined(PROFILING_CPU_HAS_TC) if(lock) spin_lock_irqsave(&perfcnt_lock, sys_flag); vpflags = dvpe(); old_tc = read_c0_vpecontrol() & VPECONTROL_TARGTC; settc(tc); if(!(read_tc_c0_tcstatus() & TCSTATUS_A)){ settc(old_tc); evpe(vpflags); if(lock) spin_unlock_irqrestore(&perfcnt_lock, sys_flag); return 1; } if (read_tc_c0_tcbind() == (unsigned)read_c0_tcbind()) { /* Are we dumping ourself? */ haltval = 0; /* Then we're not halted, and mustn't be */ } else { haltval = read_tc_c0_tchalt(); write_tc_c0_tchalt(1); } #endif/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ return 0; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static inline void restore_tcmode(unsigned long flags[] __maybe_unused, int lock __maybe_unused) { #if defined(PROFILING_CPU_HAS_TC) if (!haltval) { write_tc_c0_tchalt(0); } settc(old_tc); evpe(vpflags); if(lock) spin_unlock_irqrestore(&perfcnt_lock, sys_flag); #endif/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ } #undef vpflags #undef sys_flag #undef old_tc #undef haltval /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static unsigned int read_c0_perfctl(unsigned int ctl_reg, unsigned int tc, unsigned int *valid){ unsigned long flags[4] = {0, 0, 0, 0}; unsigned int res = 0; if(tc < PROFILING_MAX_COUNT_TCS) { if(set_tcmode(tc, flags, 1)) { *valid = 0; return res; } } *valid = 1; switch(ctl_reg){ #if defined(PROFILING_CPU_HAS_TC) case 0: res = read_tc_c0_perfctrl0(); break; case 1: res = read_tc_c0_perfctrl1(); break; case 2: res = MIPS_PERFORMANCE_TC_SPECIFIC_ENABLE | MIPS_PERFORMANCE_TCID(tc) | MIPS_PERFORMANCE_EVENT(1); break; /*--- dummy: TCSchedFeedBack liefert Instruction completed per TC ---*/ #else/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ case 0: res = read_c0_perfctrl0(); break; case 1: res = read_c0_perfctrl1(); break; case 2: res = read_c0_perfctrl2(); break; case 3: res = read_c0_perfctrl3(); break; #endif/*--- #else ---*//*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ default: *valid = 0; } if(tc < PROFILING_MAX_COUNT_TCS) { restore_tcmode(flags, 1); } /*--- printk(KERN_INFO "%s: tc=%u reg=%x val=%#08x\n", __func__, tc, ctl_reg, res); ---*/ return res; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void write_c0_perfctl(unsigned int ctl_reg, unsigned int tc, unsigned int val){ unsigned long flags[4]; unsigned int dummy __maybe_unused; if(set_tcmode(tc, flags, 1)) { return; } /*--- if(val & MIPS_PERFORMANCE_TC_SPECIFIC_ENABLE) { ---*/ /*--- val &= ~MIPS_PERFORMANCE_TCID(0xFF); ---*/ /*--- val |= MIPS_PERFORMANCE_TCID(tc); ---*/ /*--- } ---*/ DBG_PERF("%s: tc=%u ctl_reg: %x 0x%08x -> val=0x%08x epc=%pS\n", __func__, tc, ctl_reg, read_c0_perfctl(ctl_reg, PROFILING_MAX_COUNT_TCS, &dummy), val, (void *)read_tc_c0_tcrestart()); switch(ctl_reg){ #if defined(PROFILING_CPU_HAS_TC) case 0: write_tc_c0_perfctrl0(val); break; case 1: write_tc_c0_perfctrl1(val); break; #else/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ case 0: write_c0_perfctrl0(val); break; case 1: write_c0_perfctrl1(val); break; case 2: write_c0_perfctrl2(val); break; case 3: write_c0_perfctrl3(val); break; #endif/*--- #else ---*//*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ } restore_tcmode(flags, 1); } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void write_c0_perfcnt(unsigned int count_reg, unsigned int tc, unsigned int val){ unsigned long flags[4]; if(set_tcmode(tc, flags, 1)) { return; } DBG_PERF("%s: tc=%u count_reg: %x val=0x%x\n", __func__, tc, count_reg, val); switch(count_reg){ #if defined(PROFILING_CPU_HAS_TC) case 0: write_tc_c0_perfcntr0(val); break; case 1: write_tc_c0_perfcntr1(val); break; case 2: write_tc_c0_tcschefback(val); break; /*--- wird als perfcnt2 missbraucht ---*/ #else/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ case 0: write_c0_perfcntr0(val); break; case 1: write_c0_perfcntr1(val); break; case 2: write_c0_perfcntr2(val); break; case 3: write_c0_perfcntr3(val); break; #endif/*--- #else ---*//*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ } restore_tcmode(flags, 1); } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static unsigned int read_c0_perfcnt(unsigned int count_reg, unsigned int tc){ unsigned long flags[4] = {0, 0, 0, 0}; unsigned int res = 0; if(tc < PROFILING_MAX_COUNT_TCS) { if(set_tcmode(tc, flags, 1)) { return res; } } switch(count_reg){ #if defined(PROFILING_CPU_HAS_TC) case 0: res = read_tc_c0_perfcntr0(); break; case 1: res = read_tc_c0_perfcntr1(); break; case 2: res = read_tc_c0_tcschefback(); break; /*--- wird als perfcnt2 missbraucht ---*/ #else/*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ case 0: res = read_c0_perfcntr0(); break; case 1: res = read_c0_perfcntr1(); break; case 2: res = read_c0_perfcntr2(); break; case 3: res = read_c0_perfcntr3(); break; #endif/*--- #else ---*//*--- #if defined(PROFILING_CPU_HAS_TC) ---*/ } if(tc < PROFILING_MAX_COUNT_TCS) { restore_tcmode(flags, 1); } DBG_PERF("%s: tc=%u count_reg: %x val=0x%x\n", __func__, tc, count_reg, res); return res; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ struct _perfctl_param_read { unsigned int counting_tc_mask; unsigned int counting_reg_mask; char *str_buf; /* optional: write out in str_buf */ int str_len; }; /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ struct _perfctl_param_write { struct _yield_profiling *yield_profile; /*--- wenn dieser Wert gesetzt, wird auch die jeweilige Statisik-Struktur performance_count_tc_stat[] geupdatet ---*/ unsigned int counting_tc_mask; unsigned int counting_reg_mask; enum { perf_set_ctl = 0x1, /*--- setze komplettes Ctrl-Register ---*/ perf_set_cnt = 0x2, /*--- setze Count-Register ---*/ /*--- folgende funktionieren nur, wenn perf_set_ctl == 0: ---*/ perf_set_mode = 0x4, /*--- modifiziere nure Ctrl-Register (Mode Enable) ---*/ } set_val; unsigned int preset_cnt; unsigned int preset_ctl; }; /*--------------------------------------------------------------------------------*\ * Lese perf_ctrl und perf_cnt * Welche Register mit Option param->counting_reg_mask * welche TC's mit Option param->counting_tc_mask * Ausgabe wahlweise per printk oder per param->str_buf \*--------------------------------------------------------------------------------*/ static void read_perfcounter_per_core(struct _perfctl_param_read *param) { char txt[128]; unsigned int tc, reg_cnt; unsigned int ntc = supported_tcs(); unsigned int max_perf, valid; char *str_buf = param->str_buf; int str_len = param->str_len; max_perf = mips_get_performance_counter_nr(); for ( tc = 0; tc < ntc; tc++){ if ((( 1 << tc) & param->counting_tc_mask) == 0) { continue; } for(reg_cnt = 0; reg_cnt < max_perf; reg_cnt++) { unsigned int cnt, ctl; if ((( 1 << reg_cnt) & param->counting_reg_mask) == 0) { continue; } cnt = read_c0_perfcnt(reg_cnt, tc); ctl = read_c0_perfctl(reg_cnt, tc, &valid); if(!valid) { continue; } if(str_buf == NULL) { printk(KERN_INFO"[TC=%u] perf%u: cnt=0x%08x ctl=0x%08x \"%s\" (%s)\n", tc, reg_cnt, cnt, ctl, performance_counter_options(reg_cnt, GET_MIPS_PERFORMANCE_EVENT(ctl)), print_performance_counter_mode(txt, sizeof(txt), ctl)); } else { if(str_len > 0) { int len = snprintf(str_buf, str_len, "[TC=%u] perf%u: cnt=0x%08x ctl=0x%08x \"%s\" (%s)\n", tc, reg_cnt, cnt, ctl, performance_counter_options(reg_cnt, GET_MIPS_PERFORMANCE_EVENT(ctl)), print_performance_counter_mode(txt, sizeof(txt), ctl)); len = min(len, str_len); str_buf += len; str_len -= len; } } } } if(str_buf) { param->str_len = str_buf - param->str_buf; } else { param->str_len = 0; } } /*--------------------------------------------------------------------------------*\ * Setzt perf_ctrl und/oder perf_cnt (Option param->set_val) * Welche Register mit Option param->counting_reg_mask * Welche TC's mit Option param->counting_tc_mask * * Wenn aucn tc-perf-Statistik initialisiert werden soll: param->yield_profile gesetzt \*--------------------------------------------------------------------------------*/ static void setup_perfcounter_per_core(struct _perfctl_param_write *param) { unsigned int tc, ntc = supported_tcs(); unsigned int valid __maybe_unused; int ctl = param->preset_ctl; int cnt = param->counting_reg_mask; int reg = 0; while(cnt) { if(cnt & 0x1) { if(param->set_val & (perf_set_ctl | perf_set_mode)) { for ( tc = 0; tc < ntc; tc++){ if (( 1 << tc) & param->counting_tc_mask) { if((param->set_val & perf_set_ctl) == 0) { /*--- nur den Mode setzen: ---*/ ctl = read_c0_perfctl(reg, tc, &valid); ctl &= ~MIPS_PERFORMANCE_MODE_MASK; ctl |= (param->preset_ctl & MIPS_PERFORMANCE_MODE_MASK); if(param->yield_profile) { set_tc_perf_mod(param->yield_profile->act_tc_stat[tc], param->preset_ctl); } } write_c0_perfctl(reg, tc, ctl); } } } if (param->set_val & perf_set_cnt) { for ( tc = 0; tc < ntc; tc++){ if (( 1 << tc) & param->counting_tc_mask ) { write_c0_perfcnt(reg, tc, param->preset_cnt); if(param->yield_profile) { init_simple_performance_count_tc_stat(¶m->yield_profile->perf_tcstat[tc], read_c0_perfctl(reg, tc, &valid), param->preset_cnt, reg); clean_tc_stat(param->yield_profile->act_tc_stat[tc]); /*--- printk(KERN_INFO"%s: core=%u tc=%u ctrl[%x] %x\n", __func__, param->yield_profile->core, tc, reg, param->yield_profile->perf_tcstat[tc].perf_ctrl[reg]); ---*/ } } } } } reg++; cnt >>= 1; } } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void print_performance_options(unsigned int perf_reg) { char txt[32]; unsigned int max_perf, i; max_perf = mips_get_performance_counter_nr(); if(max_perf == 0) { return; } for(i = 0; i < ARRAY_SIZE(_performance_counter_options[0]); i++) { if(performance_counter_options(perf_reg, i) == NULL) { continue; } if(max_perf == 4) { snprintf(txt, sizeof(txt), "%u/%u", perf_reg & 1, (perf_reg & 1) + 2); } else { snprintf(txt, sizeof(txt), "%u", perf_reg); } printk(KERN_INFO "\tctrl%s:[%2d]: %s\n", txt, i, performance_counter_options(perf_reg, i)); } } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int print_all_performance_options_cb(unsigned int param1 __maybe_unused, unsigned int param2 __maybe_unused){ print_performance_options(0); print_performance_options(1); return 1; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int print_config_cb(unsigned int core __maybe_unused, unsigned int param2 __maybe_unused){ struct _perfctl_param_read perfctl_param; perfctl_param.counting_tc_mask = (1 << PROFILING_MAX_COUNT_TCS) - 1; perfctl_param.counting_reg_mask = (1 << mips_get_performance_counter_nr()) - 1; perfctl_param.str_buf = NULL; perfctl_param.str_len = 0; #if defined(PROFILING_IN_YIELD) if(core >= PROFILING_CORES) { for(core = 0; core < PROFILING_CORES; core++) { preempt_disable(); smp_call_function_single(gYieldProfiler[core].linux_os_cpu, (smp_call_func_t)read_perfcounter_per_core, &perfctl_param, true); preempt_enable(); } } else { preempt_disable(); smp_call_function_single(gYieldProfiler[core].linux_os_cpu, (smp_call_func_t)read_perfcounter_per_core, &perfctl_param, true); preempt_enable(); } #else/*--- #if defined(PROFILING_IN_YIELD) ---*/ read_perfcounter_per_core(&perfctl_param); #endif/*--- #else ---*//*--- #if defined(PROFILING_IN_YIELD) ---*/ return 1; } /*--------------------------------------------------------------------------------*\ * ret: len of str \*--------------------------------------------------------------------------------*/ int mips_get_performance_counter_mode(char *str, int str_len, unsigned int nr){ int len = 0; struct _perfctl_param_read perfctl_param; unsigned int core __maybe_unused; /*--- printk("%s: init-len: %u\n", __func__, str_len); ---*/ perfctl_param.counting_tc_mask = (1 << PROFILING_MAX_COUNT_TCS) - 1; perfctl_param.counting_reg_mask = 0x1 << nr; #if defined(PROFILING_IN_YIELD) for(core = 0; core < PROFILING_CORES; core++) { int mlen = snprintf(str, str_len, "CORE%u:\n", core); mlen = min(str_len, mlen); str_len -= mlen, str += mlen, len += mlen; perfctl_param.str_len = str_len; perfctl_param.str_buf = str; preempt_disable(); smp_call_function_single(gYieldProfiler[core].linux_os_cpu, (smp_call_func_t)read_perfcounter_per_core, &perfctl_param, true); preempt_enable(); /*--- printk("%s: smp-retlen: %u\n", __func__, perfctl_param.str_len); ---*/ str_len -= perfctl_param.str_len, str += perfctl_param.str_len, len += perfctl_param.str_len; } #else/*--- #if defined(PROFILING_IN_YIELD) ---*/ perfctl_param.str_len = str_len; perfctl_param.str_buf = str; read_perfcounter_per_core(&perfctl_param); len = perfctl_param.str_len; #endif/*--- #else ---*//*--- #if defined(PROFILING_IN_YIELD) ---*/ /*--- printk("%s: len: %u\n", __func__, len); ---*/ return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int check_setting_param_cb(unsigned int perf_reg, unsigned int option){ unsigned int perf_max_registers = mips_get_performance_counter_nr(); if(perf_reg >= perf_max_registers) { printk(KERN_ERR"error: invalid param1 on set %u