/****************************************************************************** ** ** FILE NAME : avm_membench.c ** AUTHOR : Christoph Buettner & Heiko Blobner *******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(CONFIG_ATH79) #include #include #include #include #endif #if defined(CONFIG_MACH_FUSIV) #include #endif/*--- #if defined(CONFIG_MIPS_FUSIV) ---*/ #include /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ #define BLOCK_ELEMENT_BYTE 64 #define TOTAL_BLOCKS (BLOCK_ELEMENT_BYTE * 1) /*--- in KiB ---*/ #if defined(CONFIG_ATH79) #define CPU_CLOCK ath_get_clock(avm_clock_id_cpu) #define BUS_CLOCK ath_get_clock(avm_clock_id_ddr) #if defined(CONFIG_SOC_AR724X) #define WORTBREITE 16 #elif defined(CONFIG_SOC_AR934X) || defined(CONFIG_SOC_QCA953X) #define WORTBREITE (soc_is_ar9341() ? 16 : 32) #elif defined(CONFIG_SOC_QCA955X) || defined(CONFIG_SOC_QCA956X) #define WORTBREITE 16 #else #error "Unknown SOC!!!" #endif #elif defined(CONFIG_LANTIQ) extern u32 ifx_get_ddr_hz(void); extern unsigned int ifx_get_cpu_hz(void); #define CPU_CLOCK ifx_get_cpu_hz() #define BUS_CLOCK ifx_get_ddr_hz() * 2 #define WORTBREITE 16 #elif defined(CONFIG_MACH_FUSIV) #define CPU_CLOCK avm_get_clock(avm_clock_id_cpu) #define BUS_CLOCK avm_get_clock(avm_clock_id_ddr) #define WORTBREITE 16 #else #error "Unknown Architecture!!!" #endif #define MESS_LAENGE ((CPU_CLOCK / 2) * 1) #define ZEIT_S (MESS_LAENGE / (CPU_CLOCK >> 1)) #define ZEIT_MS ((MESS_LAENGE % (CPU_CLOCK >> 1)) / ((CPU_CLOCK >> 1) / 1000)) #define KB_PRO_SEC ((kb / loops) * 1000/(ZEIT_S * 1000 + ZEIT_MS)) #define WORTE_PRO_SEC(wortbreite) (KB_PRO_SEC * (1024 / (wortbreite / 8))) #define WORTE_PRO_CLOCK_1(wortbreite) (BUS_CLOCK / WORTE_PRO_SEC(wortbreite)) #define WORTE_PRO_CLOCK_10(wortbreite) ((BUS_CLOCK / (WORTE_PRO_SEC(wortbreite) / 1000)) % 1000) /*------------------------------------------------------------------------------------------*\ * Pipeline-friendly Read * * -16x 4-byte-Werte pro Schleifendurchlauf * -> 16 Lesezugriffe pro Schleifendurchlauf * -4 Register werden abwechselnd als Ziel genutzt -> kein unnoetiges Pipeline-Leeren wg. doppelt genutzter Register \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__read_pipe(char *mem, int irqsave, int loops, unsigned long * stall_cycles, unsigned long * instr_compl) { unsigned long start_stall_cycles, start_instr_compl; int i; unsigned long flags; unsigned long kb = 0; for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); /*--- ---- ---*/ tick_value = read_c0_count(); start_stall_cycles = read_c0_perfcntr0(); start_instr_compl = read_c0_perfcntr1(); for( ; p < p_end; p += BLOCK_ELEMENT_BYTE ) { /*--- 64 Byte pro Durchlauf ---*/ register int dummy0 asm("v0") ; register int dummy1 asm("v1") ; register int dummy2 asm("a0") ; register int dummy3 asm("a1") ; __asm__ __volatile__ (" lw %0, 0(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" lw %0, 4(%1) \n" : "=r" (dummy1) : "r" (p)); __asm__ __volatile__ (" lw %0, 8(%1) \n" : "=r" (dummy2) : "r" (p)); __asm__ __volatile__ (" lw %0,12(%1) \n" : "=r" (dummy3) : "r" (p)); __asm__ __volatile__ (" lw %0,16(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" lw %0,20(%1) \n" : "=r" (dummy1) : "r" (p)); __asm__ __volatile__ (" lw %0,24(%1) \n" : "=r" (dummy2) : "r" (p)); __asm__ __volatile__ (" lw %0,28(%1) \n" : "=r" (dummy3) : "r" (p)); __asm__ __volatile__ (" lw %0,32(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" lw %0,36(%1) \n" : "=r" (dummy1) : "r" (p)); __asm__ __volatile__ (" lw %0,40(%1) \n" : "=r" (dummy2) : "r" (p)); __asm__ __volatile__ (" lw %0,44(%1) \n" : "=r" (dummy3) : "r" (p)); __asm__ __volatile__ (" lw %0,48(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" lw %0,52(%1) \n" : "=r" (dummy1) : "r" (p)); __asm__ __volatile__ (" lw %0,56(%1) \n" : "=r" (dummy2) : "r" (p)); __asm__ __volatile__ (" lw %0,60(%1) \n" : "=r" (dummy3) : "r" (p)); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = read_c0_count(); *stall_cycles += (read_c0_perfcntr0() - start_stall_cycles); *instr_compl += (read_c0_perfcntr1() - start_instr_compl); /*--- --- ---*/ time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ * Extreme Read * * -16x 4-byte-Werte werden jeweils von 4 unterschiedlichen Adressen gelesen * -> 16*4 Lesezugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ //static unsigned long do_measure__read_extreme(int memsize_byte, int irqsave, int loops) { static unsigned long do_measure__read_extreme(char *mem, int irqsave, int loops, unsigned long * stall_cycles, unsigned long * instr_compl) { unsigned long start_stall_cycles, start_instr_compl; int i; int x; unsigned int *local_mem[4]; unsigned long flags; unsigned long kb = 0; for (x = 0; x < loops; x++){ u32 time_in_double_cpu_clocks = 0; u32 tick_value; u32 tick_value_end; if(irqsave) { local_irq_save(flags); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { unsigned int p_end; for (i = 0; i < 4 ; i++) { local_mem[i] = (unsigned int*)(mem + (i * (TOTAL_BLOCKS * 1024 / 4))); } p_end = (unsigned int)local_mem[1]; tick_value = read_c0_count(); start_stall_cycles = read_c0_perfcntr0(); start_instr_compl = read_c0_perfcntr1(); for(; (unsigned int)local_mem[0] < p_end ; local_mem[0] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[1] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[2] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[3] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int)) { register int dummy0 asm("v0"); register int dummy1 asm("v1"); register int dummy2 asm("a0"); register int dummy3 asm("a1"); register int p0 = (int)local_mem[0]; register int p1 = (int)local_mem[1]; register int p2 = (int)local_mem[2]; register int p3 = (int)local_mem[3]; __asm__ __volatile__ (" lw %0, 0(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 0(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 0(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 0(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 4(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 4(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 4(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 4(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 8(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 8(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 8(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 8(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 12(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 12(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 12(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 12(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 16(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 16(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 16(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 16(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 20(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 20(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 20(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 20(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 24(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 24(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 24(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 24(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 28(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 28(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 28(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 28(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 32(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 32(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 32(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 32(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 36(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 36(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 36(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 36(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 40(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 40(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 40(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 40(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 44(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 44(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 44(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 44(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 48(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 48(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 48(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 48(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 52(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 52(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 52(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 52(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 56(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 56(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 56(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 56(%1) \n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" lw %0, 60(%1) \n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" lw %0, 60(%1) \n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" lw %0, 60(%1) \n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" lw %0, 60(%1) \n" : "=r" (dummy3) : "r" (p3)); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = read_c0_count(); *stall_cycles += (read_c0_perfcntr0() - start_stall_cycles); *instr_compl += (read_c0_perfcntr1() - start_instr_compl); time_in_double_cpu_clocks += ( tick_value_end - tick_value ); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("."); } printk("\n"); return kb ; } /*------------------------------------------------------------------------------------------*\ * Mixture Read/Write * * -1x 4-Byte Lesen + 1x 4-Byte Schreiben * -> 2 Zugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__read_mixture(char *mem, int irqsave, int loops, unsigned long * stall_cycles, unsigned long * instr_compl) { unsigned long start_stall_cycles, start_instr_compl; int i; unsigned long flags; unsigned long kb = 0; for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); tick_value = read_c0_count(); start_stall_cycles = read_c0_perfcntr0(); start_instr_compl = read_c0_perfcntr1(); for(; p < p_end ; p += sizeof(unsigned int) * 4) { register int dummy0 asm("v0") ; __asm__ __volatile__ (" lw %0, 0(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" sw %0, 0(%1) \n" : : "r" (dummy0), "r" (p)); __asm__ __volatile__ (" lw %0, 4(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" sw %0, 4(%1) \n" : : "r" (dummy0), "r" (p)); __asm__ __volatile__ (" lw %0, 8(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" sw %0, 8(%1) \n" : : "r" (dummy0), "r" (p)); __asm__ __volatile__ (" lw %0, 12(%1) \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" sw %0, 12(%1) \n" : : "r" (dummy0), "r" (p)); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = read_c0_count(); *stall_cycles += (read_c0_perfcntr0() - start_stall_cycles); *instr_compl += (read_c0_perfcntr1() - start_instr_compl); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ * Simple Write * * -1x 4-Byte Schreiben * -> 1 Zugriff pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__write(char *mem, int irqsave, int loops, unsigned long * stall_cycles, unsigned long * instr_compl) { unsigned long start_stall_cycles, start_instr_compl; int i; unsigned long flags; unsigned long kb = 0; for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); start_stall_cycles = read_c0_perfcntr0(); start_instr_compl = read_c0_perfcntr1(); tick_value = read_c0_count(); for(; p < p_end; p += BLOCK_ELEMENT_BYTE) { register int dummy0 = 23; __asm__ __volatile__ (" sw %0, 0(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0, 4(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0, 8(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,12(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,16(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,20(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,24(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,28(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,32(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,36(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,40(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,44(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,48(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,52(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,56(%1) \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" sw %0,60(%1) \n" : : "r" (dummy0), "r" (p) ); } if(((unsigned long)mem & 0xE0000000UL) != 0xA0000000UL) dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = read_c0_count(); *stall_cycles += (read_c0_perfcntr0() - start_stall_cycles); *instr_compl += (read_c0_perfcntr1() - start_instr_compl); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ * Mem-Test von Knut Dettmer (Lantiq-Mitarbeiter) * * KD: simple memory thruput test (MIPS cached memory address space). \*------------------------------------------------------------------------------------------*/ static inline signed long get_timer(long value) { return (signed long)(read_c0_count()) - value; } int do_dettmer_bench(char *argv_read, char* argv_mb, char *argv_mhz) { register ulong data=0xdeadbeaf; volatile ulong *p_addr; signed long start; ulong stop = 0; ulong read, MB,freq; int i=1; int rcode = 0; ulong mSeconds, thruput; // 1st parameter defines whether read or write should be tested (0-> read, 1->write) /*--- if (argc > 1) { ---*/ read = (ulong)simple_strtoul(argv_read, NULL, 16); /*--- } else { ---*/ /*--- read=0; ---*/ /*--- } ---*/ // 2nd parameter defines how many 500MBs should be read/written in hex /*--- if (argc > 2) { ---*/ MB = (ulong)simple_strtoul(argv_mb, NULL, 16); MB = MB * 512; /*--- } else { ---*/ /*--- MB = 4000; ---*/ /*--- } ---*/ //3rd parameters tells whether this is for 166.5 or 196.6MHz (0->166.5, 1->196.6) /*--- if (argc > 3) { ---*/ freq = (ulong)simple_strtoul(argv_mhz, NULL, 16); /*--- } else { ---*/ /*--- freq = 0; ---*/ /*--- } ---*/ printk(KERN_EMERG "Starting ddr "); if (read==0) printk(KERN_EMERG "write "); else printk(KERN_EMERG "read "); printk(KERN_EMERG "stress test for "); if (freq == 0) printk(KERN_EMERG "166.5 MHz "); else printk(KERN_EMERG "196.6 MHz "); printk(KERN_EMERG "ddr clock frequency\n"); printk(KERN_EMERG "%liMB will be ",MB); if (read==0) printk(KERN_EMERG "written\n"); else printk(KERN_EMERG "read\n"); if (read==0) { char *kmem = kmalloc(512, GFP_ATOMIC); uint32_t counter = 0; if(!kmem) { printk(KERN_EMERG "[%s:%d]error: kmalloc failed\n", __FUNCTION__, __LINE__); return -1; } // set start address // get starting time start = get_timer(0); //printk(KERN_EMERG "get_timer(0)=0x%8.8x\n", start); for (;;) { // 128 writes in loop counter++; p_addr=(ulong*)kmem; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; *p_addr++=data; // reset address if 1MB was written if (counter >= 2048) { counter = 0; // decide here how many MBs should be written stop += (unsigned long)get_timer(start); if (i==MB) { // get end time //printk(KERN_EMERG "get_timer(start)=0x%8.8x\n", stop); printk(KERN_EMERG "%uMB written\n", i); // calculate thruput if (freq==0) mSeconds=stop/166500; else mSeconds=stop/196699; printk(KERN_EMERG "in %li mseconds\n", mSeconds); thruput=i*1024/mSeconds; printk(KERN_EMERG "==>> %liMB/s thruput\n", thruput); return 1; } i++; start = get_timer(0); } } kfree(kmem); }else { //start read address p_addr=(ulong*)0x80100000; //get start time start = get_timer(0); for (;;) { //128 reads in loop data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; data=*p_addr++; // reset address when 1 MB was read if (p_addr>=(ulong*)0x80200000) { p_addr=(ulong*)0x80100000; // decide here how many MBs should be read stop += (unsigned long long)get_timer(start); if (i==MB) { // get end time and calculate thruput //printk(KERN_EMERG "get_timer(start)=0x%8.8x\n", stop); printk(KERN_EMERG "%uMB read\n", i); if (freq==0) mSeconds=stop/166500; else mSeconds=stop/196600; printk(KERN_EMERG "in %li mseconds\n", mSeconds); thruput=i*1024/mSeconds; printk(KERN_EMERG "==>> %liMB/s thruput\n", thruput); return 1; } i++; start = get_timer(0); } } } return rcode; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_head(struct seq_file *s, int loops, int wortbreite) { seq_puts(s, "\n\n"); seq_puts(s, "AVM-RAM-Benchmark\n"); seq_puts(s, "=============================================\n"); seq_puts(s, "IRQs: off (alle Tests mit deaktivierten IRQs)\n"); seq_printf(s, "CPU-Clock: %u\n", CPU_CLOCK); seq_printf(s, "RAM-Clock: %u (eff. Datentaktrate)\n", BUS_CLOCK); seq_printf(s, "BUS-Breite (Word=): %d Bit\n", wortbreite); seq_printf(s, "Measure-Time: %d * %d.%ds\n\n", loops, ZEIT_S, ZEIT_MS); seq_puts(s, " -- Results --\n"); seq_puts(s, "========================================================================================\n"); seq_printf(s," type | total read | loops | DDR-Ticks | %2dBit | | Stalls per |\n", wortbreite); seq_printf(s," | in kb | | /%2dBit | Worte/s | kB/s | Instruction |\n", wortbreite); seq_puts(s, "========================================================================================\n"); udelay(100); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_read_pipe(struct seq_file *s, int loops, int wortbreite, char *kmem) { unsigned long kb; unsigned long stall_cycles = 0; unsigned long instr_compl = 0; unsigned long stalls_per_instr; unsigned long stalls_per_instr_dot; kb = do_measure__read_pipe(kmem, 1, loops, &stall_cycles, &instr_compl); instr_compl /= 1000; stalls_per_instr = stall_cycles / instr_compl; stalls_per_instr_dot = stalls_per_instr % 1000; stalls_per_instr /= 1000; seq_printf(s, "read | %7lu | %1d | %5lu.%03lu | %9lu | %6lu | %4lu.%3lu |\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC, stalls_per_instr, stalls_per_instr_dot); seq_puts(s, "Pipeline-friendly | | | | | | |\n"); seq_puts(s, "Lesen aus dem RAM mit optimaler Unterstuetzung der Pipline. | | |\n"); seq_puts(s, "D.h. der Code ist gewaehlt, dass die Pipeline nicht geleert | | |\n"); seq_puts(s, "werden muss und so keine Zeit verschwendet wird. | | |\n"); seq_puts(s, "----------------------------------------------------------------------------------------\n"); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_read_extreme(struct seq_file *s, int loops, int wortbreite, char *kmem) { unsigned long kb; unsigned long stall_cycles = 0; unsigned long instr_compl = 0; unsigned long stalls_per_instr; unsigned long stalls_per_instr_dot; kb = do_measure__read_extreme(kmem, 1, loops, &stall_cycles, &instr_compl); instr_compl /= 1000; stalls_per_instr = stall_cycles / instr_compl; stalls_per_instr_dot = stalls_per_instr % 1000; stalls_per_instr /= 1000; seq_printf(s, "read | %7lu | %1d | %5lu.%03lu | %9lu | %6lu | %4lu.%3lu |\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC, stalls_per_instr, stalls_per_instr_dot); seq_puts(s, "extrema | | | | | | |\n"); seq_puts(s, "Die gelesenen Werte stehen im Speicher nicht hintereinander. | | |\n"); seq_puts(s, "D.h. die CPU kann den Cache nicht nutzen. | | |\n"); seq_puts(s, "----------------------------------------------------------------------------------------\n"); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_readwrite(struct seq_file *s, int loops, int wortbreite, char *kmem) { unsigned long kb; unsigned long stall_cycles = 0; unsigned long instr_compl = 0; unsigned long stalls_per_instr; unsigned long stalls_per_instr_dot; kb = do_measure__read_mixture(kmem, 1, loops, &stall_cycles, &instr_compl); instr_compl /= 1000; stalls_per_instr = stall_cycles / instr_compl; stalls_per_instr_dot = stalls_per_instr % 1000; stalls_per_instr /= 1000; seq_printf(s, "read/write | %7lu | %1d | %5lu.%03lu | %9lu | %6lu | %4lu.%3lu |\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC, stalls_per_instr, stalls_per_instr_dot); seq_puts(s, " | | | | | | |\n"); seq_puts(s, "Immer schoen im Wechsel 1x Lesen und 1x Schreiben. | | |\n"); seq_puts(s, "----------------------------------------------------------------------------------------\n"); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_write(struct seq_file *s, int loops, int wortbreite, char *kmem) { unsigned long kb; unsigned long stall_cycles = 0; unsigned long instr_compl = 0; unsigned long stalls_per_instr; unsigned long stalls_per_instr_dot; kb = do_measure__write(kmem, 1, loops, &stall_cycles, &instr_compl); instr_compl /= 1000; stalls_per_instr = stall_cycles / instr_compl; stalls_per_instr_dot = stalls_per_instr % 1000; stalls_per_instr /= 1000; seq_printf(s, "write | %7lu | %1d | %5lu.%03lu | %9lu | %6lu | %4lu.%3lu |\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC, stalls_per_instr,stalls_per_instr_dot); seq_puts(s, " | | | | | | |\n"); seq_puts(s, "Einfaches Schreiben. | | |\n"); seq_puts(s, "----------------------------------------------------------------------------------------\n"); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static int avm_mb_proc_complete_membench(struct seq_file *s, void *data __maybe_unused) { int loops = 1; int wortbreite = WORTBREITE; char *kmem; unsigned int perf_ctrl0; unsigned int perf_ctrl1; unsigned int tc_id; kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if (!kmem) { seq_puts(s, "No memory for test\n"); return 0; } tc_id = smp_processor_id() % 2; //configure perfomance counter perf_ctrl0 = read_c0_perfctrl0(); perf_ctrl1 = read_c0_perfctrl1(); //Stall Cycles with counter 0 write_c0_perfctrl0((18 << 5) | (2 << 20) | (tc_id << 22) | 0xF); //Instructions completed with counter 1 write_c0_perfctrl1((1 << 5) | (2 << 20) | (tc_id << 22) | 0xF); print_head(s, loops, wortbreite); print_read_pipe(s, loops, wortbreite, kmem); print_read_extreme(s, loops, wortbreite, kmem); print_readwrite(s, loops, wortbreite, kmem); print_write(s, loops, wortbreite, kmem); seq_puts(s, "\n\n"); //restore performance cntr write_c0_perfctrl0(perf_ctrl0); write_c0_perfctrl1(perf_ctrl1); kfree(kmem); return 0; } static int avm_mb_proc_help(struct seq_file *s, void *data) { seq_printf(s, "CP0-Config0: 0x%08x\n", read_c0_config()); seq_puts(s, "\n"); seq_puts(s, "\n"); seq_puts(s, "AVM-RAM-Benchmark (HELP)\n"); seq_puts(s, "=============================================\n"); seq_puts(s, "cat /proc/avm/complete -> Durchfuehrung aller Benchmarks\n"); seq_puts(s, "cat /proc/avm/help -> Anzeige dieser Hilfe\n"); seq_puts(s, "\n"); seq_puts(s, "cat /proc/avm/do_read_extreme -> Read Bench\n"); seq_puts(s, " Lese benchmark fuer nicht-lineares Lesen.\n"); seq_puts(s, "cat /proc/avm/do_read_pipe -> Read Bench\n"); seq_puts(s, " Pipeline orientierter Lese benchmark\n"); seq_puts(s, "cat /proc/avm/do_read_write -> Read/Schreib Bench\n"); seq_puts(s, "cat /proc/avm/do_write -> Schreib Bench\n"); seq_puts(s, "\n"); seq_puts(s, "cat /proc/avm/do_dettmer_read -> Lantiq Read Bench\n"); seq_puts(s, "cat /proc/avm/do_dettmer_write-> Lantiq Schreib Bench\n"); seq_puts(s, "\n"); #ifdef CONFIG_MACH_AR934x seq_puts(s, "cat /proc/avm/do_atheros_pctrace -> performance counter log (on idle)\n"); seq_puts(s, "cat /proc/avm/do_atheros__hogging -> ddr hogging test\n"); #endif /* CONFIG_MACH_AR934x */ seq_puts(s, "\n"); return 0; } static int avm_mb_proc_generic_membench(struct seq_file *s, void *data) { int loops = 1; int wortbreite = WORTBREITE; char *kmem; void (*membench_func)(struct seq_file *, int, int, char *); kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if (!kmem) { seq_puts(s, "No memory for test\n"); return 0; } membench_func = s->private; BUG_ON(!membench_func); print_head(s, loops, wortbreite); membench_func(s, loops, wortbreite, kmem); kfree(kmem); seq_puts(s, "\n\n"); return 0; } static int print_dettmer_read_info(struct seq_file *s, void *data __maybe_unused) { /* 1 .. read | 0x1 .. 500MB Test | 0x1 .. 196.6MHz */ do_dettmer_bench("1", "0x1", "0x1"); seq_puts(s, "Lantiq Read Benchmark. (Set LogLevel 0 to see the " "results..)\n"); return 0; } static int print_dettmer_write_info(struct seq_file *s, void *data __maybe_unused) { /* 0 .. write | 0x1 .. 500MB Test | 0x1 .. 196.6MHz */ do_dettmer_bench("0", "0x1", "0x1"); seq_puts(s, "Lantiq Write Benchmark. (Set LogLevel 0 to see the " "results..)\n"); return 0; } /*------------------------------------------------------------------------------------------*\ * ATHEROS \*------------------------------------------------------------------------------------------*/ #ifdef CONFIG_MACH_AR934x #define DDRMON_CTL_CLEAR_ALL_CNT (1 << 0) #define DDRMON_CTL_CLEAR_MAX_LATENCY_CNT (1 << 1) #define DDRMON_CTL_DISABLE_LATENCY_REFRESH (1 << 2) #define DDRMON_CTL_CLIENT_SEL_CPU (0 << 8) #define DDRMON_CTL_CLIENT_SEL_USB_MBOX (3 << 8) #define CLEAR_ALL (DDRMON_CTL_CLEAR_ALL_CNT | DDRMON_CTL_CLEAR_MAX_LATENCY_CNT | DDRMON_CTL_DISABLE_LATENCY_REFRESH) // Optional noinline int ddr_act_init(void) { asm( " li $t0, 0xbd007f00\n" " li $t1, 0x500ddeed\n" " sw $t1, 0x0($t0)\n" "li $t0, 0x82100000\n" /* DDR_START_ADDRESS */ " li $t1, 0x821a0000\n" /* DDR_END_ADDRESS */ "_init_seq:\n" " sw $t0, 0x0($t0)\n" " addiu $t0, $t0, 4\n" " bne $t0, $t1, _init_seq\n" " nop\n"); return 0; } /*------------------------------------------------------------------------------------------*\ * DDR CPU HOG \*------------------------------------------------------------------------------------------*/ noinline void ddr_act(unsigned int start, unsigned int end) { asm("STRT_DDR_TXNS:\n" " li $t9, 0x100\n" "_outer_ddr_rw_loop:\n" " ori $t0, %[ddr_start], 0\n" " addiu $t8, %[ddr_start], 0x8000\n" " li $t1, 0x8\n" /* NO_CHNGES_WITHIN_CACHE_LINE */ "_inner_ddr_rw_loop:\n" " lw $t2, 0x0($t0)\n" " lw $t5, 0x2000($t0)\n" " lw $t6, 0x4000($t0)\n" " lw $t7, 0x6000($t0)\n" " addiu $t2, $t2, 1\n" " sw $t2, 0x0($t8)\n" " sw $t5, 0x2000($t8)\n" " sw $t6, 0x4000($t8)\n" " sw $t7, 0x6000($t8)\n" " addiu $t0, $t0, 4\n" " addiu $t8, $t8, 4\n" " addiu $t1, $t1, -1\n" " nop\n" " bnez $t1, _inner_ddr_rw_loop\n" " nop\n" "_changes_before_outer_loop:\n" " addiu %[ddr_start], %[ddr_start], 32\n" /* DDR_INCR_COUNT */ " bne %[ddr_start], %[ddr_end], _outer_ddr_rw_loop\n" " nop\n" " nop\n" : : [ddr_start] "r" (start), [ddr_end] "r" (end) ); } /*------------------------------------------------------------------------------------------*\ * ISR for timer \*------------------------------------------------------------------------------------------*/ #define TEST_ELEMENTS 6 #define TEST_SIZE (1024 * TEST_ELEMENTS) static volatile unsigned int ath_pctrace_mode = 3; static unsigned int gcnt = 0; volatile unsigned int soff = 0; unsigned int test_buffer[TEST_SIZE]; irqreturn_t ath_timer_intr(int irq, void *dev_id) { #if 0 ath_pctrace_mode++; if (ath_pctrace_mode > 7) { ath_pctrace_mode = 0; } ath_pctrace_mode = 3; #endif #if 0 /*--- Performace count registers for DDR ---*/ *(test_buffer + soff + 0) = ath_reg_rd(0xb80000ec); /*--- ALL_GRANTS ---*/ *(test_buffer + soff + 1) = ath_reg_rd(0xb80000f0); /*--- ALL_DUR_L ---*/ /*--- *(test_buffer + soff + 2) = ath_reg_rd(0xb80000f4); ---*/ *(test_buffer + soff + 2) = ath_reg_rd(0xb80000f8); /*--- SEL_GRANTS ---*/ *(test_buffer + soff + 3) = ath_reg_rd(0xb80000fc); /*--- SEL_DUR_L ---*/ /*--- *(test_buffer + soff + 5) = ath_reg_rd(0xb8000100); ---*/ *(test_buffer + soff + 4) = ath_reg_rd(0xb8000104); /*--- MAX_LATENCY ---*/ /*--- *(test_buffer + soff + 5) = gcnt | (ath_pctrace_mode << 16); ---*/ *(test_buffer + soff + 5) = ath_pctrace_mode; /*--- *(test_buffer + soff + 7) = gcnt | (gcntinvalid << 28); ---*/ #else test_buffer[soff + 0] = ath_reg_rd(0xb80000ec); /*--- ALL_GRANTS ---*/ test_buffer[soff + 1] = ath_reg_rd(0xb80000f0); /*--- ALL_DUR_L ---*/ test_buffer[soff + 2] = ath_reg_rd(0xb80000f8); /*--- SEL_GRANTS ---*/ test_buffer[soff + 3] = ath_reg_rd(0xb80000fc); /*--- SEL_DUR_L ---*/ test_buffer[soff + 4] = ath_reg_rd(0xb8000104); /*--- MAX_LATENCY ---*/ /*--- test_buffer + soff + 5) = gcnt | (ath_pctrace_mode << 16); ---*/ test_buffer[soff + 5] = ath_pctrace_mode; #endif /*--- Reset Performance Counters ---*/ #if 0 ath_reg_wr(0xb80000e8, DDRMON_CTL_CLIENT_SEL_USB_MBOX | CLEAR_ALL); ath_reg_wr(0xb80000e8, DDRMON_CTL_CLIENT_SEL_USB_MBOX); #else ath_reg_wr(0xb80000e8, (ath_pctrace_mode << 8) | CLEAR_ALL); ath_reg_wr(0xb80000e8, (ath_pctrace_mode << 8)); #endif gcnt++; soff += TEST_ELEMENTS; if (soff > TEST_SIZE) { gcnt = 0; soff = 0; } return IRQ_HANDLED; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static int do_atheros_pctrace_read(char *page, char **start, off_t off, int count, int *eof, void *data) { unsigned int index = soff, loop = 0; printk( "Atheros DMA Benchmark\n" "SELECTION = USB/MBOX(SLIC)\n\n" "ERROR COUNT ALL_GRANTS ALL_DUR_L SEL_GRANTS SEL_DUR_L MAX_LATENCY\n" " 500us 0xB80000EC 0xB80000F0 0xB80000F8 0xB80000FC 0xB8000104 \n"); /*--- printk( KERN_ERR "mode %d\n", ath_pctrace_mode); ---*/ while (loop < TEST_SIZE) { printk( "0x%05x: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", test_buffer[index+5], test_buffer[index+0], test_buffer[index+1], test_buffer[index+2], test_buffer[index+3], test_buffer[index+4]); index += TEST_ELEMENTS; loop += TEST_ELEMENTS; if (index > TEST_SIZE) { index = 0; } } *eof = 1; return 0; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ #define PCTRACE_MAX_BUFFER 32 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) static int do_atheros_hogging(struct file *file, const char *buffer, size_t count, loff_t *_off __maybe_unused) #else static int do_atheros_hogging(struct file *file, const char *buffer, unsigned long count, void *_data __maybe_unused) #endif { char local_buffer[PCTRACE_MAX_BUFFER]; unsigned char *memarea; unsigned int len; long cnt; memarea = kmalloc(0x10000 +(2 * PAGE_SIZE), GFP_ATOMIC); if(!memarea) { printk(KERN_ERR "Hogging test: Failed to reserve memory!\n"); return -1; } if (count > PCTRACE_MAX_BUFFER) len = PCTRACE_MAX_BUFFER; else len = count; if(copy_from_user(local_buffer, buffer, len)) { kfree(memarea); return -EFAULT; } local_buffer[len] = '\0'; cnt = simple_strtol(local_buffer, NULL, 10); if(cnt < 1) { printk(KERN_ERR "Error: Invalid hogging test count %ld\n", cnt); kfree(memarea); return -1; } while(cnt) { ddr_act((unsigned int)memarea + PAGE_SIZE, (unsigned int)memarea + PAGE_SIZE + 0x10000); /*--- if(cnt%1000 == 0) { ---*/ /*--- schedule(); ---*/ /*--- } ---*/ cnt--; } kfree(memarea); return len; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static int do_atheros_pctrace(struct file *file, const char *buffer, unsigned long count, void *data) { int len; char local_buffer[PCTRACE_MAX_BUFFER]; if (count > PCTRACE_MAX_BUFFER) len = PCTRACE_MAX_BUFFER; else len = count; if(copy_from_user(local_buffer, buffer, len)) return -EFAULT; local_buffer[len] = '\0'; if ( ! strncmp(local_buffer, "start", sizeof("start") - 1)) { /*--- init ---*/ printk(KERN_ERR "%s start\n", __func__); /* Reset Performance Counters */ ath_reg_wr(0xb80000e8, DDRMON_CTL_CLIENT_SEL_USB_MBOX | CLEAR_ALL); ath_reg_wr(0xb80000e8, DDRMON_CTL_CLIENT_SEL_USB_MBOX); ath_reg_wr(ATH_GENERAL_TMR2_RELOAD, 0x4e20); // 500 us //ath_reg_wr(ATH_GENERAL_TMR_RELOAD, 0x2625a00); // 1s if(request_irq(ATH_MISC_IRQ_TIMER2, ath_timer_intr, IRQF_DISABLED, "ath_pctrace_timer", NULL)) { printk(KERN_ERR "[%s]: Failed to register general purpose timer interrupt\n", __FUNCTION__); } return len; } if ( ! strncmp(local_buffer, "stop", sizeof("stop") - 1)) { /*--- exit ---*/ printk(KERN_ERR "%s stop\n", __func__); free_irq(ATH_MISC_IRQ_TIMER2, NULL); return len; } if ( ! strncmp(local_buffer, "mode", sizeof("mode") - 1)) { /*--- exit ---*/ ath_pctrace_mode = (unsigned int)(local_buffer[5] - '0') & 0xF; /*--- printk(KERN_ERR "%s mode -%s- \"%d\"\n", __func__, local_buffer, ath_pctrace_mode); ---*/ if ((ath_pctrace_mode > 0) && (ath_pctrace_mode < 8)) { return len; } else { printk(KERN_ERR "%s: mode not set\n", __func__); ath_pctrace_mode = 0; } } printk(KERN_ERR "%s: use \"start\", \"stop\" or \"mode [0..7]\"\n", __func__); return len; } #endif /*--- #ifdef CONFIG_MACH_AR934x ---*/ static inline __attribute__((pure)) unsigned long _kb_value_per_sec(unsigned long x, unsigned loops) { return ((x / loops) * 1000 / (ZEIT_S * 1000 + ZEIT_MS)); } static int avm_mb_proc_performance_index(struct seq_file *s, void *data __maybe_unused) { unsigned long kb_r_burst; unsigned long kb_w_burst; unsigned long kb_rw; unsigned long kb_r; unsigned int perf_ctrl0; unsigned int perf_ctrl1; unsigned int tc_id; int loops = 1; char *kmem; unsigned long stall_cycles = 0; unsigned long instr_compl = 0; unsigned long stalls_per_instr; unsigned long stalls_per_instr_dot; kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if (!kmem) { seq_puts(s, "No memory for test\n"); return 0; } tc_id = smp_processor_id() % 2; //configure perfomance counter perf_ctrl0 = read_c0_perfctrl0(); perf_ctrl1 = read_c0_perfctrl1(); //Stall Cycles with counter 0 write_c0_perfctrl0((18 << 5) | (2 << 20) | (tc_id << 22) | 0xF); //Instructions completed with counter 1 write_c0_perfctrl1((1 << 5) | (2 << 20) | (tc_id << 22) | 0xF); kb_r_burst = do_measure__read_pipe(kmem, 1, loops, &stall_cycles, &instr_compl); kb_w_burst = do_measure__write(kmem, 1, loops, &stall_cycles, &instr_compl); kb_rw = do_measure__read_mixture(kmem, 1, loops, &stall_cycles, &instr_compl); kb_r = do_measure__read_extreme(kmem, 1, loops, &stall_cycles, &instr_compl); instr_compl /= 1000; stalls_per_instr = stall_cycles / instr_compl; stalls_per_instr_dot = stalls_per_instr % 1000; stalls_per_instr /= 1000; kfree(kmem); seq_printf(s, "Performance-Index: %lu\n", _kb_value_per_sec(kb_r_burst, loops)/1000*10 + _kb_value_per_sec(kb_w_burst, loops)/1000*10 + _kb_value_per_sec(kb_rw, loops)/1000*1 + _kb_value_per_sec(kb_r, loops)/1000*1); seq_printf(s, "CPU-Clock: %u MHz\n", CPU_CLOCK/(1000*1000)); seq_printf(s, "RAM-Clock: %u MHz\n", BUS_CLOCK/(1000*1000)); seq_printf(s, "Stalls per Instruction %lu.%03lu\n", stalls_per_instr, stalls_per_instr_dot); //restore performance cntr write_c0_perfctrl0(perf_ctrl0); write_c0_perfctrl1(perf_ctrl1); return 0; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ void early_membench(void) { static char buffer[1024]; struct seq_file sf = { .buf = buffer, .size = sizeof(buffer), }; printk(KERN_ERR "%s: running membench\n", __func__); avm_mb_proc_complete_membench(&sf, NULL); BUG_ON(sf.count == sf.size); printk(KERN_ERR "%s", buffer); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ struct proc_dir_entry *proc_avm __attribute__((weak)); #ifdef CONFIG_MACH_AR934x static struct proc_dir_entry *ath_pctrace; #endif struct avm_mb_seq_info { const char *name; int (*show)(struct seq_file *, void *); struct file_operations *fops; union { void *data; void (*mb_print_func)(struct seq_file *, int, int, char *); }; struct proc_dir_entry *pde; }; static int avm_mb_proc_seq_open(struct inode *inode, struct file *file) { struct avm_mb_seq_info *si; si = PDE_DATA(inode); if (!si) { return -ENOENT; } return single_open(file, si->show, si->data); } static struct file_operations avm_mb_proc_seq_fops = { .open = avm_mb_proc_seq_open, .llseek = seq_lseek, .read = seq_read, .release = single_release, }; #ifdef CONFIG_MACH_AR934x static struct file_operations avm_mb_proc_atheros_hogging_fops = { .write = do_atheros_hogging, }; static struct file_operations avm_mb_proc_atheros_pctrace_fops = { .open = avm_mb_proc_seq_open, .llseek = seq_lseek, .read = seq_read, .release = single_release, .write = do_atheros_pctrace, }; #endif static struct avm_mb_seq_info avm_mb_seq_info[] = { { .name = "complete", .show = avm_mb_proc_complete_membench, }, { .name = "help", .show = avm_mb_proc_help, }, { .name = "do_read_extreme", .show = avm_mb_proc_generic_membench, .mb_print_func = print_read_extreme, }, { .name = "do_read_pipe", .show = avm_mb_proc_generic_membench, .mb_print_func = print_read_pipe, }, { .name = "do_read_write", .show = avm_mb_proc_generic_membench, .mb_print_func = print_readwrite, }, { .name = "do_write", .show = avm_mb_proc_generic_membench, .mb_print_func = print_write, }, { .name = "do_dettmer_read", .show = print_dettmer_read_info, }, { .name = "do_dettmer_write", .show = print_dettmer_write_info, }, { .name = "performance_index", .show = avm_mb_proc_performance_index, }, #ifdef CONFIG_MACH_AR934x { .name = "do_atheros_hogging", .fops = avm_mb_proc_atheros_hogging_fops, }, { .name = "do_atheros_pctrace", .show = avm_mb_proc_atheros_pctrace_read, .fops = avm_mb_proc_atheros_pctrace_fops, }, #endif { .name = NULL, } }; struct proc_dir_entry *proc_avm __attribute__((weak)); static int __init avm_membench_init(void) { struct avm_mb_seq_info *si; if (!proc_avm) proc_avm = proc_mkdir("avm", NULL); printk("[%s]\n",__FUNCTION__); for (si = &avm_mb_seq_info[0]; si->name; si++) { umode_t mode = 0; mode |= (si->show || (si->fops && si->fops->read)) ? 0444 : 0; mode |= (si->fops && si->fops->write) ? 0222 : 0; if (si->show && !si->fops) si->fops = &avm_mb_proc_seq_fops; si->pde = proc_create_data(si->name, mode, proc_avm, &avm_mb_proc_seq_fops, si->show ? si : si->data); } return 0; } static void avm_membench_exit(void) { struct avm_mb_seq_info *si; for (si = &avm_mb_seq_info[0]; si->name; si++) { if (si->pde) { remove_proc_entry(si->name, proc_avm); } } } module_init(avm_membench_init); module_exit(avm_membench_exit)