/****************************************************************************** ** ** FILE NAME : avm_membench.c ** AUTHOR (MIPS): Christoph Buettner & Heiko Blobner ** ADJUSTED for ARM: Mario Bahr *******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ #define BLOCK_ELEMENT_BYTE 64 #define TOTAL_BLOCKS (BLOCK_ELEMENT_BYTE * 1) /*--- in KiB ---*/ #if defined(CONFIG_MACH_PUMA6) #include #define CPU_CLOCK (450 * 1000 * 1000) #define BUS_CLOCK (250 * 1000 * 1000) #define WORTBREITE 32 #elif defined(CONFIG_ARCH_PUMA5) || defined(CONFIG_MACH_PUMA5) #define CPU_CLOCK (400 * 1000 * 1000) #define BUS_CLOCK (200 * 1000 * 1000) #define WORTBREITE 16 #else #error "Unknown Architecture!!!" #endif static unsigned long Cycle_Shift; #define CYCLE_TO_SEC(a) ((a) * ((Cycle_Shift) ? CPU_CLOCK / 2 : CPU_CLOCK)) /*--------------------------------------------------------------------------------*\ * der performance-Counter wird auch fuer get_cycles() verwendet! \*--------------------------------------------------------------------------------*/ static void arm_performance_counter_init(void) { union __performance_monitor_control C; write_secure_debug_enable_register(0, 1); C.Register = read_p15_performance_monitor_control(); if(C.Bits.EnableCounters && C.Bits.CycleCounterDivider) { Cycle_Shift = 5; return; } C.Bits.CycleCounterDivider = 0; /*--- entspricht damit CPU-Takt ---*/ C.Bits.EnableCounters = 1; write_p15_performance_monitor_control(C.Register); /*--- printk(KERN_INFO"%s: enable cycle_count performance-monitor-register: %x\n", __func__, read_p15_performance_monitor_control()); ---*/ } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ #define arm_cpu_cycles() (read_p15_cycle_counter() << Cycle_Shift) #define MESS_LAENGE CYCLE_TO_SEC(1) #define ZEIT_S (MESS_LAENGE / (CYCLE_TO_SEC(1))) #define ZEIT_MS ((MESS_LAENGE % (CYCLE_TO_SEC(1))) / ((CYCLE_TO_SEC(1)) / 1000)) #define KB_PRO_SEC ((kb / loops) * 1000/(ZEIT_S * 1000 + ZEIT_MS)) #define WORTE_PRO_SEC(wortbreite) (KB_PRO_SEC * (1024 / (wortbreite / 8))) #define WORTE_PRO_CLOCK_1(wortbreite) (BUS_CLOCK / WORTE_PRO_SEC(wortbreite)) #define WORTE_PRO_CLOCK_10(wortbreite) ((BUS_CLOCK / (WORTE_PRO_SEC(wortbreite) / 1000)) % 1000) /*------------------------------------------------------------------------------------------*\ * Pipeline-friendly Read * * -16x 4-byte-Werte pro Schleifendurchlauf * -> 16 Lesezugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__read_pipe(char *mem, int irqsave, int loops) { int i; unsigned long flags; unsigned long kb = 0; arm_performance_counter_init(); for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register int p = (unsigned int)mem; register int p_end = p + (TOTAL_BLOCKS * 1024); tick_value = arm_cpu_cycles(); /*--------------------------------------------------------------------------------*\ * So sieht Schleife aus: f48: e8bc00ff ldm ip!, {r0, r1, r2, r3, r4, r5, r6, r7} f4c: e8bc00ff ldm ip!, {r0, r1, r2, r3, r4, r5, r6, r7} f50: e24ee001 sub lr, lr, #1 f54: e6ffe07e uxth lr, lr f58: e35e0000 cmp lr, #0 f5c: 1afffff9 bne f48 \*--------------------------------------------------------------------------------*/ while(p < p_end) { /*--- 64 Byte pro Durchlauf ---*/ register int dummy0 asm("r0") ; register int dummy1 asm("r1") ; register int dummy2 asm("r2") ; register int dummy3 asm("r3") ; register int dummy4 asm("r4") ; register int dummy5 asm("r5") ; register int dummy6 asm("r6") ; register int dummy7 asm("r7") ; __asm__ __volatile__ (" ldmia %8!, { %0,%1,%2,%3,%4,%5,%6,%7 }\n" : "=r" (dummy0), "=r" (dummy1), "=r" (dummy2), "=r" (dummy3), "=r" (dummy4), "=r" (dummy5), "=r" (dummy6), "=r" (dummy7), "+r" (p) : ); __asm__ __volatile__ (" ldmia %8!, { %0,%1,%2,%3,%4,%5,%6,%7 }\n" : "=r" (dummy0), "=r" (dummy1), "=r" (dummy2), "=r" (dummy3), "=r" (dummy4), "=r" (dummy5), "=r" (dummy6), "=r" (dummy7), "+r" (p) : ); } dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = arm_cpu_cycles(); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ * Extreme Read * * -16x 4-byte-Werte werden jeweils von 4 unterschiedlichen Adressen gelesen * -> 16*4 Lesezugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ //static unsigned long do_measure__read_extreme(int memsize_byte, int irqsave, int loops) { static unsigned long do_measure__read_extreme(char *mem, int irqsave, int loops) { int i; int x; unsigned int *local_mem[4]; unsigned long flags; unsigned long kb = 0; arm_performance_counter_init(); for (x = 0; x < loops; x++){ u32 time_in_double_cpu_clocks = 0; u32 tick_value; u32 tick_value_end; if(irqsave) { local_irq_save(flags); } dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { unsigned int p_end; for (i = 0; i < 4 ; i++) { local_mem[i] = (unsigned int*)(mem + (i * (TOTAL_BLOCKS * 1024 / 4))); } p_end = (unsigned int)local_mem[1]; tick_value = arm_cpu_cycles(); for(; (unsigned int)local_mem[0] < p_end ; local_mem[0] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[1] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[2] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[3] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int)) { register int dummy0 asm("r0"); register int dummy1 asm("r1"); register int dummy2 asm("r2"); register int dummy3 asm("r3"); register unsigned int p0 = (unsigned int)local_mem[0]; register unsigned int p1 = (unsigned int)local_mem[1]; register unsigned int p2 = (unsigned int)local_mem[2]; register unsigned int p3 = (unsigned int)local_mem[3]; __asm__ __volatile__ (" ldr %0, [%1, #0]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #0]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #0]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #0]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #4]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #4]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #4]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #4]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #8]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #8]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #8]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #8]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #12]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #12]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #12]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #12]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #16]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #16]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #16]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #16]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #20]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #20]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #20]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #20]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #24]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #24]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #24]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #24]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #28]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #28]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #28]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #28]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #32]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #32]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #32]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #32]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #36]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #36]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #36]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #36]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #40]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #40]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #40]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #40]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #44]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #44]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #44]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #44]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #48]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #48]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #48]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #48]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #52]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #52]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #52]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #52]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #56]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #56]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #56]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #56]\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" ldr %0, [%1, #60]\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" ldr %0, [%1, #60]\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" ldr %0, [%1, #60]\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" ldr %0, [%1, #60]\n" : "=r" (dummy3) : "r" (p3)); } dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = arm_cpu_cycles(); time_in_double_cpu_clocks += ( tick_value_end - tick_value ); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("."); } printk("\n"); return kb ; } /*------------------------------------------------------------------------------------------*\ * Mixture Read/Write * * -1x 4-Byte Lesen + 1x 4-Byte Schreiben * -> 2 Zugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__read_mixture(char *mem, int irqsave, int loops) { int i; unsigned long flags; unsigned long kb = 0; arm_performance_counter_init(); for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = (unsigned int)mem + (TOTAL_BLOCKS * 1024); tick_value = arm_cpu_cycles(); for(; p < p_end ; p += sizeof(unsigned int) * 4) { register int dummy0 asm("r0") ; __asm__ __volatile__ (" ldr %0, [%1, #0] \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" str %0, [%1, #0] \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" ldr %0, [%1, #4] \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" str %0, [%1, #4] \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" ldr %0, [%1, #8] \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" str %0, [%1, #8] \n" : : "r" (dummy0), "r" (p) ); __asm__ __volatile__ (" ldr %0, [%1, #12] \n" : "=r" (dummy0) : "r" (p)); __asm__ __volatile__ (" str %0, [%1, #12] \n" : : "r" (dummy0), "r" (p) ); } dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = arm_cpu_cycles(); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static unsigned long do_measure__write(char *mem, int irqsave, int loops) { int i; unsigned long flags; unsigned long kb = 0; arm_performance_counter_init(); for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); tick_value = arm_cpu_cycles(); while(p < p_end) { register int dummy0 = 23; __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); __asm__ __volatile__ (" stmia %0!, { %1 }\n" : "+r" (p) : "r" (dummy0)); } dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = arm_cpu_cycles(); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ * Simple Write * * -1x 4-Byte Schreiben * -> 1 Zugriff pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__writeburst(char *mem, int irqsave, int loops) { int i; unsigned long flags; unsigned long kb = 0; arm_performance_counter_init(); for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); tick_value = arm_cpu_cycles(); /*--------------------------------------------------------------------------------*\ * so wieht Schleife in Assembler aus (top!): 540: e8a35273 stmia r3!, {r0, r1, r4, r5, r6, r9, ip, lr} 544: e8a35273 stmia r3!, {r0, r1, r4, r5, r6, r9, ip, lr} 548: e2422001 sub r2, r2, #1 54c: e6ff2072 uxth r2, r2 550: e3520000 cmp r2, #0 554: 1afffff9 bne 540 \*--------------------------------------------------------------------------------*/ while(p < p_end) { register int dummy0 = 23; register int dummy1 = 24; register int dummy2 = 25; register int dummy3 = 26; register int dummy4 = 27; register int dummy5 = 28; register int dummy6 = 29; register int dummy7 = 30; __asm__ __volatile__ (" stmia %0!, { %1,%2,%3,%4,%5,%6,%7,%8 }\n" : "+r" (p) : "r" (dummy0), "r" (dummy1), "r" (dummy2), "r" (dummy3), "r" (dummy4), "r" (dummy5), "r" (dummy6), "r" (dummy7) ); __asm__ __volatile__ (" stmia %0!, { %1,%2,%3,%4,%5,%6,%7,%8 }\n" : "+r" (p) : "r" (dummy0), "r" (dummy1), "r" (dummy2), "r" (dummy3), "r" (dummy4), "r" (dummy5), "r" (dummy6), "r" (dummy7) ); } dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); tick_value_end = arm_cpu_cycles(); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_head(char *buf, off_t off, int *len, int loops, int wortbreite) { *len += sprintf(buf + off + *len, "\n"); *len += sprintf(buf + off + *len, "\n"); *len += sprintf(buf + off + *len, "AVM-RAM-Benchmark\n"); *len += sprintf(buf + off + *len, "=============================================\n"); *len += sprintf(buf + off + *len, "IRQs: off (alle Tests mit deaktivierten IRQs)\n"); *len += sprintf(buf + off + *len, "CPU-Clock: %u\n", CPU_CLOCK); *len += sprintf(buf + off + *len, "RAM-Clock: %u (eff. Datentaktrate)\n", BUS_CLOCK); *len += sprintf(buf + off + *len, "BUS-Breite (Word=): %d Bit\n", wortbreite); *len += sprintf(buf + off + *len, "Measure-Time: %d * %d.%ds\n\n", loops, ZEIT_S, ZEIT_MS); *len += sprintf(buf + off + *len, " -- Results --\n"); *len += sprintf(buf + off + *len, "=============================================================================\n"); *len += sprintf(buf + off + *len, " type | total read | loops | DDR-Ticks | %2dBit |\n", wortbreite); *len += sprintf(buf + off + *len, " | in kb | | /%2dBit | Worte/s | kB/s\n", wortbreite); *len += sprintf(buf + off + *len, "=============================================================================\n"); udelay(100); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_read_pipe(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; kb = do_measure__read_pipe(kmem, 1, loops); { *len += sprintf(buf + off + *len, "read | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Burstartiges Lesen aus dem RAM unter Nutzung von load multiple.\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_read_extreme(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; //kb = do_measure__read_extreme(64*1024*1024, 1, loops); kb = do_measure__read_extreme(kmem, 1, loops); { *len += sprintf(buf + off + *len, "read | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Die gelesenen Werte stehen im Speicher nicht hintereinander.\n"); *len += sprintf(buf + off + *len, "D.h. die CPU kann den Cache nicht nutzen.\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_readwrite(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; kb = do_measure__read_mixture(kmem, 1, loops); { *len += sprintf(buf + off + *len, "read/write | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Immer schoen im Wechsel 1x Lesen und 1x Schreiben.\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_write(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; kb = do_measure__write(kmem, 1, loops); { *len += sprintf(buf + off + *len, "write | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Einfaches Schreiben (Cache-Nutzung).\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_writeburst(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; kb = do_measure__writeburst(kmem, 1, loops); { *len += sprintf(buf + off + *len, "write | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Burst-Schreiben unter Nutzung von store multiple.\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static int do_complete_membench(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_read_pipe(buf, off, &len, loops, wortbreite, kmem); print_read_extreme(buf, off, &len, loops, wortbreite, kmem); print_readwrite(buf, off, &len, loops, wortbreite, kmem); print_write(buf, off, &len, loops, wortbreite, kmem); print_writeburst(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_help(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "AVM-RAM-Benchmark (HELP)\n"); len += sprintf(buf + off + len, "=============================================\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/complete -> Durchfuehrung aller Benchmarks\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/help -> Anzeige dieser Hilfe\n"); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_read_extreme -> Read Bench\n"); len += sprintf(buf + off + len, " Lese Bench fuer nicht-lineares Lesen.\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_read_pipe -> Read Bench (unter Nutzung von Load-Multiple)\n"); len += sprintf(buf + off + len, " Pipeline orientierter Lese benchmark\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_read_write -> Read/Schreib Bench\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_write -> Schreib Bench\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_burstwrite -> Schreib Bench (unter Nutzung von Store-Multiple)\n"); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_read_extreme(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_read_extreme(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_read_pipe(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_read_pipe(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_read_write(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_readwrite(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_write(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_write(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_writeburst(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_writeburst(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int performance_index(char *buf, char **start, off_t off, int count, int *eof, void *data) { #define KB_VALUE_PRO_SEC(x) ((x / loops) * 1000/(ZEIT_S * 1000 + ZEIT_MS)) int len = 0; int loops = 1; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { unsigned long kb_r_burst; unsigned long kb_w_burst; unsigned long kb_w_burst_enh; unsigned long kb_rw; unsigned long kb_r; unsigned int irqsave = 1; kb_r_burst = do_measure__read_pipe(kmem, irqsave, loops); kb_w_burst = do_measure__write(kmem, irqsave, loops); kb_w_burst_enh = do_measure__writeburst(kmem, irqsave, loops); kb_rw = do_measure__read_mixture(kmem, irqsave, loops); kb_r = do_measure__read_extreme(kmem, irqsave, loops); len += sprintf(buf + off + len, "Performance-Index: %lu\n", KB_VALUE_PRO_SEC(kb_r_burst)/1000*10 + KB_VALUE_PRO_SEC(kb_w_burst)/1000*8 + KB_VALUE_PRO_SEC(kb_w_burst_enh)/1000*2 + KB_VALUE_PRO_SEC(kb_rw)/1000*1 + KB_VALUE_PRO_SEC(kb_r)/1000*1 ); len += sprintf(buf + off + len, "CPU-Clock: %u MHz\n", CPU_CLOCK/(1000*1000)); len += sprintf(buf + off + len, "RAM-Clock: %u MHz\n", BUS_CLOCK/(1000*1000)); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ void early_membench(void) { static char buffer[1024]; int eof; int len; printk( KERN_ERR "running membench\n"); len = do_complete_membench(buffer, NULL, 0, 0, &eof, NULL); BUG_ON(len >= 1024); buffer[len] = '\0'; printk( KERN_ERR "%s", buffer); } #define PROC_BENCHDIR "avm/benchmark" static struct proc_dir_entry *benchprocdir; /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ int __init avm_membench_init(void) { benchprocdir = proc_mkdir(PROC_BENCHDIR, NULL); if(benchprocdir == NULL) { return -ENOMEM; } create_proc_read_entry("complete", 0444, benchprocdir, do_complete_membench, NULL); create_proc_read_entry("help", 0444, benchprocdir, do_help, NULL); create_proc_read_entry("do_read_extreme", 0444, benchprocdir, do_read_extreme, NULL); create_proc_read_entry("do_read_pipe", 0444, benchprocdir, do_read_pipe, NULL); create_proc_read_entry("do_read_write", 0444, benchprocdir, do_read_write, NULL); create_proc_read_entry("do_write", 0444, benchprocdir, do_write, NULL); create_proc_read_entry("do_writeburst", 0444, benchprocdir, do_writeburst, NULL); create_proc_read_entry("performance_index",0444, benchprocdir, performance_index, NULL); return 0; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ void avm_membench_exit(void) { if(benchprocdir) { remove_proc_entry("complete", benchprocdir); remove_proc_entry("help", benchprocdir); remove_proc_entry("do_read_extreme", benchprocdir); remove_proc_entry("do_read_pipe", benchprocdir); remove_proc_entry("do_read_write", benchprocdir); remove_proc_entry("do_writeburst", benchprocdir); remove_proc_entry("performance_index", benchprocdir); remove_proc_entry("benchmark", benchprocdir); benchprocdir = NULL; } } module_init(avm_membench_init); module_exit(avm_membench_exit)