/****************************************************************************** ** ** FILE NAME : avm_membench.c ** AUTHOR (MIPS): Christoph Buettner & Heiko Blobner ** ADJUSTED for X86: Mario Bahr *******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ #define BLOCK_ELEMENT_BYTE 64 #define TOTAL_BLOCKS (BLOCK_ELEMENT_BYTE * 8) /*--- in KiB ---*/ unsigned long cpu_takt; #define CPU_CLOCK cpu_takt #define BUS_CLOCK (100UL * 1000 * 1000) #define WORTBREITE 32 #define CYCLE_TO_SEC(a) (CPU_CLOCK * (a)) /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static void x86_counter_init(void) { cpu_takt = cpufreq_quick_get(raw_smp_processor_id()); if (!cpu_takt) { cpu_takt= cpu_khz; } cpu_takt *= 1000UL; /*--- printk(KERN_ERR"cpu_takt=%lu\n", cpu_takt); ---*/ } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static inline unsigned long cpu_cycles(void) { register unsigned long cycle; rdtscl(cycle); return cycle; } #define MESS_LAENGE CYCLE_TO_SEC(1) #define ZEIT_S (MESS_LAENGE / (CYCLE_TO_SEC(1))) #define ZEIT_MS ((MESS_LAENGE % (CYCLE_TO_SEC(1))) / ((CYCLE_TO_SEC(1)) / 1000)) #define KB_PRO_SEC ((kb / loops) * 1000/(ZEIT_S * 1000 + ZEIT_MS)) #define WORTE_PRO_SEC(wortbreite) (KB_PRO_SEC * (1024 / (wortbreite / 8))) #define WORTE_PRO_CLOCK_1(wortbreite) (BUS_CLOCK / WORTE_PRO_SEC(wortbreite)) #define WORTE_PRO_CLOCK_10(wortbreite) ((BUS_CLOCK / (WORTE_PRO_SEC(wortbreite) / 1000)) % 1000) /*------------------------------------------------------------------------------------------*\ * Pipeline-friendly Read * * -16x 4-byte-Werte pro Schleifendurchlauf * -> 16 Lesezugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__read_pipe(char *mem, int irqsave, int loops) { int i; unsigned long flags; unsigned long kb = 0; x86_counter_init(); for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } /*--- dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ clflush_cache_range(mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); tick_value = cpu_cycles(); for( ; p < p_end; p += BLOCK_ELEMENT_BYTE ) { /*--- 64 Byte pro Durchlauf ---*/ register int dummy asm("ax") ; __asm__ __volatile__ (" mov %0, 0(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 4(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 8(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 12(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 16(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 20(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 24(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 28(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 32(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 36(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 40(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 44(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 48(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 52(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 56(%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov %0, 60(%1)\n" : "=r" (dummy) : "r" (p)); } clflush_cache_range(mem, TOTAL_BLOCKS * 1024); /*--- dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ tick_value_end = cpu_cycles(); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ * Extreme Read * * -16x 4-byte-Werte werden jeweils von 4 unterschiedlichen Adressen gelesen * -> 16*4 Lesezugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ //static unsigned long do_measure__read_extreme(int memsize_byte, int irqsave, int loops) { static unsigned long do_measure__read_extreme(char *mem, int irqsave, int loops) { int i; int x; unsigned int *local_mem[4]; unsigned long flags; unsigned long kb = 0; x86_counter_init(); for (x = 0; x < loops; x++){ u32 time_in_double_cpu_clocks = 0; u32 tick_value; u32 tick_value_end; if(irqsave) { local_irq_save(flags); } /*--- dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ clflush_cache_range(mem, TOTAL_BLOCKS * 1024); do { unsigned int p_end; for (i = 0; i < 4 ; i++) { local_mem[i] = (unsigned int*)(mem + (i * (TOTAL_BLOCKS * 1024 / 4))); } p_end = (unsigned int)local_mem[1]; tick_value = cpu_cycles(); for(; (unsigned int)local_mem[0] < p_end ; local_mem[0] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[1] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[2] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int), local_mem[3] += BLOCK_ELEMENT_BYTE / sizeof(unsigned int)) { #if 0 register int dummy0 asm("ax"); register int dummy1 asm("bx"); register int dummy2 asm("cx"); register int dummy3 asm("dx"); register unsigned int p0 = (unsigned int)local_mem[0]; register unsigned int p1 = (unsigned int)local_mem[1]; register unsigned int p2 = (unsigned int)local_mem[2]; register unsigned int p3 = (unsigned int)local_mem[3]; __asm__ __volatile__ (" mov %0, 0(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 0(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 0(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 0(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 4(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 4(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 4(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 4(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 8(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 8(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 8(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 8(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 12(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 12(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 12(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 12(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 16(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 16(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 16(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 16(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 20(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 20(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 20(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 20(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 24(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 24(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 24(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 24(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 28(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 28(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 28(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 28(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 32(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 32(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 32(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 32(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 36(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 36(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 36(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 36(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 40(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 40(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 40(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 40(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 44(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 44(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 44(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 44(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 48(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 48(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 48(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 48(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 52(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 52(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 52(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 52(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 56(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 56(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 56(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 56(%1)\n" : "=r" (dummy3) : "r" (p3)); __asm__ __volatile__ (" mov %0, 60(%1)\n" : "=r" (dummy0) : "r" (p0)); __asm__ __volatile__ (" mov %0, 60(%1)\n" : "=r" (dummy1) : "r" (p1)); __asm__ __volatile__ (" mov %0, 60(%1)\n" : "=r" (dummy2) : "r" (p2)); __asm__ __volatile__ (" mov %0, 60(%1)\n" : "=r" (dummy3) : "r" (p3)); #else /*--------------------------------------------------------------------------------*\ a08: 8b 10 mov (%eax),%edx a0a: 8b 90 00 40 00 00 mov 0x4000(%eax),%edx a10: 8b 90 00 80 00 00 mov 0x8000(%eax),%edx a16: 8b 90 00 c0 00 00 mov 0xc000(%eax),%edx a1c: 8b 50 04 mov 0x4(%eax),%edx a1f: 8b 90 04 40 00 00 mov 0x4004(%eax),%edx a25: 8b 90 04 80 00 00 mov 0x8004(%eax),%edx a2b: 8b 90 04 c0 00 00 mov 0xc004(%eax),%edx a31: 8b 50 08 mov 0x8(%eax),%edx a34: 8b 90 08 40 00 00 mov 0x4008(%eax),%edx a3a: 8b 90 08 80 00 00 mov 0x8008(%eax),%edx a40: 8b 90 08 c0 00 00 mov 0xc008(%eax),%edx a46: 8b 50 0c mov 0xc(%eax),%edx a49: 8b 90 0c 40 00 00 mov 0x400c(%eax),%edx a4f: 8b 90 0c 80 00 00 mov 0x800c(%eax),%edx a55: 8b 90 0c c0 00 00 mov 0xc00c(%eax),%edx a5b: 8b 50 10 mov 0x10(%eax),%edx a5e: 8b 90 10 40 00 00 mov 0x4010(%eax),%edx a64: 8b 90 10 80 00 00 mov 0x8010(%eax),%edx a6a: 8b 90 10 c0 00 00 mov 0xc010(%eax),%edx a70: 8b 50 14 mov 0x14(%eax),%edx a73: 8b 90 14 40 00 00 mov 0x4014(%eax),%edx a79: 8b 90 14 80 00 00 mov 0x8014(%eax),%edx a7f: 8b 90 14 c0 00 00 mov 0xc014(%eax),%edx a85: 8b 50 18 mov 0x18(%eax),%edx a88: 8b 90 18 40 00 00 mov 0x4018(%eax),%edx a8e: 8b 90 18 80 00 00 mov 0x8018(%eax),%edx a94: 8b 90 18 c0 00 00 mov 0xc018(%eax),%edx a9a: 8b 50 1c mov 0x1c(%eax),%edx a9d: 8b 90 1c 40 00 00 mov 0x401c(%eax),%edx aa3: 8b 90 1c 80 00 00 mov 0x801c(%eax),%edx aa9: 8b 90 1c c0 00 00 mov 0xc01c(%eax),%edx aaf: 8b 50 20 mov 0x20(%eax),%edx ab2: 8b 90 20 40 00 00 mov 0x4020(%eax),%edx ab8: 8b 90 20 80 00 00 mov 0x8020(%eax),%edx abe: 8b 90 20 c0 00 00 mov 0xc020(%eax),%edx ac4: 8b 50 24 mov 0x24(%eax),%edx ac7: 8b 90 24 40 00 00 mov 0x4024(%eax),%edx acd: 8b 90 24 80 00 00 mov 0x8024(%eax),%edx ad3: 8b 90 24 c0 00 00 mov 0xc024(%eax),%edx ad9: 8b 50 28 mov 0x28(%eax),%edx adc: 8b 90 28 40 00 00 mov 0x4028(%eax),%edx ae2: 8b 90 28 80 00 00 mov 0x8028(%eax),%edx ae8: 8b 90 28 c0 00 00 mov 0xc028(%eax),%edx aee: 8b 50 2c mov 0x2c(%eax),%edx af1: 8b 90 2c 40 00 00 mov 0x402c(%eax),%edx af7: 8b 90 2c 80 00 00 mov 0x802c(%eax),%edx afd: 8b 90 2c c0 00 00 mov 0xc02c(%eax),%edx b03: 8b 50 30 mov 0x30(%eax),%edx b06: 8b 90 30 40 00 00 mov 0x4030(%eax),%edx b0c: 8b 90 30 80 00 00 mov 0x8030(%eax),%edx b12: 8b 90 30 c0 00 00 mov 0xc030(%eax),%edx b18: 8b 50 34 mov 0x34(%eax),%edx b1b: 8b 90 34 40 00 00 mov 0x4034(%eax),%edx b21: 8b 90 34 80 00 00 mov 0x8034(%eax),%edx b27: 8b 90 34 c0 00 00 mov 0xc034(%eax),%edx b2d: 8b 50 38 mov 0x38(%eax),%edx b30: 8b 90 38 40 00 00 mov 0x4038(%eax),%edx b36: 8b 90 38 80 00 00 mov 0x8038(%eax),%edx b3c: 8b 90 38 c0 00 00 mov 0xc038(%eax),%edx b42: 8b 50 3c mov 0x3c(%eax),%edx b45: 8b 90 3c 40 00 00 mov 0x403c(%eax),%edx b4b: 8b 90 3c 80 00 00 mov 0x803c(%eax),%edx b51: 8b 90 3c c0 00 00 mov 0xc03c(%eax),%edx b57: 83 c0 40 add $0x40,%eax b5a: 39 d8 cmp %ebx,%eax b5c: 0f 85 a6 fe ff ff jne a08 \*--------------------------------------------------------------------------------*/ register int dummy0 __attribute__((unused)); register int dummy1 __attribute__((unused)); register int dummy2 __attribute__((unused)); register int dummy3 __attribute__((unused)); register int p0 = (int)local_mem[0]; register int p1 = (int)local_mem[1]; register int p2 = (int)local_mem[2]; register int p3 = (int)local_mem[3]; dummy0 = ((volatile unsigned int *)p0)[ 0]; dummy1 = ((volatile unsigned int *)p1)[ 0]; dummy2 = ((volatile unsigned int *)p2)[ 0]; dummy3 = ((volatile unsigned int *)p3)[ 0]; dummy0 = ((volatile unsigned int *)p0)[ 1]; dummy1 = ((volatile unsigned int *)p1)[ 1]; dummy2 = ((volatile unsigned int *)p2)[ 1]; dummy3 = ((volatile unsigned int *)p3)[ 1]; dummy0 = ((volatile unsigned int *)p0)[ 2]; dummy1 = ((volatile unsigned int *)p1)[ 2]; dummy2 = ((volatile unsigned int *)p2)[ 2]; dummy3 = ((volatile unsigned int *)p3)[ 2]; dummy0 = ((volatile unsigned int *)p0)[ 3]; dummy1 = ((volatile unsigned int *)p1)[ 3]; dummy2 = ((volatile unsigned int *)p2)[ 3]; dummy3 = ((volatile unsigned int *)p3)[ 3]; dummy0 = ((volatile unsigned int *)p0)[ 4]; dummy1 = ((volatile unsigned int *)p1)[ 4]; dummy2 = ((volatile unsigned int *)p2)[ 4]; dummy3 = ((volatile unsigned int *)p3)[ 4]; dummy0 = ((volatile unsigned int *)p0)[ 5]; dummy1 = ((volatile unsigned int *)p1)[ 5]; dummy2 = ((volatile unsigned int *)p2)[ 5]; dummy3 = ((volatile unsigned int *)p3)[ 5]; dummy0 = ((volatile unsigned int *)p0)[ 6]; dummy1 = ((volatile unsigned int *)p1)[ 6]; dummy2 = ((volatile unsigned int *)p2)[ 6]; dummy3 = ((volatile unsigned int *)p3)[ 6]; dummy0 = ((volatile unsigned int *)p0)[ 7]; dummy1 = ((volatile unsigned int *)p1)[ 7]; dummy2 = ((volatile unsigned int *)p2)[ 7]; dummy3 = ((volatile unsigned int *)p3)[ 7]; dummy0 = ((volatile unsigned int *)p0)[ 8]; dummy1 = ((volatile unsigned int *)p1)[ 8]; dummy2 = ((volatile unsigned int *)p2)[ 8]; dummy3 = ((volatile unsigned int *)p3)[ 8]; dummy0 = ((volatile unsigned int *)p0)[ 9]; dummy1 = ((volatile unsigned int *)p1)[ 9]; dummy2 = ((volatile unsigned int *)p2)[ 9]; dummy3 = ((volatile unsigned int *)p3)[ 9]; dummy0 = ((volatile unsigned int *)p0)[10]; dummy1 = ((volatile unsigned int *)p1)[10]; dummy2 = ((volatile unsigned int *)p2)[10]; dummy3 = ((volatile unsigned int *)p3)[10]; dummy0 = ((volatile unsigned int *)p0)[11]; dummy1 = ((volatile unsigned int *)p1)[11]; dummy2 = ((volatile unsigned int *)p2)[11]; dummy3 = ((volatile unsigned int *)p3)[11]; dummy0 = ((volatile unsigned int *)p0)[12]; dummy1 = ((volatile unsigned int *)p1)[12]; dummy2 = ((volatile unsigned int *)p2)[12]; dummy3 = ((volatile unsigned int *)p3)[12]; dummy0 = ((volatile unsigned int *)p0)[13]; dummy1 = ((volatile unsigned int *)p1)[13]; dummy2 = ((volatile unsigned int *)p2)[13]; dummy3 = ((volatile unsigned int *)p3)[13]; dummy0 = ((volatile unsigned int *)p0)[14]; dummy1 = ((volatile unsigned int *)p1)[14]; dummy2 = ((volatile unsigned int *)p2)[14]; dummy3 = ((volatile unsigned int *)p3)[14]; dummy0 = ((volatile unsigned int *)p0)[15]; dummy1 = ((volatile unsigned int *)p1)[15]; dummy2 = ((volatile unsigned int *)p2)[15]; dummy3 = ((volatile unsigned int *)p3)[15]; #endif } clflush_cache_range(mem, TOTAL_BLOCKS * 1024); /*--- dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ tick_value_end = cpu_cycles(); time_in_double_cpu_clocks += ( tick_value_end - tick_value ); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("."); } printk("\n"); return kb ; } /*------------------------------------------------------------------------------------------*\ * Mixture Read/Write * * -1x 4-Byte Lesen + 1x 4-Byte Schreiben * -> 2 Zugriffe pro Schleifendurchlauf \*------------------------------------------------------------------------------------------*/ static unsigned long do_measure__read_mixture(char *mem, int irqsave, int loops) { int i; unsigned long flags; unsigned long kb = 0; x86_counter_init(); for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } /*--- dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ clflush_cache_range(mem, TOTAL_BLOCKS * 1024); do { u32 tick_value; u32 tick_value_end; volatile unsigned int p = (unsigned int )mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); tick_value = cpu_cycles(); p_end = (unsigned int)mem + (TOTAL_BLOCKS * 1024); for(; p < p_end ; p += sizeof(unsigned int)) { register int dummy asm("ax") ; __asm__ __volatile__ (" mov %0, (%1)\n" : "=r" (dummy) : "r" (p)); __asm__ __volatile__ (" mov (%0), %1 \n" : : "r" (p), "r" (dummy)); } clflush_cache_range(mem, TOTAL_BLOCKS * 1024); /*--- dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ tick_value_end = cpu_cycles(); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static unsigned long do_measure__write(char *mem, int irqsave, int loops) { int i; unsigned long flags; unsigned long kb = 0; x86_counter_init(); for (i = 0; i < loops; i++){ u32 time_in_double_cpu_clocks = 0; if(irqsave) { local_irq_save(flags); } /*--- dma_cache_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ clflush_cache_range(mem, TOTAL_BLOCKS * 1024); do { register int dummy = 23; u32 tick_value; u32 tick_value_end; register unsigned int p = (unsigned int)mem; register unsigned int p_end = p + (TOTAL_BLOCKS * 1024); tick_value = cpu_cycles(); p_end = p + (TOTAL_BLOCKS * 1024); for(; p < p_end; p += BLOCK_ELEMENT_BYTE) { __asm__ __volatile__ (" mov 0(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 4(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 8(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 12(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 16(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 20(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 24(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 28(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 32(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 36(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 40(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 44(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 48(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 52(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 56(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); __asm__ __volatile__ (" mov 60(%0), %1 \n" : : "r" ((unsigned int)p), "r" (dummy)); } clflush_cache_range(mem, TOTAL_BLOCKS * 1024); /*--- dma_cache_wback_inv((unsigned long)mem, TOTAL_BLOCKS * 1024); ---*/ tick_value_end = cpu_cycles(); time_in_double_cpu_clocks += (tick_value_end - tick_value); kb += TOTAL_BLOCKS; } while(time_in_double_cpu_clocks < MESS_LAENGE); if(irqsave) { local_irq_restore(flags); } printk("*"); } printk("\n"); return kb; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_head(char *buf, off_t off, int *len, int loops, int wortbreite) { *len += sprintf(buf + off + *len, "\n"); *len += sprintf(buf + off + *len, "\n"); *len += sprintf(buf + off + *len, "AVM-RAM-Benchmark\n"); *len += sprintf(buf + off + *len, "=============================================\n"); *len += sprintf(buf + off + *len, "IRQs: off (alle Tests mit deaktivierten IRQs)\n"); *len += sprintf(buf + off + *len, "CPU-Clock: %lu\n", CPU_CLOCK); *len += sprintf(buf + off + *len, "RAM-Clock: %lu (eff. Datentaktrate)\n", BUS_CLOCK); *len += sprintf(buf + off + *len, "BUS-Breite (Word=): %d Bit\n", wortbreite); *len += sprintf(buf + off + *len, "Measure-Time: %d * %lu.%lus\n\n", loops, ZEIT_S, ZEIT_MS); *len += sprintf(buf + off + *len, " -- Results --\n"); *len += sprintf(buf + off + *len, "=============================================================================\n"); *len += sprintf(buf + off + *len, " type | total read | loops | DDR-Ticks | %2dBit |\n", wortbreite); *len += sprintf(buf + off + *len, " | in kb | | /%2dBit | Worte/s | kB/s\n", wortbreite); *len += sprintf(buf + off + *len, "=============================================================================\n"); udelay(100); } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_read_pipe(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; kb = do_measure__read_pipe(kmem, 1, loops); { *len += sprintf(buf + off + *len, "read | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Lineares Lesen aus dem RAM\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_read_extreme(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; //kb = do_measure__read_extreme(64*1024*1024, 1, loops); kb = do_measure__read_extreme(kmem, 1, loops); { *len += sprintf(buf + off + *len, "read | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Die gelesenen Werte stehen im Speicher nicht hintereinander.\n"); *len += sprintf(buf + off + *len, "D.h. die CPU kann den Cache nicht nutzen.\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_readwrite(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; kb = do_measure__read_mixture(kmem, 1, loops); { *len += sprintf(buf + off + *len, "read/write | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Immer schoen im Wechsel 1x Lesen und 1x Schreiben.\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static void print_write(char *buf, off_t off, int *len, int loops, int wortbreite, char *kmem) { unsigned long kb; kb = do_measure__write(kmem, 1, loops); { *len += sprintf(buf + off + *len, "write | %7lu | %1d | %5lu.%03lu | %9lu | %6lu\n", kb, loops, WORTE_PRO_CLOCK_1(wortbreite), WORTE_PRO_CLOCK_10(wortbreite), WORTE_PRO_SEC(wortbreite), KB_PRO_SEC); *len += sprintf(buf + off + *len, " | | | | |\n"); *len += sprintf(buf + off + *len, "Lineares Schreiben in den RAM.\n"); *len += sprintf(buf + off + *len, "-----------------------------------------------------------------------------\n"); } } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ static int do_complete_membench(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_read_pipe(buf, off, &len, loops, wortbreite, kmem); print_read_extreme(buf, off, &len, loops, wortbreite, kmem); print_readwrite(buf, off, &len, loops, wortbreite, kmem); print_write(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_help(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "AVM-RAM-Benchmark (HELP)\n"); len += sprintf(buf + off + len, "=============================================\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/complete -> Durchfuehrung aller Benchmarks\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/help -> Anzeige dieser Hilfe\n"); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_read_extreme -> Read Bench\n"); len += sprintf(buf + off + len, " Lese Bench fuer nicht-lineares Lesen.\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_read_pipe -> Read Bench (unter Nutzung von Load-Multiple)\n"); len += sprintf(buf + off + len, " Pipeline orientierter Lese benchmark\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_read_write -> Read/Schreib Bench\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_write -> Schreib Bench\n"); len += sprintf(buf + off + len, "cat /proc/avm/benchmark/do_burstwrite -> Schreib Bench (unter Nutzung von Store-Multiple)\n"); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_read_extreme(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_read_extreme(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_read_pipe(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_read_pipe(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_read_write(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_readwrite(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int do_write(char *buf, char **start, off_t off, int count, int *eof, void *data) { int len = 0; int loops = 1; int wortbreite = WORTBREITE; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { print_head(buf, off, &len, loops, wortbreite); print_write(buf, off, &len, loops, wortbreite, kmem); len += sprintf(buf + off + len, "\n"); len += sprintf(buf + off + len, "\n"); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ static int performance_index(char *buf, char **start, off_t off, int count, int *eof, void *data) { #define KB_VALUE_PRO_SEC(x) ((x / loops) * 1000/(ZEIT_S * 1000 + ZEIT_MS)) int len = 0; int loops = 1; char *kmem = kmalloc(TOTAL_BLOCKS * 1024, GFP_ATOMIC); if(kmem) { unsigned long kb_r_burst; unsigned long kb_w_burst; unsigned long kb_rw; unsigned long kb_r; unsigned int irqsave = 1; kb_r_burst = do_measure__read_pipe(kmem, irqsave, loops); kb_w_burst = do_measure__write(kmem, irqsave, loops); kb_rw = do_measure__read_mixture(kmem, irqsave, loops); kb_r = do_measure__read_extreme(kmem, irqsave, loops); len += sprintf(buf + off + len, "Performance-Index: %lu\n", KB_VALUE_PRO_SEC(kb_r_burst)/1000*10 + KB_VALUE_PRO_SEC(kb_w_burst)/1000*10 + KB_VALUE_PRO_SEC(kb_rw)/1000*1 + KB_VALUE_PRO_SEC(kb_r)/1000*1 ); len += sprintf(buf + off + len, "CPU-Clock: %lu MHz\n", CPU_CLOCK/(1000*1000)); len += sprintf(buf + off + len, "RAM-Clock: %lu MHz\n", BUS_CLOCK/(1000*1000)); kfree(kmem); } else { len += sprintf(buf + off + len, "No memory for test\n"); } *eof = 1; return len; } /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ void early_membench(void) { static char buffer[1024]; int eof; int len; printk( KERN_ERR "running membench\n"); len = do_complete_membench(buffer, NULL, 0, 0, &eof, NULL); BUG_ON(len >= 1024); buffer[len] = '\0'; printk( KERN_ERR "%s", buffer); } #define PROC_BENCHDIR "avm/benchmark" static struct proc_dir_entry *benchprocdir; /*------------------------------------------------------------------------------------------*\ \*------------------------------------------------------------------------------------------*/ int __init avm_membench_init(void) { x86_counter_init(); benchprocdir = proc_mkdir(PROC_BENCHDIR, NULL); if(benchprocdir == NULL) { return -ENOMEM; } create_proc_read_entry("complete", 0444, benchprocdir, do_complete_membench, NULL); create_proc_read_entry("help", 0444, benchprocdir, do_help, NULL); create_proc_read_entry("do_read_extreme", 0444, benchprocdir, do_read_extreme, NULL); create_proc_read_entry("do_read_pipe", 0444, benchprocdir, do_read_pipe, NULL); create_proc_read_entry("do_read_write", 0444, benchprocdir, do_read_write, NULL); create_proc_read_entry("do_write", 0444, benchprocdir, do_write, NULL); create_proc_read_entry("performance_index",0444, benchprocdir, performance_index, NULL); /*--- early_membench(); ---*/ return 0; } /*--------------------------------------------------------------------------------*\ \*--------------------------------------------------------------------------------*/ void avm_membench_exit(void) { if(benchprocdir) { remove_proc_entry("complete", benchprocdir); remove_proc_entry("help", benchprocdir); remove_proc_entry("do_read_extreme", benchprocdir); remove_proc_entry("do_read_pipe", benchprocdir); remove_proc_entry("do_read_write", benchprocdir); remove_proc_entry("performance_index", benchprocdir); remove_proc_entry("benchmark", benchprocdir); benchprocdir = NULL; } } module_init(avm_membench_init); module_exit(avm_membench_exit)