/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * r5432.c: NEC Vr5432 processor. We cannot use r4xx0.c because of * its unique way-selection method for indexed operations. * * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle (ralf@gnu.org) * Copyright (C) 2000 Jun Sun (jsun@mvista.com) * */ /* * [jsun] * In a sense, this is really silly. We cannot re-use r4xx0.c because * the lowest-level indexed cache operation does not take way-selection * into account. So all what I am doing here is to copy all the funcs * and macros (in r4kcache.h) relavent to R5432 to this file, and then * modify a few indexed cache operations. *sigh* */ #include #include #include #include #include #include #include #include #include #include #include /* CP0 hazard avoidance. */ #define BARRIER __asm__ __volatile__(".set noreorder\n\t" \ "nop; nop; nop; nop; nop; nop;\n\t" \ ".set reorder\n\t") #include #include #undef DEBUG_CACHE /* Primary cache parameters. */ static int icache_size, dcache_size; /* Size in bytes */ static int ic_lsize, dc_lsize; /* LineSize in bytes */ /* -------------------------------------------------------------------- */ /* #include */ extern inline void flush_icache_line_indexed(unsigned long addr) { __asm__ __volatile__( ".set noreorder\n\t" ".set mips3\n\t" "cache %1, (%0)\n\t" "cache %1, 1(%0)\n\t" ".set mips0\n\t" ".set reorder" : : "r" (addr), "i" (Index_Invalidate_I)); } extern inline void flush_dcache_line_indexed(unsigned long addr) { __asm__ __volatile__( ".set noreorder\n\t" ".set mips3\n\t" "cache %1, (%0)\n\t" "cache %1, 1(%0)\n\t" ".set mips0\n\t" ".set reorder" : : "r" (addr), "i" (Index_Writeback_Inv_D)); } extern inline void flush_icache_line(unsigned long addr) { __asm__ __volatile__( ".set noreorder\n\t" ".set mips3\n\t" "cache %1, (%0)\n\t" ".set mips0\n\t" ".set reorder" : : "r" (addr), "i" (Hit_Invalidate_I)); } extern inline void flush_dcache_line(unsigned long addr) { __asm__ __volatile__( ".set noreorder\n\t" ".set mips3\n\t" "cache %1, (%0)\n\t" ".set mips0\n\t" ".set reorder" : : "r" (addr), "i" (Hit_Writeback_Inv_D)); } extern inline void invalidate_dcache_line(unsigned long addr) { __asm__ __volatile__( ".set noreorder\n\t" ".set mips3\n\t" "cache %1, (%0)\n\t" ".set mips0\n\t" ".set reorder" : : "r" (addr), "i" (Hit_Invalidate_D)); } /* * The next two are for badland addresses like signal trampolines. */ extern inline void protected_flush_icache_line(unsigned long addr) { __asm__ __volatile__( ".set noreorder\n\t" ".set mips3\n" "1:\tcache %1,(%0)\n" "2:\t.set mips0\n\t" ".set reorder\n\t" ".section\t__ex_table,\"a\"\n\t" STR(PTR)"\t1b,2b\n\t" ".previous" : : "r" (addr), "i" (Hit_Invalidate_I)); } extern inline void protected_writeback_dcache_line(unsigned long addr) { __asm__ __volatile__( ".set noreorder\n\t" ".set mips3\n" "1:\tcache %1,(%0)\n" "2:\t.set mips0\n\t" ".set reorder\n\t" ".section\t__ex_table,\"a\"\n\t" STR(PTR)"\t1b,2b\n\t" ".previous" : : "r" (addr), "i" (Hit_Writeback_D)); } #define cache32_unroll32(base,op) \ __asm__ __volatile__(" \ .set noreorder; \ .set mips3; \ cache %1, 0x000(%0); cache %1, 0x020(%0); \ cache %1, 0x040(%0); cache %1, 0x060(%0); \ cache %1, 0x080(%0); cache %1, 0x0a0(%0); \ cache %1, 0x0c0(%0); cache %1, 0x0e0(%0); \ cache %1, 0x100(%0); cache %1, 0x120(%0); \ cache %1, 0x140(%0); cache %1, 0x160(%0); \ cache %1, 0x180(%0); cache %1, 0x1a0(%0); \ cache %1, 0x1c0(%0); cache %1, 0x1e0(%0); \ cache %1, 0x200(%0); cache %1, 0x220(%0); \ cache %1, 0x240(%0); cache %1, 0x260(%0); \ cache %1, 0x280(%0); cache %1, 0x2a0(%0); \ cache %1, 0x2c0(%0); cache %1, 0x2e0(%0); \ cache %1, 0x300(%0); cache %1, 0x320(%0); \ cache %1, 0x340(%0); cache %1, 0x360(%0); \ cache %1, 0x380(%0); cache %1, 0x3a0(%0); \ cache %1, 0x3c0(%0); cache %1, 0x3e0(%0); \ .set mips0; \ .set reorder" \ : \ : "r" (base), \ "i" (op)); extern inline void blast_dcache32(void) { unsigned long start = KSEG0; unsigned long end = (start + dcache_size/2); while(start < end) { cache32_unroll32(start,Index_Writeback_Inv_D); cache32_unroll32(start+1,Index_Writeback_Inv_D); start += 0x400; } } extern inline void blast_dcache32_page(unsigned long page) { unsigned long start = page; unsigned long end = (start + PAGE_SIZE); while(start < end) { cache32_unroll32(start,Hit_Writeback_Inv_D); start += 0x400; } } extern inline void blast_dcache32_page_indexed(unsigned long page) { unsigned long start = page; unsigned long end = (start + PAGE_SIZE); while(start < end) { cache32_unroll32(start,Index_Writeback_Inv_D); cache32_unroll32(start+1,Index_Writeback_Inv_D); start += 0x400; } } extern inline void blast_icache32(void) { unsigned long start = KSEG0; unsigned long end = (start + icache_size/2); while(start < end) { cache32_unroll32(start,Index_Invalidate_I); cache32_unroll32(start+1,Index_Invalidate_I); start += 0x400; } } extern inline void blast_icache32_page(unsigned long page) { unsigned long start = page; unsigned long end = (start + PAGE_SIZE); while(start < end) { cache32_unroll32(start,Hit_Invalidate_I); start += 0x400; } } extern inline void blast_icache32_page_indexed(unsigned long page) { unsigned long start = page; unsigned long end = (start + PAGE_SIZE); while(start < end) { cache32_unroll32(start,Index_Invalidate_I); cache32_unroll32(start+1,Index_Invalidate_I); start += 0x400; } } /* -------------------------------------------------------------------- */ static void r5432_clear_page_d32(void * page) { __asm__ __volatile__( ".set\tnoreorder\n\t" ".set\tnoat\n\t" ".set\tmips3\n\t" "daddiu\t$1,%0,%2\n" "1:\tcache\t%3,(%0)\n\t" "sd\t$0,(%0)\n\t" "sd\t$0,8(%0)\n\t" "sd\t$0,16(%0)\n\t" "sd\t$0,24(%0)\n\t" "daddiu\t%0,64\n\t" "cache\t%3,-32(%0)\n\t" "sd\t$0,-32(%0)\n\t" "sd\t$0,-24(%0)\n\t" "sd\t$0,-16(%0)\n\t" "bne\t$1,%0,1b\n\t" "sd\t$0,-8(%0)\n\t" ".set\tmips0\n\t" ".set\tat\n\t" ".set\treorder" :"=r" (page) :"0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_D) :"$1","memory"); } /* * This is still inefficient. We only can do better if we know the * virtual address where the copy will be accessed. */ static void r5432_copy_page_d32(void * to, void * from) { unsigned long dummy1, dummy2; unsigned long reg1, reg2, reg3, reg4; __asm__ __volatile__( ".set\tnoreorder\n\t" ".set\tnoat\n\t" ".set\tmips3\n\t" "daddiu\t$1,%0,%8\n" "1:\tcache\t%9,(%0)\n\t" "lw\t%2,(%1)\n\t" "lw\t%3,4(%1)\n\t" "lw\t%4,8(%1)\n\t" "lw\t%5,12(%1)\n\t" "sw\t%2,(%0)\n\t" "sw\t%3,4(%0)\n\t" "sw\t%4,8(%0)\n\t" "sw\t%5,12(%0)\n\t" "lw\t%2,16(%1)\n\t" "lw\t%3,20(%1)\n\t" "lw\t%4,24(%1)\n\t" "lw\t%5,28(%1)\n\t" "sw\t%2,16(%0)\n\t" "sw\t%3,20(%0)\n\t" "sw\t%4,24(%0)\n\t" "sw\t%5,28(%0)\n\t" "cache\t%9,32(%0)\n\t" "daddiu\t%0,64\n\t" "daddiu\t%1,64\n\t" "lw\t%2,-32(%1)\n\t" "lw\t%3,-28(%1)\n\t" "lw\t%4,-24(%1)\n\t" "lw\t%5,-20(%1)\n\t" "sw\t%2,-32(%0)\n\t" "sw\t%3,-28(%0)\n\t" "sw\t%4,-24(%0)\n\t" "sw\t%5,-20(%0)\n\t" "lw\t%2,-16(%1)\n\t" "lw\t%3,-12(%1)\n\t" "lw\t%4,-8(%1)\n\t" "lw\t%5,-4(%1)\n\t" "sw\t%2,-16(%0)\n\t" "sw\t%3,-12(%0)\n\t" "sw\t%4,-8(%0)\n\t" "bne\t$1,%0,1b\n\t" "sw\t%5,-4(%0)\n\t" ".set\tmips0\n\t" ".set\tat\n\t" ".set\treorder" :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2), "=&r" (reg3), "=&r" (reg4) :"0" (to), "1" (from), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_D)); } /* * If you think for one second that this stuff coming up is a lot * of bulky code eating too many kernel cache lines. Think _again_. * * Consider: * 1) Taken branches have a 3 cycle penalty on R4k * 2) The branch itself is a real dead cycle on even R4600/R5000. * 3) Only one of the following variants of each type is even used by * the kernel based upon the cache parameters we detect at boot time. * * QED. */ static inline void r5432_flush_cache_all_d32i32(void) { blast_dcache32(); blast_icache32(); } static void r5432_flush_cache_range_d32i32(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm->context != 0) { #ifdef DEBUG_CACHE printk("crange[%d,%08lx,%08lx]", (int)mm->context, start, end); #endif blast_dcache32(); blast_icache32(); } } /* * On architectures like the Sparc, we could get rid of lines in * the cache created only by a certain context, but on the MIPS * (and actually certain Sparc's) we cannot. */ static void r5432_flush_cache_mm_d32i32(struct mm_struct *mm) { if (mm->context != 0) { #ifdef DEBUG_CACHE printk("cmm[%d]", (int)mm->context); #endif r5432_flush_cache_all_d32i32(); } } static void r5432_flush_cache_page_d32i32(struct vm_area_struct *vma, unsigned long page) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgdp; pmd_t *pmdp; pte_t *ptep; /* * If ownes no valid ASID yet, cannot possibly have gotten * this page into the cache. */ if (mm->context == 0) return; #ifdef DEBUG_CACHE printk("cpage[%d,%08lx]", (int)mm->context, page); #endif page &= PAGE_MASK; pgdp = pgd_offset(mm, page); pmdp = pmd_offset(pgdp, page); ptep = pte_offset(pmdp, page); /* * If the page isn't marked valid, the page cannot possibly be * in the cache. */ if (!(pte_val(*ptep) & _PAGE_PRESENT)) return; /* * Doing flushes for another ASID than the current one is * too difficult since stupid R4k caches do a TLB translation * for every cache flush operation. So we do indexed flushes * in that case, which doesn't overly flush the cache too much. */ if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) { blast_dcache32_page(page); } else { /* * Do indexed flush, too much work to get the (possible) * tlb refills to work correctly. */ page = (KSEG0 + (page & (dcache_size - 1))); blast_dcache32_page_indexed(page); } } /* If the addresses passed to these routines are valid, they are * either: * * 1) In KSEG0, so we can do a direct flush of the page. * 2) In KSEG2, and since every process can translate those * addresses all the time in kernel mode we can do a direct * flush. * 3) In KSEG1, no flush necessary. */ static void r5432_flush_page_to_ram_d32(struct page *page) { blast_dcache32_page((unsigned long)page_address(page)); } static void r5432_flush_icache_range(unsigned long start, unsigned long end) { r5432_flush_cache_all_d32i32(); } /* * Ok, this seriously sucks. We use them to flush a user page but don't * know the virtual address, so we have to blast away the whole icache * which is significantly more expensive than the real thing. */ static void r5432_flush_icache_page_i32(struct vm_area_struct *vma, struct page *page) { if (!(vma->vm_flags & VM_EXEC)) return; r5432_flush_cache_all_d32i32(); } /* * Writeback and invalidate the primary cache dcache before DMA. */ static void r5432_dma_cache_wback_inv_pc(unsigned long addr, unsigned long size) { unsigned long end, a; if (size >= dcache_size) { flush_cache_all(); } else { a = addr & ~(dc_lsize - 1); end = (addr + size) & ~(dc_lsize - 1); while (1) { flush_dcache_line(a); /* Hit_Writeback_Inv_D */ if (a == end) break; a += dc_lsize; } } bc_wback_inv(addr, size); } static void r5432_dma_cache_inv_pc(unsigned long addr, unsigned long size) { unsigned long end, a; if (size >= dcache_size) { flush_cache_all(); } else { a = addr & ~(dc_lsize - 1); end = (addr + size) & ~(dc_lsize - 1); while (1) { flush_dcache_line(a); /* Hit_Writeback_Inv_D */ if (a == end) break; a += dc_lsize; } } bc_inv(addr, size); } static void r5432_dma_cache_wback(unsigned long addr, unsigned long size) { panic("r5432_dma_cache called - should not happen.\n"); } /* * While we're protected against bad userland addresses we don't care * very much about what happens in that case. Usually a segmentation * fault will dump the process later on anyway ... */ static void r5432_flush_cache_sigtramp(unsigned long addr) { protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); protected_flush_icache_line(addr & ~(ic_lsize - 1)); } #undef DEBUG_TLB #undef DEBUG_TLBUPDATE #define NTLB_ENTRIES 48 /* Fixed on all R4XX0 variants... */ #define NTLB_ENTRIES_HALF 24 /* Fixed on all R4XX0 variants... */ void flush_tlb_all(void) { unsigned long old_ctx; int entry; unsigned long flags; #ifdef DEBUG_TLB printk("[tlball]"); #endif __save_and_cli(flags); /* Save old context and create impossible VPN2 value */ old_ctx = (get_entryhi() & 0xff); set_entryhi(KSEG0); set_entrylo0(0); set_entrylo1(0); BARRIER; entry = get_wired(); /* Blast 'em all away. */ while(entry < NTLB_ENTRIES) { set_index(entry); BARRIER; tlb_write_indexed(); BARRIER; entry++; } BARRIER; set_entryhi(old_ctx); __restore_flags(flags); } void flush_tlb_mm(struct mm_struct *mm) { if (mm->context != 0) { unsigned long flags; #ifdef DEBUG_TLB printk("[tlbmm<%d>]", mm->context); #endif __save_and_cli(flags); get_new_mmu_context(mm, asid_cache); if (mm == current->active_mm) set_entryhi(mm->context & 0xff); __restore_flags(flags); } } void flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) { if(mm->context != 0) { unsigned long flags; int size; #ifdef DEBUG_TLB printk("[tlbrange<%02x,%08lx,%08lx>]", (mm->context & 0xff), start, end); #endif __save_and_cli(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; if(size <= NTLB_ENTRIES_HALF) { int oldpid = (get_entryhi() & 0xff); int newpid = (mm->context & 0xff); start &= (PAGE_MASK << 1); end += ((PAGE_SIZE << 1) - 1); end &= (PAGE_MASK << 1); while(start < end) { int idx; set_entryhi(start | newpid); start += (PAGE_SIZE << 1); BARRIER; tlb_probe(); BARRIER; idx = get_index(); set_entrylo0(0); set_entrylo1(0); set_entryhi(KSEG0); BARRIER; if(idx < 0) continue; tlb_write_indexed(); BARRIER; } set_entryhi(oldpid); } else { get_new_mmu_context(mm, asid_cache); if (mm == current->active_mm) set_entryhi(mm->context & 0xff); } __restore_flags(flags); } } void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { if (vma->vm_mm->context != 0) { unsigned long flags; int oldpid, newpid, idx; #ifdef DEBUG_TLB printk("[tlbpage<%d,%08lx>]", vma->vm_mm->context, page); #endif newpid = (vma->vm_mm->context & 0xff); page &= (PAGE_MASK << 1); __save_and_cli(flags); oldpid = (get_entryhi() & 0xff); set_entryhi(page | newpid); BARRIER; tlb_probe(); BARRIER; idx = get_index(); set_entrylo0(0); set_entrylo1(0); set_entryhi(KSEG0); if(idx < 0) goto finish; BARRIER; tlb_write_indexed(); finish: BARRIER; set_entryhi(oldpid); __restore_flags(flags); } } void pgd_init(unsigned long page) { unsigned long *p = (unsigned long *) page; int i; for(i = 0; i < USER_PTRS_PER_PGD; i+=8) { p[i + 0] = (unsigned long) invalid_pte_table; p[i + 1] = (unsigned long) invalid_pte_table; p[i + 2] = (unsigned long) invalid_pte_table; p[i + 3] = (unsigned long) invalid_pte_table; p[i + 4] = (unsigned long) invalid_pte_table; p[i + 5] = (unsigned long) invalid_pte_table; p[i + 6] = (unsigned long) invalid_pte_table; p[i + 7] = (unsigned long) invalid_pte_table; } } /* We will need multiple versions of update_mmu_cache(), one that just * updates the TLB with the new pte(s), and another which also checks * for the R4k "end of page" hardware bug and does the needy. */ void update_mmu_cache(struct vm_area_struct * vma, unsigned long address, pte_t pte) { unsigned long flags; pgd_t *pgdp; pmd_t *pmdp; pte_t *ptep; int idx, pid; /* * Handle debugger faulting in for debugee. */ if (current->active_mm != vma->vm_mm) return; pid = get_entryhi() & 0xff; #ifdef DEBUG_TLB if((pid != (vma->vm_mm->context & 0xff)) || (vma->vm_mm->context == 0)) { printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%d tlbpid=%d\n", (int) (vma->vm_mm->context & 0xff), pid); } #endif __save_and_cli(flags); address &= (PAGE_MASK << 1); set_entryhi(address | (pid)); pgdp = pgd_offset(vma->vm_mm, address); BARRIER; tlb_probe(); BARRIER; pmdp = pmd_offset(pgdp, address); idx = get_index(); ptep = pte_offset(pmdp, address); BARRIER; set_entrylo0(pte_val(*ptep++) >> 6); set_entrylo1(pte_val(*ptep) >> 6); set_entryhi(address | (pid)); BARRIER; if(idx < 0) { tlb_write_random(); } else { tlb_write_indexed(); } BARRIER; set_entryhi(pid); BARRIER; __restore_flags(flags); } void show_regs(struct pt_regs * regs) { /* Saved main processor registers. */ printk("$0 : %08lx %08lx %08lx %08lx\n", 0UL, regs->regs[1], regs->regs[2], regs->regs[3]); printk("$4 : %08lx %08lx %08lx %08lx\n", regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); printk("$8 : %08lx %08lx %08lx %08lx\n", regs->regs[8], regs->regs[9], regs->regs[10], regs->regs[11]); printk("$12: %08lx %08lx %08lx %08lx\n", regs->regs[12], regs->regs[13], regs->regs[14], regs->regs[15]); printk("$16: %08lx %08lx %08lx %08lx\n", regs->regs[16], regs->regs[17], regs->regs[18], regs->regs[19]); printk("$20: %08lx %08lx %08lx %08lx\n", regs->regs[20], regs->regs[21], regs->regs[22], regs->regs[23]); printk("$24: %08lx %08lx\n", regs->regs[24], regs->regs[25]); printk("$28: %08lx %08lx %08lx %08lx\n", regs->regs[28], regs->regs[29], regs->regs[30], regs->regs[31]); /* Saved cp0 registers. */ printk("epc : %08lx %s\nStatus: %08lx\nCause : %08lx\n", regs->cp0_epc, print_tainted(), regs->cp0_status, regs->cp0_cause); } void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, unsigned long entryhi, unsigned long pagemask) { unsigned long flags; unsigned long wired; unsigned long old_pagemask; unsigned long old_ctx; __save_and_cli(flags); /* Save old context and create impossible VPN2 value */ old_ctx = (get_entryhi() & 0xff); old_pagemask = get_pagemask(); wired = get_wired(); set_wired (wired + 1); set_index (wired); BARRIER; set_pagemask (pagemask); set_entryhi(entryhi); set_entrylo0(entrylo0); set_entrylo1(entrylo1); BARRIER; tlb_write_indexed(); BARRIER; set_entryhi(old_ctx); BARRIER; set_pagemask (old_pagemask); flush_tlb_all(); __restore_flags(flags); } /* Detect and size the various r4k caches. */ static void __init probe_icache(unsigned long config) { icache_size = 1 << (12 + ((config >> 9) & 7)); ic_lsize = 16 << ((config >> 5) & 1); printk("Primary instruction cache %dkb, linesize %d bytes.\n", icache_size >> 10, ic_lsize); } static void __init probe_dcache(unsigned long config) { dcache_size = 1 << (12 + ((config >> 6) & 7)); dc_lsize = 16 << ((config >> 4) & 1); printk("Primary data cache %dkb, linesize %d bytes.\n", dcache_size >> 10, dc_lsize); } void __init ld_mmu_r5432(void) { unsigned long config = read_32bit_cp0_register(CP0_CONFIG); printk("CPU revision is: %08x\n", read_32bit_cp0_register(CP0_PRID)); change_cp0_config(CONF_CM_CMASK, CONF_CM_CACHABLE_NONCOHERENT); probe_icache(config); probe_dcache(config); _clear_page = r5432_clear_page_d32; _copy_page = r5432_copy_page_d32; _flush_cache_all = r5432_flush_cache_all_d32i32; ___flush_cache_all = r5432_flush_cache_all_d32i32; _flush_page_to_ram = r5432_flush_page_to_ram_d32; _flush_cache_mm = r5432_flush_cache_mm_d32i32; _flush_cache_range = r5432_flush_cache_range_d32i32; _flush_cache_page = r5432_flush_cache_page_d32i32; _flush_icache_page = r5432_flush_icache_page_i32; _dma_cache_wback_inv = r5432_dma_cache_wback_inv_pc; _dma_cache_wback = r5432_dma_cache_wback; _dma_cache_inv = r5432_dma_cache_inv_pc; _flush_cache_sigtramp = r5432_flush_cache_sigtramp; _flush_icache_range = r5432_flush_icache_range; /* Ouch */ __flush_cache_all(); write_32bit_cp0_register(CP0_WIRED, 0); /* * You should never change this register: * - On R4600 1.7 the tlbp never hits for pages smaller than * the value in the c0_pagemask register. * - The entire mm handling assumes the c0_pagemask register to * be set for 4kb pages. */ write_32bit_cp0_register(CP0_PAGEMASK, PM_4K); flush_tlb_all(); }