--- zzzz-none-000/linux-3.10.107/arch/mips/mm/tlbex.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/mips/mm/tlbex.c 2021-02-04 17:41:59.000000000 +0000 @@ -26,15 +26,26 @@ #include #include #include -#include #include #include +#include #include #include #include #include +static int mips_xpa_disabled; + +static int __init xpa_disable(char *s) +{ + mips_xpa_disabled = 1; + + return 1; +} + +__setup("noxpa", xpa_disable); + /* * TLB load/store/modify handlers. * @@ -85,6 +96,7 @@ case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: return 1; default: return 0; @@ -95,6 +107,7 @@ { switch (current_cpu_type()) { case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: return 1; default: return 0; @@ -136,7 +149,7 @@ * why; it's not an issue caused by the core RTL. * */ -static int __cpuinit m4kc_tlbp_war(void) +static int m4kc_tlbp_war(void) { return (current_cpu_data.processor_id & 0xffff00) == (PRID_COMP_MIPS | PRID_IMP_4KC); @@ -181,11 +194,9 @@ UASM_L_LA(_tlb_huge_update) #endif -static int __cpuinitdata hazard_instance; +static int hazard_instance; -static void __cpuinit uasm_bgezl_hazard(u32 **p, - struct uasm_reloc **r, - int instance) +static void uasm_bgezl_hazard(u32 **p, struct uasm_reloc **r, int instance) { switch (instance) { case 0 ... 7: @@ -196,9 +207,7 @@ } } -static void __cpuinit uasm_bgezl_label(struct uasm_label **l, - u32 **p, - int instance) +static void uasm_bgezl_label(struct uasm_label **l, u32 **p, int instance) { switch (instance) { case 0 ... 7: @@ -233,14 +242,14 @@ pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); #endif +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (cpu_has_rixi) { #ifdef _PAGE_NO_EXEC_SHIFT pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); -#endif -#ifdef _PAGE_NO_READ_SHIFT pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); #endif } +#endif pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); @@ -295,17 +304,29 @@ * We deliberately chose a buffer size of 128, so we won't scribble * over anything important on overflow before we panic. */ -static u32 tlb_handler[128] __cpuinitdata; +static u32 tlb_handler[128]; /* simply assume worst case size for labels and relocs */ -static struct uasm_label labels[128] __cpuinitdata; -static struct uasm_reloc relocs[128] __cpuinitdata; +static struct uasm_label labels[128]; +static struct uasm_reloc relocs[128]; + +static int check_for_high_segbits; +static bool fill_includes_sw_bits; -static int check_for_high_segbits __cpuinitdata; +static unsigned int kscratch_used_mask; -static unsigned int kscratch_used_mask __cpuinitdata; +static inline int __maybe_unused c0_kscratch(void) +{ + switch (current_cpu_type()) { + case CPU_XLP: + case CPU_XLR: + return 22; + default: + return 31; + } +} -static int __cpuinit allocate_kscratch(void) +static int allocate_kscratch(void) { int r; unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; @@ -322,21 +343,17 @@ return r; } -static int scratch_reg __cpuinitdata; -static int pgd_reg __cpuinitdata; +static int scratch_reg; +static int pgd_reg; enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; -static struct work_registers __cpuinit build_get_work_registers(u32 **p) +static struct work_registers build_get_work_registers(u32 **p) { struct work_registers r; - int smp_processor_id_reg; - int smp_processor_id_sel; - int smp_processor_id_shift; - - if (scratch_reg > 0) { + if (scratch_reg >= 0) { /* Save in CPU local C0_KScratch? */ - UASM_i_MTC0(p, 1, 31, scratch_reg); + UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg); r.r1 = K0; r.r2 = K1; r.r3 = 1; @@ -344,25 +361,9 @@ } if (num_possible_cpus() > 1) { -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT - smp_processor_id_shift = 51; - smp_processor_id_reg = 20; /* XContext */ - smp_processor_id_sel = 0; -#else -# ifdef CONFIG_32BIT - smp_processor_id_shift = 25; - smp_processor_id_reg = 4; /* Context */ - smp_processor_id_sel = 0; -# endif -# ifdef CONFIG_64BIT - smp_processor_id_shift = 26; - smp_processor_id_reg = 4; /* Context */ - smp_processor_id_sel = 0; -# endif -#endif /* Get smp_processor_id */ - UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel); - UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift); + UASM_i_CPUID_MFC0(p, K0, SMP_CPUID_REG); + UASM_i_SRL_SAFE(p, K0, K0, SMP_CPUID_REGSHIFT); /* handler_reg_save index in K0 */ UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); @@ -382,10 +383,10 @@ return r; } -static void __cpuinit build_restore_work_registers(u32 **p) +static void build_restore_work_registers(u32 **p) { - if (scratch_reg > 0) { - UASM_i_MFC0(p, 1, 31, scratch_reg); + if (scratch_reg >= 0) { + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); return; } /* K0 already points to save area, restore $1 and $2 */ @@ -407,7 +408,7 @@ /* * The R3000 TLB handler is simple. */ -static void __cpuinit build_r3000_tlb_refill_handler(void) +static void build_r3000_tlb_refill_handler(void) { long pgdc = (long)pgd_current; u32 *p; @@ -440,6 +441,7 @@ (unsigned int)(p - tlb_handler)); memcpy((void *)ebase, tlb_handler, 0x80); + local_flush_icache_range(ebase, ebase + 0x80); dump_handler("r3000_tlb_refill", (u32 *)ebase, 32); } @@ -452,7 +454,7 @@ * other one.To keep things simple, we first assume linear space, * then we relocate it to the final handler layout as needed. */ -static u32 final_handler[64] __cpuinitdata; +static u32 final_handler[64]; /* * Hazards @@ -476,7 +478,7 @@ * * As if we MIPS hackers wouldn't know how to nop pipelines happy ... */ -static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p) +static void __maybe_unused build_tlb_probe_entry(u32 **p) { switch (current_cpu_type()) { /* Found by experiment: R4600 v2.0/R4700 needs this, too. */ @@ -500,9 +502,9 @@ */ enum tlb_write_entry { tlb_random, tlb_indexed }; -static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, - struct uasm_reloc **r, - enum tlb_write_entry wmode) +static void build_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + enum tlb_write_entry wmode) { void(*tlbw)(u32 **) = NULL; @@ -511,21 +513,9 @@ case tlb_indexed: tlbw = uasm_i_tlbwi; break; } - if (cpu_has_mips_r2) { - /* - * The architecture spec says an ehb is required here, - * but a number of cores do not have the hazard and - * using an ehb causes an expensive pipeline stall. - */ - switch (current_cpu_type()) { - case CPU_M14KC: - case CPU_74K: - break; - - default: + if (cpu_has_mips_r2_r6) { + if (cpu_has_mips_r2_exec_hazard) uasm_i_ehb(p); - break; - } tlbw(p); return; } @@ -574,6 +564,7 @@ case CPU_R10000: case CPU_R12000: case CPU_R14000: + case CPU_R16000: case CPU_4KC: case CPU_4KEC: case CPU_M14KC: @@ -589,6 +580,7 @@ case CPU_BMIPS4380: case CPU_BMIPS5000: case CPU_LOONGSON2: + case CPU_LOONGSON3: case CPU_R5500: if (m4kc_tlbp_war()) uasm_i_nop(p); @@ -631,18 +623,24 @@ default: panic("No TLB refill handler yet (CPU type: %d)", - current_cpu_data.cputype); + current_cpu_type()); break; } } -static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p, - unsigned int reg) +static __maybe_unused void build_convert_pte_to_entrylo(u32 **p, + unsigned int reg) { - if (cpu_has_rixi) { - UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); + if (cpu_has_rixi && _PAGE_NO_EXEC) { + if (fill_includes_sw_bits) { + UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); + } else { + UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC)); + UASM_i_ROTR(p, reg, reg, + ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); + } } else { -#ifdef CONFIG_64BIT_PHYS_ADDR +#ifdef CONFIG_PHYS_ADDR_T_64BIT uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); #else UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL)); @@ -652,11 +650,9 @@ #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT -static __cpuinit void build_restore_pagemask(u32 **p, - struct uasm_reloc **r, - unsigned int tmp, - enum label_id lid, - int restore_scratch) +static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, + unsigned int tmp, enum label_id lid, + int restore_scratch) { if (restore_scratch) { /* Reset default page size */ @@ -673,8 +669,8 @@ uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg > 0) - UASM_i_MFC0(p, 1, 31, scratch_reg); + if (scratch_reg >= 0) + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); else UASM_i_LW(p, 1, scratchpad_offset(0), 0); } else { @@ -695,12 +691,11 @@ } } -static __cpuinit void build_huge_tlb_write_entry(u32 **p, - struct uasm_label **l, - struct uasm_reloc **r, - unsigned int tmp, - enum tlb_write_entry wmode, - int restore_scratch) +static void build_huge_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + unsigned int tmp, + enum tlb_write_entry wmode, + int restore_scratch) { /* Set huge page tlb entry size */ uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); @@ -715,9 +710,9 @@ /* * Check if Huge PTE is present, if so then jump to LABEL. */ -static void __cpuinit +static void build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, - unsigned int pmd, int lid) + unsigned int pmd, int lid) { UASM_i_LW(p, tmp, 0, pmd); if (use_bbit_insns()) { @@ -728,9 +723,8 @@ } } -static __cpuinit void build_huge_update_entries(u32 **p, - unsigned int pte, - unsigned int tmp) +static void build_huge_update_entries(u32 **p, unsigned int pte, + unsigned int tmp) { int small_sequence; @@ -760,11 +754,10 @@ UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */ } -static __cpuinit void build_huge_handler_tail(u32 **p, - struct uasm_reloc **r, - struct uasm_label **l, - unsigned int pte, - unsigned int ptr) +static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, + struct uasm_label **l, + unsigned int pte, + unsigned int ptr) { #ifdef CONFIG_SMP UASM_i_SC(p, pte, 0, ptr); @@ -783,7 +776,7 @@ * TMP and PTR are scratch. * TMP will be clobbered, PTR will hold the pmd entry. */ -static void __cpuinit +static void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int tmp, unsigned int ptr) { @@ -814,11 +807,11 @@ } /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT if (pgd_reg != -1) { /* pgd is in pgd_reg */ - UASM_i_MFC0(p, ptr, 31, pgd_reg); + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); } else { +#if defined(CONFIG_MIPS_PGD_C0_CONTEXT) /* * &pgd << 11 stored in CONTEXT [23..63]. */ @@ -830,30 +823,18 @@ /* 1 0 1 0 1 << 6 xkphys cached */ uasm_i_ori(p, ptr, ptr, 0x540); uasm_i_drotr(p, ptr, ptr, 11); - } #elif defined(CONFIG_SMP) -# ifdef CONFIG_MIPS_MT_SMTC - /* - * SMTC uses TCBind value as "CPU" index - */ - uasm_i_mfc0(p, ptr, C0_TCBIND); - uasm_i_dsrl_safe(p, ptr, ptr, 19); -# else - /* - * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 - * stored in CONTEXT. - */ - uasm_i_dmfc0(p, ptr, C0_CONTEXT); - uasm_i_dsrl_safe(p, ptr, ptr, 23); -# endif - UASM_i_LA_mostly(p, tmp, pgdc); - uasm_i_daddu(p, ptr, ptr, tmp); - uasm_i_dmfc0(p, tmp, C0_BADVADDR); - uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); + UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG); + uasm_i_dsrl_safe(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_daddu(p, ptr, ptr, tmp); + uasm_i_dmfc0(p, tmp, C0_BADVADDR); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); #else - UASM_i_LA_mostly(p, ptr, pgdc); - uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); + UASM_i_LA_mostly(p, ptr, pgdc); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); #endif + } uasm_l_vmalloc_done(l, *p); @@ -875,7 +856,7 @@ * BVADDR is the faulting address, PTR is scratch. * PTR will hold the pgd for vmalloc. */ -static void __cpuinit +static void build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int bvaddr, unsigned int ptr, enum vmalloc64_mode mode) @@ -929,8 +910,8 @@ uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg > 0) - UASM_i_MFC0(p, 1, 31, scratch_reg); + if (scratch_reg >= 0) + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); else UASM_i_LW(p, 1, scratchpad_offset(0), 0); } else { @@ -945,34 +926,28 @@ * TMP and PTR are scratch. * TMP will be clobbered, PTR will hold the pgd entry. */ -static void __cpuinit __maybe_unused +static void __maybe_unused build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) { - long pgdc = (long)pgd_current; + if (pgd_reg != -1) { + /* pgd is in pgd_reg */ + uasm_i_mfc0(p, ptr, c0_kscratch(), pgd_reg); + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + } else { + long pgdc = (long)pgd_current; - /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ + /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ #ifdef CONFIG_SMP -#ifdef CONFIG_MIPS_MT_SMTC - /* - * SMTC uses TCBind value as "CPU" index - */ - uasm_i_mfc0(p, ptr, C0_TCBIND); - UASM_i_LA_mostly(p, tmp, pgdc); - uasm_i_srl(p, ptr, ptr, 19); -#else - /* - * smp_processor_id() << 3 is stored in CONTEXT. - */ - uasm_i_mfc0(p, ptr, C0_CONTEXT); - UASM_i_LA_mostly(p, tmp, pgdc); - uasm_i_srl(p, ptr, ptr, 23); -#endif - uasm_i_addu(p, ptr, tmp, ptr); + uasm_i_mfc0(p, ptr, SMP_CPUID_REG); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_srl(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + uasm_i_addu(p, ptr, tmp, ptr); #else - UASM_i_LA_mostly(p, ptr, pgdc); + UASM_i_LA_mostly(p, ptr, pgdc); #endif - uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ - uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + } uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ uasm_i_sll(p, tmp, tmp, PGD_T_LOG2); uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ @@ -980,7 +955,7 @@ #endif /* !CONFIG_64BIT */ -static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx) +static void build_adjust_context(u32 **p, unsigned int ctx) { unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); @@ -1006,7 +981,7 @@ uasm_i_andi(p, ctx, ctx, mask); } -static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) +static void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) { /* * Bug workaround for the Nevada. It seems as if under certain @@ -1031,61 +1006,53 @@ UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */ } -static void __cpuinit build_update_entries(u32 **p, unsigned int tmp, - unsigned int ptep) +static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep) { /* * 64bit address support (36bit on a 32bit CPU) in a 32bit * Kernel is a special case. Only a few CPUs use it. */ -#ifdef CONFIG_64BIT_PHYS_ADDR - if (cpu_has_64bits) { - uasm_i_ld(p, tmp, 0, ptep); /* get even pte */ - uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ - if (cpu_has_rixi) { - UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); - } else { - uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ - } - UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ - } else { + if (config_enabled(CONFIG_PHYS_ADDR_T_64BIT) && !cpu_has_64bits) { int pte_off_even = sizeof(pte_t) / 2; int pte_off_odd = pte_off_even + sizeof(pte_t); +#ifdef CONFIG_XPA + const int scratch = 1; /* Our extra working register */ - /* The pte entries are pre-shifted */ - uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ - UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ + uasm_i_addu(p, scratch, 0, ptep); +#endif + uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */ + uasm_i_lw(p, ptep, pte_off_odd, ptep); /* odd pte */ + UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); + UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); + UASM_i_MTC0(p, ptep, C0_ENTRYLO1); +#ifdef CONFIG_XPA + uasm_i_lw(p, tmp, 0, scratch); + uasm_i_lw(p, ptep, sizeof(pte_t), scratch); + uasm_i_lui(p, scratch, 0xff); + uasm_i_ori(p, scratch, scratch, 0xffff); + uasm_i_and(p, tmp, scratch, tmp); + uasm_i_and(p, ptep, scratch, ptep); + uasm_i_mthc0(p, tmp, C0_ENTRYLO0); + uasm_i_mthc0(p, ptep, C0_ENTRYLO1); +#endif + return; } -#else + UASM_i_LW(p, tmp, 0, ptep); /* get even pte */ UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ if (r45k_bvahwbug()) build_tlb_probe_entry(p); - if (cpu_has_rixi) { - UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); - if (r4k_250MHZhwbug()) - UASM_i_MTC0(p, 0, C0_ENTRYLO0); - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); - } else { - UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ - if (r4k_250MHZhwbug()) - UASM_i_MTC0(p, 0, C0_ENTRYLO0); - UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ - UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ - if (r45k_bvahwbug()) - uasm_i_mfc0(p, tmp, C0_INDEX); - } + build_convert_pte_to_entrylo(p, tmp); + if (r4k_250MHZhwbug()) + UASM_i_MTC0(p, 0, C0_ENTRYLO0); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + build_convert_pte_to_entrylo(p, ptep); + if (r45k_bvahwbug()) + uasm_i_mfc0(p, tmp, C0_INDEX); if (r4k_250MHZhwbug()) UASM_i_MTC0(p, 0, C0_ENTRYLO1); UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ -#endif } struct mips_huge_tlb_info { @@ -1094,10 +1061,10 @@ bool need_reload_pte; }; -static struct mips_huge_tlb_info __cpuinit +static struct mips_huge_tlb_info build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int tmp, - unsigned int ptr, int c0_scratch) + unsigned int ptr, int c0_scratch_reg) { struct mips_huge_tlb_info rv; unsigned int even, odd; @@ -1112,12 +1079,12 @@ UASM_i_MFC0(p, tmp, C0_BADVADDR); if (pgd_reg != -1) - UASM_i_MFC0(p, ptr, 31, pgd_reg); + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); else UASM_i_MFC0(p, ptr, C0_CONTEXT); - if (c0_scratch >= 0) - UASM_i_MTC0(p, scratch, 31, c0_scratch); + if (c0_scratch_reg >= 0) + UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); else UASM_i_SW(p, scratch, scratchpad_offset(0), 0); @@ -1132,14 +1099,14 @@ } } else { if (pgd_reg != -1) - UASM_i_MFC0(p, ptr, 31, pgd_reg); + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); else UASM_i_MFC0(p, ptr, C0_CONTEXT); UASM_i_MFC0(p, tmp, C0_BADVADDR); - if (c0_scratch >= 0) - UASM_i_MTC0(p, scratch, 31, c0_scratch); + if (c0_scratch_reg >= 0) + UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); else UASM_i_SW(p, scratch, scratchpad_offset(0), 0); @@ -1244,8 +1211,8 @@ } UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ - if (c0_scratch >= 0) { - UASM_i_MFC0(p, scratch, 31, c0_scratch); + if (c0_scratch_reg >= 0) { + UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); build_tlb_write_entry(p, l, r, tlb_random); uasm_l_leave(l, *p); rv.restore_scratch = 1; @@ -1273,7 +1240,7 @@ */ #define MIPS64_REFILL_INSNS 32 -static void __cpuinit build_r4000_tlb_refill_handler(void) +static void build_r4000_tlb_refill_handler(void) { u32 *p = tlb_handler; struct uasm_label *l = labels; @@ -1288,7 +1255,7 @@ memset(relocs, 0, sizeof(relocs)); memset(final_handler, 0, sizeof(final_handler)); - if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) { + if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) { htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, scratch_reg); vmalloc_mode = refill_scratch; @@ -1350,132 +1317,139 @@ * need three, with the second nop'ed and the third being * unused. */ - /* Loongson2 ebase is different than r4k, we have more space */ -#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) - if ((p - tlb_handler) > 64) - panic("TLB refill handler space exceeded"); -#else - if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) - || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) - && uasm_insn_has_bdelay(relocs, - tlb_handler + MIPS64_REFILL_INSNS - 3))) - panic("TLB refill handler space exceeded"); -#endif - - /* - * Now fold the handler in the TLB refill handler space. - */ -#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) - f = final_handler; - /* Simplest case, just copy the handler. */ - uasm_copy_handler(relocs, labels, tlb_handler, p, f); - final_len = p - tlb_handler; -#else /* CONFIG_64BIT */ - f = final_handler + MIPS64_REFILL_INSNS; - if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { - /* Just copy the handler. */ - uasm_copy_handler(relocs, labels, tlb_handler, p, f); - final_len = p - tlb_handler; - } else { -#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT - const enum label_id ls = label_tlb_huge_update; -#else - const enum label_id ls = label_vmalloc; -#endif - u32 *split; - int ov = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) - ; - BUG_ON(i == ARRAY_SIZE(labels)); - split = labels[i].addr; - - /* - * See if we have overflown one way or the other. - */ - if (split > tlb_handler + MIPS64_REFILL_INSNS || - split < p - MIPS64_REFILL_INSNS) - ov = 1; - - if (ov) { + switch (boot_cpu_type()) { + default: + if (sizeof(long) == 4) { + case CPU_LOONGSON2: + /* Loongson2 ebase is different than r4k, we have more space */ + if ((p - tlb_handler) > 64) + panic("TLB refill handler space exceeded"); /* - * Split two instructions before the end. One - * for the branch and one for the instruction - * in the delay slot. + * Now fold the handler in the TLB refill handler space. */ - split = tlb_handler + MIPS64_REFILL_INSNS - 2; - + f = final_handler; + /* Simplest case, just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + break; + } else { + if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) + || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) + && uasm_insn_has_bdelay(relocs, + tlb_handler + MIPS64_REFILL_INSNS - 3))) + panic("TLB refill handler space exceeded"); /* - * If the branch would fall in a delay slot, - * we must back up an additional instruction - * so that it is no longer in a delay slot. + * Now fold the handler in the TLB refill handler space. */ - if (uasm_insn_has_bdelay(relocs, split - 1)) - split--; - } - /* Copy first part of the handler. */ - uasm_copy_handler(relocs, labels, tlb_handler, split, f); - f += split - tlb_handler; - - if (ov) { - /* Insert branch. */ - uasm_l_split(&l, final_handler); - uasm_il_b(&f, &r, label_split); - if (uasm_insn_has_bdelay(relocs, split)) - uasm_i_nop(&f); - else { - uasm_copy_handler(relocs, labels, - split, split + 1, f); - uasm_move_labels(labels, f, f + 1, -1); - f++; - split++; + f = final_handler + MIPS64_REFILL_INSNS; + if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { + /* Just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + } else { +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + const enum label_id ls = label_tlb_huge_update; +#else + const enum label_id ls = label_vmalloc; +#endif + u32 *split; + int ov = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) + ; + BUG_ON(i == ARRAY_SIZE(labels)); + split = labels[i].addr; + + /* + * See if we have overflown one way or the other. + */ + if (split > tlb_handler + MIPS64_REFILL_INSNS || + split < p - MIPS64_REFILL_INSNS) + ov = 1; + + if (ov) { + /* + * Split two instructions before the end. One + * for the branch and one for the instruction + * in the delay slot. + */ + split = tlb_handler + MIPS64_REFILL_INSNS - 2; + + /* + * If the branch would fall in a delay slot, + * we must back up an additional instruction + * so that it is no longer in a delay slot. + */ + if (uasm_insn_has_bdelay(relocs, split - 1)) + split--; + } + /* Copy first part of the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, split, f); + f += split - tlb_handler; + + if (ov) { + /* Insert branch. */ + uasm_l_split(&l, final_handler); + uasm_il_b(&f, &r, label_split); + if (uasm_insn_has_bdelay(relocs, split)) + uasm_i_nop(&f); + else { + uasm_copy_handler(relocs, labels, + split, split + 1, f); + uasm_move_labels(labels, f, f + 1, -1); + f++; + split++; + } + } + + /* Copy the rest of the handler. */ + uasm_copy_handler(relocs, labels, split, p, final_handler); + final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + + (p - split); } } - - /* Copy the rest of the handler. */ - uasm_copy_handler(relocs, labels, split, p, final_handler); - final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + - (p - split); + break; } -#endif /* CONFIG_64BIT */ uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB refill handler (%u instructions).\n", final_len); memcpy((void *)ebase, final_handler, 0x100); + local_flush_icache_range(ebase, ebase + 0x100); dump_handler("r4000_tlb_refill", (u32 *)ebase, 64); } -/* - * 128 instructions for the fastpath handler is generous and should - * never be exceeded. - */ -#define FASTPATH_SIZE 128 - -u32 handle_tlbl[FASTPATH_SIZE] __cacheline_aligned; -u32 handle_tlbs[FASTPATH_SIZE] __cacheline_aligned; -u32 handle_tlbm[FASTPATH_SIZE] __cacheline_aligned; -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT -u32 tlbmiss_handler_setup_pgd_array[16] __cacheline_aligned; +extern u32 handle_tlbl[], handle_tlbl_end[]; +extern u32 handle_tlbs[], handle_tlbs_end[]; +extern u32 handle_tlbm[], handle_tlbm_end[]; +extern u32 tlbmiss_handler_setup_pgd_start[], tlbmiss_handler_setup_pgd[]; +extern u32 tlbmiss_handler_setup_pgd_end[]; -static void __cpuinit build_r4000_setup_pgd(void) +static void build_setup_pgd(void) { const int a0 = 4; - const int a1 = 5; - u32 *p = tlbmiss_handler_setup_pgd_array; - struct uasm_label *l = labels; - struct uasm_reloc *r = relocs; + const int __maybe_unused a1 = 5; + const int __maybe_unused a2 = 6; + u32 *p = tlbmiss_handler_setup_pgd_start; + const int tlbmiss_handler_setup_pgd_size = + tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd_start; +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + long pgdc = (long)pgd_current; +#endif - memset(tlbmiss_handler_setup_pgd_array, 0, sizeof(tlbmiss_handler_setup_pgd_array)); + memset(tlbmiss_handler_setup_pgd, 0, tlbmiss_handler_setup_pgd_size * + sizeof(tlbmiss_handler_setup_pgd[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - pgd_reg = allocate_kscratch(); - +#ifdef CONFIG_MIPS_PGD_C0_CONTEXT if (pgd_reg == -1) { + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + /* PGD << 11 in c0_Context */ /* * If it is a ckseg0 address, convert to a physical @@ -1495,32 +1469,51 @@ } else { /* PGD in c0_KScratch */ uasm_i_jr(&p, 31); - UASM_i_MTC0(&p, a0, 31, pgd_reg); + UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); } - if (p - tlbmiss_handler_setup_pgd_array > ARRAY_SIZE(tlbmiss_handler_setup_pgd_array)) - panic("tlbmiss_handler_setup_pgd_array space exceeded"); +#else +#ifdef CONFIG_SMP + /* Save PGD to pgd_current[smp_processor_id()] */ + UASM_i_CPUID_MFC0(&p, a1, SMP_CPUID_REG); + UASM_i_SRL_SAFE(&p, a1, a1, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_ADDU(&p, a2, a2, a1); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#else + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#endif /* SMP */ + uasm_i_jr(&p, 31); + + /* if pgd_reg is allocated, save PGD also to scratch register */ + if (pgd_reg != -1) + UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + else + uasm_i_nop(&p); +#endif + if (p >= tlbmiss_handler_setup_pgd_end) + panic("tlbmiss_handler_setup_pgd space exceeded"); + uasm_resolve_relocs(relocs, labels); - pr_debug("Wrote tlbmiss_handler_setup_pgd_array (%u instructions).\n", - (unsigned int)(p - tlbmiss_handler_setup_pgd_array)); + pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", + (unsigned int)(p - tlbmiss_handler_setup_pgd)); - dump_handler("tlbmiss_handler", - tlbmiss_handler_setup_pgd_array, - ARRAY_SIZE(tlbmiss_handler_setup_pgd_array)); + dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd, + tlbmiss_handler_setup_pgd_size); } -#endif -static void __cpuinit +static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); else # endif UASM_i_LL(p, pte, 0, ptr); #else -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_ld(p, pte, 0, ptr); else @@ -1529,17 +1522,23 @@ #endif } -static void __cpuinit +static void iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, unsigned int mode) { -#ifdef CONFIG_64BIT_PHYS_ADDR +#ifdef CONFIG_PHYS_ADDR_T_64BIT unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); -#endif + if (!cpu_has_64bits) { + const int scratch = 1; /* Our extra working register */ + + uasm_i_lui(p, scratch, (mode >> 16)); + uasm_i_or(p, pte, pte, scratch); + } else +#endif uasm_i_ori(p, pte, pte, mode); #ifdef CONFIG_SMP -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_scd(p, pte, 0, ptr); else @@ -1551,7 +1550,7 @@ else uasm_il_beqz(p, r, pte, label_smp_pgtable_change); -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (!cpu_has_64bits) { /* no uasm_i_nop needed */ uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr); @@ -1566,14 +1565,14 @@ uasm_i_nop(p); # endif #else -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_sd(p, pte, 0, ptr); else # endif UASM_i_SW(p, pte, 0, ptr); -# ifdef CONFIG_64BIT_PHYS_ADDR +# ifdef CONFIG_PHYS_ADDR_T_64BIT if (!cpu_has_64bits) { uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr); uasm_i_ori(p, pte, pte, hwmode); @@ -1589,26 +1588,37 @@ * the page table where this PTE is located, PTE will be re-loaded * with it's original value. */ -static void __cpuinit +static void build_pte_present(u32 **p, struct uasm_reloc **r, int pte, int ptr, int scratch, enum label_id lid) { int t = scratch >= 0 ? scratch : pte; + int cur = pte; if (cpu_has_rixi) { if (use_bbit_insns()) { uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); uasm_i_nop(p); } else { - uasm_i_andi(p, t, pte, _PAGE_PRESENT); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, 1); uasm_il_beqz(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ iPTE_LW(p, pte, ptr); } } else { - uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); - uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_READ) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, + (_PAGE_PRESENT | _PAGE_READ) >> _PAGE_PRESENT_SHIFT); uasm_il_bnez(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ @@ -1617,7 +1627,7 @@ } /* Make PTE valid, store result in PTR. */ -static void __cpuinit +static void build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr) { @@ -1630,15 +1640,22 @@ * Check if PTE can be written to, if not branch to LABEL. Regardless * restore PTE with value from PTR when done. */ -static void __cpuinit +static void build_pte_writable(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, int scratch, enum label_id lid) { int t = scratch >= 0 ? scratch : pte; + int cur = pte; - uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); - uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); uasm_il_bnez(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ @@ -1650,7 +1667,7 @@ /* Make PTE writable, update software status bits as well, then store * at PTR. */ -static void __cpuinit +static void build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr) { @@ -1664,7 +1681,7 @@ * Check if PTE can be modified, if not branch to LABEL. Regardless * restore PTE with value from PTR when done. */ -static void __cpuinit +static void build_pte_modifiable(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, int scratch, enum label_id lid) @@ -1674,7 +1691,8 @@ uasm_i_nop(p); } else { int t = scratch >= 0 ? scratch : pte; - uasm_i_andi(p, t, pte, _PAGE_WRITE); + uasm_i_srl(p, t, pte, _PAGE_WRITE_SHIFT); + uasm_i_andi(p, t, t, 1); uasm_il_beqz(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ @@ -1693,7 +1711,7 @@ * This places the pte into ENTRYLO0 and writes it with tlbwi. * Then it returns. */ -static void __cpuinit +static void build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) { uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ @@ -1709,7 +1727,7 @@ * may have the probe fail bit set as a result of a trap on a * kseg2 access, i.e. without refill. Then it returns. */ -static void __cpuinit +static void build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int pte, unsigned int tmp) @@ -1727,7 +1745,7 @@ uasm_i_rfe(p); /* branch delay */ } -static void __cpuinit +static void build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, unsigned int ptr) { @@ -1747,13 +1765,14 @@ uasm_i_tlbp(p); /* load delay */ } -static void __cpuinit build_r3000_tlb_load_handler(void) +static void build_r3000_tlb_load_handler(void) { u32 *p = handle_tlbl; + const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbl, 0, sizeof(handle_tlbl)); + memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -1767,23 +1786,24 @@ uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); uasm_i_nop(&p); - if ((p - handle_tlbl) > FASTPATH_SIZE) + if (p >= handle_tlbl_end) panic("TLB load handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", (unsigned int)(p - handle_tlbl)); - dump_handler("r3000_tlb_load", handle_tlbl, ARRAY_SIZE(handle_tlbl)); + dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_size); } -static void __cpuinit build_r3000_tlb_store_handler(void) +static void build_r3000_tlb_store_handler(void) { u32 *p = handle_tlbs; + const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbs, 0, sizeof(handle_tlbs)); + memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -1797,23 +1817,24 @@ uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if ((p - handle_tlbs) > FASTPATH_SIZE) + if (p >= handle_tlbs_end) panic("TLB store handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", (unsigned int)(p - handle_tlbs)); - dump_handler("r3000_tlb_store", handle_tlbs, ARRAY_SIZE(handle_tlbs)); + dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_size); } -static void __cpuinit build_r3000_tlb_modify_handler(void) +static void build_r3000_tlb_modify_handler(void) { u32 *p = handle_tlbm; + const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbm, 0, sizeof(handle_tlbm)); + memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -1827,21 +1848,21 @@ uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if ((p - handle_tlbm) > FASTPATH_SIZE) + if (p >= handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", (unsigned int)(p - handle_tlbm)); - dump_handler("r3000_tlb_modify", handle_tlbm, ARRAY_SIZE(handle_tlbm)); + dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_size); } #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ /* * R4000 style TLB load/store/modify handlers. */ -static struct work_registers __cpuinit +static struct work_registers build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, struct uasm_reloc **r) { @@ -1872,12 +1893,20 @@ uasm_l_smp_pgtable_change(l, *p); #endif iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ - if (!m4kc_tlbp_war()) + if (!m4kc_tlbp_war()) { build_tlb_probe_entry(p); + if (cpu_has_htw) { + /* race condition happens, leaving */ + uasm_i_ehb(p); + uasm_i_mfc0(p, wr.r3, C0_INDEX); + uasm_il_bltz(p, r, wr.r3, label_leave); + uasm_i_nop(p); + } + } return wr; } -static void __cpuinit +static void build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int tmp, unsigned int ptr) @@ -1895,14 +1924,15 @@ #endif } -static void __cpuinit build_r4000_tlb_load_handler(void) +static void build_r4000_tlb_load_handler(void) { u32 *p = handle_tlbl; + const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbl, 0, sizeof(handle_tlbl)); + memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -1925,7 +1955,7 @@ if (m4kc_tlbp_war()) build_tlb_probe_entry(&p); - if (cpu_has_rixi) { + if (cpu_has_rixi && !cpu_has_rixiex) { /* * If the page is not _PAGE_VALID, RI or XI could not * have triggered it. Skip the expensive test.. @@ -1940,6 +1970,19 @@ uasm_i_nop(&p); uasm_i_tlbr(&p); + + switch (current_cpu_type()) { + default: + if (cpu_has_mips_r2_exec_hazard) { + uasm_i_ehb(&p); + + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + break; + } + } + /* Examine entrylo 0 or 1 based on ptr. */ if (use_bbit_insns()) { uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); @@ -1979,7 +2022,7 @@ build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); build_tlb_probe_entry(&p); - if (cpu_has_rixi) { + if (cpu_has_rixi && !cpu_has_rixiex) { /* * If the page is not _PAGE_VALID, RI or XI could not * have triggered it. Skip the expensive test.. @@ -1994,6 +2037,19 @@ uasm_i_nop(&p); uasm_i_tlbr(&p); + + switch (current_cpu_type()) { + default: + if (cpu_has_mips_r2_exec_hazard) { + uasm_i_ehb(&p); + + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + break; + } + } + /* Examine entrylo 0 or 1 based on ptr. */ if (use_bbit_insns()) { uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); @@ -2041,24 +2097,25 @@ uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); uasm_i_nop(&p); - if ((p - handle_tlbl) > FASTPATH_SIZE) + if (p >= handle_tlbl_end) panic("TLB load handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", (unsigned int)(p - handle_tlbl)); - dump_handler("r4000_tlb_load", handle_tlbl, ARRAY_SIZE(handle_tlbl)); + dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_size); } -static void __cpuinit build_r4000_tlb_store_handler(void) +static void build_r4000_tlb_store_handler(void) { u32 *p = handle_tlbs; + const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbs, 0, sizeof(handle_tlbs)); + memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2095,24 +2152,25 @@ uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if ((p - handle_tlbs) > FASTPATH_SIZE) + if (p >= handle_tlbs_end) panic("TLB store handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", (unsigned int)(p - handle_tlbs)); - dump_handler("r4000_tlb_store", handle_tlbs, ARRAY_SIZE(handle_tlbs)); + dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_size); } -static void __cpuinit build_r4000_tlb_modify_handler(void) +static void build_r4000_tlb_modify_handler(void) { u32 *p = handle_tlbm; + const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbm, 0, sizeof(handle_tlbm)); + memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2150,17 +2208,185 @@ uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if ((p - handle_tlbm) > FASTPATH_SIZE) + if (p >= handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", (unsigned int)(p - handle_tlbm)); - dump_handler("r4000_tlb_modify", handle_tlbm, ARRAY_SIZE(handle_tlbm)); + dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_size); +} + +static void flush_tlb_handlers(void) +{ + local_flush_icache_range((unsigned long)handle_tlbl, + (unsigned long)handle_tlbl_end); + local_flush_icache_range((unsigned long)handle_tlbs, + (unsigned long)handle_tlbs_end); + local_flush_icache_range((unsigned long)handle_tlbm, + (unsigned long)handle_tlbm_end); + local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd, + (unsigned long)tlbmiss_handler_setup_pgd_end); +} + +static void print_htw_config(void) +{ + unsigned long config; + unsigned int pwctl; + const int field = 2 * sizeof(unsigned long); + + config = read_c0_pwfield(); + pr_debug("PWField (0x%0*lx): GDI: 0x%02lx UDI: 0x%02lx MDI: 0x%02lx PTI: 0x%02lx PTEI: 0x%02lx\n", + field, config, + (config & MIPS_PWFIELD_GDI_MASK) >> MIPS_PWFIELD_GDI_SHIFT, + (config & MIPS_PWFIELD_UDI_MASK) >> MIPS_PWFIELD_UDI_SHIFT, + (config & MIPS_PWFIELD_MDI_MASK) >> MIPS_PWFIELD_MDI_SHIFT, + (config & MIPS_PWFIELD_PTI_MASK) >> MIPS_PWFIELD_PTI_SHIFT, + (config & MIPS_PWFIELD_PTEI_MASK) >> MIPS_PWFIELD_PTEI_SHIFT); + + config = read_c0_pwsize(); + pr_debug("PWSize (0x%0*lx): GDW: 0x%02lx UDW: 0x%02lx MDW: 0x%02lx PTW: 0x%02lx PTEW: 0x%02lx\n", + field, config, + (config & MIPS_PWSIZE_GDW_MASK) >> MIPS_PWSIZE_GDW_SHIFT, + (config & MIPS_PWSIZE_UDW_MASK) >> MIPS_PWSIZE_UDW_SHIFT, + (config & MIPS_PWSIZE_MDW_MASK) >> MIPS_PWSIZE_MDW_SHIFT, + (config & MIPS_PWSIZE_PTW_MASK) >> MIPS_PWSIZE_PTW_SHIFT, + (config & MIPS_PWSIZE_PTEW_MASK) >> MIPS_PWSIZE_PTEW_SHIFT); + + pwctl = read_c0_pwctl(); + pr_debug("PWCtl (0x%x): PWEn: 0x%x DPH: 0x%x HugePg: 0x%x Psn: 0x%x\n", + pwctl, + (pwctl & MIPS_PWCTL_PWEN_MASK) >> MIPS_PWCTL_PWEN_SHIFT, + (pwctl & MIPS_PWCTL_DPH_MASK) >> MIPS_PWCTL_DPH_SHIFT, + (pwctl & MIPS_PWCTL_HUGEPG_MASK) >> MIPS_PWCTL_HUGEPG_SHIFT, + (pwctl & MIPS_PWCTL_PSN_MASK) >> MIPS_PWCTL_PSN_SHIFT); } -void __cpuinit build_tlb_refill_handler(void) +static void config_htw_params(void) +{ + unsigned long pwfield, pwsize, ptei; + unsigned int config; + + /* + * We are using 2-level page tables, so we only need to + * setup GDW and PTW appropriately. UDW and MDW will remain 0. + * The default value of GDI/UDI/MDI/PTI is 0xc. It is illegal to + * write values less than 0xc in these fields because the entire + * write will be dropped. As a result of which, we must preserve + * the original reset values and overwrite only what we really want. + */ + + pwfield = read_c0_pwfield(); + /* re-initialize the GDI field */ + pwfield &= ~MIPS_PWFIELD_GDI_MASK; + pwfield |= PGDIR_SHIFT << MIPS_PWFIELD_GDI_SHIFT; + /* re-initialize the PTI field including the even/odd bit */ + pwfield &= ~MIPS_PWFIELD_PTI_MASK; + pwfield |= PAGE_SHIFT << MIPS_PWFIELD_PTI_SHIFT; + if (CONFIG_PGTABLE_LEVELS >= 3) { + pwfield &= ~MIPS_PWFIELD_MDI_MASK; + pwfield |= PMD_SHIFT << MIPS_PWFIELD_MDI_SHIFT; + } + /* Set the PTEI right shift */ + ptei = _PAGE_GLOBAL_SHIFT << MIPS_PWFIELD_PTEI_SHIFT; + pwfield |= ptei; + write_c0_pwfield(pwfield); + /* Check whether the PTEI value is supported */ + back_to_back_c0_hazard(); + pwfield = read_c0_pwfield(); + if (((pwfield & MIPS_PWFIELD_PTEI_MASK) << MIPS_PWFIELD_PTEI_SHIFT) + != ptei) { + pr_warn("Unsupported PTEI field value: 0x%lx. HTW will not be enabled", + ptei); + /* + * Drop option to avoid HTW being enabled via another path + * (eg htw_reset()) + */ + current_cpu_data.options &= ~MIPS_CPU_HTW; + return; + } + + pwsize = ilog2(PTRS_PER_PGD) << MIPS_PWSIZE_GDW_SHIFT; + pwsize |= ilog2(PTRS_PER_PTE) << MIPS_PWSIZE_PTW_SHIFT; + if (CONFIG_PGTABLE_LEVELS >= 3) + pwsize |= ilog2(PTRS_PER_PMD) << MIPS_PWSIZE_MDW_SHIFT; + + /* If XPA has been enabled, PTEs are 64-bit in size. */ + if (config_enabled(CONFIG_64BITS) || (read_c0_pagegrain() & PG_ELPA)) + pwsize |= 1; + + write_c0_pwsize(pwsize); + + /* Make sure everything is set before we enable the HTW */ + back_to_back_c0_hazard(); + + /* Enable HTW and disable the rest of the pwctl fields */ + config = 1 << MIPS_PWCTL_PWEN_SHIFT; + write_c0_pwctl(config); + pr_info("Hardware Page Table Walker enabled\n"); + + print_htw_config(); +} + +static void config_xpa_params(void) +{ +#ifdef CONFIG_XPA + unsigned int pagegrain; + + if (mips_xpa_disabled) { + pr_info("Extended Physical Addressing (XPA) disabled\n"); + return; + } + + pagegrain = read_c0_pagegrain(); + write_c0_pagegrain(pagegrain | PG_ELPA); + back_to_back_c0_hazard(); + pagegrain = read_c0_pagegrain(); + + if (pagegrain & PG_ELPA) + pr_info("Extended Physical Addressing (XPA) enabled\n"); + else + panic("Extended Physical Addressing (XPA) disabled"); +#endif +} + +static void check_pabits(void) +{ + unsigned long entry; + unsigned pabits, fillbits; + + if (!cpu_has_rixi || !_PAGE_NO_EXEC) { + /* + * We'll only be making use of the fact that we can rotate bits + * into the fill if the CPU supports RIXI, so don't bother + * probing this for CPUs which don't. + */ + return; + } + + write_c0_entrylo0(~0ul); + back_to_back_c0_hazard(); + entry = read_c0_entrylo0(); + + /* clear all non-PFN bits */ + entry &= ~((1 << MIPS_ENTRYLO_PFN_SHIFT) - 1); + entry &= ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); + + /* find a lower bound on PABITS, and upper bound on fill bits */ + pabits = fls_long(entry) + 6; + fillbits = max_t(int, (int)BITS_PER_LONG - pabits, 0); + + /* minus the RI & XI bits */ + fillbits -= min_t(unsigned, fillbits, 2); + + if (fillbits >= ilog2(_PAGE_NO_EXEC)) + fill_includes_sw_bits = true; + + pr_debug("Entry* registers contain %u fill bits\n", fillbits); +} + +void build_tlb_refill_handler(void) { /* * The refill handler is generated per-CPU, multi-node systems @@ -2170,6 +2396,7 @@ static int run_once = 0; output_pgtable_bits_defines(); + check_pabits(); #ifdef CONFIG_64BIT check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); @@ -2189,9 +2416,11 @@ if (!run_once) { if (!cpu_has_local_ebase) build_r3000_tlb_refill_handler(); + build_setup_pgd(); build_r3000_tlb_load_handler(); build_r3000_tlb_store_handler(); build_r3000_tlb_modify_handler(); + flush_tlb_handlers(); run_once++; } #else @@ -2211,31 +2440,20 @@ default: if (!run_once) { scratch_reg = allocate_kscratch(); -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT - build_r4000_setup_pgd(); -#endif + build_setup_pgd(); build_r4000_tlb_load_handler(); build_r4000_tlb_store_handler(); build_r4000_tlb_modify_handler(); if (!cpu_has_local_ebase) build_r4000_tlb_refill_handler(); + flush_tlb_handlers(); run_once++; } if (cpu_has_local_ebase) build_r4000_tlb_refill_handler(); + if (cpu_has_xpa) + config_xpa_params(); + if (cpu_has_htw) + config_htw_params(); } } - -void __cpuinit flush_tlb_handlers(void) -{ - local_flush_icache_range((unsigned long)handle_tlbl, - (unsigned long)handle_tlbl + sizeof(handle_tlbl)); - local_flush_icache_range((unsigned long)handle_tlbs, - (unsigned long)handle_tlbs + sizeof(handle_tlbs)); - local_flush_icache_range((unsigned long)handle_tlbm, - (unsigned long)handle_tlbm + sizeof(handle_tlbm)); -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT - local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd_array, - (unsigned long)tlbmiss_handler_setup_pgd_array + sizeof(handle_tlbm)); -#endif -}