--- zzzz-none-000/linux-3.10.107/arch/x86/mm/init.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/x86/mm/init.c 2021-02-04 17:41:59.000000000 +0000 @@ -18,8 +18,51 @@ #include /* for MAX_DMA_PFN */ #include +/* + * We need to define the tracepoints somewhere, and tlb.c + * is only compied when SMP=y. + */ +#define CREATE_TRACE_POINTS +#include + #include "mm_internal.h" +/* + * Tables translating between page_cache_type_t and pte encoding. + * + * The default values are defined statically as minimal supported mode; + * WC and WT fall back to UC-. pat_init() updates these values to support + * more cache modes, WC and WT, when it is safe to do so. See pat_init() + * for the details. Note, __early_ioremap() used during early boot-time + * takes pgprot_t (pte encoding) and does not use these tables. + * + * Index into __cachemode2pte_tbl[] is the cachemode. + * + * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. + */ +uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { + [_PAGE_CACHE_MODE_WB ] = 0 | 0 , + [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD, + [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, + [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, + [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, + [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD, +}; +EXPORT_SYMBOL(__cachemode2pte_tbl); + +uint8_t __pte2cachemode_tbl[8] = { + [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, + [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, + [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, + [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, +}; +EXPORT_SYMBOL(__pte2cachemode_tbl); + static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; static unsigned long __initdata pgt_buf_top; @@ -53,12 +96,12 @@ if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { unsigned long ret; if (min_pfn_mapped >= max_pfn_mapped) - panic("alloc_low_page: ran out of memory"); + panic("alloc_low_pages: ran out of memory"); ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, max_pfn_mapped << PAGE_SHIFT, PAGE_SIZE * num , PAGE_SIZE); if (!ret) - panic("alloc_low_page: can not alloc memory"); + panic("alloc_low_pages: can not alloc memory"); memblock_reserve(ret, PAGE_SIZE * num); pfn = ret >> PAGE_SHIFT; } else { @@ -95,21 +138,7 @@ int after_bootmem; -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - -static void __init init_gbpages(void) -{ -#ifdef CONFIG_X86_64 - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -#endif -} +early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES); struct map_range { unsigned long start; @@ -121,28 +150,33 @@ static void __init probe_page_size_mask(void) { - init_gbpages(); - #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (direct_gbpages) - page_size_mask |= 1 << PG_LEVEL_1G; if (cpu_has_pse) page_size_mask |= 1 << PG_LEVEL_2M; #endif /* Enable PSE if available */ if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); + cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); + cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; + } else + __supported_pte_mask &= ~_PAGE_GLOBAL; + + /* Enable 1 GB linear kernel mappings if available: */ + if (direct_gbpages && cpu_has_gbpages) { + printk(KERN_INFO "Using GB pages for direct mapping\n"); + page_size_mask |= 1 << PG_LEVEL_1G; + } else { + direct_gbpages = 0; } } @@ -202,6 +236,31 @@ } } +static const char *page_size_string(struct map_range *mr) +{ + static const char str_1g[] = "1G"; + static const char str_2m[] = "2M"; + static const char str_4m[] = "4M"; + static const char str_4k[] = "4k"; + + if (mr->page_size_mask & (1<page_size_mask & (1<page_size_mask & (1< ISA_END_ADDRESS) { + while (last_start > map_start) { if (last_start > step_size) { start = round_down(last_start - 1, step_size); - if (start < ISA_END_ADDRESS) - start = ISA_END_ADDRESS; + if (start < map_start) + start = map_start; } else - start = ISA_END_ADDRESS; - new_mapped_ram_size = init_range_memory_mapping(start, + start = map_start; + mapped_ram_size += init_range_memory_mapping(start, last_start); last_start = start; min_pfn_mapped = last_start >> PAGE_SHIFT; - /* only increase step_size after big range get mapped */ - if (new_mapped_ram_size > mapped_ram_size) - step_size <<= STEP_SIZE_SHIFT; - mapped_ram_size += new_mapped_ram_size; + if (mapped_ram_size >= step_size) + step_size = get_new_step_size(step_size); + } + + if (real_end < map_end) + init_range_memory_mapping(real_end, map_end); +} + +/** + * memory_map_bottom_up - Map [map_start, map_end) bottom up + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in bottom-up. Since we have limited the + * bottom-up allocation above the kernel, the page tables will + * be allocated just above the kernel and we map the memory + * in [map_start, map_end) in bottom-up. + */ +static void __init memory_map_bottom_up(unsigned long map_start, + unsigned long map_end) +{ + unsigned long next, start; + unsigned long mapped_ram_size = 0; + /* step_size need to be small so pgt_buf from BRK could cover it */ + unsigned long step_size = PMD_SIZE; + + start = map_start; + min_pfn_mapped = start >> PAGE_SHIFT; + + /* + * We start from the bottom (@map_start) and go to the top (@map_end). + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (start < map_end) { + if (step_size && map_end - start > step_size) { + next = round_up(start + 1, step_size); + if (next > map_end) + next = map_end; + } else { + next = map_end; + } + + mapped_ram_size += init_range_memory_mapping(start, next); + start = next; + + if (mapped_ram_size >= step_size) + step_size = get_new_step_size(step_size); } +} + +void __init init_mem_mapping(void) +{ + unsigned long end; + + probe_page_size_mask(); + +#ifdef CONFIG_X86_64 + end = max_pfn << PAGE_SHIFT; +#else + end = max_low_pfn << PAGE_SHIFT; +#endif + + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping(0, ISA_END_ADDRESS); - if (real_end < end) - init_range_memory_mapping(real_end, end); + /* + * If the allocation is in bottom-up direction, we setup direct mapping + * in bottom-up, otherwise we setup direct mapping in top-down. + */ + if (memblock_bottom_up()) { + unsigned long kernel_end = __pa_symbol(_end); + + /* + * we need two separate calls here. This is because we want to + * allocate page tables above the kernel. So we first map + * [kernel_end, end) to make memory above the kernel be mapped + * as soon as possible. And then use page tables allocated above + * the kernel to map [ISA_END_ADDRESS, kernel_end). + */ + memory_map_bottom_up(kernel_end, end); + memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); + } else { + memory_map_top_down(ISA_END_ADDRESS, end); + } #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { @@ -475,45 +628,25 @@ * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. * - * On x86, access has to be given to the first megabyte of RAM because that - * area traditionally contains BIOS code and data regions used by X, dosemu, - * and similar apps. Since they map the entire memory range, the whole range - * must be allowed (for mapping), but any areas that would otherwise be - * disallowed are flagged as being "zero filled" instead of rejected. - * Access has to be given to non-kernel-ram areas as well, these contain the - * PCI mmio resources as well as potential bios/acpi data regions. + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains BIOS code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. */ int devmem_is_allowed(unsigned long pagenr) { - if (page_is_ram(pagenr)) { - /* - * For disallowed memory regions in the low 1MB range, - * request that the page be shown as all zeros. - */ - if (pagenr < 256) - return 2; - - return 0; - } - - /* - * This must follow RAM test, since System RAM is considered a - * restricted resource under CONFIG_STRICT_IOMEM. - */ - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) { - /* Low 1MB bypasses iomem restrictions. */ - if (pagenr < 256) - return 1; - + if (pagenr < 256) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; - } - - return 1; + if (!page_is_ram(pagenr)) + return 1; + return 0; } void free_init_pages(char *what, unsigned long begin, unsigned long end) { - unsigned long addr; unsigned long begin_aligned, end_aligned; /* Make sure boundaries are page aligned */ @@ -528,8 +661,6 @@ if (begin >= end) return; - addr = begin; - /* * If debugging page accesses then do not free this memory but * mark them not present - any buggy init-section access will @@ -548,18 +679,13 @@ set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - - for (; addr < end; addr += PAGE_SIZE) { - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_reserved_page(virt_to_page(addr)); - } + free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); #endif } void free_initmem(void) { - free_init_pages("unused kernel memory", + free_init_pages("unused kernel", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); } @@ -567,14 +693,12 @@ #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { -#ifdef CONFIG_MICROCODE_EARLY /* * Remember, initrd memory may contain microcode or other useful things. * Before we lose initrd mem, we need to find a place to hold them * now that normal virtual memory is enabled. */ save_microcode_in_initrd(); -#endif /* * end could be not aligned, and We can not align that, @@ -585,7 +709,7 @@ * - relocate_initrd() * So here We can do PAGE_ALIGN() safely to get partial page to be freed */ - free_init_pages("initrd memory", start, PAGE_ALIGN(end)); + free_init_pages("initrd", start, PAGE_ALIGN(end)); } #endif @@ -596,10 +720,10 @@ memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; + max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn); #endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; + max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM @@ -609,3 +733,20 @@ free_area_init_nodes(max_zone_pfns); } +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { +#ifdef CONFIG_SMP + .active_mm = &init_mm, + .state = 0, +#endif + .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ +}; +EXPORT_SYMBOL_GPL(cpu_tlbstate); + +void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) +{ + /* entry 0 MUST be WB (hardwired to speed up translations) */ + BUG_ON(!entry && cache != _PAGE_CACHE_MODE_WB); + + __cachemode2pte_tbl[cache] = __cm_idx2pte(entry); + __pte2cachemode_tbl[entry] = cache; +}