--- zzzz-none-000/linux-3.10.107/include/linux/mmzone.h 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/include/linux/mmzone.h 2021-02-04 17:41:59.000000000 +0000 @@ -37,10 +37,10 @@ enum { MIGRATE_UNMOVABLE, - MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, + MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ - MIGRATE_RESERVE = MIGRATE_PCPTYPES, + MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way @@ -75,9 +75,18 @@ extern int page_group_by_mobility_disabled; -static inline int get_pageblock_migratetype(struct page *page) -{ - return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); +#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) +#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) + +#define get_pageblock_migratetype(page) \ + get_pfnblock_flags_mask(page, page_to_pfn(page), \ + PB_migrate_end, MIGRATETYPE_MASK) + +static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn) +{ + BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2); + return get_pfnblock_flags_mask(page, pfn, PB_migrate_end, + MIGRATETYPE_MASK); } struct free_area { @@ -105,6 +114,7 @@ enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, + NR_ALLOC_BATCH, NR_LRU_BASE, NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ NR_ACTIVE_ANON, /* " " " " " */ @@ -133,6 +143,7 @@ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_PAGES_SCANNED, /* pages scanned since last reclaim */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ @@ -141,8 +152,12 @@ NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ #endif + WORKINGSET_REFAULT, + WORKINGSET_ACTIVATE, + WORKINGSET_NODERECLAIM, NR_ANON_TRANSPARENT_HUGEPAGES, NR_FREE_CMA_PAGES, + NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */ NR_VM_ZONE_STAT_ITEMS }; /* @@ -305,67 +320,52 @@ ZONE_HIGHMEM, #endif ZONE_MOVABLE, +#ifdef CONFIG_ZONE_DEVICE + ZONE_DEVICE, +#endif __MAX_NR_ZONES + }; #ifndef __GENERATING_BOUNDS_H struct zone { - /* Fields commonly accessed by the page allocator */ + /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; - /* - * When free pages are below this point, additional steps are taken - * when reading the number of free pages to avoid per-cpu counter - * drift allowing watermarks to be breached - */ - unsigned long percpu_drift_mark; - - /* - * We don't know if the memory that we're going to allocate will be freeable - * or/and it will be released eventually, so to avoid totally wasting several - * GB of ram we must reserve some of the lower zone memory (otherwise we risk - * to run OOM on the lower zones despite there's tons of freeable ram - * on the higher zones). This array is recalculated at runtime if the - * sysctl_lowmem_reserve_ratio sysctl changes. - */ - unsigned long lowmem_reserve[MAX_NR_ZONES]; + unsigned long nr_reserved_highatomic; /* - * This is a per-zone reserve of pages that should not be - * considered dirtyable memory. + * We don't know if the memory that we're going to allocate will be + * freeable or/and it will be released eventually, so to avoid totally + * wasting several GB of ram we must reserve some of the lower zone + * memory (otherwise we risk to run OOM on the lower zones despite + * there being tons of freeable ram on the higher zones). This array is + * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl + * changes. */ - unsigned long dirty_balance_reserve; + long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA int node; +#endif + /* - * zone reclaim becomes active if more unmapped pages exist. + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on + * this zone's LRU. Maintained by the pageout code. */ - unsigned long min_unmapped_pages; - unsigned long min_slab_pages; -#endif + unsigned int inactive_ratio; + + struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; + /* - * free areas of different sizes + * This is a per-zone reserve of pages that should not be + * considered dirtyable memory. */ - spinlock_t lock; - int all_unreclaimable; /* All pages pinned */ -#if defined CONFIG_COMPACTION || defined CONFIG_CMA - /* Set to true when the PG_migrate_skip bits should be cleared */ - bool compact_blockskip_flush; - - /* pfns where compaction scanners should start */ - unsigned long compact_cached_free_pfn; - unsigned long compact_cached_migrate_pfn; -#endif -#ifdef CONFIG_MEMORY_HOTPLUG - /* see spanned/present_pages for more description */ - seqlock_t span_seqlock; -#endif - struct free_area free_area[MAX_ORDER]; + unsigned long dirty_balance_reserve; #ifndef CONFIG_SPARSEMEM /* @@ -375,71 +375,14 @@ unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_COMPACTION - /* - * On compaction failure, 1<> PAGE_SHIFT */ unsigned long zone_start_pfn; @@ -474,58 +417,135 @@ * frequently read in proximity to zone->lock. It's good to * give them a chance of being in the same cacheline. * - * Write access to present_pages and managed_pages at runtime should - * be protected by lock_memory_hotplug()/unlock_memory_hotplug(). - * Any reader who can't tolerant drift of present_pages and - * managed_pages should hold memory hotplug lock to get a stable value. + * Write access to present_pages at runtime should be protected by + * mem_hotplug_begin/end(). Any reader who can't tolerant drift of + * present_pages should get_online_mems() to get a stable value. + * + * Read access to managed_pages should be safe because it's unsigned + * long. Write access to zone->managed_pages and totalram_pages are + * protected by managed_page_count_lock at runtime. Idealy only + * adjust_managed_page_count() should be used instead of directly + * touching zone->managed_pages and totalram_pages. */ + unsigned long managed_pages; unsigned long spanned_pages; unsigned long present_pages; - unsigned long managed_pages; + const char *name; + +#ifdef CONFIG_MEMORY_ISOLATION /* - * rarely used fields: + * Number of isolated pageblock. It is used to solve incorrect + * freepage counting problem due to racy retrieving migratetype + * of pageblock. Protected by zone->lock. */ - const char *name; -} ____cacheline_internodealigned_in_smp; + unsigned long nr_isolate_pageblock; +#endif -typedef enum { - ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ - ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ - ZONE_CONGESTED, /* zone has many dirty pages backed by - * a congested BDI - */ -} zone_flags_t; +#ifdef CONFIG_MEMORY_HOTPLUG + /* see spanned/present_pages for more description */ + seqlock_t span_seqlock; +#endif -static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) -{ - set_bit(flag, &zone->flags); -} + /* + * wait_table -- the array holding the hash table + * wait_table_hash_nr_entries -- the size of the hash table array + * wait_table_bits -- wait_table_size == (1 << wait_table_bits) + * + * The purpose of all these is to keep track of the people + * waiting for a page to become available and make them + * runnable again when possible. The trouble is that this + * consumes a lot of space, especially when so few things + * wait on pages at a given time. So instead of using + * per-page waitqueues, we use a waitqueue hash table. + * + * The bucket discipline is to sleep on the same queue when + * colliding and wake all in that wait queue when removing. + * When something wakes, it must check to be sure its page is + * truly available, a la thundering herd. The cost of a + * collision is great, but given the expected load of the + * table, they should be so rare as to be outweighed by the + * benefits from the saved space. + * + * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the + * primary users of these fields, and in mm/page_alloc.c + * free_area_init_core() performs the initialization of them. + */ + wait_queue_head_t *wait_table; + unsigned long wait_table_hash_nr_entries; + unsigned long wait_table_bits; -static inline int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag) -{ - return test_and_set_bit(flag, &zone->flags); -} + ZONE_PADDING(_pad1_) + /* free areas of different sizes */ + struct free_area free_area[MAX_ORDER]; -static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag) -{ - clear_bit(flag, &zone->flags); -} + /* zone flags, see below */ + unsigned long flags; -static inline int zone_is_reclaim_congested(const struct zone *zone) -{ - return test_bit(ZONE_CONGESTED, &zone->flags); -} + /* Write-intensive fields used from the page allocator */ + spinlock_t lock; -static inline int zone_is_reclaim_locked(const struct zone *zone) -{ - return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); -} + ZONE_PADDING(_pad2_) -static inline int zone_is_oom_locked(const struct zone *zone) -{ - return test_bit(ZONE_OOM_LOCKED, &zone->flags); -} + /* Write-intensive fields used by page reclaim */ + + /* Fields commonly accessed by the page reclaim scanner */ + spinlock_t lru_lock; + struct lruvec lruvec; + + /* Evictions & activations on the inactive file list */ + atomic_long_t inactive_age; + + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + /* pfn where compaction free scanner should start */ + unsigned long compact_cached_free_pfn; + /* pfn where async and sync compaction migration scanner should start */ + unsigned long compact_cached_migrate_pfn[2]; +#endif + +#ifdef CONFIG_COMPACTION + /* + * On compaction failure, 1<mems_allowed (which - * comes from cpusets) is not set. During the second scan, we bypass - * this zonelist_cache, to ensure we look methodically at each zone. - * - * Once per second, we zero out (zap) fullzones, forcing us to - * reconsider nodes that might have regained more free memory. - * The field last_full_zap is the time we last zapped fullzones. - * - * This mechanism reduces the amount of time we waste repeatedly - * reexaming zones for free memory when they just came up low on - * memory momentarilly ago. - * - * The zonelist_cache struct members logically belong in struct - * zonelist. However, the mempolicy zonelists constructed for - * MPOL_BIND are intentionally variable length (and usually much - * shorter). A general purpose mechanism for handling structs with - * multiple variable length members is more mechanism than we want - * here. We resort to some special case hackery instead. - * - * The MPOL_BIND zonelists don't need this zonelist_cache (in good - * part because they are shorter), so we put the fixed length stuff - * at the front of the zonelist struct, ending in a variable length - * zones[], as is needed by MPOL_BIND. - * - * Then we put the optional zonelist cache on the end of the zonelist - * struct. This optional stuff is found by a 'zlcache_ptr' pointer in - * the fixed length portion at the front of the struct. This pointer - * both enables us to find the zonelist cache, and in the case of - * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL) - * to know that the zonelist cache is not there. - * - * The end result is that struct zonelists come in two flavors: - * 1) The full, fixed length version, shown below, and - * 2) The custom zonelists for MPOL_BIND. - * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache. - * - * Even though there may be multiple CPU cores on a node modifying - * fullzones or last_full_zap in the same zonelist_cache at the same - * time, we don't lock it. This is just hint data - if it is wrong now - * and then, the allocator will still function, perhaps a bit slower. - */ - - -struct zonelist_cache { - unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */ - DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ - unsigned long last_full_zap; /* when last zap'd (jiffies) */ -}; #else #define MAX_ZONELISTS 1 -struct zonelist_cache; #endif /* @@ -653,9 +606,6 @@ * allocation, the other zones are fallback zones, in decreasing * priority. * - * If zlcache_ptr is not NULL, then it is just the address of zlcache, - * as explained above. If zlcache_ptr is NULL, there is no zlcache. - * * * To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are @@ -665,20 +615,8 @@ * zonelist_node_idx() - Return the index of the node for an entry */ struct zonelist { - struct zonelist_cache *zlcache_ptr; // NULL or &zlcache struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; -#ifdef CONFIG_NUMA - struct zonelist_cache zlcache; // optional ... -#endif -}; - -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -struct node_active_region { - unsigned long start_pfn; - unsigned long end_pfn; - int nid; }; -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #ifndef CONFIG_DISCONTIGMEM /* The array of struct pages - for discontigmem use pgdat->lmem_map */ @@ -703,8 +641,8 @@ int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; -#ifdef CONFIG_MEMCG - struct page_cgroup *node_page_cgroup; +#ifdef CONFIG_PAGE_EXTENSION + struct page_ext *node_page_ext; #endif #endif #ifndef CONFIG_NO_BOOTMEM @@ -716,7 +654,10 @@ * or node_spanned_pages stay constant. Holding this will also * guarantee that any pfn_valid() stays that way. * - * Nests above zone->lock and zone->size_seqlock. + * pgdat_resize_lock() and pgdat_resize_unlock() are provided to + * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG. + * + * Nests above zone->lock and zone->span_seqlock */ spinlock_t node_size_lock; #endif @@ -725,17 +666,14 @@ unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; - nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */ wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; - struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ + struct task_struct *kswapd; /* Protected by + mem_hotplug_begin/end() */ int kswapd_max_order; enum zone_type classzone_idx; #ifdef CONFIG_NUMA_BALANCING - /* - * Lock serializing the per destination node AutoNUMA memory - * migration rate limiting data. - */ + /* Lock serializing the migrate rate limiting window */ spinlock_t numabalancing_migrate_lock; /* Rate limiting time interval */ @@ -744,6 +682,14 @@ /* Number of pages migrated during the rate limiting time interval */ unsigned long numabalancing_migrate_nr_pages; #endif + +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT + /* + * If memory initialisation on large machines is deferred then this + * is the first PFN that needs to be initialised. + */ + unsigned long first_deferred_pfn; +#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) @@ -768,22 +714,40 @@ return !pgdat->node_start_pfn && !pgdat->node_spanned_pages; } +static inline int zone_id(const struct zone *zone) +{ + struct pglist_data *pgdat = zone->zone_pgdat; + + return zone - pgdat->node_zones; +} + +#ifdef CONFIG_ZONE_DEVICE +static inline bool is_dev_zone(const struct zone *zone) +{ + return zone_id(zone) == ZONE_DEVICE; +} +#else +static inline bool is_dev_zone(const struct zone *zone) +{ + return false; +} +#endif + #include extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); -bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); +bool zone_watermark_ok(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags); +bool zone_watermark_ok_safe(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx); enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, }; extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, - unsigned long size, - enum memmap_context context); + unsigned long size); extern void lruvec_init(struct lruvec *lruvec); @@ -824,14 +788,16 @@ extern int movable_zone; +#ifdef CONFIG_HIGHMEM static inline int zone_movable_is_highmem(void) { -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP return movable_zone == ZONE_HIGHMEM; #else - return 0; + return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; #endif } +#endif static inline int is_highmem_idx(enum zone_type idx) { @@ -843,11 +809,6 @@ #endif } -static inline int is_normal_idx(enum zone_type idx) -{ - return (idx == ZONE_NORMAL); -} - /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references @@ -866,33 +827,12 @@ #endif } -static inline int is_normal(struct zone *zone) -{ - return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL; -} - -static inline int is_dma32(struct zone *zone) -{ -#ifdef CONFIG_ZONE_DMA32 - return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; -#else - return 0; -#endif -} - -static inline int is_dma(struct zone *zone) -{ -#ifdef CONFIG_ZONE_DMA - return zone == zone->zone_pgdat->node_zones + ZONE_DMA; -#else - return 0; -#endif -} - /* These two functions are used to setup the per zone pages min values */ struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int low_free_kbytes_ratio_sysctl_handler(struct ctl_table *, int, + void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); @@ -977,7 +917,6 @@ * @z - The cursor used as a starting point for the search * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the @@ -987,8 +926,7 @@ */ struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, - nodemask_t *nodes, - struct zone **zone); + nodemask_t *nodes); /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist @@ -1007,8 +945,10 @@ nodemask_t *nodes, struct zone **zone) { - return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes, - zone); + struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs, + highest_zoneidx, nodes); + *zone = zonelist_zone(z); + return z; } /** @@ -1025,7 +965,8 @@ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ zone; \ - z = next_zones_zonelist(++z, highidx, nodemask, &zone)) \ + z = next_zones_zonelist(++z, highidx, nodemask), \ + zone = zonelist_zone(z)) \ /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index @@ -1085,7 +1026,7 @@ #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) struct page; -struct page_cgroup; +struct page_ext; struct mem_section { /* * This is, logically, a pointer to an array of struct @@ -1103,14 +1044,18 @@ /* See declaration of similar field in struct zone */ unsigned long *pageblock_flags; -#ifdef CONFIG_MEMCG +#ifdef CONFIG_PAGE_EXTENSION /* - * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use - * section. (see memcontrol.h/page_cgroup.h about this.) + * If !SPARSEMEM, pgdat doesn't have page_ext pointer. We use + * section. (see page_ext.h about this.) */ - struct page_cgroup *page_cgroup; + struct page_ext *page_ext; unsigned long pad; #endif + /* + * WARNING: mem_section must be a power-of-2 in size for the + * calculation and use of SECTION_ROOT_MASK to make sense. + */ }; #ifdef CONFIG_SPARSEMEM_EXTREME @@ -1219,11 +1164,16 @@ #define sparse_index_init(_sec, _nid) do {} while (0) #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_NODES_SPAN_OTHER_NODES -bool early_pfn_in_nid(unsigned long pfn, int nid); -#else -#define early_pfn_in_nid(pfn, nid) (1) -#endif +/* + * During memory init memblocks map pfns to nids. The search is expensive and + * this caches recent lookups. The implementation of __early_pfn_to_nid + * may treat start/end as pfns or sections. + */ +struct mminit_pfnnid_cache { + unsigned long last_start; + unsigned long last_end; + int last_nid; +}; #ifndef early_pfn_valid #define early_pfn_valid(pfn) (1)