--- zzzz-none-000/linux-3.10.107/mm/slab_common.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/mm/slab_common.c 2021-02-04 17:41:59.000000000 +0000 @@ -20,16 +20,61 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include "slab.h" +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +#include +#include +#include +#endif + enum slab_state slab_state; LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; +/* + * Set of flags that will prevent slab merging + */ +#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | \ + SLAB_STORE_USER | SLAB_STORE_USER_LITE | \ + SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ + SLAB_FAILSLAB) + +#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK) + +/* + * Merge control. If this is set then no merging of slab caches will occur. + * (Could be removed. This was introduced to pacify the merge skeptics.) + */ +static int slab_nomerge; + +static int __init setup_slab_nomerge(char *str) +{ + slab_nomerge = 1; + return 1; +} + +#ifdef CONFIG_SLUB +__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0); +#endif + +__setup("slab_nomerge", setup_slab_nomerge); + +/* + * Determine the size of a slab object + */ +unsigned int kmem_cache_size(struct kmem_cache *s) +{ + return s->object_size; +} +EXPORT_SYMBOL(kmem_cache_size); + #ifdef CONFIG_DEBUG_VM -static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, - size_t size) +static int kmem_cache_sanity_check(const char *name, size_t size) { struct kmem_cache *s = NULL; @@ -54,62 +99,202 @@ s->object_size); continue; } - -#if !defined(CONFIG_SLUB) - /* - * For simplicity, we won't check this in the list of memcg - * caches. We have control over memcg naming, and if there - * aren't duplicates in the global list, there won't be any - * duplicates in the memcg lists as well. - */ - if (!memcg && !strcmp(s->name, name)) { - pr_err("%s (%s): Cache name already exists.\n", - __func__, name); - dump_stack(); - s = NULL; - return -EINVAL; - } -#endif } WARN_ON(strchr(name, ' ')); /* It confuses parsers */ return 0; } #else -static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg, - const char *name, size_t size) +static inline int kmem_cache_sanity_check(const char *name, size_t size) { return 0; } #endif +void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) +{ + size_t i; + + for (i = 0; i < nr; i++) + kmem_cache_free(s, p[i]); +} + +int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, + void **p) +{ + size_t i; + + for (i = 0; i < nr; i++) { + void *x = p[i] = kmem_cache_alloc(s, flags); + if (!x) { + __kmem_cache_free_bulk(s, i, p); + return 0; + } + } + return i; +} + #ifdef CONFIG_MEMCG_KMEM +void slab_init_memcg_params(struct kmem_cache *s) +{ + s->memcg_params.is_root_cache = true; + INIT_LIST_HEAD(&s->memcg_params.list); + RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); +} + +static int init_memcg_params(struct kmem_cache *s, + struct mem_cgroup *memcg, struct kmem_cache *root_cache) +{ + struct memcg_cache_array *arr; + + if (memcg) { + s->memcg_params.is_root_cache = false; + s->memcg_params.memcg = memcg; + s->memcg_params.root_cache = root_cache; + return 0; + } + + slab_init_memcg_params(s); + + if (!memcg_nr_cache_ids) + return 0; + + arr = kzalloc(sizeof(struct memcg_cache_array) + + memcg_nr_cache_ids * sizeof(void *), + GFP_KERNEL); + if (!arr) + return -ENOMEM; + + RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr); + return 0; +} + +static void destroy_memcg_params(struct kmem_cache *s) +{ + if (is_root_cache(s)) + kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); +} + +static int update_memcg_params(struct kmem_cache *s, int new_array_size) +{ + struct memcg_cache_array *old, *new; + + if (!is_root_cache(s)) + return 0; + + new = kzalloc(sizeof(struct memcg_cache_array) + + new_array_size * sizeof(void *), GFP_KERNEL); + if (!new) + return -ENOMEM; + + old = rcu_dereference_protected(s->memcg_params.memcg_caches, + lockdep_is_held(&slab_mutex)); + if (old) + memcpy(new->entries, old->entries, + memcg_nr_cache_ids * sizeof(void *)); + + rcu_assign_pointer(s->memcg_params.memcg_caches, new); + if (old) + kfree_rcu(old, rcu); + return 0; +} + int memcg_update_all_caches(int num_memcgs) { struct kmem_cache *s; int ret = 0; - mutex_lock(&slab_mutex); + mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { - if (!is_root_cache(s)) - continue; - - ret = memcg_update_cache_size(s, num_memcgs); + ret = update_memcg_params(s, num_memcgs); /* - * See comment in memcontrol.c, memcg_update_cache_size: * Instead of freeing the memory, we'll just leave the caches * up to this point in an updated state. */ if (ret) - goto out; + break; } - - memcg_update_array_size(num_memcgs); -out: mutex_unlock(&slab_mutex); return ret; } -#endif +#else +static inline int init_memcg_params(struct kmem_cache *s, + struct mem_cgroup *memcg, struct kmem_cache *root_cache) +{ + return 0; +} + +static inline void destroy_memcg_params(struct kmem_cache *s) +{ +} +#endif /* CONFIG_MEMCG_KMEM */ + +/* + * Find a mergeable slab cache + */ +int slab_unmergeable(struct kmem_cache *s) +{ + if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE)) + return 1; + + if (!is_root_cache(s)) + return 1; + + if (s->ctor) + return 1; + + /* + * We may have set a slab to be unmergeable during bootstrap. + */ + if (s->refcount < 0) + return 1; + + return 0; +} + +struct kmem_cache *find_mergeable(size_t size, size_t align, + unsigned long flags, const char *name, void (*ctor)(void *)) +{ + struct kmem_cache *s; + + if (slab_nomerge || (flags & SLAB_NEVER_MERGE)) + return NULL; + + if (ctor) + return NULL; + + size = ALIGN(size, sizeof(void *)); + align = calculate_alignment(flags, align, size); + size = ALIGN(size, align); + flags = kmem_cache_flags(size, flags, name, NULL); + + list_for_each_entry_reverse(s, &slab_caches, list) { + if (slab_unmergeable(s)) + continue; + + if (size > s->size) + continue; + + if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME)) + continue; + /* + * Check if alignment is compatible. + * Courtesy of Adrian Drzewiecki + */ + if ((s->size & ~(align - 1)) != s->size) + continue; + + if (s->size - size >= sizeof(void *)) + continue; + + if (IS_ENABLED(CONFIG_SLAB) && align && + (align > s->align || s->align % align)) + continue; + + return s; + } + return NULL; +} /* * Figure out what the alignment of the objects will be given a set of @@ -138,6 +323,45 @@ return ALIGN(align, sizeof(void *)); } +static struct kmem_cache *create_cache(const char *name, + size_t object_size, size_t size, size_t align, + unsigned long flags, void (*ctor)(void *), + struct mem_cgroup *memcg, struct kmem_cache *root_cache) +{ + struct kmem_cache *s; + int err; + + err = -ENOMEM; + s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); + if (!s) + goto out; + + s->name = name; + s->object_size = object_size; + s->size = size; + s->align = align; + s->ctor = ctor; + + err = init_memcg_params(s, memcg, root_cache); + if (err) + goto out_free_cache; + + err = __kmem_cache_create(s, flags); + if (err) + goto out_free_cache; + + s->refcount = 1; + list_add(&s->list, &slab_caches); +out: + if (err) + return ERR_PTR(err); + return s; + +out_free_cache: + destroy_memcg_params(s); + kmem_cache_free(kmem_cache, s); + goto out; +} /* * kmem_cache_create - Create a cache. @@ -163,20 +387,24 @@ * cacheline. This can be beneficial if you're counting cycles as closely * as davem. */ - struct kmem_cache * -kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, - size_t align, unsigned long flags, void (*ctor)(void *), - struct kmem_cache *parent_cache) +kmem_cache_create(const char *name, size_t size, size_t align, + unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *s = NULL; - int err = 0; + const char *cache_name; + int err; get_online_cpus(); + get_online_mems(); + memcg_get_cache_ids(); + mutex_lock(&slab_mutex); - if (!kmem_cache_sanity_check(memcg, name, size) == 0) - goto out_locked; + err = kmem_cache_sanity_check(name, size); + if (err) { + goto out_unlock; + } /* * Some allocators will constraint the set of valid flags to a subset @@ -186,47 +414,32 @@ */ flags &= CACHE_CREATE_MASK; - s = __kmem_cache_alias(memcg, name, size, align, flags, ctor); + s = __kmem_cache_alias(name, size, align, flags, ctor); if (s) - goto out_locked; + goto out_unlock; - s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); - if (s) { - s->object_size = s->size = size; - s->align = calculate_alignment(flags, align, size); - s->ctor = ctor; - - if (memcg_register_cache(memcg, s, parent_cache)) { - kmem_cache_free(kmem_cache, s); - err = -ENOMEM; - goto out_locked; - } - - s->name = kstrdup(name, GFP_KERNEL); - if (!s->name) { - kmem_cache_free(kmem_cache, s); - err = -ENOMEM; - goto out_locked; - } - - err = __kmem_cache_create(s, flags); - if (!err) { - s->refcount = 1; - list_add(&s->list, &slab_caches); - memcg_cache_list_add(memcg, s); - } else { - kfree(s->name); - kmem_cache_free(kmem_cache, s); - } - } else + cache_name = kstrdup_const(name, GFP_KERNEL); + if (!cache_name) { err = -ENOMEM; + goto out_unlock; + } -out_locked: + s = create_cache(cache_name, size, size, + calculate_alignment(flags, align, size), + flags, ctor, NULL, NULL); + if (IS_ERR(s)) { + err = PTR_ERR(s); + kfree_const(cache_name); + } + +out_unlock: mutex_unlock(&slab_mutex); + + memcg_put_cache_ids(); + put_online_mems(); put_online_cpus(); if (err) { - if (flags & SLAB_PANIC) panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", name, err); @@ -235,55 +448,320 @@ name, err); dump_stack(); } - return NULL; } - return s; } +EXPORT_SYMBOL(kmem_cache_create); -struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t align, - unsigned long flags, void (*ctor)(void *)) +static int shutdown_cache(struct kmem_cache *s, + struct list_head *release, bool *need_rcu_barrier) { - return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL); + if (__kmem_cache_shutdown(s) != 0) + return -EBUSY; + + if (s->flags & SLAB_DESTROY_BY_RCU) + *need_rcu_barrier = true; + + list_move(&s->list, release); + return 0; +} + +static void release_caches(struct list_head *release, bool need_rcu_barrier) +{ + struct kmem_cache *s, *s2; + + if (need_rcu_barrier) + rcu_barrier(); + + list_for_each_entry_safe(s, s2, release, list) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_remove(s); +#else + slab_kmem_cache_release(s); +#endif + } +} + +#ifdef CONFIG_MEMCG_KMEM +/* + * memcg_create_kmem_cache - Create a cache for a memory cgroup. + * @memcg: The memory cgroup the new cache is for. + * @root_cache: The parent of the new cache. + * + * This function attempts to create a kmem cache that will serve allocation + * requests going from @memcg to @root_cache. The new cache inherits properties + * from its parent. + */ +void memcg_create_kmem_cache(struct mem_cgroup *memcg, + struct kmem_cache *root_cache) +{ + static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ + struct cgroup_subsys_state *css = &memcg->css; + struct memcg_cache_array *arr; + struct kmem_cache *s = NULL; + char *cache_name; + int idx; + + get_online_cpus(); + get_online_mems(); + + mutex_lock(&slab_mutex); + + /* + * The memory cgroup could have been deactivated while the cache + * creation work was pending. + */ + if (!memcg_kmem_is_active(memcg)) + goto out_unlock; + + idx = memcg_cache_id(memcg); + arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches, + lockdep_is_held(&slab_mutex)); + + /* + * Since per-memcg caches are created asynchronously on first + * allocation (see memcg_kmem_get_cache()), several threads can try to + * create the same cache, but only one of them may succeed. + */ + if (arr->entries[idx]) + goto out_unlock; + + cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf)); + cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name, + css->serial_nr, memcg_name_buf); + if (!cache_name) + goto out_unlock; + + s = create_cache(cache_name, root_cache->object_size, + root_cache->size, root_cache->align, + root_cache->flags, root_cache->ctor, + memcg, root_cache); + /* + * If we could not create a memcg cache, do not complain, because + * that's not critical at all as we can always proceed with the root + * cache. + */ + if (IS_ERR(s)) { + kfree(cache_name); + goto out_unlock; + } + + list_add(&s->memcg_params.list, &root_cache->memcg_params.list); + + /* + * Since readers won't lock (see cache_from_memcg_idx()), we need a + * barrier here to ensure nobody will see the kmem_cache partially + * initialized. + */ + smp_wmb(); + arr->entries[idx] = s; + +out_unlock: + mutex_unlock(&slab_mutex); + + put_online_mems(); + put_online_cpus(); +} + +void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) +{ + int idx; + struct memcg_cache_array *arr; + struct kmem_cache *s, *c; + + idx = memcg_cache_id(memcg); + + get_online_cpus(); + get_online_mems(); + + mutex_lock(&slab_mutex); + list_for_each_entry(s, &slab_caches, list) { + if (!is_root_cache(s)) + continue; + + arr = rcu_dereference_protected(s->memcg_params.memcg_caches, + lockdep_is_held(&slab_mutex)); + c = arr->entries[idx]; + if (!c) + continue; + + __kmem_cache_shrink(c, true); + arr->entries[idx] = NULL; + } + mutex_unlock(&slab_mutex); + + put_online_mems(); + put_online_cpus(); +} + +static int __shutdown_memcg_cache(struct kmem_cache *s, + struct list_head *release, bool *need_rcu_barrier) +{ + BUG_ON(is_root_cache(s)); + + if (shutdown_cache(s, release, need_rcu_barrier)) + return -EBUSY; + + list_del(&s->memcg_params.list); + return 0; +} + +void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) +{ + LIST_HEAD(release); + bool need_rcu_barrier = false; + struct kmem_cache *s, *s2; + + get_online_cpus(); + get_online_mems(); + + mutex_lock(&slab_mutex); + list_for_each_entry_safe(s, s2, &slab_caches, list) { + if (is_root_cache(s) || s->memcg_params.memcg != memcg) + continue; + /* + * The cgroup is about to be freed and therefore has no charges + * left. Hence, all its caches must be empty by now. + */ + BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier)); + } + mutex_unlock(&slab_mutex); + + put_online_mems(); + put_online_cpus(); + + release_caches(&release, need_rcu_barrier); +} + +static int shutdown_memcg_caches(struct kmem_cache *s, + struct list_head *release, bool *need_rcu_barrier) +{ + struct memcg_cache_array *arr; + struct kmem_cache *c, *c2; + LIST_HEAD(busy); + int i; + + BUG_ON(!is_root_cache(s)); + + /* + * First, shutdown active caches, i.e. caches that belong to online + * memory cgroups. + */ + arr = rcu_dereference_protected(s->memcg_params.memcg_caches, + lockdep_is_held(&slab_mutex)); + for_each_memcg_cache_index(i) { + c = arr->entries[i]; + if (!c) + continue; + if (__shutdown_memcg_cache(c, release, need_rcu_barrier)) + /* + * The cache still has objects. Move it to a temporary + * list so as not to try to destroy it for a second + * time while iterating over inactive caches below. + */ + list_move(&c->memcg_params.list, &busy); + else + /* + * The cache is empty and will be destroyed soon. Clear + * the pointer to it in the memcg_caches array so that + * it will never be accessed even if the root cache + * stays alive. + */ + arr->entries[i] = NULL; + } + + /* + * Second, shutdown all caches left from memory cgroups that are now + * offline. + */ + list_for_each_entry_safe(c, c2, &s->memcg_params.list, + memcg_params.list) + __shutdown_memcg_cache(c, release, need_rcu_barrier); + + list_splice(&busy, &s->memcg_params.list); + + /* + * A cache being destroyed must be empty. In particular, this means + * that all per memcg caches attached to it must be empty too. + */ + if (!list_empty(&s->memcg_params.list)) + return -EBUSY; + return 0; +} +#else +static inline int shutdown_memcg_caches(struct kmem_cache *s, + struct list_head *release, bool *need_rcu_barrier) +{ + return 0; +} +#endif /* CONFIG_MEMCG_KMEM */ + +void slab_kmem_cache_release(struct kmem_cache *s) +{ + destroy_memcg_params(s); + kfree_const(s->name); + kmem_cache_free(kmem_cache, s); } -EXPORT_SYMBOL(kmem_cache_create); void kmem_cache_destroy(struct kmem_cache *s) { - /* Destroy all the children caches if we aren't a memcg cache */ - kmem_cache_destroy_memcg_children(s); + LIST_HEAD(release); + bool need_rcu_barrier = false; + int err; + + if (unlikely(!s)) + return; get_online_cpus(); + get_online_mems(); + mutex_lock(&slab_mutex); + s->refcount--; - if (!s->refcount) { - list_del(&s->list); + if (s->refcount) + goto out_unlock; - if (!__kmem_cache_shutdown(s)) { - mutex_unlock(&slab_mutex); - if (s->flags & SLAB_DESTROY_BY_RCU) - rcu_barrier(); - - memcg_release_cache(s); - kfree(s->name); - kmem_cache_free(kmem_cache, s); - } else { - list_add(&s->list, &slab_caches); - mutex_unlock(&slab_mutex); - printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n", - s->name); - dump_stack(); - } - } else { - mutex_unlock(&slab_mutex); + err = shutdown_memcg_caches(s, &release, &need_rcu_barrier); + if (!err) + err = shutdown_cache(s, &release, &need_rcu_barrier); + + if (err) { + pr_err("kmem_cache_destroy %s: " + "Slab cache still has objects\n", s->name); + dump_stack(); } +out_unlock: + mutex_unlock(&slab_mutex); + + put_online_mems(); put_online_cpus(); + + release_caches(&release, need_rcu_barrier); } EXPORT_SYMBOL(kmem_cache_destroy); -int slab_is_available(void) +/** + * kmem_cache_shrink - Shrink a cache. + * @cachep: The cache to shrink. + * + * Releases as many slabs as possible for a cache. + * To help debugging, a zero exit status indicates all slabs were released. + */ +int kmem_cache_shrink(struct kmem_cache *cachep) +{ + int ret; + + get_online_cpus(); + get_online_mems(); + ret = __kmem_cache_shrink(cachep, false); + put_online_mems(); + put_online_cpus(); + return ret; +} +EXPORT_SYMBOL(kmem_cache_shrink); + +bool slab_is_available(void) { return slab_state >= UP; } @@ -298,6 +776,9 @@ s->name = name; s->size = s->object_size = size; s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); + + slab_init_memcg_params(s); + err = __kmem_cache_create(s, flags); if (err) @@ -375,7 +856,7 @@ { int index; - if (size > KMALLOC_MAX_SIZE) { + if (unlikely(size > KMALLOC_MAX_SIZE)) { WARN_ON_ONCE(!(flags & __GFP_NOWARN)); return NULL; } @@ -397,25 +878,45 @@ } /* - * Create the kmalloc array. Some of the regular kmalloc arrays - * may already have been created because they were needed to - * enable allocations for slab creation. + * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. + * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is + * kmalloc-67108864. */ -void __init create_kmalloc_caches(unsigned long flags) +static struct { + const char *name; + unsigned long size; +} const kmalloc_info[] __initconst = { + {NULL, 0}, {"kmalloc-96", 96}, + {"kmalloc-192", 192}, {"kmalloc-8", 8}, + {"kmalloc-16", 16}, {"kmalloc-32", 32}, + {"kmalloc-64", 64}, {"kmalloc-128", 128}, + {"kmalloc-256", 256}, {"kmalloc-512", 512}, + {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048}, + {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192}, + {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768}, + {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072}, + {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288}, + {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152}, + {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608}, + {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432}, + {"kmalloc-67108864", 67108864} +}; + +/* + * Patch up the size_index table if we have strange large alignment + * requirements for the kmalloc array. This is only the case for + * MIPS it seems. The standard arches will not generate any code here. + * + * Largest permitted alignment is 256 bytes due to the way we + * handle the index determination for the smaller caches. + * + * Make sure that nothing crazy happens if someone starts tinkering + * around with ARCH_KMALLOC_MINALIGN + */ +void __init setup_kmalloc_cache_index_table(void) { int i; - /* - * Patch up the size_index table if we have strange large alignment - * requirements for the kmalloc array. This is only the case for - * MIPS it seems. The standard arches will not generate any code here. - * - * Largest permitted alignment is 256 bytes due to the way we - * handle the index determination for the smaller caches. - * - * Make sure that nothing crazy happens if someone starts tinkering - * around with ARCH_KMALLOC_MINALIGN - */ BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); @@ -446,11 +947,26 @@ for (i = 128 + 8; i <= 192; i += 8) size_index[size_index_elem(i)] = 8; } +} + +static void __init new_kmalloc_cache(int idx, unsigned long flags) +{ + kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, + kmalloc_info[idx].size, flags); +} + +/* + * Create the kmalloc array. Some of the regular kmalloc arrays + * may already have been created because they were needed to + * enable allocations for slab creation. + */ +void __init create_kmalloc_caches(unsigned long flags) +{ + int i; + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - if (!kmalloc_caches[i]) { - kmalloc_caches[i] = create_kmalloc_cache(NULL, - 1 << i, flags); - } + if (!kmalloc_caches[i]) + new_kmalloc_cache(i, flags); /* * Caches that are not of the two-to-the-power-of size. @@ -458,27 +974,14 @@ * earlier power of two caches */ if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) - kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); - + new_kmalloc_cache(1, flags); if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) - kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); + new_kmalloc_cache(2, flags); } /* Kmalloc array is now usable */ slab_state = UP; - for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache *s = kmalloc_caches[i]; - char *n; - - if (s) { - n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); - - BUG_ON(!n); - s->name = n; - } - } - #ifdef CONFIG_ZONE_DMA for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { struct kmem_cache *s = kmalloc_caches[i]; @@ -497,9 +1000,48 @@ } #endif /* !CONFIG_SLOB */ +/* + * To avoid unnecessary overhead, we pass through large allocation requests + * directly to the page allocator. We use __GFP_COMP, because we will need to + * know the allocation order to free the pages properly in kfree. + */ +void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) +{ + void *ret; + struct page *page; + + flags |= __GFP_COMP; + page = alloc_kmem_pages(flags, order); + ret = page ? page_address(page) : NULL; +#if defined(CONFIG_AVM_PAGE_TRACE) + if (likely(page)) + avm_set_page_current_pc(page, _RET_IP_); +#endif + kmemleak_alloc(ret, size, 1, flags); + kasan_kmalloc_large(ret, size); + return ret; +} +EXPORT_SYMBOL(kmalloc_order); + +#ifdef CONFIG_TRACING +void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +{ + void *ret = kmalloc_order(size, flags, order); + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); + return ret; +} +EXPORT_SYMBOL(kmalloc_order_trace); +#endif #ifdef CONFIG_SLABINFO -void print_slabinfo_header(struct seq_file *m) + +#ifdef CONFIG_SLAB +#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) +#else +#define SLABINFO_RIGHTS S_IRUSR +#endif + +static void print_slabinfo_header(struct seq_file *m) { /* * Output format version, so at least we can change it @@ -522,23 +1064,18 @@ seq_putc(m, '\n'); } -static void *s_start(struct seq_file *m, loff_t *pos) +void *slab_start(struct seq_file *m, loff_t *pos) { - loff_t n = *pos; - mutex_lock(&slab_mutex); - if (!n) - print_slabinfo_header(m); - return seq_list_start(&slab_caches, *pos); } -static void *s_next(struct seq_file *m, void *p, loff_t *pos) +void *slab_next(struct seq_file *m, void *p, loff_t *pos) { return seq_list_next(p, &slab_caches, pos); } -static void s_stop(struct seq_file *m, void *p) +void slab_stop(struct seq_file *m, void *p) { mutex_unlock(&slab_mutex); } @@ -548,16 +1085,11 @@ { struct kmem_cache *c; struct slabinfo sinfo; - int i; if (!is_root_cache(s)) return; - for_each_memcg_cache_index(i) { - c = cache_from_memcg(s, i); - if (!c) - continue; - + for_each_memcg_cache(c, s) { memset(&sinfo, 0, sizeof(sinfo)); get_slabinfo(c, &sinfo); @@ -569,7 +1101,7 @@ } } -int cache_show(struct kmem_cache *s, struct seq_file *m) +static void cache_show(struct kmem_cache *s, struct seq_file *m) { struct slabinfo sinfo; @@ -588,17 +1120,32 @@ sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); slabinfo_show_stats(m, s); seq_putc(m, '\n'); +} + +static int slab_show(struct seq_file *m, void *p) +{ + struct kmem_cache *s = list_entry(p, struct kmem_cache, list); + + if (p == slab_caches.next) + print_slabinfo_header(m); + if (is_root_cache(s)) + cache_show(s, m); return 0; } -static int s_show(struct seq_file *m, void *p) +#ifdef CONFIG_MEMCG_KMEM +int memcg_slab_show(struct seq_file *m, void *p) { struct kmem_cache *s = list_entry(p, struct kmem_cache, list); + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - if (!is_root_cache(s)) - return 0; - return cache_show(s, m); + if (p == slab_caches.next) + print_slabinfo_header(m); + if (!is_root_cache(s) && s->memcg_params.memcg == memcg) + cache_show(s, m); + return 0; } +#endif /* * slabinfo_op - iterator that generates /proc/slabinfo @@ -614,10 +1161,10 @@ * + further values on SMP and with statistics enabled */ static const struct seq_operations slabinfo_op = { - .start = s_start, - .next = s_next, - .stop = s_stop, - .show = s_show, + .start = slab_start, + .next = slab_next, + .stop = slab_stop, + .show = slab_show, }; static int slabinfo_open(struct inode *inode, struct file *file) @@ -635,8 +1182,339 @@ static int __init slab_proc_init(void) { - proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); + proc_create("slabinfo", SLABINFO_RIGHTS, NULL, + &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); #endif /* CONFIG_SLABINFO */ + +static __always_inline void *__do_krealloc(const void *p, size_t new_size, + gfp_t flags) +{ + void *ret; + size_t ks = 0; + + if (p) + ks = ksize(p); + + if (ks >= new_size) { + kasan_krealloc((void *)p, new_size); + return (void *)p; + } + + ret = kmalloc_track_caller(new_size, flags); + if (ret && p) + memcpy(ret, p, ks); + + return ret; +} + +/** + * __krealloc - like krealloc() but don't free @p. + * @p: object to reallocate memory for. + * @new_size: how many bytes of memory are required. + * @flags: the type of memory to allocate. + * + * This function is like krealloc() except it never frees the originally + * allocated buffer. Use this if you don't want to free the buffer immediately + * like, for example, with RCU. + */ +void *__krealloc(const void *p, size_t new_size, gfp_t flags) +{ + if (unlikely(!new_size)) + return ZERO_SIZE_PTR; + + return __do_krealloc(p, new_size, flags); + +} +EXPORT_SYMBOL(__krealloc); + +/** + * krealloc - reallocate memory. The contents will remain unchanged. + * @p: object to reallocate memory for. + * @new_size: how many bytes of memory are required. + * @flags: the type of memory to allocate. + * + * The contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. If @p is %NULL, krealloc() + * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a + * %NULL pointer, the object pointed to is freed. + */ +void *krealloc(const void *p, size_t new_size, gfp_t flags) +{ + void *ret; + + if (unlikely(!new_size)) { + kfree(p); + return ZERO_SIZE_PTR; + } + + ret = __do_krealloc(p, new_size, flags); + if (ret && p != ret) + kfree(p); + + return ret; +} +EXPORT_SYMBOL(krealloc); + +/** + * kzfree - like kfree but zero memory + * @p: object to free memory of + * + * The memory of the object @p points to is zeroed before freed. + * If @p is %NULL, kzfree() does nothing. + * + * Note: this function zeroes the whole allocated buffer which can be a good + * deal bigger than the requested buffer size passed to kmalloc(). So be + * careful when using this function in performance sensitive code. + */ +void kzfree(const void *p) +{ + size_t ks; + void *mem = (void *)p; + + if (unlikely(ZERO_OR_NULL_PTR(mem))) + return; + ks = ksize(mem); + memset(mem, 0, ks); + kfree(mem); +} +EXPORT_SYMBOL(kzfree); + +/* Tracepoints definitions. */ +EXPORT_TRACEPOINT_SYMBOL(kmalloc); +EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); +EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); +EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); +EXPORT_TRACEPOINT_SYMBOL(kfree); +EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); + +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +/** + */ +static char *human_time(char *buf, int len, unsigned long secs) +{ + unsigned long seconds, minutes, hours; + + seconds = secs % 60; + secs /= 60; + minutes = secs % 60; + secs /= 60; + hours = secs % 24; + if (hours) { + snprintf(buf, len, "%lu h %2lu min %2lu s", hours, minutes, + seconds); + } else if (minutes) { + snprintf(buf, len, "%2lu min %2lu s", minutes, seconds); + } else { + snprintf(buf, len, "%2lu s", seconds); + } + return buf; +} + +/** + * @brief show memory-usage-caller for cachepool + * @param cachep cachepool + * @param m seq-pointer + * @param threshsize only cache-pool-memory-usage greater this + * return void + */ +#define local_print(seq, args...) \ + do { \ + if (seq) { \ + seq_printf(seq, args); \ + } else { \ + pr_err(args); \ + } \ + } while (0) + +static void show_cache_toplist(struct kmem_cache *cachep, struct seq_file *m, + struct _slab_avm_topalloclist *toplist, + unsigned long threshsize) +{ + unsigned int i; + char tmp[128]; + + cache_avm_topalloc_list(toplist, cachep, 0); + if ((toplist->sum_count == 0) || + ((toplist->sum_count * cachep->object_size) < threshsize)) { + return; + } + for (i = 0; i < ARRAY_SIZE(toplist->entry); i++) { + struct _slab_avm_alloc_entry *p = &toplist->entry[i]; + unsigned long long avg = p->sum_time; + + if ((i == 0) || + (p->count * cachep->object_size) > threshsize / 4) { + if (i == 0) { + local_print(m, "%s: %5lu KiB\n", cachep->name, + (cachep->object_size * toplist->sum_count) >> 10); + } + do_div(avg, (p->count * HZ)); + local_print(m, " \t%6u entries (%5u KiB - avg-time %s) %pS\n", + p->count, + (cachep->object_size * p->count) >> 10, + human_time(tmp, sizeof(tmp), (unsigned long)avg), + (void *)p->caller); + } else { + break; + } + } + if (toplist->ignored) { + if (i) { + local_print(m, "... but %d callers ignored (too much different callers)\n", + toplist->ignored); + } + } +} +/** + * @brief show all memory-usage-caller + * @param m seq-pointer + * @param threshsize only cachep greater this + * return void + */ +static void proc_show_slab_allocator(struct seq_file *m, + struct _slab_avm_topalloclist *ptoplist, + unsigned long threshsize) +{ + struct kmem_cache *cachep = NULL; + struct slabinfo sinfo; + unsigned long sum = 0; + unsigned int init = 0; + + list_for_each_entry(cachep, &slab_caches, list) { + + if (threshsize && (init == 0)) { + local_print(m, "show all cache-pools greater %lu KiB:\n", threshsize >> 10); + init = 1; + } + memset(&sinfo, 0, sizeof(sinfo)); + get_slabinfo(cachep, &sinfo); + memcg_accumulate_slabinfo(cachep, &sinfo); + if ((sinfo.active_objs * cachep->object_size) >= threshsize) { + show_cache_toplist(cachep, m, ptoplist, threshsize); + } + sum += sinfo.active_objs * cachep->object_size; + } + local_print(m, "slab-pools use %lu MiB (netto)\n", sum >> 20); +} + +static struct _slab_avm_topalloclist local_toplist; +unsigned int slab_track_time; + +#ifdef CONFIG_SLABINFO + +#define SKIP_SPACES(p) { while ((*p == ' ') || (*p == '\t')) p++; } +#define SKIP_NONSPACES(p) { while (*p && (*p != ' ') && (*p != '\t')) p++; } + +/** + * print out /proc/slabinfo per printk + */ +static void __show_slab(void) +{ + unsigned int active_objs; + char *ptxt; + void *p; + loff_t pos; + char buf[512 + 1]; + struct seq_file seq; + + memset(&seq, 0, sizeof(seq)); + seq.size = sizeof(buf) - 1; + seq.buf = buf; + pos = 0; + + print_slabinfo_header(&seq); + p = seq_list_start(&slab_caches, pos); + + seq.buf[seq.count] = 0; + pr_err("%s", seq.buf), seq.count = 0; + for (;;) { + struct kmem_cache *s; + + if (!p || IS_ERR(p)) { + break; + } + s = list_entry(p, struct kmem_cache, list); + if (is_root_cache(s)) { + cache_show(s, &seq); + seq.buf[seq.count] = 0; + /*--- only if active_objs exist: ---*/ + ptxt = seq.buf; + SKIP_NONSPACES(ptxt); + SKIP_SPACES(ptxt); + sscanf(ptxt, "%u", &active_objs); + if (active_objs) { + pr_err("%s", seq.buf); + } + } + seq.count = 0; + p = seq_list_next(p, &slab_caches, &pos); + } +} +#endif /* CONFIG_SLABINFO */ + +/** + * @brief show slabinfo and all heavy memory-usage-caller + * use kernel-printk + * used in oom-notifier + */ +void show_slab(void) +{ + if (!mutex_trylock(&slab_mutex)) { + return; + } +#ifdef CONFIG_SLABINFO + __show_slab(); +#endif /* CONFIG_SLABINFO */ + proc_show_slab_allocator(NULL, &local_toplist, SZ_1M); + mutex_unlock(&slab_mutex); +} + +/** + * @brief show allocator-statistic + * @param m seq-pointer + * @param priv + * return void + */ +static void lproc_slab_allocators(struct seq_file *m, void *priv __maybe_unused) +{ + struct _slab_avm_topalloclist *ptoplist; + + ptoplist = kzalloc(sizeof(struct _slab_avm_topalloclist), GFP_KERNEL); + if (ptoplist == NULL) { + return; + } + mutex_lock(&slab_mutex); + proc_show_slab_allocator(m, ptoplist, 0); + mutex_unlock(&slab_mutex); + kfree(ptoplist); + } + + +/** + * @brief delayed slab_allocator-trace on timer-context + */ +static void slab_allocator_on(unsigned long data __maybe_unused) +{ + pr_err("start slab_allocator-trace now (use cat /proc/slab_allocators)\n"); + slab_track_time = 1; +} + +static DEFINE_TIMER(slab_allocator_timer, slab_allocator_on, 0, 0); +/** + * @brief install /proc/slab_allocators + * return 0 + */ +int __init avm_proc_slaballocator(void) +{ + /*--- pr_err("%s()\n", __func__); ---*/ + add_simple_proc_file("slab_allocators", NULL, + lproc_slab_allocators, NULL); + + mod_timer(&slab_allocator_timer, jiffies + 45 * HZ); + return 0; +} +late_initcall(avm_proc_slaballocator); +#endif/*--- #if defined(CONFIG_SLUB_AVM_ALLOC_LIST) ---*/