--- zzzz-none-000/linux-5.4.213/mm/slub.c 2022-09-15 10:04:56.000000000 +0000 +++ miami-7690-761/linux-5.4.213/mm/slub.c 2024-05-29 11:20:02.000000000 +0000 @@ -117,6 +117,15 @@ * the fast path and disables lockless freelists. */ +static inline int kmem_cache_fastdebug(struct kmem_cache *s) +{ +#ifdef CONFIG_SLUB_DEBUG + return (s->flags & SLAB_STORE_USER_LITE) && !(s->flags & (SLAB_RED_ZONE | SLAB_POISON | SLAB_TRACE)); +#else + return 0; +#endif +} + static inline int kmem_cache_debug(struct kmem_cache *s) { #ifdef CONFIG_SLUB_DEBUG @@ -137,7 +146,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) { #ifdef CONFIG_SLUB_CPU_PARTIAL - return !kmem_cache_debug(s); + /* we want cpu-pools also in lite-debug-Mode! */ + return kmem_cache_fastdebug(s) || !kmem_cache_debug(s); #else return false; #endif @@ -170,8 +180,7 @@ */ #define MAX_PARTIAL 10 -#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \ - SLAB_POISON | SLAB_STORE_USER) +#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER_LITE) /* * These debug flags cannot use CMPXCHG because there might be consistency @@ -186,7 +195,8 @@ * disabled when slub_debug=O is used and a cache's min order increases with * metadata. */ -#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) +#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | \ + SLAB_STORE_USER | SLAB_STORE_USER_LITE) #define OO_SHIFT 16 #define OO_MASK ((1 << OO_SHIFT) - 1) @@ -201,20 +211,38 @@ /* * Tracking user of a slab. */ -#define TRACK_ADDRS_COUNT 16 +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +#define TRACK_ADDRS_COUNT 4 +#else +#define TRACK_ADDRS_COUNT 8 +#endif struct track { + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ unsigned long addr; /* Called from address */ #ifdef CONFIG_STACKTRACE unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ #endif - int cpu; /* Was running on cpu */ - int pid; /* Pid context */ - unsigned long when; /* When did the operation occur */ }; -enum track_item { TRACK_ALLOC, TRACK_FREE }; +static inline size_t track_size(unsigned int slab_flag) +{ + size_t len = sizeof(struct track); + +#ifdef CONFIG_STACKTRACE + if (slab_flag & SLAB_STORE_USER) { + return len; + } + /* unused stack-unwind-data for SLAB_STORE_USER_LITE */ + len = offsetof(struct track, addrs[0]); +#endif + return len; +} + +enum track_item { TRACK_ALLOC = 0, TRACK_FREE = 1}; -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void memcg_propagate_slab_attrs(struct kmem_cache *s); @@ -442,6 +470,16 @@ } #ifdef CONFIG_SLUB_DEBUG + +static void delayed_slub_corruption_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(delayed_bug, delayed_slub_corruption_handler); + +static void delayed_slub_corruption_handler(struct work_struct *work) +{ + pr_err("slub corruption: intentional crash in workqueue\n"); + BUG(); +} + /* * Determine a map of object in use on a page. * @@ -473,13 +511,19 @@ return p; } +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +#define DEBUG_MIN_FLAGS (SLAB_STORE_USER_LITE | SLAB_CONSISTENCY_CHECKS) +#else +#define DEBUG_MIN_FLAGS 0x0 +#endif + /* * Debug settings: */ #if defined(CONFIG_SLUB_DEBUG_ON) static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; #else -static slab_flags_t slub_debug; +static slab_flags_t slub_debug = DEBUG_MIN_FLAGS; #endif static char *slub_debug_slabs; @@ -561,16 +605,67 @@ p = object + get_info_end(s); - return p + alloc; + return (void *)p + (alloc ? track_size(s->flags) : 0); +} + +/** + * ret: old_addr-value + */ +static inline unsigned long reset_track(struct kmem_cache *s, void *object, + enum track_item alloc) +{ + unsigned long old_addr; + struct track *p = get_track(s, object, alloc); + /* + * Tracks for free_calls must be initialized with a known value for the + * address so that objects for which alloc was called but never free + * be recognized. + */ + /*--- todo use: cmpxchg() ---*/ + old_addr = xchg(&p->addr, 0L); +#ifdef CONFIG_STACKTRACE + if (unlikely(s->flags & SLAB_STORE_USER)) + p->addrs[0] = 0L; +#endif + return old_addr; } -static void set_track(struct kmem_cache *s, void *object, +/** + * special case or TRACK_FREE! + * + * if alloc == TRACK_FREE: + * (a) clear alloc-addr + * (b) set free-addr ... - but only if old-alloc-addr exist + * (c) deliver old-alloc-addr + * + */ +static unsigned long set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { + unsigned long alloc_addr = 0; struct track *p = get_track(s, object, alloc); - if (addr) { + if (alloc == TRACK_FREE) { + /* use alloc-addr as indicator for an alloced object */ + alloc_addr = reset_track(s, object, TRACK_ALLOC); + if (alloc_addr == 0) + /* it should be double-freed */ + return alloc_addr; + } + p->addr = addr; + p->cpu = smp_processor_id(); + p->pid = current->pid; + +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) + if (likely(!slab_track_time)) + /* do not track initial for slab_allocator (too much entries) */ + p->when = 0; + else +#endif + p->when = jiffies; + #ifdef CONFIG_STACKTRACE + if (unlikely(s->flags & SLAB_STORE_USER)) { unsigned int nr_entries; metadata_access_enable(); @@ -579,26 +674,23 @@ if (nr_entries < TRACK_ADDRS_COUNT) p->addrs[nr_entries] = 0; -#endif - p->addr = addr; - p->cpu = smp_processor_id(); - p->pid = current->pid; p->when = jiffies; - } else { - memset(p, 0, sizeof(struct track)); } +#endif + return alloc_addr; } static void init_tracking(struct kmem_cache *s, void *object) { - if (!(s->flags & SLAB_STORE_USER)) + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) return; - set_track(s, object, TRACK_FREE, 0UL); - set_track(s, object, TRACK_ALLOC, 0UL); + reset_track(s, object, TRACK_FREE); + reset_track(s, object, TRACK_ALLOC); } -static void print_track(const char *s, struct track *t, unsigned long pr_time) +static void print_track(const char *s, struct track *t, unsigned long pr_time, + unsigned int flags) { if (!t->addr) return; @@ -608,6 +700,9 @@ #ifdef CONFIG_STACKTRACE { int i; + + if (!(flags & SLAB_STORE_USER)) + return; for (i = 0; i < TRACK_ADDRS_COUNT; i++) if (t->addrs[i]) pr_err("\t%pS\n", (void *)t->addrs[i]); @@ -620,11 +715,13 @@ static void print_tracking(struct kmem_cache *s, void *object) { unsigned long pr_time = jiffies; - if (!(s->flags & SLAB_STORE_USER)) + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) return; - print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time); - print_track("Freed", get_track(s, object, TRACK_FREE), pr_time); + print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time, + s->flags); + print_track("Freed", get_track(s, object, TRACK_FREE), pr_time, + s->flags); } static void print_page_info(struct page *page) @@ -693,7 +790,6 @@ s->red_left_pad); else if (p > addr + 16) print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); - print_section(KERN_ERR, "Object ", p, min_t(unsigned int, s->object_size, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) @@ -702,11 +798,10 @@ off = get_info_end(s); - if (s->flags & SLAB_STORE_USER) - off += 2 * sizeof(struct track); + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) + off += 2 * track_size(s->flags); off += kasan_metadata_size(s); - if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ print_section(KERN_ERR, "Padding ", p + off, @@ -782,6 +877,11 @@ print_trailer(s, page, object); restore_bytes(s, what, value, fault, end); + + if (s->flags & SLAB_PANIC_CORRUPTION) { + pr_err("slub corruption: schedule delayed BUG()\n"); + schedule_delayed_work(&delayed_bug, 1000); + } return 0; } @@ -827,9 +927,9 @@ { unsigned long off = get_info_end(s); /* The end of info */ - if (s->flags & SLAB_STORE_USER) + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) /* We also have user information there */ - off += 2 * sizeof(struct track); + off += 2 * track_size(s->flags); off += kasan_metadata_size(s); @@ -1012,7 +1112,7 @@ return search == NULL; } -static void trace(struct kmem_cache *s, struct page *page, void *object, +static noinline void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) { if (s->flags & SLAB_TRACE) { @@ -1092,7 +1192,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) { - if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) + if (!(s->flags & (SLAB_STORE_USER|SLAB_STORE_USER_LITE|SLAB_RED_ZONE|__OBJECT_POISON))) return; init_object(s, object, SLUB_RED_INACTIVE); @@ -1135,10 +1235,6 @@ if (!alloc_consistency_checks(s, page, object)) goto bad; } - - /* Success perform special debug activities for allocs */ - if (s->flags & SLAB_STORE_USER) - set_track(s, object, TRACK_ALLOC, addr); trace(s, page, object, 1); init_object(s, object, SLUB_RED_ACTIVE); return 1; @@ -1217,8 +1313,12 @@ goto out; } - if (s->flags & SLAB_STORE_USER) - set_track(s, object, TRACK_FREE, addr); + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) { + if (unlikely(set_track(s, object, TRACK_FREE, addr) == 0)) { + object_err(s, page, object, "Object already free"); + goto out; + } + } trace(s, page, object, 0); /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ init_object(s, object, SLUB_RED_INACTIVE); @@ -1242,6 +1342,26 @@ return ret; } +static int free_debug_processing_fast(struct kmem_cache *s, struct page *page, + void *object, + unsigned long addr) +{ + if (s->flags & SLAB_CONSISTENCY_CHECKS) { + + if (unlikely((s != page->slab_cache) || !check_valid_pointer(s, page, object))) { + /* force complete check with locks */ + if (!free_debug_processing(s, page, object, object, addr, 1)) + return 0; + } + } + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) { + if (unlikely(set_track(s, object, TRACK_FREE, addr) == 0)) { + object_err(s, page, object, "Object already free"); + return 0; + } + } + return 1; +} static int __init setup_slub_debug(char *str) { slub_debug = DEBUG_DEFAULT_FLAGS; @@ -1288,6 +1408,9 @@ case 'a': slub_debug |= SLAB_FAILSLAB; break; + case 'l': + slub_debug |= SLAB_STORE_USER_LITE; + break; case 'o': /* * Avoid enabling debugging on caches if its minimum @@ -1295,12 +1418,14 @@ */ disable_higher_order_debug = 1; break; + case 'c': + slub_debug |= SLAB_PANIC_CORRUPTION; + break; default: pr_err("slub_debug option '%c' unknown. skipped\n", *str); } } - check_slabs: if (*str == ',') slub_debug_slabs = str + 1; @@ -1309,6 +1434,15 @@ static_branch_unlikely(&init_on_free)) && (slub_debug & SLAB_POISON)) pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n"); + + pr_err("%s: slub_debug =%s%s%s%s%s%s%s\n", __func__, + (slub_debug & SLAB_CONSISTENCY_CHECKS) ? " CONSISTENCY_CHECKS" : "", + (slub_debug & SLAB_RED_ZONE) ? " RED_ZONE" : "", + (slub_debug & SLAB_POISON) ? " POISON" : "", + (slub_debug & SLAB_STORE_USER) ? " STORE_USER" : "", + (slub_debug & SLAB_TRACE) ? " TRACE" : "", + (slub_debug & SLAB_FAILSLAB) ? " FAILSLAB" : "", + (slub_debug & SLAB_STORE_USER_LITE) ? " STORE_USER_LITE" : ""); return 1; } @@ -1360,7 +1494,14 @@ break; iter = end + 1; } - + pr_debug("%s: flags =%s%s%s%s%s%s%s\n", __func__, + (flags & SLAB_CONSISTENCY_CHECKS) ? " CONSISTENCY_CHECKS" : "", + (flags & SLAB_RED_ZONE) ? " RED_ZONE" : "", + (flags & SLAB_POISON) ? " POISON" : "", + (flags & SLAB_STORE_USER) ? " STORE_USER" : "", + (flags & SLAB_TRACE) ? " TRACE" : "", + (flags & SLAB_FAILSLAB) ? " FAILSLAB" : "", + (flags & SLAB_STORE_USER_LITE) ? " STORE_USER_LITE" : ""); return flags; } #else /* !CONFIG_SLUB_DEBUG */ @@ -1376,11 +1517,16 @@ struct kmem_cache *s, struct page *page, void *head, void *tail, int bulk_cnt, unsigned long addr) { return 0; } - +static int free_debug_processing_fast(struct kmem_cache *s, struct page *page, + void *object, + unsigned long addr) + { return 1; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } static inline int check_object(struct kmem_cache *s, struct page *page, void *object, u8 val) { return 1; } +static inline unsigned long set_track(struct kmem_cache *s, void *object, + enum track_item alloc, unsigned long addr) { return 1; } static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) {} static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -2429,7 +2575,7 @@ } #endif /* CONFIG_SLUB_DEBUG */ -#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) +#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS) static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct page *)) { @@ -2443,7 +2589,7 @@ spin_unlock_irqrestore(&n->list_lock, flags); return x; } -#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ +#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */ static noinline void slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) @@ -2666,13 +2812,16 @@ } page = c->page; - if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) - goto load_freelist; - - /* Only entered in the debug case */ - if (kmem_cache_debug(s) && - !alloc_debug_processing(s, page, freelist, addr)) - goto new_slab; /* Slab failed checks. Next slab needed */ + if (pfmemalloc_match(page, gfpflags)) { + if (likely(!kmem_cache_debug(s))) + goto load_freelist; + /* Only entered in the debug case */ + if (!alloc_debug_processing(s, page, freelist, addr)) + goto new_slab; /* Slab failed checks. Next slab needed */ + if (kmem_cache_fastdebug(s)) + /* deactivate_slab() is very expensive, switch off fastpath ! */ + goto load_freelist; + } deactivate_slab(s, page, get_freepointer(s, freelist), c); return freelist; @@ -2803,8 +2952,11 @@ stat(s, ALLOC_FASTPATH); } +#ifdef CONFIG_SLUB_DEBUG + if ((s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) && object) + set_track(s, object, TRACK_ALLOC, addr); +#endif maybe_wipe_obj_freeptr(s, object); - if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); @@ -2834,6 +2986,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc(s, gfpflags, _RET_IP_); + trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; @@ -2892,10 +3045,11 @@ stat(s, FREE_SLOWPATH); - if (kmem_cache_debug(s) && - !free_debug_processing(s, page, head, tail, cnt, addr)) - return; - + if (kmem_cache_debug(s)) { + if (!kmem_cache_fastdebug(s) || (cnt > 1)) + if(!free_debug_processing(s, page, head, tail, cnt, addr)) + return; + } do { if (unlikely(n)) { spin_unlock_irqrestore(&n->list_lock, flags); @@ -3013,6 +3167,11 @@ void *tail_obj = tail ? : head; struct kmem_cache_cpu *c; unsigned long tid; + + if ((kmem_cache_fastdebug(s)) && (cnt == 1)) { + if (!free_debug_processing_fast(s, page, tail_obj, addr)) + return; + } redo: /* * Determine the currently cpus per cpu slab. @@ -3227,13 +3386,16 @@ if (unlikely(!p[i])) goto error; + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) + set_track(s, p[i], TRACK_ALLOC, _RET_IP_); c = this_cpu_ptr(s->cpu_slab); maybe_wipe_obj_freeptr(s, p[i]); - continue; /* goto for-loop */ } c->freelist = get_freepointer(s, object); p[i] = object; + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) + set_track(s, p[i], TRACK_ALLOC, _RET_IP_); maybe_wipe_obj_freeptr(s, p[i]); } c->tid = next_tid(c->tid); @@ -3616,12 +3778,12 @@ } #ifdef CONFIG_SLUB_DEBUG - if (flags & SLAB_STORE_USER) + if (flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) /* * Need to store information about allocs and frees after * the object. */ - size += 2 * sizeof(struct track); + size += 2 * track_size(flags); #endif kasan_cache_create(s, &size, &s->flags); @@ -3687,7 +3849,6 @@ #ifdef CONFIG_SLAB_FREELIST_HARDENED s->random = get_random_long(); #endif - if (!calculate_sizes(s, -1)) goto error; if (disable_higher_order_debug) { @@ -4441,7 +4602,7 @@ } #endif -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int count_inuse(struct page *page) { return page->inuse; @@ -4506,7 +4667,7 @@ pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n", s->name, count, n->nr_partial); - if (!(s->flags & SLAB_STORE_USER)) + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) goto out; list_for_each_entry(page, &n->full, slab_list) { @@ -4670,13 +4831,19 @@ { void *addr = page_address(page); void *p; + bool use_freelist = (alloc == TRACK_FREE); bitmap_zero(map, page->objects); get_map(s, page, map); for_each_object(p, s, addr, page->objects) - if (!test_bit(slab_index(p, s, addr), map)) - add_location(t, s, get_track(s, p, alloc)); + if (test_bit(slab_index(p, s, addr), map) == use_freelist) { + struct track *track = get_track(s, p, alloc); + + if (!track->addr) + continue; + add_location(t, s, track); + } } static int list_locations(struct kmem_cache *s, char *buf, @@ -4819,12 +4986,12 @@ validate_slab_cache(kmalloc_caches[type][9]); } #else -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static void resiliency_test(void) {}; #endif #endif /* SLUB_RESILIENCY_TEST */ -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS enum slab_stat_type { SL_ALL, /* All slabs */ SL_PARTIAL, /* Only partially allocated slabs */ @@ -5312,6 +5479,29 @@ } SLAB_ATTR(store_user); +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +static ssize_t store_user_lite_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER_LITE)); +} + +static ssize_t store_user_lite_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + if (any_slab_objects(s)) + return -EBUSY; + + s->flags &= ~SLAB_STORE_USER_LITE; + if (buf[0] == '1') { + s->flags &= ~__CMPXCHG_DOUBLE; + s->flags |= SLAB_STORE_USER_LITE; + } + calculate_sizes(s, -1); + return length; +} +SLAB_ATTR(store_user_lite); +#endif/* #if defined(CONFIG_SLUB_AVM_ALLOC_LIST) */ + static ssize_t validate_show(struct kmem_cache *s, char *buf) { return 0; @@ -5517,6 +5707,9 @@ &red_zone_attr.attr, &poison_attr.attr, &store_user_attr.attr, +#ifdef CONFIG_SLUB_AVM_ALLOC_LIST + &store_user_lite_attr.attr, +#endif &validate_attr.attr, &alloc_calls_attr.attr, &free_calls_attr.attr, @@ -5743,7 +5936,8 @@ char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); char *p = name; - BUG_ON(!name); + if (!name) + return ERR_PTR(-ENOMEM); *p++ = ':'; /* @@ -5825,6 +6019,8 @@ * for the symlinks. */ name = create_unique_id(s); + if (IS_ERR(name)) + return PTR_ERR(name); } s->kobj.kset = kset; @@ -5960,7 +6156,7 @@ } __initcall(slab_sysfs_init); -#endif /* CONFIG_SYSFS */ +#endif /* SLAB_SUPPORTS_SYSFS */ /* * The /proc/slabinfo ABI @@ -5998,3 +6194,399 @@ return -EIO; } #endif /* CONFIG_SLUB_DEBUG */ + +#if defined(CONFIG_AVM_ENHANCED) +/** + * @brief get kmemalloc-area if addr in range + * attention! function unsaved for cachep - zone-page-spinlock necessary + * @return start (zero if not exist) + */ +unsigned long get_kmemalloc_area(unsigned long addr, + unsigned long *caller, + const char **cache_name, + unsigned long *size, int *freed) +{ + void *object; + unsigned long alloc_caller = 0, free_caller = 0; + unsigned int obj_idx; + void *base; + void *kstart; + struct page *page; + struct kmem_cache *s; + + object = (void *)addr; + page = virt_to_head_page(object); + if (!virt_addr_valid(page)) + return 0; + if (unlikely(!PageSlab(page))) + return 0; + + s = page->slab_cache; + if (!virt_addr_valid(s)) + return 0; + + base = page_address(page); + if ((object < base) || (object >= base + page->objects * s->size)) + return 0; + + obj_idx = slab_index(object, s, base); + kstart = base + (s->size * obj_idx); + if (cache_name) + *cache_name = s->name; + if (size) + *size = s->size; +#ifdef CONFIG_SLUB_DEBUG + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) { + alloc_caller = get_track(s, kstart, TRACK_ALLOC)->addr; + free_caller = get_track(s, kstart, TRACK_FREE)->addr; + } +#endif + if (freed) + *freed = alloc_caller ? 0 : 1; + if (caller) + *caller = alloc_caller ? : free_caller; + return (unsigned long)kstart; +} +#endif /*--- #if defined(CONFIG_AVM_ENHANCED) ---*/ + +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) + +/*--- #define DBG_TRC(args...) pr_info(args) ---*/ +#define DBG_TRC(args...) no_printk(args) + +/** + * @brief + * Die geallocten slab's werden nur fuer Debugzwecken in list->full und list->parital gehalten. + * Diese Listen sind also je nach verwendeten Slub-Debugoptionen unvollständig. + * In dieser Funktion wird deshalb brute-force alle existierenden Pages nach Slab-Pages durchforstet. + * Falls eine Page entsprechend dem Cachepool gefunden wird, so wird die Callback aufgerufen. + * + * @param s kmem_cache-pool + * @param ref reference for callback + * @param page_cb callback - called if page from kmem_cache-pool + * @return slab_count + */ +static unsigned int kmem_cache_parse_all_pages(struct kmem_cache *s, void *ref, + int (*page_cb)(void *ref, struct kmem_cache *s, struct page *page)) +{ + unsigned int slab_count = 0; + unsigned int order; + unsigned long flags; + unsigned int _count; + struct page *page; + struct zone *zone; + pg_data_t *pgdat; + unsigned int page_count; + + for (pgdat = first_online_pgdat(); pgdat; pgdat = next_online_pgdat(pgdat)) { + + if (pgdat_is_empty(pgdat)) { + DBG_TRC("[%s] pgdat 0x%lx is empty\n", __func__, (unsigned long)pgdat); + continue; + } + DBG_TRC("[%s] scan pgdat: start 0x%lx(mem_map=0x%p) present 0x%lx spanned 0x%lx\n", + __func__, pgdat->node_start_pfn, + pgdat_page_nr(pgdat, 0), pgdat->node_present_pages, + pgdat->node_spanned_pages); + + for (page_count = 0; page_count < pgdat->node_spanned_pages; page_count++) { + + if (!pfn_valid(pgdat->node_start_pfn + page_count)) { + /* es kann gaps in der page struct-Table geben, + * da auch der memory gaps enthalten kann ! + */ + continue; + } + page = pgdat_page_nr(pgdat, page_count); + zone = page_zone(page); + spin_lock_irqsave(&zone->lock, flags); + + if (PageBuddy(page)) { + order = page_private(page); /*--- PageBuddy must check ! ---*/ + spin_unlock_irqrestore(&zone->lock, flags); + if (likely(order <= MAX_ORDER)) { + DBG_TRC("Buddy: page=0x%p pfn=%lu-%lu (addr=0x%p) order %2u\n", + page, page_to_pfn(page), + page_to_pfn(page) + (1 << order) - 1, + page_address(page), order); + page_count += (1 << order) - 1; + } else { + pr_warn_ratelimited("obscure Buddy: page=0x%p pfn=%lu (addr=0x%p) order %2u\n", + page, page_to_pfn(page), + page_address(page), order); + } + continue; + } + order = 0; + if (PageHead(page)) { + order = compound_order(page); + if (unlikely(order > MAX_ORDER)) { + spin_unlock_irqrestore(&zone->lock, flags); + pr_warn_ratelimited("obscure Head: page=0x%p pfn=%lu (addr=0x%p) invalid order %2u\n", + page, page_to_pfn(page), + page_address(page), order); + continue; + } + _count = page_ref_count(page); + if (unlikely(_count == 0)) { + pr_warn_ratelimited("obscure Head: page=0x%p pfn=%lu (addr=0x%p) order %2u invalid _count=%u\n", + page, page_to_pfn(page), + page_address(page), + order, _count); + } else { + DBG_TRC("Head: page=0x%p pfn=%lu (addr=0x%p) order=%2u\n", + page, page_to_pfn(page), + page_address(page), order); + } + } + if (PageSlab(page) && (page->slab_cache == s)) { + slab_count++; + if (page_cb(ref, s, page)) { + spin_unlock_irqrestore(&zone->lock, flags); + pgdat = NULL; + break; + } + } else if (PageReserved(page)) { + order = compound_order(page); + DBG_TRC("Reserved: page=0x%p pfn=%lu (addr=0x%p) order=%2u\n", + page, page_to_pfn(page), + page_address(page), order); + } + page_count += (1 << order) - 1; + spin_unlock_irqrestore(&zone->lock, flags); + } + } + return slab_count; +} + +/** + * @brief sorted on caller + * divide et impera to make caller-history (like add_caller()-function but this struct ;-)* + * @param ptoplist pointer for toplist to fill + * @param caller caller + * @return != 0 if no place in toplist + */ +#define TOP_TOIDX(p) ((p) - (&ptoplist->entry[0])) +static int mark_in_toplist(struct _slab_avm_topalloclist *ptoplist, unsigned long caller, unsigned long act_diff) +{ + unsigned int i, elements, idx; + struct _slab_avm_alloc_entry *q, *p; + + elements = ptoplist->entries; + p = &ptoplist->entry[0]; + while (elements) { + i = elements / 2; + q = &p[i]; + if (q->caller == caller) { + q->count++; + q->sum_time += (unsigned long long)act_diff; + return 0; + } + if (q->caller > caller) { + elements = i; + } else { + p = q + 1; + elements -= i + 1; + } + } + if (ptoplist->entries >= ARRAY_SIZE(ptoplist->entry)) { + ptoplist->ignored++; + return 1; + } + idx = TOP_TOIDX(p); + memmove(&p[1], p, (ptoplist->entries - idx) * sizeof(ptoplist->entry[0])); + ptoplist->entries++; + ptoplist->entry[idx].caller = caller; + ptoplist->entry[idx].sum_time = act_diff; + ptoplist->entry[idx].count = 1; + return 0; + } + +/** + * @brief sum caller-toplist entries + * @param ptoplistpointer + * @return void + */ +static unsigned long sum_toplist_entries(struct _slab_avm_topalloclist *ptoplist) +{ + unsigned long sum_count = 0; + unsigned int i; + + for (i = 0; i < ptoplist->entries; i++) { + sum_count += ptoplist->entry[i].count; +} + return sum_count; +} +/** + * @brief sort caller-toplist (greater first) + * @param ptoplistpointer for toplist to fill + * @return void + */ +static void sort_topalloclist(struct _slab_avm_topalloclist *ptoplist) +{ + unsigned int i, max_count, max_idx, idx = 0; + + for (;;) { + struct _slab_avm_alloc_entry tmp; + + max_count = 0; + for (i = idx; i < ptoplist->entries; i++) { + if (ptoplist->entry[i].count > max_count) { + max_count = ptoplist->entry[i].count; + max_idx = i; + } + } + if (max_count == 0) { + break; + } + /*--- swap ---*/ + memcpy(&tmp, &ptoplist->entry[idx], sizeof(tmp)); + memcpy(&ptoplist->entry[idx], &ptoplist->entry[max_idx], sizeof(tmp)); + memcpy(&ptoplist->entry[max_idx], &tmp, sizeof(tmp)); + idx++; + } +} + +struct _avm_topalloc_list_ref { + signed long tresh_jiffiesdiff; + struct _slab_avm_topalloclist *ptoplist; + unsigned long act_jiffies; +}; + +/** + * callback for kmem_cache_parse_all_pages() - called from cache_avm_topalloc_list() + */ +static int avm_topalloc_list_cb(void *ref, struct kmem_cache *s, struct page *page) +{ + struct _avm_topalloc_list_ref *ptref = (struct _avm_topalloc_list_ref *)ref; + signed long act_diff; + struct track *pt; + void *addr, *p; + + slab_lock(page); + addr = page_address(page); + + for_each_object(p, s, addr, page->objects) { + if (on_freelist(s, page, p)) + continue; + + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) { + /* no track-info exist */ + ptref->ptoplist->ignored++; + continue; + } + pt = get_track(s, p, TRACK_ALLOC); + if ((pt->addr == 0) || (pt->when == 0)) { + /* all pt->when with zero: do not trace initial alloc's (see set_track)*/ + continue; + } + act_diff = ptref->act_jiffies - pt->when; + if (act_diff < ptref->tresh_jiffiesdiff) { + /*--- too young ---*/ + continue; + } + mark_in_toplist(ptref->ptoplist, pt->addr, act_diff); + } + slab_unlock(page); + return 0; +} + +/** + * @brief fill allocator-toplist for cachep + * @param ptoplist pointer for toplist to fill + * @param cachep cachepool + * @param tresh_jiffiesdiff only if caller older than ... + * @return void + */ +void cache_avm_topalloc_list(struct _slab_avm_topalloclist *ptoplist, + struct kmem_cache *s, + unsigned long tresh_jiffiesdiff) +{ + struct _avm_topalloc_list_ref tref; + int node; + struct kmem_cache_node *n; + unsigned int nr_slabs = 0; + + memset(ptoplist, 0, sizeof(*ptoplist)); + tref.act_jiffies = jiffies; + tref.ptoplist = ptoplist; + tref.tresh_jiffiesdiff = (signed long)tresh_jiffiesdiff > 0 ? (signed long)tresh_jiffiesdiff : 0; + + flush_all(s); /* be sure that all objects in lists (flush freelist's) */ + for_each_kmem_cache_node(s, node, n) + nr_slabs += node_nr_slabs(n); + + if (nr_slabs) { + unsigned int slab_count __maybe_unused; + + slab_count = kmem_cache_parse_all_pages(s, &tref, avm_topalloc_list_cb); + if (nr_slabs != slab_count) { + pr_debug("%s: %s slb_nr=%u versus counted slabs=%u\n", __func__, s->name, nr_slabs, slab_count); + } + sort_topalloclist(ptoplist); + ptoplist->sum_count = sum_toplist_entries(ptoplist) + ptoplist->ignored; + } +} + +struct _all_object_cb_ref { + void *ref; + int (*object_pointer)(void *ref, void *p); +}; + +/** + * callback for kmem_cache_parse_all_pages() - called from kmem_cache_list_all_objects() + */ +static int all_object_cb(void *ref, struct kmem_cache *s, struct page *page) +{ + struct _all_object_cb_ref *pcref = (struct _all_object_cb_ref *)ref; + void *addr, *p; + int ret = 0; + + slab_lock(page); + addr = page_address(page); + + for_each_object(p, s, addr, page->objects) { + if (on_freelist(s, page, p)) + continue; + + if (pcref->object_pointer(pcref->ref, p)) { + ret = 1; + break; + } + } + slab_unlock(page); + return ret; +} + +/** + * @brief + * deliver all active object-pointer from kmem_cache-pool s + * + * if callback returns non-zero: stop listing + */ +void kmem_cache_list_all_objects(struct kmem_cache *s, + void *ref, + int (*object_pointer)(void *ref, void *p)) +{ + struct _all_object_cb_ref cref; + int node; + unsigned int nr_slabs = 0; + struct kmem_cache_node *n = NULL; + + cref.ref = ref; + cref.object_pointer = object_pointer; + + flush_all(s); /* be sure that all objects in lists (flush freelist's) */ + for_each_kmem_cache_node(s, node, n) { + nr_slabs += node_nr_slabs(n); + } + if (nr_slabs) { + unsigned int slab_count __maybe_unused; + + slab_count = kmem_cache_parse_all_pages(s, &cref, all_object_cb); + if (nr_slabs != slab_count) { + pr_debug("%s: %s slb_nr=%u versus counted slabs=%u\n", __func__, s->name, nr_slabs, slab_count); + } + } +} +#endif/*--- #if defined(CONFIG_SLUB_AVM_ALLOC_LIST) ---*/