--- zzzz-none-000/linux-4.4.271/mm/slub.c 2021-06-03 06:22:09.000000000 +0000 +++ hawkeye-5590-750/linux-4.4.271/mm/slub.c 2023-04-19 10:22:30.000000000 +0000 @@ -115,6 +115,15 @@ * the fast path and disables lockless freelists. */ +static inline int kmem_cache_fastdebug(struct kmem_cache *s) +{ +#ifdef CONFIG_SLUB_DEBUG + return (s->flags & SLAB_STORE_USER_LITE) && !(s->flags & (SLAB_RED_ZONE | SLAB_POISON | SLAB_TRACE)); +#else + return 0; +#endif +} + static inline int kmem_cache_debug(struct kmem_cache *s) { #ifdef CONFIG_SLUB_DEBUG @@ -127,7 +136,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) { #ifdef CONFIG_SLUB_CPU_PARTIAL - return !kmem_cache_debug(s); + /* we want cpu-pools also in lite-debug-Mode! */ + return kmem_cache_fastdebug(s) || !kmem_cache_debug(s); #else return false; #endif @@ -160,15 +170,15 @@ */ #define MAX_PARTIAL 10 -#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ - SLAB_POISON | SLAB_STORE_USER) +#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_STORE_USER_LITE) /* * Debugging flags that require metadata to be stored in the slab. These get * disabled when slub_debug=O is used and a cache's min order increases with * metadata. */ -#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) +#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | \ + SLAB_STORE_USER | SLAB_STORE_USER_LITE) #define OO_SHIFT 16 #define OO_MASK ((1 << OO_SHIFT) - 1) @@ -185,20 +195,38 @@ /* * Tracking user of a slab. */ +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +#define TRACK_ADDRS_COUNT 4 +#else #define TRACK_ADDRS_COUNT 16 +#endif struct track { + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ unsigned long addr; /* Called from address */ #ifdef CONFIG_STACKTRACE unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ #endif - int cpu; /* Was running on cpu */ - int pid; /* Pid context */ - unsigned long when; /* When did the operation occur */ }; -enum track_item { TRACK_ALLOC, TRACK_FREE }; +static inline size_t track_size(unsigned int slab_flag) +{ + size_t len = sizeof(struct track); + +#ifdef CONFIG_STACKTRACE + if (slab_flag & SLAB_STORE_USER) { + return len; + } + /* unused stack-unwind-data for SLAB_STORE_USER_LITE */ + len = offsetof(struct track, addrs[0]); +#endif + return len; +} + +enum track_item { TRACK_ALLOC = 0, TRACK_FREE = 1}; -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void memcg_propagate_slab_attrs(struct kmem_cache *s); @@ -300,7 +328,7 @@ * back there or track user information then we can * only use the space before that information. */ - if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) + if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER | SLAB_STORE_USER_LITE)) return s->inuse; /* * Else we can use all the padding etc for the allocation @@ -441,6 +469,16 @@ } #ifdef CONFIG_SLUB_DEBUG + +static void delayed_slub_corruption_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(delayed_bug, delayed_slub_corruption_handler); + +static void delayed_slub_corruption_handler(struct work_struct *work) +{ + pr_err("slub corruption: intentional crash in workqueue\n"); + BUG(); +} + /* * Determine a map of object in use on a page. * @@ -456,15 +494,20 @@ set_bit(slab_index(p, s, addr), map); } +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +#define DEBUG_MIN_FLAGS (SLAB_STORE_USER_LITE | SLAB_DEBUG_FREE) +#else +#define DEBUG_MIN_FLAGS 0x0 +#endif /* * Debug settings: */ #if defined(CONFIG_SLUB_DEBUG_ON) static int slub_debug = DEBUG_DEFAULT_FLAGS; #elif defined(CONFIG_KASAN) -static int slub_debug = SLAB_STORE_USER; +static int slub_debug = SLAB_STORE_USER | DEBUG_MIN_FLAGS; #else -static int slub_debug; +static int slub_debug = DEBUG_MIN_FLAGS; #endif static char *slub_debug_slabs; @@ -507,20 +550,73 @@ else p = object + s->inuse; - return p + alloc; + return (void *)p + (alloc ? track_size(s->flags) : 0); +} + +/** + * ret: old_addr-value + */ +static inline unsigned long reset_track(struct kmem_cache *s, void *object, + enum track_item alloc) +{ + unsigned long old_addr; + struct track *p = get_track(s, object, alloc); + /* + * Tracks for free_calls must be initialized with a known value for the + * address so that objects for which alloc was called but never free + * be recognized. + */ + /*--- todo use: cmpxchg() ---*/ + old_addr = xchg(&p->addr, 0L); +#ifdef CONFIG_STACKTRACE + if (unlikely(s->flags & SLAB_STORE_USER)) + p->addrs[0] = 0L; +#endif + return old_addr; } -static void set_track(struct kmem_cache *s, void *object, +/** + * special case or TRACK_FREE! + * + * if alloc == TRACK_FREE: + * (a) clear alloc-addr + * (b) set free-addr ... - but only if old-alloc-addr exist + * (c) deliver old-alloc-addr + * + */ +static unsigned long set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { + unsigned long alloc_addr = 0; struct track *p = get_track(s, object, alloc); - if (addr) { + if (alloc == TRACK_FREE) { + /* use alloc-addr as indicator for an alloced object */ + alloc_addr = reset_track(s, object, TRACK_ALLOC); + if (alloc_addr == 0) + /* it should be double-freed */ + return alloc_addr; + } + p->addr = addr; + p->cpu = get_cpu(); + put_cpu(); + p->pid = current->pid; + +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) + if (likely(!slab_track_time)) + /* do not track initial for slab_allocator (too much entries) */ + p->when = 0; + else +#endif + p->when = jiffies; + #ifdef CONFIG_STACKTRACE + if (unlikely(s->flags & SLAB_STORE_USER)) { struct stack_trace trace; int i; trace.nr_entries = 0; + /*--- complete stacktrace is to slow ! ---*/ trace.max_entries = TRACK_ADDRS_COUNT; trace.entries = p->addrs; trace.skip = 3; @@ -532,28 +628,23 @@ if (trace.nr_entries != 0 && trace.entries[trace.nr_entries - 1] == ULONG_MAX) trace.nr_entries--; - for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) p->addrs[i] = 0; + } #endif - p->addr = addr; - p->cpu = smp_processor_id(); - p->pid = current->pid; - p->when = jiffies; - } else - memset(p, 0, sizeof(struct track)); + return alloc_addr; } static void init_tracking(struct kmem_cache *s, void *object) { - if (!(s->flags & SLAB_STORE_USER)) + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) return; - set_track(s, object, TRACK_FREE, 0UL); - set_track(s, object, TRACK_ALLOC, 0UL); + reset_track(s, object, TRACK_FREE); + reset_track(s, object, TRACK_ALLOC); } -static void print_track(const char *s, struct track *t) +static void print_track(const char *s, struct track *t, unsigned int flags) { if (!t->addr) return; @@ -563,6 +654,9 @@ #ifdef CONFIG_STACKTRACE { int i; + + if (!(flags & SLAB_STORE_USER)) + return; for (i = 0; i < TRACK_ADDRS_COUNT; i++) if (t->addrs[i]) pr_err("\t%pS\n", (void *)t->addrs[i]); @@ -574,11 +668,11 @@ static void print_tracking(struct kmem_cache *s, void *object) { - if (!(s->flags & SLAB_STORE_USER)) + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) return; - print_track("Allocated", get_track(s, object, TRACK_ALLOC)); - print_track("Freed", get_track(s, object, TRACK_FREE)); + print_track("Allocated", get_track(s, object, TRACK_ALLOC), s->flags); + print_track("Freed", get_track(s, object, TRACK_FREE), s->flags); } static void print_page_info(struct page *page) @@ -642,8 +736,8 @@ else off = s->inuse; - if (s->flags & SLAB_STORE_USER) - off += 2 * sizeof(struct track); + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) + off += 2 * track_size(s->flags); if (off != s->size) /* Beginning of the filler is the free pointer */ @@ -716,6 +810,11 @@ print_trailer(s, page, object); restore_bytes(s, what, value, fault, end); + + if (s->flags & SLAB_PANIC_CORRUPTION) { + pr_err("slub corruption: schedule delayed BUG()\n"); + schedule_delayed_work(&delayed_bug, 1000); + } return 0; } @@ -765,9 +864,9 @@ /* Freepointer is placed after the object. */ off += sizeof(void *); - if (s->flags & SLAB_STORE_USER) + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) /* We also have user information there */ - off += 2 * sizeof(struct track); + off += 2 * track_size(s->flags); if (s->size == off) return 1; @@ -942,10 +1041,9 @@ return search == NULL; } -static void trace(struct kmem_cache *s, struct page *page, void *object, +static noinline void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) { - if (s->flags & SLAB_TRACE) { pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n", s->name, alloc ? "alloc" : "free", @@ -957,7 +1055,6 @@ s->object_size); dump_stack(); - } } /* @@ -1022,7 +1119,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) { - if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) + if (!(s->flags & (SLAB_STORE_USER|SLAB_STORE_USER_LITE|SLAB_RED_ZONE|__OBJECT_POISON))) return; init_object(s, object, SLUB_RED_INACTIVE); @@ -1044,10 +1141,8 @@ if (!check_object(s, page, object, SLUB_RED_INACTIVE)) goto bad; - /* Success perform special debug activities for allocs */ - if (s->flags & SLAB_STORE_USER) - set_track(s, object, TRACK_ALLOC, addr); - trace(s, page, object, 1); + if (unlikely(s->flags & SLAB_TRACE)) + trace(s, page, object, 1); init_object(s, object, SLUB_RED_ACTIVE); return 1; @@ -1111,9 +1206,14 @@ goto fail; } - if (s->flags & SLAB_STORE_USER) - set_track(s, object, TRACK_FREE, addr); - trace(s, page, object, 0); + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) { + if (unlikely(set_track(s, object, TRACK_FREE, addr) == 0)) { + object_err(s, page, object, "Object already free"); + goto out; + } + } + if (unlikely(s->flags & SLAB_TRACE)) + trace(s, page, object, 0); /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ init_object(s, object, SLUB_RED_INACTIVE); @@ -1141,6 +1241,32 @@ return NULL; } +static int free_debug_processing_fast(struct kmem_cache *s, struct page *page, + void *object, + unsigned long addr) +{ + struct kmem_cache_node *n; + unsigned long flags; + + if (s->flags & SLAB_DEBUG_FREE) { + + if (unlikely((s != page->slab_cache) || !check_valid_pointer(s, page, object))) { + /* force complete check with locks */ + n = free_debug_processing(s, page, object, object, addr, 1, &flags); + if (n) + spin_unlock_irqrestore(&n->list_lock, flags); + else + return 0; + } + } + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) { + if (unlikely(set_track(s, object, TRACK_FREE, addr) == 0)) { + object_err(s, page, object, "Object already free"); + return 0; + } + } + return 1; +} static int __init setup_slub_debug(char *str) { slub_debug = DEBUG_DEFAULT_FLAGS; @@ -1187,6 +1313,9 @@ case 'a': slub_debug |= SLAB_FAILSLAB; break; + case 'l': + slub_debug |= SLAB_STORE_USER_LITE; + break; case 'o': /* * Avoid enabling debugging on caches if its minimum @@ -1194,16 +1323,26 @@ */ disable_higher_order_debug = 1; break; + case 'c': + slub_debug |= SLAB_PANIC_CORRUPTION; + break; default: pr_err("slub_debug option '%c' unknown. skipped\n", *str); } } - check_slabs: if (*str == ',') slub_debug_slabs = str + 1; out: + pr_err("%s: slub_debug =%s%s%s%s%s%s%s\n", __func__, + (slub_debug & SLAB_DEBUG_FREE) ? " DEBUG_FREE" : "", + (slub_debug & SLAB_RED_ZONE) ? " RED_ZONE" : "", + (slub_debug & SLAB_POISON) ? " POISON" : "", + (slub_debug & SLAB_STORE_USER) ? " STORE_USER" : "", + (slub_debug & SLAB_TRACE) ? " TRACE" : "", + (slub_debug & SLAB_FAILSLAB) ? " FAILSLAB" : "", + (slub_debug & SLAB_STORE_USER_LITE) ? " STORE_USER_LITE" : ""); return 1; } @@ -1220,6 +1359,14 @@ !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) flags |= slub_debug; + pr_debug("%s: flags =%s%s%s%s%s%s%s\n", __func__, + (flags & SLAB_DEBUG_FREE) ? " DEBUG_FREE" : "", + (flags & SLAB_RED_ZONE) ? " RED_ZONE" : "", + (flags & SLAB_POISON) ? " POISON" : "", + (flags & SLAB_STORE_USER) ? " STORE_USER" : "", + (flags & SLAB_TRACE) ? " TRACE" : "", + (flags & SLAB_FAILSLAB) ? " FAILSLAB" : "", + (flags & SLAB_STORE_USER_LITE) ? " STORE_USER_LITE" : ""); return flags; } #else /* !CONFIG_SLUB_DEBUG */ @@ -1233,11 +1380,16 @@ struct kmem_cache *s, struct page *page, void *head, void *tail, int bulk_cnt, unsigned long addr, unsigned long *flags) { return NULL; } - +static int free_debug_processing_fast(struct kmem_cache *s, struct page *page, + void *object, + unsigned long addr) + { return 1; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } static inline int check_object(struct kmem_cache *s, struct page *page, void *object, u8 val) { return 1; } +static inline unsigned long set_track(struct kmem_cache *s, void *object, + enum track_item alloc, unsigned long addr) { return 1; } static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) {} static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -2198,7 +2350,7 @@ } #endif /* CONFIG_SLUB_DEBUG */ -#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) +#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS) static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct page *)) { @@ -2212,7 +2364,7 @@ spin_unlock_irqrestore(&n->list_lock, flags); return x; } -#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ +#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */ static noinline void slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) @@ -2438,14 +2590,16 @@ } page = c->page; - if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) - goto load_freelist; - - /* Only entered in the debug case */ - if (kmem_cache_debug(s) && - !alloc_debug_processing(s, page, freelist, addr)) - goto new_slab; /* Slab failed checks. Next slab needed */ - + if (pfmemalloc_match(page, gfpflags)) { + if (likely(!kmem_cache_debug(s))) + goto load_freelist; + /* Only entered in the debug case */ + if (!alloc_debug_processing(s, page, freelist, addr)) + goto new_slab; /* Slab failed checks. Next slab needed */ + if (kmem_cache_fastdebug(s)) + /* deactivate_slab() is very expensive, switch off fastpath ! */ + goto load_freelist; + } deactivate_slab(s, page, get_freepointer(s, freelist)); c->page = NULL; c->freelist = NULL; @@ -2565,7 +2719,8 @@ prefetch_freepointer(s, next_object); stat(s, ALLOC_FASTPATH); } - + if ((s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) && object) + set_track(s, object, TRACK_ALLOC, addr); if (unlikely(gfpflags & __GFP_ZERO) && object) memset(object, 0, s->object_size); @@ -2595,6 +2750,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc(s, gfpflags, _RET_IP_); + trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); kasan_kmalloc(s, ret, size); return ret; @@ -2653,11 +2809,13 @@ stat(s, FREE_SLOWPATH); - if (kmem_cache_debug(s) && - !(n = free_debug_processing(s, page, head, tail, cnt, - addr, &flags))) - return; - + if (kmem_cache_debug(s)) { + if (!kmem_cache_fastdebug(s) || (cnt > 1)) { + n = free_debug_processing(s, page, head, tail, cnt, addr, &flags); + if (!n) + return; + } + } do { if (unlikely(n)) { spin_unlock_irqrestore(&n->list_lock, flags); @@ -2778,7 +2936,10 @@ unsigned long tid; slab_free_freelist_hook(s, head, tail); - + if ((kmem_cache_fastdebug(s)) && (cnt == 1)) { + if (!free_debug_processing_fast(s, page, tail_obj, addr)) + return; + } redo: /* * Determine the currently cpus per cpu slab. @@ -2959,11 +3120,17 @@ if (unlikely(!p[i])) goto error; + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) + set_track(s, p[i], TRACK_ALLOC, _RET_IP_); + c = this_cpu_ptr(s->cpu_slab); continue; /* goto for-loop */ } c->freelist = get_freepointer(s, object); p[i] = object; + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) + set_track(s, p[i], TRACK_ALLOC, _RET_IP_); + } c->tid = next_tid(c->tid); local_irq_enable(); @@ -3234,7 +3401,9 @@ min = MAX_PARTIAL; s->min_partial = min; } - +#ifndef CONFIG_KASAN +#define KASAN_SHADOW_SCALE_SHIFT 0 +#endif /* * calculate_sizes() determines the order and the distribution of data within * a slab object. @@ -3251,6 +3420,7 @@ * the possible location of the free pointer. */ size = ALIGN(size, sizeof(void *)); + size = ALIGN(size, 1UL << KASAN_SHADOW_SCALE_SHIFT); #ifdef CONFIG_SLUB_DEBUG /* @@ -3295,12 +3465,12 @@ } #ifdef CONFIG_SLUB_DEBUG - if (flags & SLAB_STORE_USER) + if (flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) /* * Need to store information about allocs and frees after * the object. */ - size += 2 * sizeof(struct track); + size += 2 * track_size(flags); if (flags & SLAB_RED_ZONE) /* @@ -4091,7 +4261,7 @@ } #endif -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int count_inuse(struct page *page) { return page->inuse; @@ -4156,7 +4326,7 @@ pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n", s->name, count, n->nr_partial); - if (!(s->flags & SLAB_STORE_USER)) + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) goto out; list_for_each_entry(page, &n->full, lru) { @@ -4321,13 +4491,19 @@ { void *addr = page_address(page); void *p; + bool use_freelist = (alloc == TRACK_FREE); bitmap_zero(map, page->objects); get_map(s, page, map); for_each_object(p, s, addr, page->objects) - if (!test_bit(slab_index(p, s, addr), map)) - add_location(t, s, get_track(s, p, alloc)); + if (test_bit(slab_index(p, s, addr), map) == use_freelist) { + struct track *track = get_track(s, p, alloc); + + if (!track->addr) + continue; + add_location(t, s, track); + } } static int list_locations(struct kmem_cache *s, char *buf, @@ -4470,12 +4646,12 @@ validate_slab_cache(kmalloc_caches[9]); } #else -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static void resiliency_test(void) {}; #endif #endif -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS enum slab_stat_type { SL_ALL, /* All slabs */ SL_PARTIAL, /* Only partially allocated slabs */ @@ -4945,6 +5121,29 @@ } SLAB_ATTR(store_user); +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) +static ssize_t store_user_lite_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER_LITE)); +} + +static ssize_t store_user_lite_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + if (any_slab_objects(s)) + return -EBUSY; + + s->flags &= ~SLAB_STORE_USER_LITE; + if (buf[0] == '1') { + s->flags &= ~__CMPXCHG_DOUBLE; + s->flags |= SLAB_STORE_USER_LITE; + } + calculate_sizes(s, -1); + return length; +} +SLAB_ATTR(store_user_lite); +#endif/*--- #if defined(CONFIG_SLUB_AVM_ALLOC_LIST) ---*/ + static ssize_t validate_show(struct kmem_cache *s, char *buf) { return 0; @@ -5150,6 +5349,9 @@ &red_zone_attr.attr, &poison_attr.attr, &store_user_attr.attr, +#ifdef CONFIG_SLUB_AVM_ALLOC_LIST + &store_user_lite_attr.attr, +#endif &validate_attr.attr, &alloc_calls_attr.attr, &free_calls_attr.attr, @@ -5548,7 +5750,7 @@ } __initcall(slab_sysfs_init); -#endif /* CONFIG_SYSFS */ +#endif /* SLAB_SUPPORTS_SYSFS */ /* * The /proc/slabinfo ABI @@ -5586,3 +5788,399 @@ return -EIO; } #endif /* CONFIG_SLABINFO */ + +#if defined(CONFIG_AVM_ENHANCED) +/** + * @brief get kmemalloc-area if addr in range + * attention! function unsaved for cachep - zone-page-spinlock necessary + * @return start (zero if not exist) + */ +unsigned long get_kmemalloc_area(unsigned long addr, + unsigned long *caller, + const char **cache_name, + unsigned long *size, int *freed) +{ + void *object; + unsigned long alloc_caller = 0, free_caller = 0; + unsigned int obj_idx; + void *base; + void *kstart; + struct page *page; + struct kmem_cache *s; + + object = (void *)addr; + page = virt_to_head_page(object); + if (!virt_addr_valid(page)) + return 0; + if (unlikely(!PageSlab(page))) + return 0; + + s = page->slab_cache; + if (!virt_addr_valid(s)) + return 0; + + base = page_address(page); + if ((object < base) || (object >= base + page->objects * s->size)) + return 0; + + obj_idx = slab_index(object, s, base); + kstart = base + (s->size * obj_idx); + if (cache_name) + *cache_name = s->name; + if (size) + *size = s->size; +#ifdef CONFIG_SLUB_DEBUG + if (s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE)) { + alloc_caller = get_track(s, kstart, TRACK_ALLOC)->addr; + free_caller = get_track(s, kstart, TRACK_FREE)->addr; + } +#endif + if (freed) + *freed = alloc_caller ? 0 : 1; + if (caller) + *caller = alloc_caller ? : free_caller; + return (unsigned long)kstart; +} +#endif /*--- #if defined(CONFIG_AVM_ENHANCED) ---*/ + +#if defined(CONFIG_SLUB_AVM_ALLOC_LIST) + +/*--- #define DBG_TRC(args...) pr_info(args) ---*/ +#define DBG_TRC(args...) no_printk(args) + +/** + * @brief + * Die geallocten slab's werden nur fuer Debugzwecken in list->full und list->parital gehalten. + * Diese Listen sind also je nach verwendeten Slub-Debugoptionen unvollständig. + * In dieser Funktion wird deshalb brute-force alle existierenden Pages nach Slab-Pages durchforstet. + * Falls eine Page entsprechend dem Cachepool gefunden wird, so wird die Callback aufgerufen. + * + * @param s kmem_cache-pool + * @param ref reference for callback + * @param page_cb callback - called if page from kmem_cache-pool + * @return slab_count + */ +static unsigned int kmem_cache_parse_all_pages(struct kmem_cache *s, void *ref, + int (*page_cb)(void *ref, struct kmem_cache *s, struct page *page)) +{ + unsigned int slab_count = 0; + unsigned int order; + unsigned long flags; + unsigned int _count; + struct page *page; + struct zone *zone; + pg_data_t *pgdat; + unsigned int page_count; + + for (pgdat = first_online_pgdat(); pgdat; pgdat = next_online_pgdat(pgdat)) { + + if (pgdat_is_empty(pgdat)) { + DBG_TRC("[%s] pgdat 0x%lx is empty\n", __func__, (unsigned long)pgdat); + continue; + } + DBG_TRC("[%s] scan pgdat: start 0x%lx(mem_map=0x%p) present 0x%lx spanned 0x%lx\n", + __func__, pgdat->node_start_pfn, + pgdat_page_nr(pgdat, 0), pgdat->node_present_pages, + pgdat->node_spanned_pages); + + for (page_count = 0; page_count < pgdat->node_spanned_pages; page_count++) { + + if (!pfn_valid(pgdat->node_start_pfn + page_count)) { + /* es kann gaps in der page struct-Table geben, + * da auch der memory gaps enthalten kann ! + */ + continue; + } + page = pgdat_page_nr(pgdat, page_count); + zone = page_zone(page); + spin_lock_irqsave(&zone->lock, flags); + + if (PageBuddy(page)) { + order = page_private(page); /*--- PageBuddy must check ! ---*/ + spin_unlock_irqrestore(&zone->lock, flags); + if (likely(order <= MAX_ORDER)) { + DBG_TRC("Buddy: page=0x%p pfn=%lu-%lu (addr=0x%p) order %2u\n", + page, page_to_pfn(page), + page_to_pfn(page) + (1 << order) - 1, + page_address(page), order); + page_count += (1 << order) - 1; + } else { + pr_warn_ratelimited("obscure Buddy: page=0x%p pfn=%lu (addr=0x%p) order %2u\n", + page, page_to_pfn(page), + page_address(page), order); + } + continue; + } + order = 0; + if (PageHead(page)) { + order = compound_order(page); + if (unlikely(order > MAX_ORDER)) { + spin_unlock_irqrestore(&zone->lock, flags); + pr_warn_ratelimited("obscure Head: page=0x%p pfn=%lu (addr=0x%p) invalid order %2u\n", + page, page_to_pfn(page), + page_address(page), order); + continue; + } + _count = page_ref_count(page); + if (unlikely(_count == 0)) { + pr_warn_ratelimited("obscure Head: page=0x%p pfn=%lu (addr=0x%p) order %2u invalid _count=%u\n", + page, page_to_pfn(page), + page_address(page), + order, _count); + } else { + DBG_TRC("Head: page=0x%p pfn=%lu (addr=0x%p) order=%2u\n", + page, page_to_pfn(page), + page_address(page), order); + } + } + if (PageSlab(page) && (page->slab_cache == s)) { + slab_count++; + if (page_cb(ref, s, page)) { + spin_unlock_irqrestore(&zone->lock, flags); + pgdat = NULL; + break; + } + } else if (PageReserved(page)) { + order = compound_order(page); + DBG_TRC("Reserved: page=0x%p pfn=%lu (addr=0x%p) order=%2u\n", + page, page_to_pfn(page), + page_address(page), order); + } + page_count += (1 << order) - 1; + spin_unlock_irqrestore(&zone->lock, flags); + } + } + return slab_count; +} + +/** + * @brief sorted on caller + * divide et impera to make caller-history (like add_caller()-function but this struct ;-)* + * @param ptoplist pointer for toplist to fill + * @param caller caller + * @return != 0 if no place in toplist + */ +#define TOP_TOIDX(p) ((p) - (&ptoplist->entry[0])) +static int mark_in_toplist(struct _slab_avm_topalloclist *ptoplist, unsigned long caller, unsigned long act_diff) +{ + unsigned int i, elements, idx; + struct _slab_avm_alloc_entry *q, *p; + + elements = ptoplist->entries; + p = &ptoplist->entry[0]; + while (elements) { + i = elements / 2; + q = &p[i]; + if (q->caller == caller) { + q->count++; + q->sum_time += (unsigned long long)act_diff; + return 0; + } + if (q->caller > caller) { + elements = i; + } else { + p = q + 1; + elements -= i + 1; + } + } + if (ptoplist->entries >= ARRAY_SIZE(ptoplist->entry)) { + ptoplist->ignored++; + return 1; + } + idx = TOP_TOIDX(p); + memmove(&p[1], p, (ptoplist->entries - idx) * sizeof(ptoplist->entry[0])); + ptoplist->entries++; + ptoplist->entry[idx].caller = caller; + ptoplist->entry[idx].sum_time = act_diff; + ptoplist->entry[idx].count = 1; + return 0; + } + +/** + * @brief sum caller-toplist entries + * @param ptoplistpointer + * @return void + */ +static unsigned long sum_toplist_entries(struct _slab_avm_topalloclist *ptoplist) +{ + unsigned long sum_count = 0; + unsigned int i; + + for (i = 0; i < ptoplist->entries; i++) { + sum_count += ptoplist->entry[i].count; +} + return sum_count; +} +/** + * @brief sort caller-toplist (greater first) + * @param ptoplistpointer for toplist to fill + * @return void + */ +static void sort_topalloclist(struct _slab_avm_topalloclist *ptoplist) +{ + unsigned int i, max_count, max_idx, idx = 0; + + for (;;) { + struct _slab_avm_alloc_entry tmp; + + max_count = 0; + for (i = idx; i < ptoplist->entries; i++) { + if (ptoplist->entry[i].count > max_count) { + max_count = ptoplist->entry[i].count; + max_idx = i; + } + } + if (max_count == 0) { + break; + } + /*--- swap ---*/ + memcpy(&tmp, &ptoplist->entry[idx], sizeof(tmp)); + memcpy(&ptoplist->entry[idx], &ptoplist->entry[max_idx], sizeof(tmp)); + memcpy(&ptoplist->entry[max_idx], &tmp, sizeof(tmp)); + idx++; + } +} + +struct _avm_topalloc_list_ref { + signed long tresh_jiffiesdiff; + struct _slab_avm_topalloclist *ptoplist; + unsigned long act_jiffies; +}; + +/** + * callback for kmem_cache_parse_all_pages() - called from cache_avm_topalloc_list() + */ +static int avm_topalloc_list_cb(void *ref, struct kmem_cache *s, struct page *page) +{ + struct _avm_topalloc_list_ref *ptref = (struct _avm_topalloc_list_ref *)ref; + signed long act_diff; + struct track *pt; + void *addr, *p; + + slab_lock(page); + addr = page_address(page); + + for_each_object(p, s, addr, page->objects) { + if (on_freelist(s, page, p)) + continue; + + if (!(s->flags & (SLAB_STORE_USER | SLAB_STORE_USER_LITE))) { + /* no track-info exist */ + ptref->ptoplist->ignored++; + continue; + } + pt = get_track(s, p, TRACK_ALLOC); + if ((pt->addr == 0) || (pt->when == 0)) { + /* all pt->when with zero: do not trace initial alloc's (see set_track)*/ + continue; + } + act_diff = ptref->act_jiffies - pt->when; + if (act_diff < ptref->tresh_jiffiesdiff) { + /*--- too young ---*/ + continue; + } + mark_in_toplist(ptref->ptoplist, pt->addr, act_diff); + } + slab_unlock(page); + return 0; +} + +/** + * @brief fill allocator-toplist for cachep + * @param ptoplist pointer for toplist to fill + * @param cachep cachepool + * @param tresh_jiffiesdiff only if caller older than ... + * @return void + */ +void cache_avm_topalloc_list(struct _slab_avm_topalloclist *ptoplist, + struct kmem_cache *s, + unsigned long tresh_jiffiesdiff) +{ + struct _avm_topalloc_list_ref tref; + int node; + struct kmem_cache_node *n; + unsigned int nr_slabs = 0; + + memset(ptoplist, 0, sizeof(*ptoplist)); + tref.act_jiffies = jiffies; + tref.ptoplist = ptoplist; + tref.tresh_jiffiesdiff = (signed long)tresh_jiffiesdiff > 0 ? (signed long)tresh_jiffiesdiff : 0; + + flush_all(s); /* be sure that all objects in lists (flush freelist's) */ + for_each_kmem_cache_node(s, node, n) + nr_slabs += node_nr_slabs(n); + + if (nr_slabs) { + unsigned int slab_count __maybe_unused; + + slab_count = kmem_cache_parse_all_pages(s, &tref, avm_topalloc_list_cb); + if (nr_slabs != slab_count) { + pr_debug("%s: %s slb_nr=%u versus counted slabs=%u\n", __func__, s->name, nr_slabs, slab_count); + } + sort_topalloclist(ptoplist); + ptoplist->sum_count = sum_toplist_entries(ptoplist) + ptoplist->ignored; + } +} + +struct _all_object_cb_ref { + void *ref; + int (*object_pointer)(void *ref, void *p); +}; + +/** + * callback for kmem_cache_parse_all_pages() - called from kmem_cache_list_all_objects() + */ +static int all_object_cb(void *ref, struct kmem_cache *s, struct page *page) +{ + struct _all_object_cb_ref *pcref = (struct _all_object_cb_ref *)ref; + void *addr, *p; + int ret = 0; + + slab_lock(page); + addr = page_address(page); + + for_each_object(p, s, addr, page->objects) { + if (on_freelist(s, page, p)) + continue; + + if (pcref->object_pointer(pcref->ref, p)) { + ret = 1; + break; + } + } + slab_unlock(page); + return ret; +} + +/** + * @brief + * deliver all active object-pointer from kmem_cache-pool s + * + * if callback returns non-zero: stop listing + */ +void kmem_cache_list_all_objects(struct kmem_cache *s, + void *ref, + int (*object_pointer)(void *ref, void *p)) +{ + struct _all_object_cb_ref cref; + int node; + unsigned int nr_slabs = 0; + struct kmem_cache_node *n = NULL; + + cref.ref = ref; + cref.object_pointer = object_pointer; + + flush_all(s); /* be sure that all objects in lists (flush freelist's) */ + for_each_kmem_cache_node(s, node, n) { + nr_slabs += node_nr_slabs(n); + } + if (nr_slabs) { + unsigned int slab_count __maybe_unused; + + slab_count = kmem_cache_parse_all_pages(s, &cref, all_object_cb); + if (nr_slabs != slab_count) { + pr_debug("%s: %s slb_nr=%u versus counted slabs=%u\n", __func__, s->name, nr_slabs, slab_count); + } + } +} +#endif/*--- #if defined(CONFIG_SLUB_AVM_ALLOC_LIST) ---*/