--- zzzz-none-000/linux-3.10.107/arch/x86/kernel/process.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/x86/kernel/process.c 2021-02-04 17:41:59.000000000 +0000 @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -24,10 +24,13 @@ #include #include #include -#include -#include +#include +#include #include #include +#include +#include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -36,7 +39,26 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { + .x86_tss = { + .sp0 = TOP_OF_INIT_STACK, +#ifdef CONFIG_X86_32 + .ss0 = __KERNEL_DS, + .ss1 = __KERNEL_CS, + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, +#endif + }, +#ifdef CONFIG_X86_32 + /* + * Note that the .io_bitmap member must be extra-big. This is because + * the CPU will access an additional byte beyond the end of the IO + * permission bitmap. The extra byte must be all 1 bits, and must + * be within the limit. + */ + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, +#endif +}; +EXPORT_PER_CPU_SYMBOL(cpu_tss); #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); @@ -55,44 +77,18 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); #endif -struct kmem_cache *task_xstate_cachep; -EXPORT_SYMBOL_GPL(task_xstate_cachep); - /* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - int ret; - - *dst = *src; - if (fpu_allocated(&src->thread.fpu)) { - memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); - ret = fpu_alloc(&dst->thread.fpu); - if (ret) - return ret; - fpu_copy(dst, src); - } - return 0; -} - -void free_thread_xstate(struct task_struct *tsk) -{ - fpu_free(&tsk->thread.fpu); -} - -void arch_release_task_struct(struct task_struct *tsk) -{ - free_thread_xstate(tsk); -} + memcpy(dst, src, arch_task_struct_size); +#ifdef CONFIG_VM86 + dst->thread.vm86 = NULL; +#endif -void arch_task_cache_init(void) -{ - task_xstate_cachep = - kmem_cache_create("task_xstate", xstate_size, - __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); + return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } /* @@ -103,9 +99,10 @@ struct task_struct *me = current; struct thread_struct *t = &me->thread; unsigned long *bp = t->io_bitmap_ptr; + struct fpu *fpu = &t->fpu; if (bp) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); @@ -118,7 +115,9 @@ kfree(bp); } - drop_fpu(me); + free_vm86(t); + + fpu__drop(fpu); } void flush_thread(void) @@ -127,18 +126,13 @@ flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - drop_init_fpu(tsk); - /* - * Free the FPU state for non xsave platforms. They get reallocated - * lazily at the first use. - */ - if (!use_eager_fpu()) - free_thread_xstate(tsk); + + fpu__clear(&tsk->thread.fpu); } static void hard_disable_TSC(void) { - write_cr4(read_cr4() | X86_CR4_TSD); + cr4_set_bits(X86_CR4_TSD); } void disable_TSC(void) @@ -155,7 +149,7 @@ static void hard_enable_TSC(void) { - write_cr4(read_cr4() & ~X86_CR4_TSD); + cr4_clear_bits(X86_CR4_TSD); } static void enable_TSC(void) @@ -298,10 +292,7 @@ */ void arch_cpu_idle(void) { - if (cpuidle_idle_call()) - x86_idle(); - else - local_irq_enable(); + x86_idle(); } /* @@ -335,6 +326,7 @@ */ set_cpu_online(smp_processor_id(), false); disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); for (;;) halt(); @@ -376,14 +368,11 @@ if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { cpumask_set_cpu(cpu, amd_e400_c1e_mask); - /* - * Force broadcast so ACPI can not interfere. - */ - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - &cpu); + /* Force broadcast so ACPI can not interfere. */ + tick_broadcast_force(); pr_info("Switch to broadcast mode on CPU%d\n", cpu); } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + tick_broadcast_enter(); default_idle(); @@ -392,13 +381,61 @@ * called with interrupts disabled. */ local_irq_disable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + tick_broadcast_exit(); local_irq_enable(); } else default_idle(); } -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +/* + * Intel Core2 and older machines prefer MWAIT over HALT for C1. + * We can't rely on cpuidle installing MWAIT, because it will not load + * on systems that support only C1 -- so the boot default must be MWAIT. + * + * Some AMD machines are the opposite, they depend on using HALT. + * + * So for default C1, which is used during boot until cpuidle loads, + * use MWAIT-C1 on Intel HW that has it, else use HALT. + */ +static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_INTEL) + return 0; + + if (!cpu_has(c, X86_FEATURE_MWAIT)) + return 0; + + return 1; +} + +/* + * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT + * with interrupts enabled and no flags, which is backwards compatible with the + * original MWAIT implementation. + */ +static void mwait_idle(void) +{ + if (!current_set_polling_and_test()) { + trace_cpu_idle_rcuidle(1, smp_processor_id()); + if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { + smp_mb(); /* quirk */ + clflush((void *)¤t_thread_info()->flags); + smp_mb(); /* quirk */ + } + + __monitor((void *)¤t_thread_info()->flags, 0, 0); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); + } else { + local_irq_enable(); + } + __current_clr_polling(); +} + +void select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) @@ -411,6 +448,9 @@ /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); x86_idle = amd_e400_idle; + } else if (prefer_mwait_c1_over_halt(c)) { + pr_info("using mwait in idle threads\n"); + x86_idle = mwait_idle; } else x86_idle = default_idle; } @@ -469,3 +509,58 @@ return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long start, bottom, top, sp, fp, ip; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) + return 0; + + fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); + do { + if (fp < bottom || fp > top) + return 0; + ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); + if (!in_sched_functions(ip)) + return ip; + fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); + return 0; +}