--- zzzz-none-000/linux-3.10.107/arch/s390/kernel/smp.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/s390/kernel/smp.c 2021-02-04 17:41:59.000000000 +0000 @@ -31,7 +31,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -45,11 +47,11 @@ #include #include #include +#include #include "entry.h" enum { ec_schedule = 0, - ec_call_function, ec_call_function_single, ec_stop_cpu, }; @@ -59,21 +61,41 @@ CPU_STATE_CONFIGURED, }; +static DEFINE_PER_CPU(struct cpu *, cpu_device); + struct pcpu { - struct cpu cpu; struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ - unsigned long async_stack; /* async stack for the cpu */ - unsigned long panic_stack; /* panic stack for the cpu */ unsigned long ec_mask; /* bit mask for ec_xxx functions */ - int state; /* physical cpu state */ - int polarization; /* physical polarization */ + signed char state; /* physical cpu state */ + signed char polarization; /* physical polarization */ u16 address; /* physical cpu address */ }; -static u8 boot_cpu_type; -static u16 boot_cpu_address; +static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; +unsigned int smp_cpu_mt_shift; +EXPORT_SYMBOL(smp_cpu_mt_shift); + +unsigned int smp_cpu_mtid; +EXPORT_SYMBOL(smp_cpu_mtid); + +static unsigned int smp_max_threads __initdata = -1U; + +static int __init early_nosmt(char *s) +{ + smp_max_threads = 1; + return 0; +} +early_param("nosmt", early_nosmt); + +static int __init early_smt(char *s) +{ + get_option(&s, &smp_max_threads); + return 0; +} +early_param("smt", early_smt); + /* * The smp_cpu_state_mutex must be held when changing the state or polarization * member of a pcpu data structure within the pcpu_devices arreay. @@ -83,22 +105,8 @@ /* * Signal processor helper functions. */ -static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) -{ - register unsigned int reg1 asm ("1") = parm; - int cc; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); - if (status && cc == 1) - *status = reg1; - return cc; -} - -static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm, + u32 *status) { int cc; @@ -146,7 +154,7 @@ /* * Find struct pcpu by cpu address. */ -static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) { int cpu; @@ -160,49 +168,50 @@ { int order; - set_bit(ec_bit, &pcpu->ec_mask); - order = pcpu_running(pcpu) ? - SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) + return; + order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; pcpu_sigp_retry(pcpu, order, 0); } -static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) +#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) + +static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { + unsigned long async_stack, panic_stack; struct _lowcore *lc; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct _lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - pcpu->panic_stack = __get_free_page(GFP_KERNEL); - if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + } else { + async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; + panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); - lc->async_stack = pcpu->async_stack + ASYNC_SIZE - - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->panic_stack = pcpu->panic_stack + PAGE_SIZE - - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; + lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; lc->cpu_nr = cpu; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); - if (!lc->extended_save_area_addr) - goto out; - } -#else + lc->spinlock_lockval = arch_spin_lockval(cpu); + if (MACHINE_HAS_VX) + lc->vector_save_area_addr = + (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; -#endif lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; out: if (pcpu != &pcpu_devices[0]) { - free_page(pcpu->panic_stack); - free_pages(pcpu->async_stack, ASYNC_ORDER); + free_page(panic_stack); + free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); } return -ENOMEM; @@ -214,21 +223,12 @@ { pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - struct _lowcore *lc = pcpu->lowcore; - - free_page((unsigned long) lc->extended_save_area_addr); - lc->extended_save_area_addr = 0; - } -#else vdso_free_per_cpu(pcpu->lowcore); -#endif - if (pcpu != &pcpu_devices[0]) { - free_page(pcpu->panic_stack); - free_pages(pcpu->async_stack, ASYNC_ORDER); - free_pages((unsigned long) pcpu->lowcore, LC_ORDER); - } + if (pcpu == &pcpu_devices[0]) + return; + free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); + free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -237,12 +237,15 @@ { struct _lowcore *lc = pcpu->lowcore; + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); atomic_inc(&init_mm.context.attach_count); lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; - lc->ftrace_func = S390_lowcore.ftrace_func; lc->user_timer = lc->system_timer = lc->steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); save_access_regs((unsigned int *) lc->access_regs_save_area); @@ -259,6 +262,8 @@ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->thread_info = (unsigned long) task_thread_info(tsk); lc->current_task = (unsigned long) tsk; + lc->lpp = LPP_MAGIC; + lc->current_pid = tsk->pid; lc->user_timer = ti->user_timer; lc->system_timer = ti->system_timer; lc->steal_timer = 0; @@ -284,7 +289,7 @@ struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; unsigned long source_cpu = stap(); - __load_psw_mask(psw_kernel_bits); + __load_psw_mask(PSW_KERNEL_BITS); if (pcpu->address == source_cpu) func(data); /* should not return */ /* Stop target cpu (if func returns this stops the current cpu). */ @@ -306,6 +311,32 @@ } /* + * Enable additional logical cpus for multi-threading. + */ +static int pcpu_set_smt(unsigned int mtid) +{ + register unsigned long reg1 asm ("1") = (unsigned long) mtid; + int cc; + + if (smp_cpu_mtid == mtid) + return 0; + asm volatile( + " sigp %1,0,%2 # sigp set multi-threading\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING) + : "cc"); + if (cc == 0) { + smp_cpu_mtid = mtid; + smp_cpu_mt_shift = 0; + while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) + smp_cpu_mt_shift++; + pcpu_devices[0].address = stap(); + } + return cc; +} + +/* * Call function on an online CPU. */ void smp_call_online_cpu(void (*func)(void *), void *data) @@ -326,7 +357,8 @@ void smp_call_ipl_cpu(void (*func)(void *), void *data) { pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->panic_stack + PAGE_SIZE); + pcpu_devices->lowcore->panic_stack - + PANIC_FRAME_OFFSET + PAGE_SIZE); } int smp_find_processor_id(u16 address) @@ -344,26 +376,23 @@ return pcpu_running(pcpu_devices + cpu); } -void smp_yield(void) -{ - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); -} - void smp_yield_cpu(int cpu) { - if (MACHINE_HAS_DIAG9C) + if (MACHINE_HAS_DIAG9C) { + diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" : : "d" (pcpu_devices[cpu].address)); - else if (MACHINE_HAS_DIAG44) + } else if (MACHINE_HAS_DIAG44) { + diag_stat_inc_norecursion(DIAG_STAT_X044); asm volatile("diag 0,0,0x44"); + } } /* * Send cpus emergency shutdown signal. This gives the cpus the * opportunity to complete outstanding interrupts. */ -void smp_emergency_stop(cpumask_t *cpumask) +static void smp_emergency_stop(cpumask_t *cpumask) { u64 end; int cpu; @@ -396,7 +425,7 @@ int cpu; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); trace_hardirqs_off(); debug_set_critical(); @@ -416,15 +445,6 @@ } /* - * Stop the current cpu. - */ -void smp_stop_cpu(void) -{ - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); - for (;;) ; -} - -/* * This is the main routine where commands issued by other * cpus are handled. */ @@ -438,8 +458,6 @@ smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) scheduler_ipi(); - if (test_bit(ec_call_function, &bits)) - generic_smp_call_function_interrupt(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); } @@ -456,7 +474,7 @@ int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) @@ -464,22 +482,6 @@ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } -#ifndef CONFIG_64BIT -/* - * this function sends a 'purge tlb' signal to another CPU. - */ -static void smp_ptlb_callback(void *info) -{ - __tlb_flush_local(); -} - -void smp_ptlb_all(void) -{ - on_each_cpu(smp_ptlb_callback, NULL, 1); -} -EXPORT_SYMBOL(smp_ptlb_all); -#endif /* ! CONFIG_64BIT */ - /* * this function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing @@ -534,59 +536,127 @@ } EXPORT_SYMBOL(smp_ctl_clear_bit); -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) - -struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; -EXPORT_SYMBOL_GPL(zfcpdump_save_areas); +#ifdef CONFIG_CRASH_DUMP -static void __init smp_get_save_area(int cpu, u16 address) +static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext, + u16 address, int is_boot_cpu) { - void *lc = pcpu_devices[0].lowcore; - struct save_area *save_area; + void *lc = (void *)(unsigned long) store_prefix(); + unsigned long vx_sa; - if (is_kdump_kernel()) - return; - if (!OLDMEM_BASE && (address == boot_cpu_address || - ipl_info.type != IPL_TYPE_FCP_DUMP)) - return; - if (cpu >= NR_CPUS) { - pr_warning("CPU %i exceeds the maximum %i and is excluded " - "from the dump\n", cpu, NR_CPUS - 1); - return; - } - save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL); - if (!save_area) - panic("could not allocate memory for save area\n"); - zfcpdump_save_areas[cpu] = save_area; -#ifdef CONFIG_CRASH_DUMP - if (address == boot_cpu_address) { - /* Copy the registers of the boot cpu. */ - copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + if (is_boot_cpu) { + /* Copy the registers of the boot CPU. */ + copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), SAVE_AREA_BASE - PAGE_SIZE, 0); + if (MACHINE_HAS_VX) + save_vx_regs_safe(sa_ext->vx_regs); return; } -#endif /* Get the registers of a non-boot cpu. */ __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); - memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); + memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa)); + if (!MACHINE_HAS_VX) + return; + /* Get the VX registers */ + vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!vx_sa) + panic("could not allocate memory for VX save area\n"); + __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL); + memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs)); + memblock_free(vx_sa, PAGE_SIZE); } int smp_store_status(int cpu) { + unsigned long vx_sa; struct pcpu *pcpu; pcpu = pcpu_devices + cpu; if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; + if (!MACHINE_HAS_VX) + return 0; + vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); + __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + vx_sa, NULL); return 0; } -#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ +#endif /* CONFIG_CRASH_DUMP */ -static inline void smp_get_save_area(int cpu, u16 address) { } +/* + * Collect CPU state of the previous, crashed system. + * There are four cases: + * 1) standard zfcp dump + * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The boot CPU state is located in + * the absolute lowcore of the memory stored in the HSA. The zcore code + * will allocate the save area and copy the boot CPU state from the HSA. + * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) + * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The firmware or the boot-loader + * stored the registers of the boot CPU in the absolute lowcore in the + * memory of the old system. + * 3) kdump and the old kernel did not store the CPU state, + * or stand-alone kdump for DASD + * condition: OLDMEM_BASE != NULL && !is_kdump_kernel() + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The kexec code or the boot-loader + * stored the registers of the boot CPU in the memory of the old system. + * 4) kdump and the old kernel stored the CPU state + * condition: OLDMEM_BASE != NULL && is_kdump_kernel() + * The state of all CPUs is stored in ELF sections in the memory of the + * old system. The ELF sections are picked up by the crash_dump code + * via elfcorehdr_addr. + */ +void __init smp_save_dump_cpus(void) +{ +#ifdef CONFIG_CRASH_DUMP + int addr, cpu, boot_cpu_addr, max_cpu_addr; + struct save_area_ext *sa_ext; + bool is_boot_cpu; -#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ + if (is_kdump_kernel()) + /* Previous system stored the CPU states. Nothing to do. */ + return; + if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) + /* No previous system present, normal boot. */ + return; + /* Set multi-threading state to the previous system. */ + pcpu_set_smt(sclp.mtid_prev); + max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; + for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { + if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == + SIGP_CC_NOT_OPERATIONAL) + continue; + cpu += 1; + } + dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8); + dump_save_areas.count = cpu; + boot_cpu_addr = stap(); + for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { + if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == + SIGP_CC_NOT_OPERATIONAL) + continue; + sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8); + dump_save_areas.areas[cpu] = sa_ext; + if (!sa_ext) + panic("could not allocate memory for save area\n"); + is_boot_cpu = (addr == boot_cpu_addr); + cpu += 1; + if (is_boot_cpu && !OLDMEM_BASE) + /* Skip boot CPU for standard zfcp dump. */ + continue; + /* Get state for this CPU. */ + __smp_store_cpu_state(sa_ext, addr, is_boot_cpu); + } + diag308_reset(); + pcpu_set_smt(0); +#endif /* CONFIG_CRASH_DUMP */ +} void smp_cpu_set_polarization(int cpu, int val) { @@ -598,20 +668,23 @@ return pcpu_devices[cpu].polarization; } -static struct sclp_cpu_info *smp_get_cpu_info(void) +static struct sclp_core_info *smp_get_core_info(void) { static int use_sigp_detection; - struct sclp_cpu_info *info; + struct sclp_core_info *info; int address; info = kzalloc(sizeof(*info), GFP_KERNEL); - if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + if (info && (use_sigp_detection || sclp_get_core_info(info))) { use_sigp_detection = 1; - for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + for (address = 0; + address < (SCLP_MAX_CORES << smp_cpu_mt_shift); + address += (1U << smp_cpu_mt_shift)) { if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == SIGP_CC_NOT_OPERATIONAL) continue; - info->cpu[info->configured].address = address; + info->core[info->configured].core_id = + address >> smp_cpu_mt_shift; info->configured++; } info->combined = info->configured; @@ -619,66 +692,87 @@ return info; } -static int __cpuinit smp_add_present_cpu(int cpu); +static int smp_add_present_cpu(int cpu); -static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info, - int sysfs_add) +static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) { struct pcpu *pcpu; cpumask_t avail; - int cpu, nr, i; + int cpu, nr, i, j; + u16 address; nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); cpu = cpumask_first(&avail); for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + if (sclp.has_core_type && info->core[i].type != boot_core_type) continue; - if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = info->cpu[i].address; - pcpu->state = (i >= info->configured) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); + address = info->core[i].core_id << smp_cpu_mt_shift; + for (j = 0; j <= smp_cpu_mtid; j++) { + if (pcpu_find_address(cpu_present_mask, address + j)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = address + j; + pcpu->state = + (cpu >= info->configured*(smp_cpu_mtid + 1)) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); + if (cpu >= nr_cpu_ids) + break; + } } return nr; } static void __init smp_detect_cpus(void) { - unsigned int cpu, c_cpus, s_cpus; - struct sclp_cpu_info *info; + unsigned int cpu, mtid, c_cpus, s_cpus; + struct sclp_core_info *info; + u16 address; - info = smp_get_cpu_info(); + /* Get CPU information */ + info = smp_get_core_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); - if (info->has_cpu_type) { - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address != boot_cpu_address) - continue; - /* The boot cpu dictates the cpu type. */ - boot_cpu_type = info->cpu[cpu].type; - break; - } + + /* Find boot CPU type */ + if (sclp.has_core_type) { + address = stap(); + for (cpu = 0; cpu < info->combined; cpu++) + if (info->core[cpu].core_id == address) { + /* The boot cpu dictates the cpu type. */ + boot_core_type = info->core[cpu].type; + break; + } + if (cpu >= info->combined) + panic("Could not find boot CPU type"); } + + /* Set multi-threading state for the current system */ + mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; + mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; + pcpu_set_smt(mtid); + + /* Print number of CPUs */ c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) + if (sclp.has_core_type && + info->core[cpu].type != boot_core_type) continue; - if (cpu < info->configured) { - smp_get_save_area(c_cpus, info->cpu[cpu].address); - c_cpus++; - } else - s_cpus++; + if (cpu < info->configured) + c_cpus += smp_cpu_mtid + 1; + else + s_cpus += smp_cpu_mtid + 1; } pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); + + /* Add CPUs present at boot */ get_online_cpus(); __smp_rescan_cpus(info, 0); put_online_cpus(); @@ -688,7 +782,7 @@ /* * Activate a secondary processor. */ -static void __cpuinit smp_start_secondary(void *cpuvoid) +static void smp_start_secondary(void *cpuvoid) { S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; @@ -697,11 +791,11 @@ S390_lowcore.restart_source = -1UL; restore_access_regs(S390_lowcore.access_regs_save_area); __ctl_load(S390_lowcore.cregs_save_area, 0, 15); - __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); cpu_init(); preempt_disable(); init_cpu_timer(); - init_cpu_vtimer(); + vtime_init(); pfault_init(); notify_cpu_starting(smp_processor_id()); set_cpu_online(smp_processor_id(), true); @@ -711,15 +805,26 @@ } /* Upping and downing of CPUs */ -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { struct pcpu *pcpu; - int rc; + int base, i, rc; pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != + base = cpu - (cpu % (smp_cpu_mtid + 1)); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (base + i < nr_cpu_ids) + if (cpu_online(base + i)) + break; + } + /* + * If this is the first CPU of the core to get online + * do an initial CPU reset. + */ + if (i > smp_cpu_mtid && + pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -729,23 +834,20 @@ pcpu_prepare_secondary(pcpu, cpu); pcpu_attach_task(pcpu, tidle); pcpu_start_fn(pcpu, smp_start_secondary, NULL); - while (!cpu_online(cpu)) + /* Wait until cpu puts itself in the online & active maps */ + while (!cpu_online(cpu) || !cpu_active(cpu)) cpu_relax(); return 0; } -static int __init setup_possible_cpus(char *s) -{ - int max, cpu; +static unsigned int setup_possible_cpus __initdata; - if (kstrtoint(s, 0, &max) < 0) - return 0; - init_cpu_possible(cpumask_of(0)); - for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++) - set_cpu_possible(cpu, true); +static int __init _setup_possible_cpus(char *s) +{ + get_option(&s, &setup_possible_cpus); return 0; } -early_param("possible_cpus", setup_possible_cpus); +early_param("possible_cpus", _setup_possible_cpus); #ifdef CONFIG_HOTPLUG_CPU @@ -764,6 +866,7 @@ cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ __ctl_load(cregs, 0, 15); + clear_cpu_flag(CIF_NOHZ_DELAY); return 0; } @@ -777,6 +880,9 @@ cpu_relax(); pcpu_free_lowcore(pcpu); atomic_dec(&init_mm.context.attach_count); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + if (MACHINE_HAS_TLB_LC) + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } void __noreturn cpu_die(void) @@ -788,13 +894,26 @@ #endif /* CONFIG_HOTPLUG_CPU */ +void __init smp_fill_possible_mask(void) +{ + unsigned int possible, sclp_max, cpu; + + sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; + sclp_max = min(smp_max_threads, sclp_max); + sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids; + possible = setup_possible_cpus ?: nr_cpu_ids; + possible = min(possible, sclp_max); + for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { /* request the 0x1201 emergency signal external interrupt */ - if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) + if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ - if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); smp_detect_cpus(); } @@ -803,14 +922,9 @@ { struct pcpu *pcpu = pcpu_devices; - boot_cpu_address = stap(); pcpu->state = CPU_STATE_CONFIGURED; - pcpu->address = boot_cpu_address; + pcpu->address = stap(); pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); - pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE - + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE - + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); set_cpu_present(0, true); @@ -824,6 +938,7 @@ void __init smp_setup_processor_id(void) { S390_lowcore.cpu_nr = 0; + S390_lowcore.spinlock_lockval = arch_spin_lockval(0); } /* @@ -854,7 +969,7 @@ const char *buf, size_t count) { struct pcpu *pcpu; - int cpu, val, rc; + int cpu, val, rc, i; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) @@ -866,29 +981,43 @@ rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ cpu = dev->id; - if (cpu_online(cpu) || cpu == 0) + cpu -= cpu % (smp_cpu_mtid + 1); + if (cpu == 0) goto out; + for (i = 0; i <= smp_cpu_mtid; i++) + if (cpu_online(cpu + i)) + goto out; pcpu = pcpu_devices + cpu; rc = 0; switch (val) { case 0: if (pcpu->state != CPU_STATE_CONFIGURED) break; - rc = sclp_cpu_deconfigure(pcpu->address); + rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_STANDBY; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + pcpu[i].state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; case 1: if (pcpu->state != CPU_STATE_STANDBY) break; - rc = sclp_cpu_configure(pcpu->address); + rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + pcpu[i].state = CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; default: @@ -921,42 +1050,6 @@ .attrs = cpu_common_attrs, }; -static ssize_t show_idle_count(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long idle_count; - unsigned int sequence; - - do { - sequence = ACCESS_ONCE(idle->sequence); - idle_count = ACCESS_ONCE(idle->idle_count); - if (ACCESS_ONCE(idle->clock_idle_enter)) - idle_count++; - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - return sprintf(buf, "%llu\n", idle_count); -} -static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); - -static ssize_t show_idle_time(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_time = ACCESS_ONCE(idle->idle_time); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; - return sprintf(buf, "%llu\n", idle_time >> 12); -} -static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); - static struct attribute *cpu_online_attrs[] = { &dev_attr_idle_count.attr, &dev_attr_idle_time_us.attr, @@ -967,12 +1060,11 @@ .attrs = cpu_online_attrs, }; -static int __cpuinit smp_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int smp_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &pcpu_devices[cpu].cpu; - struct device *s = &c->dev; + struct device *s = &per_cpu(cpu_device, cpu)->dev; int err = 0; switch (action & ~CPU_TASKS_FROZEN) { @@ -986,12 +1078,17 @@ return notifier_from_errno(err); } -static int __cpuinit smp_add_present_cpu(int cpu) +static int smp_add_present_cpu(int cpu) { - struct cpu *c = &pcpu_devices[cpu].cpu; - struct device *s = &c->dev; + struct device *s; + struct cpu *c; int rc; + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + per_cpu(cpu_device, cpu) = c; + s = &c->dev; c->hotpluggable = 1; rc = register_cpu(c, cpu); if (rc) @@ -1026,10 +1123,10 @@ int __ref smp_rescan_cpus(void) { - struct sclp_cpu_info *info; + struct sclp_core_info *info; int nr; - info = smp_get_cpu_info(); + info = smp_get_core_info(); if (!info) return -ENOMEM; get_online_cpus(); @@ -1058,19 +1155,24 @@ static int __init s390_smp_init(void) { - int cpu, rc; + int cpu, rc = 0; - hotcpu_notifier(smp_cpu_notify, 0); #ifdef CONFIG_HOTPLUG_CPU rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; #endif + cpu_notifier_register_begin(); for_each_present_cpu(cpu) { rc = smp_add_present_cpu(cpu); if (rc) - return rc; + goto out; } - return 0; + + __hotcpu_notifier(smp_cpu_notify, 0); + +out: + cpu_notifier_register_done(); + return rc; } subsys_initcall(s390_smp_init);