--- zzzz-none-000/linux-3.10.107/arch/powerpc/platforms/powernv/setup.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/powerpc/platforms/powernv/setup.c 2021-02-04 17:41:59.000000000 +0000 @@ -23,19 +23,25 @@ #include #include #include +#include #include #include +#include +#include #include #include #include -#include #include +#include +#include #include "powernv.h" static void __init pnv_setup_arch(void) { + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* Initialize SMP */ pnv_smp_init(); @@ -54,6 +60,12 @@ static void __init pnv_init_early(void) { + /* + * Initialize the LPC bus now so that legacy serial + * ports can be found on it + */ + opal_lpc_init(); + #ifdef CONFIG_HVC_OPAL if (firmware_has_feature(FW_FEATURE_OPAL)) hvc_opal_init_early(); @@ -89,10 +101,33 @@ of_node_put(root); } +static void pnv_prepare_going_down(void) +{ + /* + * Disable all notifiers from OPAL, we can't + * service interrupts anymore anyway + */ + opal_event_shutdown(); + + /* Soft disable interrupts */ + local_irq_disable(); + + /* + * Return secondary CPUs to firwmare if a flash update + * is pending otherwise we will get all sort of error + * messages about CPU being stuck etc.. This will also + * have the side effect of hard disabling interrupts so + * past this point, the kernel is effectively dead. + */ + opal_flash_term_callback(); +} + static void __noreturn pnv_restart(char *cmd) { long rc = OPAL_BUSY; + pnv_prepare_going_down(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_reboot(); if (rc == OPAL_BUSY_EVENT) @@ -108,6 +143,8 @@ { long rc = OPAL_BUSY; + pnv_prepare_going_down(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_power_down(0); if (rc == OPAL_BUSY_EVENT) @@ -133,44 +170,106 @@ /* Let the PCI code clear up IODA tables */ pnv_pci_shutdown(); - /* And unregister all OPAL interrupts so they don't fire - * up while we kexec + /* + * Stop OPAL activity: Unregister all OPAL interrupts so they + * don't fire up while we kexec and make sure all potentially + * DMA'ing ops are complete (such as dump retrieval). */ opal_shutdown(); } #ifdef CONFIG_KEXEC +static void pnv_kexec_wait_secondaries_down(void) +{ + int my_cpu, i, notified = -1; + + my_cpu = get_cpu(); + + for_each_online_cpu(i) { + uint8_t status; + int64_t rc, timeout = 1000; + + if (i == my_cpu) + continue; + + for (;;) { + rc = opal_query_cpu_status(get_hard_smp_processor_id(i), + &status); + if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) + break; + barrier(); + if (i != notified) { + printk(KERN_INFO "kexec: waiting for cpu %d " + "(physical %d) to enter OPAL\n", + i, paca[i].hw_cpu_id); + notified = i; + } + + /* + * On crash secondaries might be unreachable or hung, + * so timeout if we've waited too long + * */ + mdelay(1); + if (timeout-- == 0) { + printk(KERN_ERR "kexec: timed out waiting for " + "cpu %d (physical %d) to enter OPAL\n", + i, paca[i].hw_cpu_id); + break; + } + } + } +} + static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { xics_kexec_teardown_cpu(secondary); + + /* On OPAL v3, we return all CPUs to firmware */ + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + return; + + if (secondary) { + /* Return secondary CPUs to firmware on OPAL v3 */ + mb(); + get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; + mb(); + + /* Return the CPU to OPAL */ + opal_return_cpu(); + } else { + /* Primary waits for the secondaries to have reached OPAL */ + pnv_kexec_wait_secondaries_down(); + + /* + * We might be running as little-endian - now that interrupts + * are disabled, reset the HILE bit to big-endian so we don't + * take interrupts in the wrong endian later + */ + opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); + } } #endif /* CONFIG_KEXEC */ +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +static unsigned long pnv_memory_block_size(void) +{ + return 256UL * 1024 * 1024; +} +#endif + static void __init pnv_setup_machdep_opal(void) { ppc_md.get_boot_time = opal_get_boot_time; - ppc_md.get_rtc_time = opal_get_rtc_time; - ppc_md.set_rtc_time = opal_set_rtc_time; ppc_md.restart = pnv_restart; - ppc_md.power_off = pnv_power_off; + pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; + ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; + ppc_md.hmi_exception_early = opal_hmi_exception_early; + ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } -#ifdef CONFIG_PPC_POWERNV_RTAS -static void __init pnv_setup_machdep_rtas(void) -{ - if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) { - ppc_md.get_boot_time = rtas_get_boot_time; - ppc_md.get_rtc_time = rtas_get_rtc_time; - ppc_md.set_rtc_time = rtas_set_rtc_time; - } - ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; - ppc_md.halt = rtas_halt; -} -#endif /* CONFIG_PPC_POWERNV_RTAS */ - static int __init pnv_probe(void) { unsigned long root = of_get_flat_dt_root(); @@ -182,16 +281,31 @@ if (firmware_has_feature(FW_FEATURE_OPAL)) pnv_setup_machdep_opal(); -#ifdef CONFIG_PPC_POWERNV_RTAS - else if (rtas.base) - pnv_setup_machdep_rtas(); -#endif /* CONFIG_PPC_POWERNV_RTAS */ pr_debug("PowerNV detected !\n"); return 1; } +/* + * Returns the cpu frequency for 'cpu' in Hz. This is used by + * /proc/cpuinfo + */ +static unsigned long pnv_get_proc_freq(unsigned int cpu) +{ + unsigned long ret_freq; + + ret_freq = cpufreq_quick_get(cpu) * 1000ul; + + /* + * If the backend cpufreq driver does not exist, + * then fallback to old way of reporting the clockrate. + */ + if (!ret_freq) + ret_freq = ppc_proc_freq; + return ret_freq; +} + define_machine(powernv) { .name = "PowerNV", .probe = pnv_probe, @@ -199,6 +313,7 @@ .setup_arch = pnv_setup_arch, .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, + .get_proc_freq = pnv_get_proc_freq, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, .power_save = power7_idle, @@ -206,4 +321,7 @@ #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE + .memory_block_size = pnv_memory_block_size, +#endif };