--- zzzz-none-000/linux-3.10.107/arch/x86/xen/enlighten.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/x86/xen/enlighten.c 2021-02-04 17:41:59.000000000 +0000 @@ -33,7 +33,7 @@ #include #include -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #include #endif @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -70,9 +71,10 @@ #include #include #include +#include #include #include -#include +#include #ifdef CONFIG_ACPI #include @@ -86,6 +88,7 @@ #include "mmu.h" #include "smp.h" #include "multicalls.h" +#include "pmu.h" EXPORT_SYMBOL_GPL(hypercall_page); @@ -266,8 +269,9 @@ struct xen_extraversion extra; HYPERVISOR_xen_version(XENVER_extraversion, &extra); - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - pv_info.name); + pr_info("Booting paravirtualized kernel %son %s\n", + xen_feature(XENFEAT_auto_translated_physmap) ? + "with PVH extensions " : "", pv_info.name); printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); @@ -431,14 +435,13 @@ if (!xen_initial_domain()) cpuid_leaf1_edx_mask &= - ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ - (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); ax = 1; cx = 0; - xen_cpuid(&ax, &bx, &cx, &dx); + cpuid(1, &ax, &bx, &cx, &dx); xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) | @@ -779,8 +782,7 @@ addr = (unsigned long)xen_int3; else if (addr == (unsigned long)stack_segment) addr = (unsigned long)xen_stack_segment; - else if (addr == (unsigned long)double_fault || - addr == (unsigned long)nmi) { + else if (addr == (unsigned long)double_fault) { /* Don't need to handle these */ return 0; #ifdef CONFIG_X86_MCE @@ -791,7 +793,12 @@ */ ; #endif - } else { + } else if (addr == (unsigned long)nmi) + /* + * Use the native version as well. + */ + ; + else { /* Some other trap using IST? */ if (WARN_ON(val->ist != 0)) return 0; @@ -861,7 +868,7 @@ void xen_copy_trap_info(struct trap_info *traps) { - const struct desc_ptr *desc = &__get_cpu_var(idt_desc); + const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); xen_convert_trap_info(desc, traps); } @@ -878,7 +885,7 @@ spin_lock(&lock); - __get_cpu_var(idt_desc) = *desc; + memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); xen_convert_trap_info(desc, traps); @@ -950,6 +957,7 @@ mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); xen_mc_issue(PARAVIRT_LAZY_CPU); + tss->x86_tss.sp0 = thread->sp0; } void xen_set_iopl_mask(unsigned mask) @@ -965,92 +973,6 @@ { } -#ifdef CONFIG_X86_LOCAL_APIC -static unsigned long xen_set_apic_id(unsigned int x) -{ - WARN_ON(1); - return x; -} -static unsigned int xen_get_apic_id(unsigned long x) -{ - return ((x)>>24) & 0xFFu; -} -static u32 xen_apic_read(u32 reg) -{ - struct xen_platform_op op = { - .cmd = XENPF_get_cpuinfo, - .interface_version = XENPF_INTERFACE_VERSION, - .u.pcpu_info.xen_cpuid = 0, - }; - int ret = 0; - - /* Shouldn't need this as APIC is turned off for PV, and we only - * get called on the bootup processor. But just in case. */ - if (!xen_initial_domain() || smp_processor_id()) - return 0; - - if (reg == APIC_LVR) - return 0x10; - - if (reg != APIC_ID) - return 0; - - ret = HYPERVISOR_dom0_op(&op); - if (ret) - return 0; - - return op.u.pcpu_info.apic_id << 24; -} - -static void xen_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 xen_apic_icr_read(void) -{ - return 0; -} - -static void xen_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void xen_apic_wait_icr_idle(void) -{ - return; -} - -static u32 xen_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static void set_xen_basic_apic_ops(void) -{ - apic->read = xen_apic_read; - apic->write = xen_apic_write; - apic->icr_read = xen_apic_icr_read; - apic->icr_write = xen_apic_icr_write; - apic->wait_icr_idle = xen_apic_wait_icr_idle; - apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; - apic->set_apic_id = xen_set_apic_id; - apic->get_apic_id = xen_get_apic_id; - -#ifdef CONFIG_SMP - apic->send_IPI_allbutself = xen_send_IPI_allbutself; - apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself; - apic->send_IPI_mask = xen_send_IPI_mask; - apic->send_IPI_all = xen_send_IPI_all; - apic->send_IPI_self = xen_send_IPI_self; -#endif -} - -#endif - static void xen_clts(void) { struct multicall_space mcs; @@ -1093,8 +1015,7 @@ static void xen_write_cr4(unsigned long cr4) { - cr4 &= ~X86_CR4_PGE; - cr4 &= ~X86_CR4_PSE; + cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE); native_write_cr4(cr4); } @@ -1108,6 +1029,26 @@ BUG_ON(val); } #endif + +static u64 xen_read_msr_safe(unsigned int msr, int *err) +{ + u64 val; + + if (pmu_msr_read(msr, &val, err)) + return val; + + val = native_read_msr_safe(msr, err); + switch (msr) { + case MSR_IA32_APICBASE: +#ifdef CONFIG_X86_X2APIC + if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31)))) +#endif + val &= ~X2APIC_ENABLE; + break; + } + return val; +} + static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -1142,13 +1083,9 @@ Xen console noise. */ break; - case MSR_IA32_CR_PAT: - if (smp_processor_id() == 0) - xen_set_pat(((u64)high << 32) | low); - break; - default: - ret = native_write_msr_safe(msr, low, high); + if (!pmu_msr_write(msr, low, high, &ret)) + ret = native_write_msr_safe(msr, low, high); } return ret; @@ -1183,8 +1120,9 @@ xen_vcpu_setup(cpu); /* xen_vcpu_setup managed to place the vcpu_info within the - percpu area for all cpus, so make use of it */ - if (have_vcpu_info_placement) { + * percpu area for all cpus, so make use of it. Note that for + * PVH we want to use native IRQ mechanism. */ + if (have_vcpu_info_placement && !xen_pvh_domain()) { pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); @@ -1253,7 +1191,7 @@ #ifdef CONFIG_X86_64 .extra_user_64bit_cs = FLAT_USER_CS64, #endif - + .features = 0, .name = "Xen", }; @@ -1283,19 +1221,17 @@ .wbinvd = native_wbinvd, - .read_msr = native_read_msr_safe, + .read_msr = xen_read_msr_safe, .write_msr = xen_write_msr_safe, - .read_tsc = native_read_tsc, - .read_pmc = native_read_pmc, - - .read_tscp = native_read_tscp, + .read_pmc = xen_read_pmc, .iret = xen_iret, - .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, +#else + .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, @@ -1337,6 +1273,10 @@ static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; + int cpu; + + for_each_online_cpu(cpu) + xen_pmu_finish(cpu); if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) BUG(); @@ -1378,6 +1318,7 @@ static struct notifier_block xen_panic_block = { .notifier_call= xen_panic_event, + .priority = INT_MIN }; int xen_panic_handler_init(void) @@ -1395,6 +1336,21 @@ .emergency_restart = xen_emergency_restart, }; +static unsigned char xen_get_nmi_reason(void) +{ + unsigned char reason = 0; + + /* Construct a value which looks like it came from port 0x61. */ + if (test_bit(_XEN_NMIREASON_io_error, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_IOCHK; + if (test_bit(_XEN_NMIREASON_pci_serr, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_SERR; + + return reason; +} + static void __init xen_boot_params_init_edd(void) { #if IS_ENABLED(CONFIG_EDD) @@ -1448,9 +1404,49 @@ * Set up the GDT and segment registers for -fstack-protector. Until * we do this, we have to be careful not to call any stack-protected * function, which is most of the kernel. + * + * Note, that it is __ref because the only caller of this after init + * is PVH which is not going to use xen_load_gdt_boot or other + * __init functions. */ -static void __init xen_setup_stackprotector(void) +static void __ref xen_setup_gdt(int cpu) { + if (xen_feature(XENFEAT_auto_translated_physmap)) { +#ifdef CONFIG_X86_64 + unsigned long dummy; + + load_percpu_segment(cpu); /* We need to access per-cpu area */ + switch_to_new_gdt(cpu); /* GDT and GS set */ + + /* We are switching of the Xen provided GDT to our HVM mode + * GDT. The new GDT has __KERNEL_CS with CS.L = 1 + * and we are jumping to reload it. + */ + asm volatile ("pushq %0\n" + "leaq 1f(%%rip),%0\n" + "pushq %0\n" + "lretq\n" + "1:\n" + : "=&r" (dummy) : "0" (__KERNEL_CS)); + + /* + * While not needed, we also set the %es, %ds, and %fs + * to zero. We don't care about %ss as it is NULL. + * Strictly speaking this is not needed as Xen zeros those + * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE) + * + * Linux zeros them in cpu_init() and in secondary_startup_64 + * (for BSP). + */ + loadsegment(es, 0); + loadsegment(ds, 0); + loadsegment(fs, 0); +#else + /* PVH: TODO Implement. */ + BUG(); +#endif + return; /* PVH does not need any PV GDT ops. */ + } pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; pv_cpu_ops.load_gdt = xen_load_gdt_boot; @@ -1461,10 +1457,67 @@ pv_cpu_ops.load_gdt = xen_load_gdt; } +#ifdef CONFIG_XEN_PVH +/* + * A PV guest starts with default flags that are not set for PVH, set them + * here asap. + */ +static void xen_pvh_set_cr_flags(int cpu) +{ + + /* Some of these are setup in 'secondary_startup_64'. The others: + * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests + * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ + write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); + + if (!cpu) + return; + /* + * For BSP, PSE PGE are set in probe_page_size_mask(), for APs + * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). + */ + if (cpu_has_pse) + cr4_set_bits_and_update_boot(X86_CR4_PSE); + + if (cpu_has_pge) + cr4_set_bits_and_update_boot(X86_CR4_PGE); +} + +/* + * Note, that it is ref - because the only caller of this after init + * is PVH which is not going to use xen_load_gdt_boot or other + * __init functions. + */ +void __ref xen_pvh_secondary_vcpu_init(int cpu) +{ + xen_setup_gdt(cpu); + xen_pvh_set_cr_flags(cpu); +} + +static void __init xen_pvh_early_guest_init(void) +{ + if (!xen_feature(XENFEAT_auto_translated_physmap)) + return; + + if (!xen_feature(XENFEAT_hvm_callback_vector)) + return; + + xen_have_vector_callback = 1; + + xen_pvh_early_cpu_init(0, false); + xen_pvh_set_cr_flags(0); + +#ifdef CONFIG_X86_32 + BUG(); /* PVH: Implement proper support. */ +#endif +} +#endif /* CONFIG_XEN_PVH */ + /* First C function to be called on Xen boot */ -asmlinkage void __init xen_start_kernel(void) +asmlinkage __visible void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; + unsigned long initrd_start = 0; int rc; if (!xen_start_info) @@ -1472,15 +1525,28 @@ xen_domain_type = XEN_PV_DOMAIN; + xen_setup_features(); +#ifdef CONFIG_XEN_PVH + xen_pvh_early_guest_init(); +#endif xen_setup_machphys_mapping(); /* Install Xen paravirt ops */ pv_info = xen_info; + if (xen_initial_domain()) + pv_info.features |= PV_SUPPORTED_RTC; pv_init_ops = xen_init_ops; - pv_cpu_ops = xen_cpu_ops; pv_apic_ops = xen_apic_ops; + if (!xen_pvh_domain()) { + pv_cpu_ops = xen_cpu_ops; + + x86_platform.get_nmi_reason = xen_get_nmi_reason; + } - x86_init.resources.memory_setup = xen_memory_setup; + if (xen_feature(XENFEAT_auto_translated_physmap)) + x86_init.resources.memory_setup = xen_auto_xlated_memory_setup; + else + x86_init.resources.memory_setup = xen_memory_setup; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; @@ -1494,12 +1560,6 @@ /* Prevent unwanted bits from being set in PTEs. */ __supported_pte_mask &= ~_PAGE_GLOBAL; -#if 0 - if (!xen_initial_domain()) -#endif - __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - - __supported_pte_mask |= _PAGE_IOMAP; /* * Prevent page tables from being allocated in highmem, even @@ -1510,17 +1570,14 @@ /* Work out if we support NX */ x86_configure_nx(); - xen_setup_features(); - /* Get mfn list */ - if (!xen_feature(XENFEAT_auto_translated_physmap)) - xen_build_dynamic_phys_to_machine(); + xen_build_dynamic_phys_to_machine(); /* * Set up kernel GDT and segment registers, mainly so that * -fstack-protector code can be executed. */ - xen_setup_stackprotector(); + xen_setup_gdt(0); xen_init_irq_ops(); xen_init_cpuid_mask(); @@ -1529,7 +1586,7 @@ /* * set up the basic apic ops. */ - set_xen_basic_apic_ops(); + xen_init_apic(); #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { @@ -1556,14 +1613,6 @@ */ acpi_numa = -1; #endif -#ifdef CONFIG_X86_PAT - /* - * For right now disable the PAT. We should remove this once - * git commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1 - * (xen/pat: Disable PAT support for now) is reverted. - */ - pat_enabled = 0; -#endif /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1572,10 +1621,9 @@ early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); - - /* Allocate and initialize top and mid mfn levels for p2m structure */ - xen_build_mfn_list_list(); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, + xen_start_info->nr_pages); + xen_reserve_special_pages(); /* keep using Xen gdt for now; no urgent need to change it */ @@ -1589,27 +1637,37 @@ /* set the limit of our address space */ xen_reserve_top(); - /* We used to do this in xen_arch_setup, but that is too late on AMD - * were early_cpu_init (run before ->arch_setup()) calls early_amd_init - * which pokes 0xcf8 port. - */ - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - xen_raw_printk("physdev_op failed %d\n", rc); + /* PVH: runs at default kernel iopl of 0 */ + if (!xen_pvh_domain()) { + /* + * We used to do this in xen_arch_setup, but that is too late + * on AMD were early_cpu_init (run before ->arch_setup()) calls + * early_amd_init which pokes 0xcf8 port. + */ + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + xen_raw_printk("physdev_op failed %d\n", rc); + } #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); - new_cpu_data.hard_math = 1; + set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); new_cpu_data.wp_works_ok = 1; new_cpu_data.x86_capability[0] = cpuid_edx(1); #endif + if (xen_start_info->mod_start) { + if (xen_start_info->flags & SIF_MOD_START_PFN) + initrd_start = PFN_PHYS(xen_start_info->mod_start); + else + initrd_start = __pa(xen_start_info->mod_start); + } + /* Poke various useful things into boot_params */ boot_params.hdr.type_of_loader = (9 << 4) | 0; - boot_params.hdr.ramdisk_image = xen_start_info->mod_start - ? __pa(xen_start_info->mod_start) : 0; + boot_params.hdr.ramdisk_image = initrd_start; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); @@ -1636,8 +1694,6 @@ if (HYPERVISOR_dom0_op(&op) == 0) boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; - xen_init_apic(); - /* Make sure ACS will be enabled */ pci_request_acs(); @@ -1657,10 +1713,13 @@ xen_setup_runstate_info(0); + xen_efi_init(); + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel(); #else + cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */ x86_64_start_reservations((char *)__pa_symbol(&boot_params)); #endif } @@ -1725,15 +1784,14 @@ xen_domain_type = XEN_HVM_DOMAIN; } -static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: xen_vcpu_setup(cpu); if (xen_have_vector_callback) { - xen_init_lock_cpu(cpu); if (xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); } @@ -1744,11 +1802,11 @@ return NOTIFY_OK; } -static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { +static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void xen_hvm_shutdown(void) { native_machine_shutdown(); @@ -1765,10 +1823,15 @@ static void __init xen_hvm_guest_init(void) { + if (xen_pv_domain()) + return; + init_hvm_pv_info(); xen_hvm_init_shared_info(); + xen_panic_handler_init(); + if (xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; xen_hvm_smp_init(); @@ -1777,25 +1840,33 @@ x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE machine_ops.shutdown = xen_hvm_shutdown; machine_ops.crash_shutdown = xen_hvm_crash_shutdown; #endif } +#endif -static bool __init xen_hvm_platform(void) +static bool xen_nopv = false; +static __init int xen_parse_nopv(char *arg) { - if (xen_pv_domain()) - return false; + xen_nopv = true; + return 0; +} +early_param("xen_nopv", xen_parse_nopv); - if (!xen_cpuid_base()) - return false; +static uint32_t __init xen_platform(void) +{ + if (xen_nopv) + return 0; - return true; + return xen_cpuid_base(); } bool xen_hvm_need_lapic(void) { + if (xen_nopv) + return false; if (xen_pv_domain()) return false; if (!xen_hvm_domain()) @@ -1806,11 +1877,35 @@ } EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); -const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { - .name = "Xen HVM", - .detect = xen_hvm_platform, +static void xen_set_cpu_features(struct cpuinfo_x86 *c) +{ + if (xen_pv_domain()) { + clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + set_cpu_cap(c, X86_FEATURE_XENPV); + } +} + +const struct hypervisor_x86 x86_hyper_xen = { + .name = "Xen", + .detect = xen_platform, +#ifdef CONFIG_XEN_PVHVM .init_platform = xen_hvm_guest_init, +#endif .x2apic_available = xen_x2apic_para_available, + .set_cpu_features = xen_set_cpu_features, }; -EXPORT_SYMBOL(x86_hyper_xen_hvm); +EXPORT_SYMBOL(x86_hyper_xen); + +#ifdef CONFIG_HOTPLUG_CPU +void xen_arch_register_cpu(int num) +{ + arch_register_cpu(num); +} +EXPORT_SYMBOL(xen_arch_register_cpu); + +void xen_arch_unregister_cpu(int num) +{ + arch_unregister_cpu(num); +} +EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif