--- zzzz-none-000/linux-3.10.107/arch/arc/kernel/smp.c	2017-06-27 09:49:32.000000000 +0000
+++ scorpion-7490-727/linux-3.10.107/arch/arc/kernel/smp.c	2021-02-04 17:41:59.000000000 +0000
@@ -12,32 +12,26 @@
  *    -- Initial Write (Borrowed heavily from ARM)
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/profile.h>
-#include <linux/errno.h>
-#include <linux/err.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
-#include <linux/smp.h>
 #include <linux/irq.h>
-#include <linux/delay.h>
 #include <linux/atomic.h>
-#include <linux/percpu.h>
 #include <linux/cpumask.h>
-#include <linux/spinlock_types.h>
 #include <linux/reboot.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/mach_desc.h>
 
+#ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+#endif
 
-struct plat_smp_ops  plat_smp_ops;
+struct plat_smp_ops  __weak plat_smp_ops;
 
 /* XXX: per cpu ? Only needed once in early seconday boot */
 struct task_struct *secondary_idle_tsk;
@@ -48,8 +42,13 @@
 }
 
 /*
- * Initialise the CPU possible map early - this describes the CPUs
- * which may be present or become present in the system.
+ * Called from setup_arch() before calling setup_processor()
+ *
+ * - Initialise the CPU possible map early - this describes the CPUs
+ *   which may be present or become present in the system.
+ * - Call early smp init hook. This can initialize a specific multi-core
+ *   IP which is say common to several platforms (hence not part of
+ *   platform specific int_early() hook)
  */
 void __init smp_init_cpus(void)
 {
@@ -57,6 +56,9 @@
 
 	for (i = 0; i < NR_CPUS; i++)
 		set_cpu_possible(i, true);
+
+	if (plat_smp_ops.init_early_smp)
+		plat_smp_ops.init_early_smp();
 }
 
 /* called from init ( ) =>  process 1 */
@@ -78,38 +80,32 @@
 }
 
 /*
- * After power-up, a non Master CPU needs to wait for Master to kick start it
- *
- * The default implementation halts
- *
- * This relies on platform specific support allowing Master to directly set
- * this CPU's PC (to be @first_lines_of_secondary() and kick start it.
- *
- * In lack of such h/w assist, platforms can override this function
- *   - make this function busy-spin on a token, eventually set by Master
- *     (from arc_platform_smp_wakeup_cpu())
- *   - Once token is available, jump to @first_lines_of_secondary
- *     (using inline asm).
- *
- * Alert: can NOT use stack here as it has not been determined/setup for CPU.
- *        If it turns out to be elaborate, it's better to code it in assembly
- *
+ * Default smp boot helper for Run-on-reset case where all cores start off
+ * together. Non-masters need to wait for Master to start running.
+ * This is implemented using a flag in memory, which Non-masters spin-wait on.
+ * Master sets it to cpu-id of core to "ungate" it.
  */
-void __attribute__((weak)) arc_platform_smp_wait_to_boot(int cpu)
+static volatile int wake_flag;
+
+static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
 {
-	/*
-	 * As a hack for debugging - since debugger will single-step over the
-	 * FLAG insn - wrap the halt itself it in a self loop
-	 */
-	__asm__ __volatile__(
-	"1:		\n"
-	"	flag 1	\n"
-	"	b 1b	\n");
+	BUG_ON(cpu == 0);
+	wake_flag = cpu;
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+	while (wake_flag != cpu)
+		;
+
+	wake_flag = 0;
+	__asm__ __volatile__("j @first_lines_of_secondary	\n");
 }
 
+
 const char *arc_platform_smp_cpuinfo(void)
 {
-	return plat_smp_ops.info;
+	return plat_smp_ops.info ? : "";
 }
 
 /*
@@ -117,7 +113,7 @@
  * Called from asm stub in head.S
  * "current"/R25 already setup by low level boot code
  */
-void __cpuinit start_kernel_secondary(void)
+void start_kernel_secondary(void)
 {
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu = smp_processor_id();
@@ -128,16 +124,21 @@
 	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 
 	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
-	if (machine_desc->init_smp)
-		machine_desc->init_smp(smp_processor_id());
+	/* Some SMP H/w setup - for each cpu */
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(cpu);
+
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(cpu);
 
-	arc_local_timer_setup(cpu);
+	arc_local_timer_setup();
 
 	local_irq_enable();
 	preempt_disable();
@@ -154,7 +155,7 @@
  *
  * Essential requirements being where to run from (PC) and stack (SP)
 */
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
+int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	unsigned long wait_till;
 
@@ -166,6 +167,8 @@
 	if (plat_smp_ops.cpu_kick)
 		plat_smp_ops.cpu_kick(cpu,
 				(unsigned long)first_lines_of_secondary);
+	else
+		arc_default_smp_cpu_kick(cpu, (unsigned long)NULL);
 
 	/* wait for 1 sec after kicking the secondary */
 	wait_till = jiffies + HZ;
@@ -187,7 +190,7 @@
 /*
  * not supported here
  */
-int __init setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
 {
 	return -EINVAL;
 }
@@ -196,52 +199,65 @@
 /*              Inter Processor Interrupt Handling                           */
 /*****************************************************************************/
 
-/*
- * structures for inter-processor calls
- * A Collection of single bit ipi messages
- *
- */
-
-/*
- * TODO_rajesh investigate tlb message types.
- * IPI Timer not needed because each ARC has an individual Interrupting Timer
- */
 enum ipi_msg_type {
-	IPI_NOP = 0,
+	IPI_EMPTY = 0,
 	IPI_RESCHEDULE = 1,
 	IPI_CALL_FUNC,
-	IPI_CALL_FUNC_SINGLE,
-	IPI_CPU_STOP
+	IPI_CPU_STOP,
 };
 
-struct ipi_data {
-	unsigned long bits;
-};
+/*
+ * In arches with IRQ for each msg type (above), receiver can use IRQ-id  to
+ * figure out what msg was sent. For those which don't (ARC has dedicated IPI
+ * IRQ), the msg-type needs to be conveyed via per-cpu data
+ */
 
-static DEFINE_PER_CPU(struct ipi_data, ipi_data);
+static DEFINE_PER_CPU(unsigned long, ipi_data);
 
-static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg)
+static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
 {
+	unsigned long __percpu *ipi_data_ptr = per_cpu_ptr(&ipi_data, cpu);
+	unsigned long old, new;
 	unsigned long flags;
-	unsigned int cpu;
+
+	pr_debug("%d Sending msg [%d] to %d\n", smp_processor_id(), msg, cpu);
 
 	local_irq_save(flags);
 
-	for_each_cpu(cpu, callmap) {
-		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
-		set_bit(msg, &ipi->bits);
-	}
+	/*
+	 * Atomically write new msg bit (in case others are writing too),
+	 * and read back old value
+	 */
+	do {
+		new = old = ACCESS_ONCE(*ipi_data_ptr);
+		new |= 1U << msg;
+	} while (cmpxchg(ipi_data_ptr, old, new) != old);
 
-	/* Call the platform specific cross-CPU call function  */
-	if (plat_smp_ops.ipi_send)
-		plat_smp_ops.ipi_send((void *)callmap);
+	/*
+	 * Call the platform specific IPI kick function, but avoid if possible:
+	 * Only do so if there's no pending msg from other concurrent sender(s).
+	 * Otherwise, recevier will see this msg as well when it takes the
+	 * IPI corresponding to that msg. This is true, even if it is already in
+	 * IPI handler, because !@old means it has not yet dequeued the msg(s)
+	 * so @new msg can be a free-loader
+	 */
+	if (plat_smp_ops.ipi_send && !old)
+		plat_smp_ops.ipi_send(cpu);
 
 	local_irq_restore(flags);
 }
 
+static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, callmap)
+		ipi_send_msg_one(cpu, msg);
+}
+
 void smp_send_reschedule(int cpu)
 {
-	ipi_send_msg(cpumask_of(cpu), IPI_RESCHEDULE);
+	ipi_send_msg_one(cpu, IPI_RESCHEDULE);
 }
 
 void smp_send_stop(void)
@@ -254,7 +270,7 @@
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	ipi_send_msg_one(cpu, IPI_CALL_FUNC);
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -265,37 +281,33 @@
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
  */
-static void ipi_cpu_stop(unsigned int cpu)
+static void ipi_cpu_stop(void)
 {
 	machine_halt();
 }
 
-static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu)
+static inline int __do_IPI(unsigned long msg)
 {
-	unsigned long msg = 0;
+	int rc = 0;
 
-	do {
-		msg = find_next_bit(ops, BITS_PER_LONG, msg+1);
+	switch (msg) {
+	case IPI_RESCHEDULE:
+		scheduler_ipi();
+		break;
 
-		switch (msg) {
-		case IPI_RESCHEDULE:
-			scheduler_ipi();
-			break;
+	case IPI_CALL_FUNC:
+		generic_smp_call_function_interrupt();
+		break;
 
-		case IPI_CALL_FUNC:
-			generic_smp_call_function_interrupt();
-			break;
-
-		case IPI_CALL_FUNC_SINGLE:
-			generic_smp_call_function_single_interrupt();
-			break;
+	case IPI_CPU_STOP:
+		ipi_cpu_stop();
+		break;
 
-		case IPI_CPU_STOP:
-			ipi_cpu_stop(cpu);
-			break;
-		}
-	} while (msg < BITS_PER_LONG);
+	default:
+		rc = 1;
+	}
 
+	return rc;
 }
 
 /*
@@ -304,19 +316,33 @@
  */
 irqreturn_t do_IPI(int irq, void *dev_id)
 {
-	int cpu = smp_processor_id();
-	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
-	unsigned long ops;
+	unsigned long pending;
+	unsigned long __maybe_unused copy;
+
+	pr_debug("IPI [%ld] received on cpu %d\n",
+		 *this_cpu_ptr(&ipi_data), smp_processor_id());
 
 	if (plat_smp_ops.ipi_clear)
-		plat_smp_ops.ipi_clear(cpu, irq);
+		plat_smp_ops.ipi_clear(irq);
 
 	/*
-	 * XXX: is this loop really needed
-	 * And do we need to move ipi_clean inside
+	 * "dequeue" the msg corresponding to this IPI (and possibly other
+	 * piggybacked msg from elided IPIs: see ipi_send_msg_one() above)
 	 */
-	while ((ops = xchg(&ipi->bits, 0)) != 0)
-		__do_IPI(&ops, ipi, cpu);
+	copy = pending = xchg(this_cpu_ptr(&ipi_data), 0);
+
+	do {
+		unsigned long msg = __ffs(pending);
+		int rc;
+
+		rc = __do_IPI(msg);
+#ifdef CONFIG_ARC_IPI_DBG
+		/* IPI received but no valid @msg */
+		if (rc)
+			pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
+#endif
+		pending &= ~(1U << msg);
+	} while (pending);
 
 	return IRQ_HANDLED;
 }
@@ -325,8 +351,12 @@
  * API called by platform code to hookup arch-common ISR to their IPI IRQ
  */
 static DEFINE_PER_CPU(int, ipi_dev);
+
 int smp_ipi_irq_setup(int cpu, int irq)
 {
-	int *dev_id = &per_cpu(ipi_dev, smp_processor_id());
-	return request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev_id);
+	int *dev = per_cpu_ptr(&ipi_dev, cpu);
+
+	arc_request_percpu_irq(irq, cpu, do_IPI, "IPI Interrupt", dev);
+
+	return 0;
 }