--- zzzz-none-000/linux-2.4.17/arch/mips/kernel/smp.c	2001-12-21 17:41:53.000000000 +0000
+++ sangam-fb-322/linux-2.4.17/arch/mips/kernel/smp.c	2004-11-24 13:22:35.000000000 +0000
@@ -1,11 +1,4 @@
 /*
- *
- *  arch/mips/kernel/smp.c
- *
- *  Copyright (C) 2000 Sibyte
- * 
- *  Written by Justin Carlson (carlson@sibyte.com)
- *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
@@ -20,20 +13,24 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  *
+ * Copyright (C) 2000, 2001 Kanoj Sarcar
+ * Copyright (C) 2000, 2001 Ralf Baechle
+ * Copyright (C) 2000, 2001 Silicon Graphics, Inc.
+ * Copyright (C) 2000, 2001 Broadcom Corporation
  */ 
-
-
 #include <linux/config.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/threads.h>
 #include <linux/time.h>
+#include <linux/module.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 
 #include <asm/atomic.h>
+#include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/hardirq.h>
@@ -42,38 +39,27 @@
 #include <asm/delay.h>
 #include <asm/smp.h>
 
-/*
- * This was written with the BRCM12500 MP SOC in mind, but tries to
- * be generic.  It's modelled on the mips64 smp.c code, which is
- * derived from Sparc, I'm guessing, which is derived from...
- * 
- * It's probably horribly designed for very large ccNUMA systems
- * as it doesn't take any node clustering into account.  
-*/
-
-
 /* Ze Big Kernel Lock! */
 spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-int smp_threads_ready;  /* Not used */
-int smp_num_cpus;    
+int smp_threads_ready;
+int smp_num_cpus = 1;			/* Number that came online.  */
+cpumask_t cpu_online_map;		/* Bitmask of currently online CPUs */
 int global_irq_holder = NO_PROC_ID;
 spinlock_t global_irq_lock = SPIN_LOCK_UNLOCKED;
-struct mips_cpuinfo cpu_data[NR_CPUS];
-
-struct smp_fn_call_struct smp_fn_call = 
-{ SPIN_LOCK_UNLOCKED, ATOMIC_INIT(0), NULL, NULL};
+struct cpuinfo_mips cpu_data[NR_CPUS];
+void (*volatile smp_cpu0_finalize)(void) = NULL;
 
 static atomic_t cpus_booted = ATOMIC_INIT(0);
 
 
 /* These are defined by the board-specific code. */
 
-/* Cause the function described by smp_fn_call 
-   to be executed on the passed cpu.  When the function
-   has finished, increment the finished field of
-   smp_fn_call. */
-
-void core_call_function(int cpu);
+/*
+ * Cause the function described by call_data to be executed on the passed
+ * cpu.  When the function has finished, increment the finished field of
+ * call_data.
+ */
+void core_send_ipi(int cpu, unsigned int action);
 
 /*
  * Clear all undefined state in the cpu, set up sp and gp to the passed
@@ -95,11 +81,27 @@
    the number of cpus in the system, including this one */
 int prom_setup_smp(void);
 
+/*
+ * Hook for doing final board-specific setup after the generic smp setup
+ * is done
+ */
 int start_secondary(void *unused)
 {
 	prom_init_secondary();
+
+	/* Do stuff that trap_init() did for the first processor */
+	clear_cp0_status(ST0_BEV);
+	if (mips_cpu.options & MIPS_CPU_DIVEC) {
+		set_cp0_cause(CAUSEF_IV);
+	}
+	/* XXX parity protection should be folded in here when it's converted to
+	   an option instead of something based on .cputype */
+
 	write_32bit_cp0_register(CP0_CONTEXT, smp_processor_id()<<23);
-	current_pgd[smp_processor_id()] = init_mm.pgd;
+	pgd_current[smp_processor_id()] = init_mm.pgd;
+	cpu_data[smp_processor_id()].udelay_val = loops_per_jiffy;
+	cpu_data[smp_processor_id()].asid_cache = ASID_FIRST_VERSION;
+	prom_smp_finish();
 	printk("Slave cpu booted successfully\n");
 	atomic_inc(&cpus_booted);
 	cpu_idle();
@@ -113,6 +115,8 @@
 	smp_num_cpus = prom_setup_smp();
 	init_new_context(current, &init_mm);
 	current->processor = 0;
+	cpu_data[0].udelay_val = loops_per_jiffy;
+	cpu_data[0].asid_cache = ASID_FIRST_VERSION;
 	atomic_set(&cpus_booted, 1);  /* Master CPU is already booted... */
 	init_idle();
 	for (i = 1; i < smp_num_cpus; i++) {
@@ -170,6 +174,7 @@
 
 	/* Wait for everyone to come up */
 	while (atomic_read(&cpus_booted) != smp_num_cpus);
+	smp_threads_ready = 1;
 }
 
 void __init smp_commence(void)
@@ -182,11 +187,14 @@
 	current->need_resched = 1;
 }
 
-void FASTCALL(smp_send_reschedule(int cpu))
+void smp_send_reschedule(int cpu)
 {
 	smp_call_function(reschedule_this_cpu, NULL, 0, 0);
 }
 
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
+struct call_data_struct *call_data;
 
 /*
  * The caller of this wants the passed function to run on every cpu.  If wait
@@ -196,43 +204,61 @@
 int smp_call_function (void (*func) (void *info), void *info, int retry, 
 								int wait)
 {
+	struct call_data_struct data;
 	int cpus = smp_num_cpus - 1;
+	int cpu = smp_processor_id();
 	int i;
 
-	if (smp_num_cpus < 2) {
+	if (!cpus)
 		return 0;
-	}
 
-	spin_lock_bh(&smp_fn_call.lock);
-
-	atomic_set(&smp_fn_call.finished, 0);
-	smp_fn_call.fn = func;
-	smp_fn_call.data = info;
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
 
+	spin_lock_bh(&call_lock);
+	call_data = &data;
 	for (i = 0; i < smp_num_cpus; i++) {
-		if (i != smp_processor_id()) {
+		if (i != cpu) {
 			/* Call the board specific routine */
-			core_call_function(i);
+			core_send_ipi(i, SMP_CALL_FUNCTION);
 		}
 	}
 
-	if (wait) {
-		while(atomic_read(&smp_fn_call.finished) != cpus) {}
-	}
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	if (wait)
+		while(atomic_read(&data.finished) != cpus)
+			barrier();
+	spin_unlock_bh(&call_lock);
 
-	spin_unlock_bh(&smp_fn_call.lock);
 	return 0;
 }
 
 void synchronize_irq(void)
 {
-	panic("synchronize_irq");
+	if (irqs_running()) {
+		/* Stupid approach */
+		cli();
+		sti();
+	}
 }
 
 static void stop_this_cpu(void *dummy)
 {
-	printk("Cpu stopping\n");
-	for (;;);
+	int cpu = smp_processor_id();
+	if (cpu)
+		for (;;);		/* XXX Use halt like i386 */
+	else {
+		/* XXXKW this isn't quite there yet */
+		while (!smp_cpu0_finalize) ;
+				smp_cpu0_finalize();
+		}
 }
 
 void smp_send_stop(void)
@@ -401,3 +427,110 @@
 			printk("global_restore_flags: %08lx\n", flags);
 	}
 }
+
+static void flush_tlb_all_ipi(void *info)
+{
+	local_flush_tlb_all();
+}
+
+void flush_tlb_all(void)
+{
+	smp_call_function(flush_tlb_all_ipi, 0, 1, 1);
+	local_flush_tlb_all();
+}
+
+static void flush_tlb_mm_ipi(void *mm)
+{
+	local_flush_tlb_mm((struct mm_struct *)mm);
+}
+
+/*
+ * The following tlb flush calls are invoked when old translations are 
+ * being torn down, or pte attributes are changing. For single threaded
+ * address spaces, a new context is obtained on the current cpu, and tlb
+ * context on other cpus are invalidated to force a new context allocation
+ * at switch_mm time, should the mm ever be used on other cpus. For 
+ * multithreaded address spaces, intercpu interrupts have to be sent.
+ * Another case where intercpu interrupts are required is when the target
+ * mm might be active on another cpu (eg debuggers doing the flushes on
+ * behalf of debugees, kswapd stealing pages from another process etc).
+ * Kanoj 07/00.
+ */
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
+		smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1, 1);
+	} else {
+		int i;
+		for (i = 0; i < smp_num_cpus; i++)
+			if (smp_processor_id() != i)
+				CPU_CONTEXT(i, mm) = 0;
+	}
+	local_flush_tlb_mm(mm);
+}
+
+struct flush_tlb_data {
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	unsigned long addr1;
+	unsigned long addr2;
+};
+
+static void flush_tlb_range_ipi(void *info)
+{
+	struct flush_tlb_data *fd = (struct flush_tlb_data *)info;
+
+	local_flush_tlb_range(fd->mm, fd->addr1, fd->addr2);
+}
+
+void flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
+		struct flush_tlb_data fd;
+
+		fd.mm = mm;
+		fd.addr1 = start;
+		fd.addr2 = end;
+		smp_call_function(flush_tlb_range_ipi, (void *)&fd, 1, 1);
+	} else {
+		int i;
+		for (i = 0; i < smp_num_cpus; i++)
+			if (smp_processor_id() != i)
+				CPU_CONTEXT(i, mm) = 0;
+	}
+	local_flush_tlb_range(mm, start, end);
+}
+
+static void flush_tlb_page_ipi(void *info)
+{
+	struct flush_tlb_data *fd = (struct flush_tlb_data *)info;
+
+	local_flush_tlb_page(fd->vma, fd->addr1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) {
+		struct flush_tlb_data fd;
+
+		fd.vma = vma;
+		fd.addr1 = page;
+		smp_call_function(flush_tlb_page_ipi, (void *)&fd, 1, 1);
+	} else {
+		int i;
+		for (i = 0; i < smp_num_cpus; i++)
+			if (smp_processor_id() != i)
+				CPU_CONTEXT(i, vma->vm_mm) = 0;
+	}
+	local_flush_tlb_page(vma, page);
+}
+
+EXPORT_SYMBOL(flush_tlb_page);
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(synchronize_irq);
+EXPORT_SYMBOL(kernel_flag);
+EXPORT_SYMBOL(__global_sti);
+EXPORT_SYMBOL(__global_cli);
+EXPORT_SYMBOL(__global_save_flags);
+EXPORT_SYMBOL(__global_restore_flags);