From dc90e95f310f4f821c905b2aec8e9449bb3270fa Mon Sep 17 00:00:00 2001
From: Peter Chubb <peterc@gelato.unsw.edu.au>
Date: Wed, 24 Aug 2005 17:13:00 -0700
Subject: [IA64] Add PAL_VM_SUMMARY/PAL_MEM_ATTRIB to bootloader for SKI

This patch implements PAL_VM_SUMMARY (and PAL_MEM_ATTRIB for good
measure) and pretends that the simulated machine is a McKinley.

Some extra comments and clean-up by Tony Luck.

Signed-off-by: Peter Chubb <peterc@gelato.unsw.edu.au>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/hp/sim/boot/boot_head.S | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S
index 1c8c7e6a9a5e..a9bd71ac78e2 100644
--- a/arch/ia64/hp/sim/boot/boot_head.S
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -4,6 +4,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/pal.h>
 
 	.bss
 	.align 16
@@ -49,7 +50,11 @@ GLOBAL_ENTRY(jmp_to_kernel)
 	br.sptk.few b7
 END(jmp_to_kernel)
 
-
+/*
+ * r28 contains the index of the PAL function
+ * r29--31 the args
+ * Return values in ret0--3 (r8--11)
+ */
 GLOBAL_ENTRY(pal_emulator_static)
 	mov r8=-1
 	mov r9=256
@@ -62,7 +67,7 @@ GLOBAL_ENTRY(pal_emulator_static)
 	cmp.gtu p6,p7=r9,r28
 (p6)	br.cond.sptk.few stacked
 	;;
-static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
+static:	cmp.eq p6,p7=PAL_PTCE_INFO,r28
 (p7)	br.cond.sptk.few 1f
 	;;
 	mov r8=0			/* status = 0 */
@@ -70,21 +75,21 @@ static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
 	movl r10=0x0000000200000003	/* count[0], count[1] */
 	movl r11=0x1000000000002000	/* stride[0], stride[1] */
 	br.cond.sptk.few rp
-1:	cmp.eq p6,p7=14,r28		/* PAL_FREQ_RATIOS */
+1:	cmp.eq p6,p7=PAL_FREQ_RATIOS,r28
 (p7)	br.cond.sptk.few 1f
 	mov r8=0			/* status = 0 */
 	movl r9 =0x100000064		/* proc_ratio (1/100) */
 	movl r10=0x100000100		/* bus_ratio<<32 (1/256) */
 	movl r11=0x100000064		/* itc_ratio<<32 (1/100) */
 	;;
-1:	cmp.eq p6,p7=19,r28		/* PAL_RSE_INFO */
+1:	cmp.eq p6,p7=PAL_RSE_INFO,r28
 (p7)	br.cond.sptk.few 1f
 	mov r8=0			/* status = 0 */
 	mov r9=96			/* num phys stacked */
 	mov r10=0			/* hints */
 	mov r11=0
 	br.cond.sptk.few rp
-1:	cmp.eq p6,p7=1,r28		/* PAL_CACHE_FLUSH */
+1:	cmp.eq p6,p7=PAL_CACHE_FLUSH,r28		/* PAL_CACHE_FLUSH */
 (p7)	br.cond.sptk.few 1f
 	mov r9=ar.lc
 	movl r8=524288			/* flush 512k million cache lines (16MB) */
@@ -102,7 +107,7 @@ static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
 	mov ar.lc=r9
 	mov r8=r0
 	;;
-1:	cmp.eq p6,p7=15,r28		/* PAL_PERF_MON_INFO */
+1:	cmp.eq p6,p7=PAL_PERF_MON_INFO,r28
 (p7)	br.cond.sptk.few 1f
 	mov r8=0			/* status = 0 */
 	movl r9 =0x08122f04		/* generic=4 width=47 retired=8 cycles=18 */
@@ -138,6 +143,20 @@ static:	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
 	st8 [r29]=r0,16			/* clear remaining bits  */
 	st8 [r18]=r0,16			/* clear remaining bits  */
 	;;
+1:	cmp.eq p6,p7=PAL_VM_SUMMARY,r28
+(p7)	br.cond.sptk.few 1f
+	mov	r8=0			/* status = 0  */
+	movl	r9=0x2044040020F1865	/* num_tc_levels=2, num_unique_tcs=4 */
+					/* max_itr_entry=64, max_dtr_entry=64 */
+					/* hash_tag_id=2, max_pkr=15 */
+					/* key_size=24, phys_add_size=50, vw=1 */
+	movl	r10=0x183C		/* rid_size=24, impl_va_msb=60 */
+	;;
+1:	cmp.eq p6,p7=PAL_MEM_ATTRIB,r28
+(p7)	br.cond.sptk.few 1f
+	mov	r8=0			/* status = 0 */
+	mov	r9=0x80|0x01		/* NatPage|WB */
+	;;
 1:	br.cond.sptk.few rp
 stacked:
 	br.ret.sptk.few rp
-- 
cgit v1.2.3


From 714d2dc14914f0f7bb008effe830c99eb47c75df Mon Sep 17 00:00:00 2001
From: Peter Chubb <peterc@gelato.unsw.edu.au>
Date: Thu, 25 Aug 2005 17:39:00 -0700
Subject: [IA64] Allow /proc/pal/cpu0/vm_info under the simulator

Not all of the PAL VM calls are implemented for the SKI simulator.
Don't just give up if one fails, print information from the calls
that succeed.

Signed-off-by: Peter Chubb <peterc@gelato.unsw.edu.au>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/palinfo.c | 115 +++++++++++++++++++++++----------------------
 1 file changed, 60 insertions(+), 55 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 25e7c8344564..89faa603c6be 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -307,11 +307,9 @@ vm_info(char *page)
 
 	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
 		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
-		return 0;
-	}
+	} else {
 
-
-	p += sprintf(p,
+		p += sprintf(p,
 		     "Physical Address Space         : %d bits\n"
 		     "Virtual Address Space          : %d bits\n"
 		     "Protection Key Registers(PKR)  : %d\n"
@@ -319,92 +317,99 @@ vm_info(char *page)
 		     "Hash Tag ID                    : 0x%x\n"
 		     "Size of RR.rid                 : %d\n",
 		     vm_info_1.pal_vm_info_1_s.phys_add_size,
-		     vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1,
-		     vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id,
+		     vm_info_2.pal_vm_info_2_s.impl_va_msb+1,
+		     vm_info_1.pal_vm_info_1_s.max_pkr+1,
+		     vm_info_1.pal_vm_info_1_s.key_size,
+		     vm_info_1.pal_vm_info_1_s.hash_tag_id,
 		     vm_info_2.pal_vm_info_2_s.rid_size);
+	}
 
-	if (ia64_pal_mem_attrib(&attrib) != 0)
-		return 0;
-
-	p += sprintf(p, "Supported memory attributes    : ");
-	sep = "";
-	for (i = 0; i < 8; i++) {
-		if (attrib & (1 << i)) {
-			p += sprintf(p, "%s%s", sep, mem_attrib[i]);
-			sep = ", ";
+	if (ia64_pal_mem_attrib(&attrib) == 0) {
+		p += sprintf(p, "Supported memory attributes    : ");
+		sep = "";
+		for (i = 0; i < 8; i++) {
+			if (attrib & (1 << i)) {
+				p += sprintf(p, "%s%s", sep, mem_attrib[i]);
+				sep = ", ";
+			}
 		}
+		p += sprintf(p, "\n");
 	}
-	p += sprintf(p, "\n");
 
 	if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
 		printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
-		return 0;
-	}
-
-	p += sprintf(p,
-		     "\nTLB walker                     : %simplemented\n"
-		     "Number of DTR                  : %d\n"
-		     "Number of ITR                  : %d\n"
-		     "TLB insertable page sizes      : ",
-		     vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
-		     vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
-		     vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
+	} else {
 
+		p += sprintf(p,
+			     "\nTLB walker                     : %simplemented\n"
+			     "Number of DTR                  : %d\n"
+			     "Number of ITR                  : %d\n"
+			     "TLB insertable page sizes      : ",
+			     vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
+			     vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
+			     vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
 
-	p = bitvector_process(p, tr_pages);
 
-	p += sprintf(p, "\nTLB purgeable page sizes       : ");
+		p = bitvector_process(p, tr_pages);
 
-	p = bitvector_process(p, vw_pages);
+		p += sprintf(p, "\nTLB purgeable page sizes       : ");
 
+		p = bitvector_process(p, vw_pages);
+	}
 	if ((status=ia64_get_ptce(&ptce)) != 0) {
 		printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
-		return 0;
-	}
-
-	p += sprintf(p,
+	} else {
+		p += sprintf(p,
 		     "\nPurge base address             : 0x%016lx\n"
 		     "Purge outer loop count         : %d\n"
 		     "Purge inner loop count         : %d\n"
 		     "Purge outer loop stride        : %d\n"
 		     "Purge inner loop stride        : %d\n",
-		     ptce.base, ptce.count[0], ptce.count[1], ptce.stride[0], ptce.stride[1]);
+		     ptce.base, ptce.count[0], ptce.count[1],
+		     ptce.stride[0], ptce.stride[1]);
 
-	p += sprintf(p,
+		p += sprintf(p,
 		     "TC Levels                      : %d\n"
 		     "Unique TC(s)                   : %d\n",
 		     vm_info_1.pal_vm_info_1_s.num_tc_levels,
 		     vm_info_1.pal_vm_info_1_s.max_unique_tcs);
 
-	for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) {
-		for (j=2; j>0 ; j--) {
-			tc_pages = 0; /* just in case */
+		for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) {
+			for (j=2; j>0 ; j--) {
+				tc_pages = 0; /* just in case */
 
 
-			/* even without unification, some levels may not be present */
-			if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
-				continue;
-			}
+				/* even without unification, some levels may not be present */
+				if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
+					continue;
+				}
 
-			p += sprintf(p,
+				p += sprintf(p,
 				     "\n%s Translation Cache Level %d:\n"
 				     "\tHash sets           : %d\n"
 				     "\tAssociativity       : %d\n"
 				     "\tNumber of entries   : %d\n"
 				     "\tFlags               : ",
-				     cache_types[j+tc_info.tc_unified], i+1, tc_info.tc_num_sets,
-				     tc_info.tc_associativity, tc_info.tc_num_entries);
+				     cache_types[j+tc_info.tc_unified], i+1,
+				     tc_info.tc_num_sets,
+				     tc_info.tc_associativity,
+				     tc_info.tc_num_entries);
 
-			if (tc_info.tc_pf) p += sprintf(p, "PreferredPageSizeOptimized ");
-			if (tc_info.tc_unified) p += sprintf(p, "Unified ");
-			if (tc_info.tc_reduce_tr) p += sprintf(p, "TCReduction");
+				if (tc_info.tc_pf)
+					p += sprintf(p, "PreferredPageSizeOptimized ");
+				if (tc_info.tc_unified)
+					p += sprintf(p, "Unified ");
+				if (tc_info.tc_reduce_tr)
+					p += sprintf(p, "TCReduction");
 
-			p += sprintf(p, "\n\tSupported page sizes: ");
+				p += sprintf(p, "\n\tSupported page sizes: ");
 
-			p = bitvector_process(p, tc_pages);
+				p = bitvector_process(p, tc_pages);
 
-			/* when unified date (j=2) is enough */
-			if (tc_info.tc_unified) break;
+				/* when unified date (j=2) is enough */
+				if (tc_info.tc_unified)
+					break;
+			}
 		}
 	}
 	p += sprintf(p, "\n");
@@ -440,14 +445,14 @@ register_info(char *page)
 		p += sprintf(p, "\n");
 	}
 
-	if (ia64_pal_rse_info(&phys_stacked, &hints) != 0) return 0;
+	if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) {
 
 	p += sprintf(p,
 		     "RSE stacked physical registers   : %ld\n"
 		     "RSE load/store hints             : %ld (%s)\n",
 		     phys_stacked, hints.ph_data,
 		     hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
-
+	}
 	if (ia64_pal_debug_info(&iregs, &dregs))
 		return 0;
 
-- 
cgit v1.2.3


From 6cf07a8cc86a0b471466c7fe45892f7ef434015b Mon Sep 17 00:00:00 2001
From: Peter Chubb <peterc@gelato.unsw.edu.au>
Date: Tue, 23 Aug 2005 20:07:00 -0700
Subject: [IA64] Fix nasty VMLPT problem...

I've solved the problem I was having with the simulator and not
booting Debian.

The problem is that the number of bits for the virtual linear array
short-format VHPT (Virtually mapped linear page table, VMLPT for
short) is being tested incorrectly.

There are two problems:
      1. The PAL call that should tell the kernel the size of the
      virtual address space isn't implemented for the simulator, so
      the kernel uses the default 50.  This is addressed separately
      in dc90e95f310f4f821c905b2aec8e9449bb3270fa

      2.  In arch/ia64/mm/init.c there's code to calcualte the size
      of the VMLPT based on the number of implemented virtual address
      bits and the page size.  It checks to see if the VMLPT base
      address overlaps the top of the mapped region, but this check
      doesn't allow for the address space hole, and in fact will
      never trigger.

Here's an alternative test and panic, that I think is more accurate.

Signed-off-by: Peter Chubb <peterc@gelato.unsw.edu.au>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/init.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 65f9958db9f0..1281c609ee98 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -382,13 +382,22 @@ ia64_mmu_init (void *my_cpu_data)
 
 	if (impl_va_bits < 51 || impl_va_bits > 61)
 		panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
+	/*
+	 * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need,
+	 * which must fit into "vmlpt_bits - pte_bits" slots. Second half of
+	 * the test makes sure that our mapped space doesn't overlap the
+	 * unimplemented hole in the middle of the region.
+	 */
+	if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) ||
+	    (mapped_space_bits > impl_va_bits - 1))
+		panic("Cannot build a big enough virtual-linear page table"
+		      " to cover mapped address space.\n"
+		      " Try using a smaller page size.\n");
+
 
 	/* place the VMLPT at the end of each page-table mapped region: */
 	pta = POW2(61) - POW2(vmlpt_bits);
 
-	if (POW2(mapped_space_bits) >= pta)
-		panic("mm/init: overlap between virtually mapped linear page table and "
-		      "mapped kernel space!");
 	/*
 	 * Set the (virtually mapped linear) page table address.  Bit
 	 * 8 selects between the short and long format, bits 2-7 the
-- 
cgit v1.2.3


From a1cddb88920b915eaba10712ecfd0fc698b00a22 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Wed, 31 Aug 2005 08:05:00 -0700
Subject: [IA64-SGI] Add new vendor-specific SAL calls for:

- notifying the PROM of specific features that are supported by the OS.
  This is used to enable PROM feature if and only if the corresponding
  feature is implemented in the OS

- fetch feature sets that are supported by the current PROM. This allows
  the OS to selectively enable features when the PROM support is available.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/setup.c           | 30 +++++++++++-------
 include/asm-ia64/sn/sn_feature_sets.h | 57 +++++++++++++++++++++++++++++++++++
 include/asm-ia64/sn/sn_sal.h          | 36 ++++++++++++++++------
 3 files changed, 103 insertions(+), 20 deletions(-)
 create mode 100644 include/asm-ia64/sn/sn_feature_sets.h

(limited to 'arch/ia64')

diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 7c7fe441d623..981928f35a8c 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -49,6 +49,7 @@
 #include <asm/sn/clksupport.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/geo.h>
+#include <asm/sn/sn_feature_sets.h>
 #include "xtalk/xwidgetdev.h"
 #include "xtalk/hubdev.h"
 #include <asm/sn/klconfig.h>
@@ -99,6 +100,7 @@ EXPORT_SYMBOL(sn_region_size);
 int sn_prom_type;	/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
 
 short physical_node_map[MAX_PHYSNODE_ID];
+static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
 
 EXPORT_SYMBOL(physical_node_map);
 
@@ -273,7 +275,10 @@ void __init sn_setup(char **cmdline_p)
 	u32 version = sn_sal_rev();
 	extern void sn_cpu_init(void);
 
-	ia64_sn_plat_set_error_handling_features();
+	ia64_sn_plat_set_error_handling_features();	// obsolete
+	ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
+	ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
+
 
 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
 	/*
@@ -316,16 +321,6 @@ void __init sn_setup(char **cmdline_p)
 
 	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 
-	/*
-	 * Confirm the SAL we're running on is recent enough...
-	 */
-	if (version < SN_SAL_MIN_VERSION) {
-		printk(KERN_ERR "This kernel needs SGI SAL version >= "
-		       "%x.%02x\n", SN_SAL_MIN_VERSION >> 8,
-		        SN_SAL_MIN_VERSION & 0x00FF);
-		panic("PROM version too old\n");
-	}
-
 	master_nasid = boot_get_nasid();
 
 	status =
@@ -481,6 +476,10 @@ void __init sn_cpu_init(void)
 	if (nodepdaindr[0] == NULL)
 		return;
 
+	for (i = 0; i < MAX_PROM_FEATURE_SETS; i++)
+		if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0)
+			break;
+
 	cpuid = smp_processor_id();
 	cpuphyid = get_sapicid();
 
@@ -652,3 +651,12 @@ nasid_slice_to_cpuid(int nasid, int slice)
 
 	return -1;
 }
+
+int sn_prom_feature_available(int id)
+{
+	if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS)
+		return 0;
+	return test_bit(id, sn_prom_features);
+}
+EXPORT_SYMBOL(sn_prom_feature_available);
+
diff --git a/include/asm-ia64/sn/sn_feature_sets.h b/include/asm-ia64/sn/sn_feature_sets.h
new file mode 100644
index 000000000000..e68a80853d5d
--- /dev/null
+++ b/include/asm-ia64/sn/sn_feature_sets.h
@@ -0,0 +1,57 @@
+#ifndef _ASM_IA64_SN_FEATURE_SETS_H
+#define _ASM_IA64_SN_FEATURE_SETS_H
+
+/*
+ * SN PROM Features
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+
+#include <asm/types.h>
+#include <asm/bitops.h>
+
+/* --------------------- PROM Features -----------------------------*/
+extern int sn_prom_feature_available(int id);
+
+#define MAX_PROM_FEATURE_SETS			2
+
+/*
+ * The following defines features that may or may not be supported by the
+ * current PROM. The OS uses sn_prom_feature_available(feature) to test for
+ * the presence of a PROM feature. Down rev (old) PROMs will always test
+ * "false" for new features.
+ *
+ * Use:
+ * 		if (sn_prom_feature_available(PRF_FEATURE_XXX))
+ * 			...
+ */
+
+/*
+ * Example: feature XXX
+ */
+#define PRF_FEATURE_XXX		0
+
+
+
+/* --------------------- OS Features -------------------------------*/
+
+/*
+ * The following defines OS features that are optionally present in
+ * the operating system.
+ * During boot, PROM is notified of these features via a series of calls:
+ *
+ * 		ia64_sn_set_os_feature(feature1);
+ *
+ * Once enabled, a feature cannot be disabled.
+ *
+ * By default, features are disabled unless explicitly enabled.
+ */
+#define  OSF_MCA_SLV_TO_OS_INIT_SLV		0
+#define  OSF_FEAT_LOG_SBES			1
+
+#endif /* _ASM_IA64_SN_FEATURE_SETS_H */
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 27976d223186..231d0d04c3e8 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -80,6 +80,9 @@
 #define SN_SAL_BTE_RECOVER			   0x02000061
 #define SN_SAL_IOIF_GET_PCI_TOPOLOGY	           0x02000062
 
+#define  SN_SAL_GET_PROM_FEATURE_SET		   0x02000065
+#define  SN_SAL_SET_OS_FEATURE_SET		   0x02000066
+
 /*
  * Service-specific constants
  */
@@ -118,8 +121,8 @@
 /*
  * Error Handling Features
  */
-#define SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV	0x1
-#define SAL_ERR_FEAT_LOG_SBES			0x2
+#define SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV	0x1	// obsolete
+#define SAL_ERR_FEAT_LOG_SBES			0x2	// obsolete
 #define SAL_ERR_FEAT_MFR_OVERRIDE		0x4
 #define SAL_ERR_FEAT_SBE_THRESHOLD		0xffff0000
 
@@ -151,12 +154,6 @@ sn_sal_rev(void)
 	return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor);
 }
 
-/*
- * Specify the minimum PROM revsion required for this kernel.
- * Note that they're stored in hex format...
- */
-#define SN_SAL_MIN_VERSION	0x0404
-
 /*
  * Returns the master console nasid, if the call fails, return an illegal
  * value.
@@ -336,7 +333,7 @@ ia64_sn_plat_cpei_handler(void)
 }
 
 /*
- * Set Error Handling Features
+ * Set Error Handling Features	(Obsolete)
  */
 static inline u64
 ia64_sn_plat_set_error_handling_features(void)
@@ -1100,4 +1097,25 @@ ia64_sn_is_fake_prom(void)
 	return (rv.status == 0);
 }
 
+static inline int
+ia64_sn_get_prom_feature_set(int set, unsigned long *feature_set)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_GET_PROM_FEATURE_SET, set, 0, 0, 0, 0, 0, 0);
+	if (rv.status != 0)
+		return rv.status;
+	*feature_set = rv.v0;
+	return 0;
+}
+
+static inline int
+ia64_sn_set_os_feature(int feature)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_SET_OS_FEATURE_SET, feature, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+
 #endif /* _ASM_IA64_SN_SN_SAL_H */
-- 
cgit v1.2.3


From 289d773ee89ea80dcc364ef97d1be7ad1817387e Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Sun, 11 Sep 2005 17:21:46 +1000
Subject: [IA64] MCA/INIT: avoid reading INIT record during INIT event

Reading the INIT record from SAL during the INIT event has proved to be
unreliable, and a source of hangs during INIT processing.  The new
MCA/INIT handlers remove the need to get the INIT record from SAL.
Change salinfo.c so mca.c can just flag that a new record is available,
without having to read the record during INIT processing.  This patch
can be applied without the new MCA/INIT handlers.

Also clean up some usage of NR_CPUS which should have been using
cpu_online().

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/salinfo.c | 62 +++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 6f0cc7a6634e..ca68e6e44a72 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -22,6 +22,11 @@
  *
  * Dec  5 2004	kaos@sgi.com
  *   Standardize which records are cleared automatically.
+ *
+ * Aug 18 2005	kaos@sgi.com
+ *   mca.c may not pass a buffer, a NULL buffer just indicates that a new
+ *   record is available in SAL.
+ *   Replace some NR_CPUS by cpus_online, for hotplug cpu.
  */
 
 #include <linux/types.h>
@@ -193,7 +198,7 @@ shift1_data_saved (struct salinfo_data *data, int shift)
  * The buffer passed from mca.c points to the output from ia64_log_get. This is
  * a persistent buffer but its contents can change between the interrupt and
  * when user space processes the record.  Save the record id to identify
- * changes.
+ * changes.  If the buffer is NULL then just update the bitmap.
  */
 void
 salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
@@ -206,27 +211,29 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 
 	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
 
-	if (irqsafe)
-		spin_lock_irqsave(&data_saved_lock, flags);
-	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
-		if (!data_saved->buffer)
-			break;
-	}
-	if (i == saved_size) {
-		if (!data->saved_num) {
-			shift1_data_saved(data, 0);
-			data_saved = data->data_saved + saved_size - 1;
-		} else
-			data_saved = NULL;
-	}
-	if (data_saved) {
-		data_saved->cpu = smp_processor_id();
-		data_saved->id = ((sal_log_record_header_t *)buffer)->id;
-		data_saved->size = size;
-		data_saved->buffer = buffer;
+	if (buffer) {
+		if (irqsafe)
+			spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+			if (!data_saved->buffer)
+				break;
+		}
+		if (i == saved_size) {
+			if (!data->saved_num) {
+				shift1_data_saved(data, 0);
+				data_saved = data->data_saved + saved_size - 1;
+			} else
+				data_saved = NULL;
+		}
+		if (data_saved) {
+			data_saved->cpu = smp_processor_id();
+			data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+			data_saved->size = size;
+			data_saved->buffer = buffer;
+		}
+		if (irqsafe)
+			spin_unlock_irqrestore(&data_saved_lock, flags);
 	}
-	if (irqsafe)
-		spin_unlock_irqrestore(&data_saved_lock, flags);
 
 	if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
 		if (irqsafe)
@@ -244,7 +251,7 @@ salinfo_timeout_check(struct salinfo_data *data)
 	int i;
 	if (!data->open)
 		return;
-	for (i = 0; i < NR_CPUS; ++i) {
+	for_each_online_cpu(i) {
 		if (test_bit(i, &data->cpu_event)) {
 			/* double up() is not a problem, user space will see no
 			 * records for the additional "events".
@@ -291,7 +298,7 @@ retry:
 
 	n = data->cpu_check;
 	for (i = 0; i < NR_CPUS; i++) {
-		if (test_bit(n, &data->cpu_event)) {
+		if (test_bit(n, &data->cpu_event) && cpu_online(n)) {
 			cpu = n;
 			break;
 		}
@@ -585,11 +592,10 @@ salinfo_init(void)
 
 		/* we missed any events before now */
 		online = 0;
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j)) {
-				set_bit(j, &data->cpu_event);
-				++online;
-			}
+		for_each_online_cpu(j) {
+			set_bit(j, &data->cpu_event);
+			++online;
+		}
 		sema_init(&data->sem, online);
 
 		*sdir++ = dir;
-- 
cgit v1.2.3


From 7f613c7d2203ae137d98fc1c38abc30fd7048637 Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Sun, 11 Sep 2005 17:22:53 +1000
Subject: [PATCH] MCA/INIT: use per cpu stacks

The bulk of the change.  Use per cpu MCA/INIT stacks.  Change the SAL
to OS state (sos) to be per process.  Do all the assembler work on the
MCA/INIT stacks, leaving the original stack alone.  Pass per cpu state
data to the C handlers for MCA and INIT, which also means changing the
mca_drv interfaces slightly.  Lots of verification on whether the
original stack is usable before converting it to a sleeping process.

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/asm-offsets.c |   40 +-
 arch/ia64/kernel/mca.c         |  821 ++++++++++++++----------
 arch/ia64/kernel/mca_asm.S     | 1358 +++++++++++++++++++++-------------------
 arch/ia64/kernel/mca_drv.c     |   37 +-
 include/asm-ia64/mca.h         |  102 +--
 include/asm-ia64/mca_asm.h     |  125 +---
 6 files changed, 1363 insertions(+), 1120 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 7d1ae2982c53..f6a234289341 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -211,17 +211,41 @@ void foo(void)
 #endif
 
 	BLANK();
-	DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET,
-	       offsetof (struct ia64_mca_cpu, proc_state_dump));
-	DEFINE(IA64_MCA_CPU_STACK_OFFSET,
-	       offsetof (struct ia64_mca_cpu, stack));
-	DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET,
-	       offsetof (struct ia64_mca_cpu, stackframe));
-	DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET,
-	       offsetof (struct ia64_mca_cpu, rbstore));
+	DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, mca_stack));
 	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
 	       offsetof (struct ia64_mca_cpu, init_stack));
 	BLANK();
+	DEFINE(IA64_SAL_OS_STATE_COMMON_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_ra));
+	DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_gp));
+	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
+	       offsetof (struct ia64_sal_os_state, pal_min_state));
+	DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
+	       offsetof (struct ia64_sal_os_state, proc_state_param));
+	DEFINE(IA64_SAL_OS_STATE_SIZE,
+	       sizeof (struct ia64_sal_os_state));
+	DEFINE(IA64_PMSA_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_gr));
+	DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
+	DEFINE(IA64_PMSA_PR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_pr));
+	DEFINE(IA64_PMSA_BR0_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_br0));
+	DEFINE(IA64_PMSA_RSC_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_rsc));
+	DEFINE(IA64_PMSA_IIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_iip));
+	DEFINE(IA64_PMSA_IPSR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ipsr));
+	DEFINE(IA64_PMSA_IFS_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ifs));
+	DEFINE(IA64_PMSA_XIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_xip));
+	BLANK();
+
 	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
 	DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
 	DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 8d484204a3ff..6dc726ad7137 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -48,6 +48,9 @@
  *            Delete dead variables and functions.
  *            Reorder to remove the need for forward declarations and to consolidate
  *            related code.
+ *
+ * 2005-08-12 Keith Owens <kaos@sgi.com>
+ *	      Convert MCA/INIT handlers to use per event stacks and SAL/OS state.
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -77,6 +80,8 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 
+#include "entry.h"
+
 #if defined(IA64_MCA_DEBUG_INFO)
 # define IA64_MCA_DEBUG(fmt...)	printk(fmt)
 #else
@@ -84,9 +89,7 @@
 #endif
 
 /* Used by mca_asm.S */
-ia64_mca_sal_to_os_state_t	ia64_sal_to_os_handoff_state;
-ia64_mca_os_to_sal_state_t	ia64_os_to_sal_handoff_state;
-u64				ia64_mca_serialize;
+u32				ia64_mca_serialize;
 DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
@@ -95,8 +98,10 @@ DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
 unsigned long __per_cpu_mca[NR_CPUS];
 
 /* In mca_asm.S */
-extern void			ia64_monarch_init_handler (void);
-extern void			ia64_slave_init_handler (void);
+extern void			ia64_os_init_dispatch_monarch (void);
+extern void			ia64_os_init_dispatch_slave (void);
+
+static int monarch_cpu = -1;
 
 static ia64_mc_info_t		ia64_mc_info;
 
@@ -234,7 +239,8 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
  *  This function retrieves a specified error record type from SAL
  *  and wakes up any processes waiting for error records.
  *
- *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE/INIT)
+ *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE)
+ *              FIXME: remove MCA and irq_safe.
  */
 static void
 ia64_mca_log_sal_error_record(int sal_info_type)
@@ -242,7 +248,7 @@ ia64_mca_log_sal_error_record(int sal_info_type)
 	u8 *buffer;
 	sal_log_record_header_t *rh;
 	u64 size;
-	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT;
+	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA;
 #ifdef IA64_MCA_DEBUG_INFO
 	static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };
 #endif
@@ -330,182 +336,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 
 #endif /* CONFIG_ACPI */
 
-static void
-show_min_state (pal_min_state_area_t *minstate)
-{
-	u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri;
-	u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri;
-
-	printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits);
-	printk("pr\t\t%016lx\n", minstate->pmsa_pr);
-	printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0);
-	printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc);
-	printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip);
-	printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr);
-	printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs);
-	printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip);
-	printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr);
-	printk("xfs\t\t%016lx\n", minstate->pmsa_xfs);
-	printk("b1\t\t%016lx ", minstate->pmsa_br1);
-	print_symbol("%s\n", minstate->pmsa_br1);
-
-	printk("\nstatic registers r0-r15:\n");
-	printk(" r0- 3 %016lx %016lx %016lx %016lx\n",
-	       0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]);
-	printk(" r4- 7 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[3], minstate->pmsa_gr[4],
-	       minstate->pmsa_gr[5], minstate->pmsa_gr[6]);
-	printk(" r8-11 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[7], minstate->pmsa_gr[8],
-	       minstate->pmsa_gr[9], minstate->pmsa_gr[10]);
-	printk("r12-15 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_gr[11], minstate->pmsa_gr[12],
-	       minstate->pmsa_gr[13], minstate->pmsa_gr[14]);
-
-	printk("\nbank 0:\n");
-	printk("r16-19 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1],
-	       minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]);
-	printk("r20-23 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5],
-	       minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]);
-	printk("r24-27 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9],
-	       minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]);
-	printk("r28-31 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13],
-	       minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]);
-
-	printk("\nbank 1:\n");
-	printk("r16-19 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1],
-	       minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]);
-	printk("r20-23 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5],
-	       minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]);
-	printk("r24-27 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9],
-	       minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]);
-	printk("r28-31 %016lx %016lx %016lx %016lx\n",
-	       minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13],
-	       minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]);
-}
-
-static void
-fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw)
-{
-	u64 *dst_banked, *src_banked, bit, shift, nat_bits;
-	int i;
-
-	/*
-	 * First, update the pt-regs and switch-stack structures with the contents stored
-	 * in the min-state area:
-	 */
-	if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) {
-		pt->cr_ipsr = ms->pmsa_xpsr;
-		pt->cr_iip = ms->pmsa_xip;
-		pt->cr_ifs = ms->pmsa_xfs;
-	} else {
-		pt->cr_ipsr = ms->pmsa_ipsr;
-		pt->cr_iip = ms->pmsa_iip;
-		pt->cr_ifs = ms->pmsa_ifs;
-	}
-	pt->ar_rsc = ms->pmsa_rsc;
-	pt->pr = ms->pmsa_pr;
-	pt->r1 = ms->pmsa_gr[0];
-	pt->r2 = ms->pmsa_gr[1];
-	pt->r3 = ms->pmsa_gr[2];
-	sw->r4 = ms->pmsa_gr[3];
-	sw->r5 = ms->pmsa_gr[4];
-	sw->r6 = ms->pmsa_gr[5];
-	sw->r7 = ms->pmsa_gr[6];
-	pt->r8 = ms->pmsa_gr[7];
-	pt->r9 = ms->pmsa_gr[8];
-	pt->r10 = ms->pmsa_gr[9];
-	pt->r11 = ms->pmsa_gr[10];
-	pt->r12 = ms->pmsa_gr[11];
-	pt->r13 = ms->pmsa_gr[12];
-	pt->r14 = ms->pmsa_gr[13];
-	pt->r15 = ms->pmsa_gr[14];
-	dst_banked = &pt->r16;		/* r16-r31 are contiguous in struct pt_regs */
-	src_banked = ms->pmsa_bank1_gr;
-	for (i = 0; i < 16; ++i)
-		dst_banked[i] = src_banked[i];
-	pt->b0 = ms->pmsa_br0;
-	sw->b1 = ms->pmsa_br1;
-
-	/* construct the NaT bits for the pt-regs structure: */
-#	define PUT_NAT_BIT(dst, addr)					\
-	do {								\
-		bit = nat_bits & 1; nat_bits >>= 1;			\
-		shift = ((unsigned long) addr >> 3) & 0x3f;		\
-		dst = ((dst) & ~(1UL << shift)) | (bit << shift);	\
-	} while (0)
-
-	/* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */
-	shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f;
-	nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift));
-
-	PUT_NAT_BIT(sw->caller_unat, &pt->r1);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r2);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r3);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r4);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r5);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r6);
-	PUT_NAT_BIT(sw->ar_unat, &sw->r7);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r8);	PUT_NAT_BIT(sw->caller_unat, &pt->r9);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r10);	PUT_NAT_BIT(sw->caller_unat, &pt->r11);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r12);	PUT_NAT_BIT(sw->caller_unat, &pt->r13);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r14);	PUT_NAT_BIT(sw->caller_unat, &pt->r15);
-	nat_bits >>= 16;	/* skip over bank0 NaT bits */
-	PUT_NAT_BIT(sw->caller_unat, &pt->r16);	PUT_NAT_BIT(sw->caller_unat, &pt->r17);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r18);	PUT_NAT_BIT(sw->caller_unat, &pt->r19);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r20);	PUT_NAT_BIT(sw->caller_unat, &pt->r21);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r22);	PUT_NAT_BIT(sw->caller_unat, &pt->r23);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r24);	PUT_NAT_BIT(sw->caller_unat, &pt->r25);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r26);	PUT_NAT_BIT(sw->caller_unat, &pt->r27);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r28);	PUT_NAT_BIT(sw->caller_unat, &pt->r29);
-	PUT_NAT_BIT(sw->caller_unat, &pt->r30);	PUT_NAT_BIT(sw->caller_unat, &pt->r31);
-}
-
-static void
-init_handler_platform (pal_min_state_area_t *ms,
-		       struct pt_regs *pt, struct switch_stack *sw)
-{
-	struct unw_frame_info info;
-
-	/* if a kernel debugger is available call it here else just dump the registers */
-
-	/*
-	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
-	 * generated via the BMC's command-line interface, but since the console is on the
-	 * same serial line, the user will need some time to switch out of the BMC before
-	 * the dump begins.
-	 */
-	printk("Delaying for 5 seconds...\n");
-	udelay(5*1000000);
-	show_min_state(ms);
-
-	printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm);
-	fetch_min_state(ms, pt, sw);
-	unw_init_from_interruption(&info, current, pt, sw);
-	ia64_do_show_stack(&info, NULL);
-
-	if (read_trylock(&tasklist_lock)) {
-		struct task_struct *g, *t;
-		do_each_thread (g, t) {
-			if (t == current)
-				continue;
-
-			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
-			show_stack(t, NULL);
-		} while_each_thread (g, t);
-	}
-
-	printk("\nINIT dump complete.  Please reboot now.\n");
-	while (1);			/* hang city if no debugger */
-}
-
 #ifdef CONFIG_ACPI
 /*
  * ia64_mca_register_cpev
@@ -647,42 +477,6 @@ ia64_mca_cmc_vector_enable_keventd(void *unused)
 	on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0);
 }
 
-/*
- * ia64_mca_wakeup_ipi_wait
- *
- *	Wait for the inter-cpu interrupt to be sent by the
- *	monarch processor once it is done with handling the
- *	MCA.
- *
- *  Inputs  :   None
- *  Outputs :   None
- */
-static void
-ia64_mca_wakeup_ipi_wait(void)
-{
-	int	irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6);
-	int	irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f);
-	u64	irr = 0;
-
-	do {
-		switch(irr_num) {
-		      case 0:
-			irr = ia64_getreg(_IA64_REG_CR_IRR0);
-			break;
-		      case 1:
-			irr = ia64_getreg(_IA64_REG_CR_IRR1);
-			break;
-		      case 2:
-			irr = ia64_getreg(_IA64_REG_CR_IRR2);
-			break;
-		      case 3:
-			irr = ia64_getreg(_IA64_REG_CR_IRR3);
-			break;
-		}
-		cpu_relax();
-	} while (!(irr & (1UL << irr_bit))) ;
-}
-
 /*
  * ia64_mca_wakeup
  *
@@ -748,11 +542,9 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
 	 */
 	ia64_sal_mc_rendez();
 
-	/* Wait for the wakeup IPI from the monarch
-	 * This waiting is done by polling on the wakeup-interrupt
-	 * vector bit in the processor's IRRs
-	 */
-	ia64_mca_wakeup_ipi_wait();
+	/* Wait for the monarch cpu to exit. */
+	while (monarch_cpu != -1)
+	       cpu_relax();	/* spin until monarch leaves */
 
 	/* Enable all interrupts */
 	local_irq_restore(flags);
@@ -780,53 +572,13 @@ ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
 	return IRQ_HANDLED;
 }
 
-/*
- * ia64_return_to_sal_check
- *
- *	This is function called before going back from the OS_MCA handler
- *	to the OS_MCA dispatch code which finally takes the control back
- *	to the SAL.
- *	The main purpose of this routine is to setup the OS_MCA to SAL
- *	return state which can be used by the OS_MCA dispatch code
- *	just before going back to SAL.
- *
- *  Inputs  :   None
- *  Outputs :   None
- */
-
-static void
-ia64_return_to_sal_check(int recover)
-{
-
-	/* Copy over some relevant stuff from the sal_to_os_mca_handoff
-	 * so that it can be used at the time of os_mca_to_sal_handoff
-	 */
-	ia64_os_to_sal_handoff_state.imots_sal_gp =
-		ia64_sal_to_os_handoff_state.imsto_sal_gp;
-
-	ia64_os_to_sal_handoff_state.imots_sal_check_ra =
-		ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
-
-	if (recover)
-		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
-	else
-		ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
-
-	/* Default = tell SAL to return to same context */
-	ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
-
-	ia64_os_to_sal_handoff_state.imots_new_min_state =
-		(u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
-
-}
-
 /* Function pointer for extra MCA recovery */
 int (*ia64_mca_ucmc_extension)
-	(void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*)
+	(void*,struct ia64_sal_os_state*)
 	= NULL;
 
 int
-ia64_reg_MCA_extension(void *fn)
+ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *))
 {
 	if (ia64_mca_ucmc_extension)
 		return 1;
@@ -845,8 +597,321 @@ ia64_unreg_MCA_extension(void)
 EXPORT_SYMBOL(ia64_reg_MCA_extension);
 EXPORT_SYMBOL(ia64_unreg_MCA_extension);
 
+
+static inline void
+copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
+{
+	u64 fslot, tslot, nat;
+	*tr = *fr;
+	fslot = ((unsigned long)fr >> 3) & 63;
+	tslot = ((unsigned long)tr >> 3) & 63;
+	*tnat &= ~(1UL << tslot);
+	nat = (fnat >> fslot) & 1;
+	*tnat |= (nat << tslot);
+}
+
+/* On entry to this routine, we are running on the per cpu stack, see
+ * mca_asm.h.  The original stack has not been touched by this event.  Some of
+ * the original stack's registers will be in the RBS on this stack.  This stack
+ * also contains a partial pt_regs and switch_stack, the rest of the data is in
+ * PAL minstate.
+ *
+ * The first thing to do is modify the original stack to look like a blocked
+ * task so we can run backtrace on the original task.  Also mark the per cpu
+ * stack as current to ensure that we use the correct task state, it also means
+ * that we can do backtrace on the MCA/INIT handler code itself.
+ */
+
+static task_t *
+ia64_mca_modify_original_stack(struct pt_regs *regs,
+		const struct switch_stack *sw,
+		struct ia64_sal_os_state *sos,
+		const char *type)
+{
+	char *p, comm[sizeof(current->comm)];
+	ia64_va va;
+	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
+	const pal_min_state_area_t *ms = sos->pal_min_state;
+	task_t *previous_current;
+	struct pt_regs *old_regs;
+	struct switch_stack *old_sw;
+	unsigned size = sizeof(struct pt_regs) +
+			sizeof(struct switch_stack) + 16;
+	u64 *old_bspstore, *old_bsp;
+	u64 *new_bspstore, *new_bsp;
+	u64 old_unat, old_rnat, new_rnat, nat;
+	u64 slots, loadrs = regs->loadrs;
+	u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
+	u64 ar_bspstore = regs->ar_bspstore;
+	u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16);
+	const u64 *bank;
+	const char *msg;
+	int cpu = smp_processor_id();
+
+	previous_current = curr_task(cpu);
+	set_curr_task(cpu, current);
+	if ((p = strchr(current->comm, ' ')))
+		*p = '\0';
+
+	/* Best effort attempt to cope with MCA/INIT delivered while in
+	 * physical mode.
+	 */
+	regs->cr_ipsr = ms->pmsa_ipsr;
+	if (ia64_psr(regs)->dt == 0) {
+		va.l = r12;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r12 = va.l;
+		}
+		va.l = r13;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r13 = va.l;
+		}
+	}
+	if (ia64_psr(regs)->rt == 0) {
+		va.l = ar_bspstore;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bspstore = va.l;
+		}
+		va.l = ar_bsp;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bsp = va.l;
+		}
+	}
+
+	/* mca_asm.S ia64_old_stack() cannot assume that the dirty registers
+	 * have been copied to the old stack, the old stack may fail the
+	 * validation tests below.  So ia64_old_stack() must restore the dirty
+	 * registers from the new stack.  The old and new bspstore probably
+	 * have different alignments, so loadrs calculated on the old bsp
+	 * cannot be used to restore from the new bsp.  Calculate a suitable
+	 * loadrs for the new stack and save it in the new pt_regs, where
+	 * ia64_old_stack() can get it.
+	 */
+	old_bspstore = (u64 *)ar_bspstore;
+	old_bsp = (u64 *)ar_bsp;
+	slots = ia64_rse_num_regs(old_bspstore, old_bsp);
+	new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET);
+	new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
+	regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
+
+	/* Verify the previous stack state before we change it */
+	if (user_mode(regs)) {
+		msg = "occurred in user space";
+		goto no_mod;
+	}
+	if (r13 != sos->prev_IA64_KR_CURRENT) {
+		msg = "inconsistent previous current and r13";
+		goto no_mod;
+	}
+	if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent r12 and r13";
+		goto no_mod;
+	}
+	if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent ar.bspstore and r13";
+		goto no_mod;
+	}
+	va.p = old_bspstore;
+	if (va.f.reg < 5) {
+		msg = "old_bspstore is in the wrong region";
+		goto no_mod;
+	}
+	if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+		msg = "inconsistent ar.bsp and r13";
+		goto no_mod;
+	}
+	size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+	if (ar_bspstore + size > r12) {
+		msg = "no room for blocked state";
+		goto no_mod;
+	}
+
+	/* Change the comm field on the MCA/INT task to include the pid that
+	 * was interrupted, it makes for easier debugging.  If that pid was 0
+	 * (swapper or nested MCA/INIT) then use the start of the previous comm
+	 * field suffixed with its cpu.
+	 */
+	if (previous_current->pid)
+		snprintf(comm, sizeof(comm), "%s %d",
+			current->comm, previous_current->pid);
+	else {
+		int l;
+		if ((p = strchr(previous_current->comm, ' ')))
+			l = p - previous_current->comm;
+		else
+			l = strlen(previous_current->comm);
+		snprintf(comm, sizeof(comm), "%s %*s %d",
+			current->comm, l, previous_current->comm,
+			previous_current->thread_info->cpu);
+	}
+	memcpy(current->comm, comm, sizeof(current->comm));
+
+	/* Make the original task look blocked.  First stack a struct pt_regs,
+	 * describing the state at the time of interrupt.  mca_asm.S built a
+	 * partial pt_regs, copy it and fill in the blanks using minstate.
+	 */
+	p = (char *)r12 - sizeof(*regs);
+	old_regs = (struct pt_regs *)p;
+	memcpy(old_regs, regs, sizeof(*regs));
+	/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
+	 * pmsa_{xip,xpsr,xfs}
+	 */
+	if (ia64_psr(regs)->ic) {
+		old_regs->cr_iip = ms->pmsa_iip;
+		old_regs->cr_ipsr = ms->pmsa_ipsr;
+		old_regs->cr_ifs = ms->pmsa_ifs;
+	} else {
+		old_regs->cr_iip = ms->pmsa_xip;
+		old_regs->cr_ipsr = ms->pmsa_xpsr;
+		old_regs->cr_ifs = ms->pmsa_xfs;
+	}
+	old_regs->pr = ms->pmsa_pr;
+	old_regs->b0 = ms->pmsa_br0;
+	old_regs->loadrs = loadrs;
+	old_regs->ar_rsc = ms->pmsa_rsc;
+	old_unat = old_regs->ar_unat;
+	copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat);
+	copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat);
+	copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat);
+	copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat);
+	copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat);
+	copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat);
+	copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat);
+	copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat);
+	copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat);
+	copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat);
+	copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat);
+	if (ia64_psr(old_regs)->bn)
+		bank = ms->pmsa_bank1_gr;
+	else
+		bank = ms->pmsa_bank0_gr;
+	copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat);
+	copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat);
+	copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat);
+	copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat);
+	copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat);
+	copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat);
+	copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat);
+	copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat);
+	copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat);
+	copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat);
+	copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat);
+	copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat);
+	copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat);
+	copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat);
+	copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat);
+	copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat);
+
+	/* Next stack a struct switch_stack.  mca_asm.S built a partial
+	 * switch_stack, copy it and fill in the blanks using pt_regs and
+	 * minstate.
+	 *
+	 * In the synthesized switch_stack, b0 points to ia64_leave_kernel,
+	 * ar.pfs is set to 0.
+	 *
+	 * unwind.c::unw_unwind() does special processing for interrupt frames.
+	 * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate
+	 * is clear then unw_unwind() does _not_ adjust bsp over pt_regs.  Not
+	 * that this is documented, of course.  Set PRED_NON_SYSCALL in the
+	 * switch_stack on the original stack so it will unwind correctly when
+	 * unwind.c reads pt_regs.
+	 *
+	 * thread.ksp is updated to point to the synthesized switch_stack.
+	 */
+	p -= sizeof(struct switch_stack);
+	old_sw = (struct switch_stack *)p;
+	memcpy(old_sw, sw, sizeof(*sw));
+	old_sw->caller_unat = old_unat;
+	old_sw->ar_fpsr = old_regs->ar_fpsr;
+	copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat);
+	copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat);
+	copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat);
+	copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat);
+	old_sw->b0 = (u64)ia64_leave_kernel;
+	old_sw->b1 = ms->pmsa_br1;
+	old_sw->ar_pfs = 0;
+	old_sw->ar_unat = old_unat;
+	old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL);
+	previous_current->thread.ksp = (u64)p - 16;
+
+	/* Finally copy the original stack's registers back to its RBS.
+	 * Registers from ar.bspstore through ar.bsp at the time of the event
+	 * are in the current RBS, copy them back to the original stack.  The
+	 * copy must be done register by register because the original bspstore
+	 * and the current one have different alignments, so the saved RNAT
+	 * data occurs at different places.
+	 *
+	 * mca_asm does cover, so the old_bsp already includes all registers at
+	 * the time of MCA/INIT.  It also does flushrs, so all registers before
+	 * this function have been written to backing store on the MCA/INIT
+	 * stack.
+	 */
+	new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore));
+	old_rnat = regs->ar_rnat;
+	while (slots--) {
+		if (ia64_rse_is_rnat_slot(new_bspstore)) {
+			new_rnat = ia64_get_rnat(new_bspstore++);
+		}
+		if (ia64_rse_is_rnat_slot(old_bspstore)) {
+			*old_bspstore++ = old_rnat;
+			old_rnat = 0;
+		}
+		nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL;
+		old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore));
+		old_rnat |= (nat << ia64_rse_slot_num(old_bspstore));
+		*old_bspstore++ = *new_bspstore++;
+	}
+	old_sw->ar_bspstore = (unsigned long)old_bspstore;
+	old_sw->ar_rnat = old_rnat;
+
+	sos->prev_task = previous_current;
+	return previous_current;
+
+no_mod:
+	printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
+			smp_processor_id(), type, msg);
+	return previous_current;
+}
+
+/* The monarch/slave interaction is based on monarch_cpu and requires that all
+ * slaves have entered rendezvous before the monarch leaves.  If any cpu has
+ * not entered rendezvous yet then wait a bit.  The assumption is that any
+ * slave that has not rendezvoused after a reasonable time is never going to do
+ * so.  In this context, slave includes cpus that respond to the MCA rendezvous
+ * interrupt, as well as cpus that receive the INIT slave event.
+ */
+
+static void
+ia64_wait_for_slaves(int monarch)
+{
+	int c, wait = 0;
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+			udelay(1000);		/* short wait first */
+			wait = 1;
+			break;
+		}
+	}
+	if (!wait)
+		return;
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+			udelay(5*1000000);	/* wait 5 seconds for slaves (arbitrary) */
+			break;
+		}
+	}
+}
+
 /*
- * ia64_mca_ucmc_handler
+ * ia64_mca_handler
  *
  *	This is uncorrectable machine check handler called from OS_MCA
  *	dispatch code which is in turn called from SAL_CHECK().
@@ -857,16 +922,28 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension);
  *	further MCA logging is enabled by clearing logs.
  *	Monarch also has the duty of sending wakeup-IPIs to pull the
  *	slave processors out of rendezvous spinloop.
- *
- *  Inputs  :   None
- *  Outputs :   None
  */
 void
-ia64_mca_ucmc_handler(void)
+ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
+		 struct ia64_sal_os_state *sos)
 {
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
-		&ia64_sal_to_os_handoff_state.proc_state_param;
-	int recover; 
+		&sos->proc_state_param;
+	int recover, cpu = smp_processor_id();
+	task_t *previous_current;
+
+	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
+	monarch_cpu = cpu;
+	ia64_wait_for_slaves(cpu);
+
+	/* Wakeup all the processors which are spinning in the rendezvous loop.
+	 * They will leave SAL, then spin in the OS with interrupts disabled
+	 * until this monarch cpu leaves the MCA handler.  That gets control
+	 * back to the OS so we can backtrace the other cpus, backtrace when
+	 * spinning in SAL does not work.
+	 */
+	ia64_mca_wakeup_all();
 
 	/* Get the MCA error record and log it */
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
@@ -874,25 +951,20 @@ ia64_mca_ucmc_handler(void)
 	/* TLB error is only exist in this SAL error record */
 	recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
 	/* other error recovery */
-	   || (ia64_mca_ucmc_extension 
+	   || (ia64_mca_ucmc_extension
 		&& ia64_mca_ucmc_extension(
 			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
-			&ia64_sal_to_os_handoff_state,
-			&ia64_os_to_sal_handoff_state)); 
+			sos));
 
 	if (recover) {
 		sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);
 		rh->severity = sal_log_severity_corrected;
 		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
+		sos->os_status = IA64_MCA_CORRECTED;
 	}
-	/*
-	 *  Wakeup all the processors which are spinning in the rendezvous
-	 *  loop.
-	 */
-	ia64_mca_wakeup_all();
 
-	/* Return to SAL */
-	ia64_return_to_sal_check(recover);
+	set_curr_task(cpu, previous_current);
+	monarch_cpu = -1;
 }
 
 static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL);
@@ -1116,34 +1188,114 @@ ia64_mca_cpe_poll (unsigned long dummy)
 /*
  * C portion of the OS INIT handler
  *
- * Called from ia64_monarch_init_handler
- *
- * Inputs: pointer to pt_regs where processor info was saved.
+ * Called from ia64_os_init_dispatch
  *
- * Returns:
- *   0 if SAL must warm boot the System
- *   1 if SAL must return to interrupted context using PAL_MC_RESUME
+ * Inputs: pointer to pt_regs where processor info was saved.  SAL/OS state for
+ * this event.  This code is used for both monarch and slave INIT events, see
+ * sos->monarch.
  *
+ * All INIT events switch to the INIT stack and change the previous process to
+ * blocked status.  If one of the INIT events is the monarch then we are
+ * probably processing the nmi button/command.  Use the monarch cpu to dump all
+ * the processes.  The slave INIT events all spin until the monarch cpu
+ * returns.  We can also get INIT slave events for MCA, in which case the MCA
+ * process is the monarch.
  */
+
 void
-ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw)
+ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
+		  struct ia64_sal_os_state *sos)
 {
-	pal_min_state_area_t *ms;
+	static atomic_t slaves;
+	static atomic_t monarchs;
+	task_t *previous_current;
+	int cpu = smp_processor_id(), c;
+	struct task_struct *g, *t;
 
-	oops_in_progress = 1;	/* avoid deadlock in printk, but it makes recovery dodgy */
+	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
 	console_loglevel = 15;	/* make sure printks make it to console */
 
-	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n",
-		ia64_sal_to_os_handoff_state.proc_state_param);
+	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+		sos->proc_state_param, cpu, sos->monarch);
+	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
 
-	/*
-	 * Address of minstate area provided by PAL is physical,
-	 * uncacheable (bit 63 set). Convert to Linux virtual
-	 * address in region 6.
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT");
+	sos->os_status = IA64_INIT_RESUME;
+
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * slaves.  The last slave that enters is promoted to be a monarch.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
 	 */
-	ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61));
+	if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
+		printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+		       __FUNCTION__, cpu);
+		atomic_dec(&slaves);
+		sos->monarch = 1;
+	}
 
-	init_handler_platform(ms, pt, sw);	/* call platform specific routines */
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * monarchs.  Second and subsequent monarchs are demoted to slaves.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
+	 */
+	if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
+		printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+			       __FUNCTION__, cpu);
+		atomic_dec(&monarchs);
+		sos->monarch = 0;
+	}
+
+	if (!sos->monarch) {
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+		while (monarch_cpu == -1)
+		       cpu_relax();	/* spin until monarch enters */
+		while (monarch_cpu != -1)
+		       cpu_relax();	/* spin until monarch leaves */
+		printk("Slave on cpu %d returning to normal service.\n", cpu);
+		set_curr_task(cpu, previous_current);
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+		atomic_dec(&slaves);
+		return;
+	}
+
+	monarch_cpu = cpu;
+
+	/*
+	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
+	 * generated via the BMC's command-line interface, but since the console is on the
+	 * same serial line, the user will need some time to switch out of the BMC before
+	 * the dump begins.
+	 */
+	printk("Delaying for 5 seconds...\n");
+	udelay(5*1000000);
+	ia64_wait_for_slaves(cpu);
+	printk(KERN_ERR "Processes interrupted by INIT -");
+	for_each_online_cpu(c) {
+		struct ia64_sal_os_state *s;
+		t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET);
+		s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET);
+		g = s->prev_task;
+		if (g) {
+			if (g->pid)
+				printk(" %d", g->pid);
+			else
+				printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);
+		}
+	}
+	printk("\n\n");
+	if (read_trylock(&tasklist_lock)) {
+		do_each_thread (g, t) {
+			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+			show_stack(t, NULL);
+		} while_each_thread (g, t);
+		read_unlock(&tasklist_lock);
+	}
+	printk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
+	atomic_dec(&monarchs);
+	set_curr_task(cpu, previous_current);
+	monarch_cpu = -1;
+	return;
 }
 
 static int __init
@@ -1193,6 +1345,34 @@ static struct irqaction mca_cpep_irqaction = {
 };
 #endif /* CONFIG_ACPI */
 
+/* Minimal format of the MCA/INIT stacks.  The pseudo processes that run on
+ * these stacks can never sleep, they cannot return from the kernel to user
+ * space, they do not appear in a normal ps listing.  So there is no need to
+ * format most of the fields.
+ */
+
+static void
+format_mca_init_stack(void *mca_data, unsigned long offset,
+		const char *type, int cpu)
+{
+	struct task_struct *p = (struct task_struct *)((char *)mca_data + offset);
+	struct thread_info *ti;
+	memset(p, 0, KERNEL_STACK_SIZE);
+	ti = (struct thread_info *)((char *)p + IA64_TASK_SIZE);
+	ti->flags = _TIF_MCA_INIT;
+	ti->preempt_count = 1;
+	ti->task = p;
+	ti->cpu = cpu;
+	p->thread_info = ti;
+	p->state = TASK_UNINTERRUPTIBLE;
+	__set_bit(cpu, &p->cpus_allowed);
+	INIT_LIST_HEAD(&p->tasks);
+	p->parent = p->real_parent = p->group_leader = p;
+	INIT_LIST_HEAD(&p->children);
+	INIT_LIST_HEAD(&p->sibling);
+	strncpy(p->comm, type, sizeof(p->comm)-1);
+}
+
 /* Do per-CPU MCA-related initialization.  */
 
 void __devinit
@@ -1205,19 +1385,28 @@ ia64_mca_cpu_init(void *cpu_data)
 		int cpu;
 
 		mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
-					 * NR_CPUS);
+					 * NR_CPUS + KERNEL_STACK_SIZE);
+		mca_data = (void *)(((unsigned long)mca_data +
+					KERNEL_STACK_SIZE - 1) &
+				(-KERNEL_STACK_SIZE));
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
+			format_mca_init_stack(mca_data,
+					offsetof(struct ia64_mca_cpu, mca_stack),
+					"MCA", cpu);
+			format_mca_init_stack(mca_data,
+					offsetof(struct ia64_mca_cpu, init_stack),
+					"INIT", cpu);
 			__per_cpu_mca[cpu] = __pa(mca_data);
 			mca_data += sizeof(struct ia64_mca_cpu);
 		}
 	}
 
-        /*
-         * The MCA info structure was allocated earlier and its
-         * physical address saved in __per_cpu_mca[cpu].  Copy that
-         * address * to ia64_mca_data so we can access it as a per-CPU
-         * variable.
-         */
+	/*
+	 * The MCA info structure was allocated earlier and its
+	 * physical address saved in __per_cpu_mca[cpu].  Copy that
+	 * address * to ia64_mca_data so we can access it as a per-CPU
+	 * variable.
+	 */
 	__get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
 
 	/*
@@ -1227,11 +1416,11 @@ ia64_mca_cpu_init(void *cpu_data)
 	__get_cpu_var(ia64_mca_per_cpu_pte) =
 		pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL));
 
-        /*
-         * Also, stash away a copy of the PAL address and the PTE
-         * needed to map it.
-         */
-        pal_vaddr = efi_get_pal_addr();
+	/*
+	 * Also, stash away a copy of the PAL address and the PTE
+	 * needed to map it.
+	 */
+	pal_vaddr = efi_get_pal_addr();
 	if (!pal_vaddr)
 		return;
 	__get_cpu_var(ia64_mca_pal_base) =
@@ -1263,8 +1452,8 @@ ia64_mca_cpu_init(void *cpu_data)
 void __init
 ia64_mca_init(void)
 {
-	ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler;
-	ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler;
+	ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch;
+	ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
 	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
 	int i;
 	s64 rc;
@@ -1342,9 +1531,9 @@ ia64_mca_init(void)
 	 * XXX - disable SAL checksum by setting size to 0, should be
 	 * size of the actual init handler in mca_asm.S.
 	 */
-	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(mon_init_ptr->fp);
+	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(init_hldlr_ptr_monarch->fp);
 	ia64_mc_info.imi_monarch_init_handler_size	= 0;
-	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(slave_init_ptr->fp);
+	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(init_hldlr_ptr_slave->fp);
 	ia64_mc_info.imi_slave_init_handler_size	= 0;
 
 	IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__,
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index ef3fd7265b67..499a065f4e60 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -16,6 +16,9 @@
 // 04/11/12 Russ Anderson <rja@sgi.com>
 //		   Added per cpu MCA/INIT stack save areas.
 //
+// 12/08/05 Keith Owens <kaos@sgi.com>
+//		   Use per cpu MCA/INIT stacks for all data.
+//
 #include <linux/config.h>
 #include <linux/threads.h>
 
@@ -25,96 +28,23 @@
 #include <asm/mca_asm.h>
 #include <asm/mca.h>
 
-/*
- * When we get a machine check, the kernel stack pointer is no longer
- * valid, so we need to set a new stack pointer.
- */
-#define	MINSTATE_PHYS	/* Make sure stack access is physical for MINSTATE */
-
-/*
- * Needed for return context to SAL
- */
-#define IA64_MCA_SAME_CONTEXT	0
-#define IA64_MCA_COLD_BOOT	-2
-
-#include "minstate.h"
-
-/*
- * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
- *		1. GR1 = OS GP
- *		2. GR8 = PAL_PROC physical address
- *		3. GR9 = SAL_PROC physical address
- *		4. GR10 = SAL GP (physical)
- *		5. GR11 = Rendez state
- *		6. GR12 = Return address to location within SAL_CHECK
- */
-#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp)		\
-	LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \
-	st8	[_tmp]=r1,0x08;;			\
-	st8	[_tmp]=r8,0x08;;			\
-	st8	[_tmp]=r9,0x08;;			\
-	st8	[_tmp]=r10,0x08;;			\
-	st8	[_tmp]=r11,0x08;;			\
-	st8	[_tmp]=r12,0x08;;			\
-	st8	[_tmp]=r17,0x08;;			\
-	st8	[_tmp]=r18,0x08
-
-/*
- * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
- * (p6) is executed if we never entered virtual mode (TLB error)
- * (p7) is executed if we entered virtual mode as expected (normal case)
- *	1. GR8 = OS_MCA return status
- *	2. GR9 = SAL GP (physical)
- *	3. GR10 = 0/1 returning same/new context
- *	4. GR22 = New min state save area pointer
- *	returns ptr to SAL rtn save loc in _tmp
- */
-#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp)	\
-	movl	_tmp=ia64_os_to_sal_handoff_state;;	\
-	DATA_VA_TO_PA(_tmp);;				\
-	ld8	r8=[_tmp],0x08;;			\
-	ld8	r9=[_tmp],0x08;;			\
-	ld8	r10=[_tmp],0x08;;			\
-	ld8	r22=[_tmp],0x08;;
-	// now _tmp is pointing to SAL rtn save location
-
-/*
- * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state
- *	imots_os_status=IA64_MCA_COLD_BOOT
- *	imots_sal_gp=SAL GP
- *	imots_context=IA64_MCA_SAME_CONTEXT
- *	imots_new_min_state=Min state save area pointer
- *	imots_sal_check_ra=Return address to location within SAL_CHECK
- *
- */
-#define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\
-	movl	tmp=IA64_MCA_COLD_BOOT;					\
-	movl	sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state);	\
-	movl	os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);;	\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff],48;;				\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	movl	tmp=IA64_MCA_SAME_CONTEXT;;				\
-	st8	[os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff],-8;;				\
-	st8     [os_to_sal_handoff]=tmp,8;;				\
-	ld8	tmp=[sal_to_os_handoff];;				\
-	st8     [os_to_sal_handoff]=tmp;;
+#include "entry.h"
 
 #define GET_IA64_MCA_DATA(reg)						\
 	GET_THIS_PADDR(reg, ia64_mca_data)				\
 	;;								\
 	ld8 reg=[reg]
 
-	.global ia64_os_mca_dispatch
-	.global ia64_os_mca_dispatch_end
-	.global ia64_sal_to_os_handoff_state
-	.global	ia64_os_to_sal_handoff_state
 	.global ia64_do_tlb_purge
+	.global ia64_os_mca_dispatch
+	.global ia64_os_init_dispatch_monarch
+	.global ia64_os_init_dispatch_slave
 
 	.text
 	.align 16
 
+//StartMain////////////////////////////////////////////////////////////////////
+
 /*
  * Just the TLB purge part is moved to a separate function
  * so we can re-use the code for cpu hotplug code as well
@@ -207,34 +137,31 @@ ia64_do_tlb_purge:
 	br.sptk.many b1
 	;;
 
-ia64_os_mca_dispatch:
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
 
+ia64_os_mca_dispatch:
 	// Serialize all MCA processing
 	mov	r3=1;;
 	LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
 ia64_os_mca_spin:
-	xchg8	r4=[r2],r3;;
+	xchg4	r4=[r2],r3;;
 	cmp.ne	p6,p0=r4,r0
 (p6)	br ia64_os_mca_spin
 
-	// Save the SAL to OS MCA handoff state as defined
-	// by SAL SPEC 3.0
-	// NOTE : The order in which the state gets saved
-	//	  is dependent on the way the C-structure
-	//	  for ia64_mca_sal_to_os_state_t has been
-	//	  defined in include/asm/mca.h
-	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
-	;;
-
-	// LOG PROCESSOR STATE INFO FROM HERE ON..
-begin_os_mca_dump:
-	br	ia64_os_mca_proc_state_dump;;
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	mov r19=1				// All MCA events are treated as monarch (for now)
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
 
-ia64_os_mca_done_dump:
-
-	LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
+	GET_IA64_MCA_DATA(r2)
+	// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
+	;;
+	add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, r2
 	;;
-	ld8 r18=[r16]		// Get processor state parameter on existing PALE_CHECK.
+	ld8 r18=[r3]				// Get processor state parameter on existing PALE_CHECK.
 	;;
 	tbit.nz p6,p7=r18,60
 (p7)	br.spnt done_tlb_purge_and_reload
@@ -323,624 +250,775 @@ ia64_reload_tr:
 	itr.d dtr[r20]=r16
 	;;
 	srlz.d
-	;;
-	br.sptk.many done_tlb_purge_and_reload
-err:
-	COLD_BOOT_HANDOFF_STATE(r20,r21,r22)
-	br.sptk.many ia64_os_mca_done_restore
 
 done_tlb_purge_and_reload:
 
-	// Setup new stack frame for OS_MCA handling
-	GET_IA64_MCA_DATA(r2)
-	;;
-	add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
-	add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2
-	;;
-	rse_switch_context(r6,r3,r2);;	// RSC management in this new context
+	// switch to per cpu MCA stack
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
 
+	// This must be done in physical mode
 	GET_IA64_MCA_DATA(r2)
 	;;
-	add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2
-	;;
-	mov r12=r2		// establish new stack-pointer
+	mov r7=r2
 
         // Enter virtual mode from physical mode
 	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
-ia64_os_mca_virtual_begin:
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_mca_virtual_begin be a global function.
+ENTRY(ia64_os_mca_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
+	;;
 
 	// Call virtual mode handler
-	movl		r2=ia64_mca_ucmc_handler;;
-	mov		b6=r2;;
-	br.call.sptk.many    b0=b6;;
-.ret0:
+	alloc r14=ar.pfs,0,0,3,0
+	;;
+	DATA_PA_TO_VA(r2,r7)
+	;;
+	add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_mca_handler
+
 	// Revert back to physical mode before going back to SAL
 	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
 ia64_os_mca_virtual_end:
 
-	// restore the original stack frame here
+END(ia64_os_mca_virtual_begin)
+
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the MCA handler frame
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
+
+	// release lock
+	LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);;
+	st4.rel		[r3]=r0
+
+	br		b0
+
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
+
+//
+// SAL to OS entry point for INIT on all processors.  This has been defined for
+// registration purposes with SAL as a part of ia64_mca_init.  Monarch and
+// slave INIT have identical processing, except for the value of the
+// sos->monarch flag in r19.
+//
+
+ia64_os_init_dispatch_monarch:
+	mov r19=1				// Bow, bow, ye lower middle classes!
+	br.sptk ia64_os_init_dispatch
+
+ia64_os_init_dispatch_slave:
+	mov r19=0				// <igor>yeth, mathter</igor>
+
+ia64_os_init_dispatch:
+
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
+
+	// switch to per cpu INIT stack
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
+
+	// This must be done in physical mode
 	GET_IA64_MCA_DATA(r2)
 	;;
-	add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
-	;;
-	movl    r4=IA64_PSR_MC
+	mov r7=r2
+
+        // Enter virtual mode from physical mode
+	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4)
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_init_virtual_begin be a global function.
+ENTRY(ia64_os_init_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
 	;;
-	rse_return_context(r4,r3,r2)	// switch from interrupt context for RSE
 
-	// let us restore all the registers from our PSI structure
-	mov	r8=gp
+	// Call virtual mode handler
+	alloc r14=ar.pfs,0,0,3,0
+	;;
+	DATA_PA_TO_VA(r2,r7)
 	;;
-begin_os_mca_restore:
-	br	ia64_os_mca_proc_state_restore;;
+	add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_init_handler
 
-ia64_os_mca_done_restore:
-	OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
-	// branch back to SALE_CHECK
-	ld8		r3=[r2];;
-	mov		b0=r3;;		// SAL_CHECK return address
+	// Revert back to physical mode before going back to SAL
+	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4)
+ia64_os_init_virtual_end:
 
-	// release lock
-	movl		r3=ia64_mca_serialize;;
-	DATA_VA_TO_PA(r3);;
-	st8.rel		[r3]=r0
+END(ia64_os_init_virtual_begin)
+
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
 
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the INIT handler frame
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
 	br		b0
-	;;
-ia64_os_mca_dispatch_end:
+
 //EndMain//////////////////////////////////////////////////////////////////////
 
+// common defines for the stubs
+#define	ms		r4
+#define	regs		r5
+#define	temp1		r2	/* careful, it overlaps with input registers */
+#define	temp2		r3	/* careful, it overlaps with input registers */
+#define	temp3		r7
+#define	temp4		r14
+
 
 //++
 // Name:
-//      ia64_os_mca_proc_state_dump()
+//	ia64_state_save()
 //
 // Stub Description:
 //
-//       This stub dumps the processor state during MCHK to a data area
+//	Save the state that is not in minstate.  This is sensitive to the layout of
+//	struct ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	The OS to SAL section of struct ia64_sal_os_state is set to a default
+//	value of cold boot (MCA) or warm boot (INIT) and return to the same
+//	context.  ia64_sal_os_state is also used to hold some registers that
+//	need to be saved and restored across the stack switches.
+//
+//	Most input registers to this stub come from PAL/SAL
+//	r1  os gp, physical
+//	r8  pal_proc entry point
+//	r9  sal_proc entry point
+//	r10 sal gp
+//	r11 MCA - rendevzous state, INIT - reason code
+//	r12 sal return address
+//	r17 pal min_state
+//	r18 processor state parameter
+//	r19 monarch flag, set by the caller of this routine
+//
+//	In addition to the SAL to OS state, this routine saves all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those that are already in the PAL minstate area.  This
+//	results in a partial pt_regs and switch_stack, the C code copies the
+//	remaining registers from PAL minstate to pt_regs and switch_stack.  The
+//	resulting structures contain all the state of the original process when
+//	MCA/INIT occurred.
 //
 //--
 
-ia64_os_mca_proc_state_dump:
-// Save bank 1 GRs 16-31 which will be used by c-language code when we switch
-//  to virtual addressing mode.
-	GET_IA64_MCA_DATA(r2)
+ia64_state_save:
+	add regs=MCA_SOS_OFFSET, r3
+	add ms=MCA_SOS_OFFSET+8, r3
+	mov b0=r2		// save return address
+	cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add temp1=temp2, regs	// struct ia64_sal_os_state on MCA or INIT stack
+	add temp2=temp2, ms	// struct ia64_sal_os_state+8 on MCA or INIT stack
+	;;
+	mov regs=temp1		// save the start of sos
+	st8 [temp1]=r1,16	// os_gp
+	st8 [temp2]=r8,16	// pal_proc
+	;;
+	st8 [temp1]=r9,16	// sal_proc
+	st8 [temp2]=r11,16	// rv_rc
+	mov r11=cr.iipa
+	;;
+	st8 [temp1]=r18,16	// proc_state_param
+	st8 [temp2]=r19,16	// monarch
+	mov r6=IA64_KR(CURRENT)
+	;;
+	st8 [temp1]=r12,16	// sal_ra
+	st8 [temp2]=r10,16	// sal_gp
+	mov r12=cr.isr
+	;;
+	st8 [temp1]=r17,16	// pal_min_state
+	st8 [temp2]=r6,16	// prev_IA64_KR_CURRENT
+	mov r6=cr.ifa
+	;;
+	st8 [temp1]=r0,16	// prev_task, starts off as NULL
+	st8 [temp2]=r12,16	// cr.isr
+	mov r12=cr.itir
+	;;
+	st8 [temp1]=r6,16	// cr.ifa
+	st8 [temp2]=r12,16	// cr.itir
+	mov r12=cr.iim
+	;;
+	st8 [temp1]=r11,16	// cr.iipa
+	st8 [temp2]=r12,16	// cr.iim
+	mov r6=cr.iha
+(p1)	mov r12=IA64_MCA_COLD_BOOT
+(p2)	mov r12=IA64_INIT_WARM_BOOT
+	;;
+	st8 [temp1]=r6,16	// cr.iha
+	st8 [temp2]=r12		// os_status, default is cold boot
+	mov r6=IA64_MCA_SAME_CONTEXT
+	;;
+	st8 [temp1]=r6		// context, default is same context
+
+	// Save the pt_regs data that is not in minstate.  The previous code
+	// left regs at sos.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	mov temp3=b6
+	mov temp4=b7
+	add temp2=PT(B7), regs
+	;;
+	st8 [temp1]=temp3,PT(AR_CSD)-PT(B6)		// save b6
+	st8 [temp2]=temp4,PT(AR_SSD)-PT(B7)		// save b7
+	mov temp3=ar.csd
+	mov temp4=ar.ssd
+	cover						// must be last in group
 	;;
-	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
-	;;
-// save ar.NaT
-	mov		r5=ar.unat                  // ar.unat
-
-// save banked GRs 16-31 along with NaT bits
-	bsw.1;;
-	st8.spill	[r2]=r16,8;;
-	st8.spill	[r2]=r17,8;;
-	st8.spill	[r2]=r18,8;;
-	st8.spill	[r2]=r19,8;;
-	st8.spill	[r2]=r20,8;;
-	st8.spill	[r2]=r21,8;;
-	st8.spill	[r2]=r22,8;;
-	st8.spill	[r2]=r23,8;;
-	st8.spill	[r2]=r24,8;;
-	st8.spill	[r2]=r25,8;;
-	st8.spill	[r2]=r26,8;;
-	st8.spill	[r2]=r27,8;;
-	st8.spill	[r2]=r28,8;;
-	st8.spill	[r2]=r29,8;;
-	st8.spill	[r2]=r30,8;;
-	st8.spill	[r2]=r31,8;;
-
-	mov		r4=ar.unat;;
-	st8		[r2]=r4,8                // save User NaT bits for r16-r31
-	mov		ar.unat=r5                  // restore original unat
-	bsw.0;;
-
-//save BRs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r4
-
-	mov		r3=b0
-	mov		r5=b1
-	mov		r7=b2;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=b3
-	mov		r5=b4
-	mov		r7=b5;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=b6
-	mov		r5=b7;;
-	st8		[r2]=r3,2*8
-	st8		[r4]=r5,2*8;;
-
-cSaveCRs:
-// save CRs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r4
-
-	mov		r3=cr.dcr
-	mov		r5=cr.itm
-	mov		r7=cr.iva;;
-
-	st8		[r2]=r3,8*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;            // 48 byte rements
-
-	mov		r3=cr.pta;;
-	st8		[r2]=r3,8*8;;            // 64 byte rements
-
-// if PSR.ic=0, reading interruption registers causes an illegal operation fault
-	mov		r3=psr;;
-	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
-(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
-begin_skip_intr_regs:
-(p6)	br		SkipIntrRegs;;
-
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r6
-
-	mov		r3=cr.ipsr
-	mov		r5=cr.isr
-	mov		r7=r0;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.iip
-	mov		r5=cr.ifa
-	mov		r7=cr.itir;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.iipa
-	mov		r5=cr.ifs
-	mov		r7=cr.iim;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr25;;                   // cr.iha
-	st8		[r2]=r3,160;;               // 160 byte rement
-
-SkipIntrRegs:
-	st8		[r2]=r0,152;;               // another 152 byte .
-
-	add		r4=8,r2                     // duplicate r2 in r4
-	add		r6=2*8,r2                   // duplicate r2 in r6
-
-	mov		r3=cr.lid
-//	mov		r5=cr.ivr                     // cr.ivr, don't read it
-	mov		r7=cr.tpr;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=r0                       // cr.eoi => cr67
-	mov		r5=r0                       // cr.irr0 => cr68
-	mov		r7=r0;;                     // cr.irr1 => cr69
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=r0                       // cr.irr2 => cr70
-	mov		r5=r0                       // cr.irr3 => cr71
-	mov		r7=cr.itv;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=cr.pmv
-	mov		r5=cr.cmcv;;
-	st8		[r2]=r3,7*8
-	st8		[r4]=r5,7*8;;
-
-	mov		r3=r0                       // cr.lrr0 => cr80
-	mov		r5=r0;;                     // cr.lrr1 => cr81
-	st8		[r2]=r3,23*8
-	st8		[r4]=r5,23*8;;
-
-	adds		r2=25*8,r2;;
-
-cSaveARs:
-// save ARs
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2                // duplicate r2 in r6
-
-	mov		r3=ar.k0
-	mov		r5=ar.k1
-	mov		r7=ar.k2;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.k3
-	mov		r5=ar.k4
-	mov		r7=ar.k5;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.k6
-	mov		r5=ar.k7
-	mov		r7=r0;;                     // ar.kr8
-	st8		[r2]=r3,10*8
-	st8		[r4]=r5,10*8
-	st8		[r6]=r7,10*8;;           // rement by 72 bytes
-
-	mov		r3=ar.rsc
-	mov		ar.rsc=r0			    // put RSE in enforced lazy mode
-	mov		r5=ar.bsp
-	;;
-	mov		r7=ar.bspstore;;
-	st8		[r2]=r3,3*8
-	st8		[r4]=r5,3*8
-	st8		[r6]=r7,3*8;;
-
-	mov		r3=ar.rnat;;
-	st8		[r2]=r3,8*13             // increment by 13x8 bytes
-
-	mov		r3=ar.ccv;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.unat;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.fpsr;;
-	st8		[r2]=r3,8*4
-
-	mov		r3=ar.itc;;
-	st8		[r2]=r3,160                 // 160
-
-	mov		r3=ar.pfs;;
-	st8		[r2]=r3,8
-
-	mov		r3=ar.lc;;
-	st8		[r2]=r3,8
-
-	mov		r3=ar.ec;;
-	st8		[r2]=r3
-	add		r2=8*62,r2               //padding
-
-// save RRs
-	mov		ar.lc=0x08-1
-	movl		r4=0x00;;
-
-cStRR:
-	dep.z		r5=r4,61,3;;
-	mov		r3=rr[r5];;
-	st8		[r2]=r3,8
-	add		r4=1,r4
-	br.cloop.sptk.few	cStRR
-	;;
-end_os_mca_dump:
-	br	ia64_os_mca_done_dump;;
+	st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD)	// save ar.csd
+	st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD)		// save ar.ssd
+	mov temp3=ar.unat
+	mov temp4=ar.pfs
+	;;
+	st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT)	// save ar.unat
+	st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS)	// save ar.pfs
+	mov temp3=ar.rnat
+	mov temp4=ar.bspstore
+	;;
+	st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT)	// save ar.rnat
+	st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE)	// save ar.bspstore
+	mov temp3=ar.bsp
+	;;
+	sub temp3=temp3, temp4	// ar.bsp - ar.bspstore
+	mov temp4=ar.fpsr
+	;;
+	shl temp3=temp3,16	// compute ar.rsc to be used for "loadrs"
+	;;
+	st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS)		// save loadrs
+	st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR)		// save ar.fpsr
+	mov temp3=ar.ccv
+	;;
+	st8 [temp1]=temp3,PT(F7)-PT(AR_CCV)		// save ar.ccv
+	stf.spill [temp2]=f6,PT(F8)-PT(F6)
+	;;
+	stf.spill [temp1]=f7,PT(F9)-PT(F7)
+	stf.spill [temp2]=f8,PT(F10)-PT(F8)
+	;;
+	stf.spill [temp1]=f9,PT(F11)-PT(F9)
+	stf.spill [temp2]=f10
+	;;
+	stf.spill [temp1]=f11
+
+	// Save the switch_stack data that is not in minstate nor pt_regs.  The
+	// previous code left regs at pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	stf.spill [temp1]=f2,32
+	stf.spill [temp2]=f3,32
+	;;
+	stf.spill [temp1]=f4,32
+	stf.spill [temp2]=f5,32
+	;;
+	stf.spill [temp1]=f12,32
+	stf.spill [temp2]=f13,32
+	;;
+	stf.spill [temp1]=f14,32
+	stf.spill [temp2]=f15,32
+	;;
+	stf.spill [temp1]=f16,32
+	stf.spill [temp2]=f17,32
+	;;
+	stf.spill [temp1]=f18,32
+	stf.spill [temp2]=f19,32
+	;;
+	stf.spill [temp1]=f20,32
+	stf.spill [temp2]=f21,32
+	;;
+	stf.spill [temp1]=f22,32
+	stf.spill [temp2]=f23,32
+	;;
+	stf.spill [temp1]=f24,32
+	stf.spill [temp2]=f25,32
+	;;
+	stf.spill [temp1]=f26,32
+	stf.spill [temp2]=f27,32
+	;;
+	stf.spill [temp1]=f28,32
+	stf.spill [temp2]=f29,32
+	;;
+	stf.spill [temp1]=f30,SW(B2)-SW(F30)
+	stf.spill [temp2]=f31,SW(B3)-SW(F31)
+	mov temp3=b2
+	mov temp4=b3
+	;;
+	st8 [temp1]=temp3,16	// save b2
+	st8 [temp2]=temp4,16	// save b3
+	mov temp3=b4
+	mov temp4=b5
+	;;
+	st8 [temp1]=temp3,SW(AR_LC)-SW(B4)	// save b4
+	st8 [temp2]=temp4	// save b5
+	mov temp3=ar.lc
+	;;
+	st8 [temp1]=temp3	// save ar.lc
+
+	// FIXME: Some proms are incorrectly accessing the minstate area as
+	// cached data.  The C code uses region 6, uncached virtual.  Ensure
+	// that there is no cache data lying around for the first 1K of the
+	// minstate area.
+	// Remove this code in September 2006, that gives platforms a year to
+	// fix their proms and get their customers updated.
+
+	add r1=32*1,r17
+	add r2=32*2,r17
+	add r3=32*3,r17
+	add r4=32*4,r17
+	add r5=32*5,r17
+	add r6=32*6,r17
+	add r7=32*7,r17
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+
+	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
 
 
 //++
 // Name:
-//       ia64_os_mca_proc_state_restore()
+//	ia64_state_restore()
 //
 // Stub Description:
 //
-//       This is a stub to restore the saved processor state during MCHK
+//	Restore the SAL/OS state.  This is sensitive to the layout of struct
+//	ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	In addition to the SAL to OS state, this routine restores all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those in the PAL minstate area.
 //
 //--
 
-ia64_os_mca_proc_state_restore:
+ia64_state_restore:
+	// Restore the switch_stack data that is not in minstate nor pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET, r3
+	mov b0=r2		// save return address
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add regs=temp2, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	ldf.fill f2=[temp1],32
+	ldf.fill f3=[temp2],32
+	;;
+	ldf.fill f4=[temp1],32
+	ldf.fill f5=[temp2],32
+	;;
+	ldf.fill f12=[temp1],32
+	ldf.fill f13=[temp2],32
+	;;
+	ldf.fill f14=[temp1],32
+	ldf.fill f15=[temp2],32
+	;;
+	ldf.fill f16=[temp1],32
+	ldf.fill f17=[temp2],32
+	;;
+	ldf.fill f18=[temp1],32
+	ldf.fill f19=[temp2],32
+	;;
+	ldf.fill f20=[temp1],32
+	ldf.fill f21=[temp2],32
+	;;
+	ldf.fill f22=[temp1],32
+	ldf.fill f23=[temp2],32
+	;;
+	ldf.fill f24=[temp1],32
+	ldf.fill f25=[temp2],32
+	;;
+	ldf.fill f26=[temp1],32
+	ldf.fill f27=[temp2],32
+	;;
+	ldf.fill f28=[temp1],32
+	ldf.fill f29=[temp2],32
+	;;
+	ldf.fill f30=[temp1],SW(B2)-SW(F30)
+	ldf.fill f31=[temp2],SW(B3)-SW(F31)
+	;;
+	ld8 temp3=[temp1],16	// restore b2
+	ld8 temp4=[temp2],16	// restore b3
+	;;
+	mov b2=temp3
+	mov b3=temp4
+	ld8 temp3=[temp1],SW(AR_LC)-SW(B4)	// restore b4
+	ld8 temp4=[temp2]	// restore b5
+	;;
+	mov b4=temp3
+	mov b5=temp4
+	ld8 temp3=[temp1]	// restore ar.lc
+	;;
+	mov ar.lc=temp3
 
-// Restore bank1 GR16-31
-	GET_IA64_MCA_DATA(r2)
+	// Restore the pt_regs data that is not in minstate.  The previous code
+	// left regs at switch_stack.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	add temp2=PT(B7), regs
+	;;
+	ld8 temp3=[temp1],PT(AR_CSD)-PT(B6)		// restore b6
+	ld8 temp4=[temp2],PT(AR_SSD)-PT(B7)		// restore b7
+	;;
+	mov b6=temp3
+	mov b7=temp4
+	ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD)	// restore ar.csd
+	ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD)		// restore ar.ssd
+	;;
+	mov ar.csd=temp3
+	mov ar.ssd=temp4
+	ld8 temp3=[temp1]				// restore ar.unat
+	add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1
+	ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS)	// restore ar.pfs
+	;;
+	mov ar.unat=temp3
+	mov ar.pfs=temp4
+	// ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack.
+	ld8 temp3=[temp1],PT(F6)-PT(AR_CCV)		// restore ar.ccv
+	ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR)		// restore ar.fpsr
+	;;
+	mov ar.ccv=temp3
+	mov ar.fpsr=temp4
+	ldf.fill f6=[temp1],PT(F8)-PT(F6)
+	ldf.fill f7=[temp2],PT(F9)-PT(F7)
+	;;
+	ldf.fill f8=[temp1],PT(F10)-PT(F8)
+	ldf.fill f9=[temp2],PT(F11)-PT(F9)
+	;;
+	ldf.fill f10=[temp1]
+	ldf.fill f11=[temp2]
+
+	// Restore the SAL to OS state. The previous code left regs at pt_regs.
+	add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
 	;;
-	add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
-
-restore_GRs:                                    // restore bank-1 GRs 16-31
-	bsw.1;;
-	add		r3=16*8,r2;;                // to get to NaT of GR 16-31
-	ld8		r3=[r3];;
-	mov		ar.unat=r3;;                // first restore NaT
-
-	ld8.fill	r16=[r2],8;;
-	ld8.fill	r17=[r2],8;;
-	ld8.fill	r18=[r2],8;;
-	ld8.fill	r19=[r2],8;;
-	ld8.fill	r20=[r2],8;;
-	ld8.fill	r21=[r2],8;;
-	ld8.fill	r22=[r2],8;;
-	ld8.fill	r23=[r2],8;;
-	ld8.fill	r24=[r2],8;;
-	ld8.fill	r25=[r2],8;;
-	ld8.fill	r26=[r2],8;;
-	ld8.fill	r27=[r2],8;;
-	ld8.fill	r28=[r2],8;;
-	ld8.fill	r29=[r2],8;;
-	ld8.fill	r30=[r2],8;;
-	ld8.fill	r31=[r2],8;;
-
-	ld8		r3=[r2],8;;              // increment to skip NaT
-	bsw.0;;
-
-restore_BRs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		b0=r3
-	mov		b1=r5
-	mov		b2=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		b3=r3
-	mov		b4=r5
-	mov		b5=r7;;
-
-	ld8		r3=[r2],2*8
-	ld8		r5=[r4],2*8;;
-	mov		b6=r3
-	mov		b7=r5;;
-
-restore_CRs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],8*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;            // 48 byte increments
-	mov		cr.dcr=r3
-	mov		cr.itm=r5
-	mov		cr.iva=r7;;
-
-	ld8		r3=[r2],8*8;;            // 64 byte increments
-//      mov		cr.pta=r3
-
-
-// if PSR.ic=1, reading interruption registers causes an illegal operation fault
-	mov		r3=psr;;
-	tbit.nz.unc	p6,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
-(p6)    st8     [r2]=r0,9*8+160             // increment by 232 byte inc.
-
-begin_rskip_intr_regs:
-(p6)	br		rSkipIntrRegs;;
-
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.ipsr=r3
-//	mov		cr.isr=r5                   // cr.isr is read only
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.iip=r3
-	mov		cr.ifa=r5
-	mov		cr.itir=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		cr.iipa=r3
-	mov		cr.ifs=r5
-	mov		cr.iim=r7
-
-	ld8		r3=[r2],160;;               // 160 byte increment
-	mov		cr.iha=r3
-
-rSkipIntrRegs:
-	ld8		r3=[r2],152;;               // another 152 byte inc.
-
-	add		r4=8,r2                     // duplicate r2 in r4
-	add		r6=2*8,r2;;                 // duplicate r2 in r6
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-	mov		cr.lid=r3
-//	mov		cr.ivr=r5                   // cr.ivr is read only
-	mov		cr.tpr=r7;;
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-//	mov		cr.eoi=r3
-//	mov		cr.irr0=r5                  // cr.irr0 is read only
-//	mov		cr.irr1=r7;;                // cr.irr1 is read only
-
-	ld8		r3=[r2],8*3
-	ld8		r5=[r4],8*3
-	ld8		r7=[r6],8*3;;
-//	mov		cr.irr2=r3                  // cr.irr2 is read only
-//	mov		cr.irr3=r5                  // cr.irr3 is read only
-	mov		cr.itv=r7;;
-
-	ld8		r3=[r2],8*7
-	ld8		r5=[r4],8*7;;
-	mov		cr.pmv=r3
-	mov		cr.cmcv=r5;;
-
-	ld8		r3=[r2],8*23
-	ld8		r5=[r4],8*23;;
-	adds		r2=8*23,r2
-	adds		r4=8*23,r4;;
-//	mov		cr.lrr0=r3
-//	mov		cr.lrr1=r5
-
-	adds		r2=8*2,r2;;
-
-restore_ARs:
-	add		r4=8,r2                  // duplicate r2 in r4
-	add		r6=2*8,r2;;              // duplicate r2 in r4
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		ar.k0=r3
-	mov		ar.k1=r5
-	mov		ar.k2=r7;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-	mov		ar.k3=r3
-	mov		ar.k4=r5
-	mov		ar.k5=r7;;
-
-	ld8		r3=[r2],10*8
-	ld8		r5=[r4],10*8
-	ld8		r7=[r6],10*8;;
-	mov		ar.k6=r3
-	mov		ar.k7=r5
-	;;
-
-	ld8		r3=[r2],3*8
-	ld8		r5=[r4],3*8
-	ld8		r7=[r6],3*8;;
-//	mov		ar.rsc=r3
-//	mov		ar.bsp=r5                   // ar.bsp is read only
-	mov		ar.rsc=r0			    // make sure that RSE is in enforced lazy mode
-	;;
-	mov		ar.bspstore=r7;;
-
-	ld8		r9=[r2],8*13;;
-	mov		ar.rnat=r9
-
-	mov		ar.rsc=r3
-	ld8		r3=[r2],8*4;;
-	mov		ar.ccv=r3
-
-	ld8		r3=[r2],8*4;;
-	mov		ar.unat=r3
-
-	ld8		r3=[r2],8*4;;
-	mov		ar.fpsr=r3
-
-	ld8		r3=[r2],160;;               // 160
-//      mov		ar.itc=r3
-
-	ld8		r3=[r2],8;;
-	mov		ar.pfs=r3
-
-	ld8		r3=[r2],8;;
-	mov		ar.lc=r3
-
-	ld8		r3=[r2];;
-	mov		ar.ec=r3
-	add		r2=8*62,r2;;             // padding
-
-restore_RRs:
-	mov		r5=ar.lc
-	mov		ar.lc=0x08-1
-	movl		r4=0x00;;
-cStRRr:
-	dep.z		r7=r4,61,3
-	ld8		r3=[r2],8;;
-	mov		rr[r7]=r3                   // what are its access previledges?
-	add		r4=1,r4
-	br.cloop.sptk.few	cStRRr
-	;;
-	mov		ar.lc=r5
-	;;
-end_os_mca_restore:
-	br	ia64_os_mca_done_restore;;
+	add temp1=IA64_SAL_OS_STATE_COMMON_OFFSET, regs
+	add temp2=IA64_SAL_OS_STATE_COMMON_OFFSET+8, regs
+	;;
+	ld8 r12=[temp1],16	// sal_ra
+	ld8 r9=[temp2],16	// sal_gp
+	;;
+	ld8 r22=[temp1],24	// pal_min_state, virtual.  skip prev_task
+	ld8 r21=[temp2],16	// prev_IA64_KR_CURRENT
+	;;
+	ld8 temp3=[temp1],16	// cr.isr
+	ld8 temp4=[temp2],16	// cr.ifa
+	;;
+	mov cr.isr=temp3
+	mov cr.ifa=temp4
+	ld8 temp3=[temp1],16	// cr.itir
+	ld8 temp4=[temp2],16	// cr.iipa
+	;;
+	mov cr.itir=temp3
+	mov cr.iipa=temp4
+	ld8 temp3=[temp1],16	// cr.iim
+	ld8 temp4=[temp2],16	// cr.iha
+	;;
+	mov cr.iim=temp3
+	mov cr.iha=temp4
+	dep r22=0,r22,62,2	// pal_min_state, physical, uncached
+	mov IA64_KR(CURRENT)=r21
+	ld8 r8=[temp1]		// os_status
+	ld8 r10=[temp2]		// context
+
+	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
 
 
-// ok, the issue here is that we need to save state information so
-// it can be useable by the kernel debugger and show regs routines.
-// In order to do this, our best bet is save the current state (plus
-// the state information obtain from the MIN_STATE_AREA) into a pt_regs
-// format.  This way we can pass it on in a useable format.
+//++
+// Name:
+//	ia64_new_stack()
 //
-
+// Stub Description:
 //
-// SAL to OS entry point for INIT on the monarch processor
-// This has been defined for registration purposes with SAL
-// as a part of ia64_mca_init.
+//	Switch to the MCA/INIT stack.
 //
-// When we get here, the following registers have been
-// set by the SAL for our use
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
 //
-//		1. GR1 = OS INIT GP
-//		2. GR8 = PAL_PROC physical address
-//		3. GR9 = SAL_PROC physical address
-//		4. GR10 = SAL GP (physical)
-//		5. GR11 = Init Reason
-//			0 = Received INIT for event other than crash dump switch
-//			1 = Received wakeup at the end of an OS_MCA corrected machine check
-//			2 = Received INIT dude to CrashDump switch assertion
+//	On entry RBS is still on the original stack, this routine switches RBS
+//	to use the MCA/INIT stack.
 //
-//		6. GR12 = Return address to location within SAL_INIT procedure
-
+//	On entry, sos->pal_min_state is physical, on exit it is virtual.
+//
+//--
 
-GLOBAL_ENTRY(ia64_monarch_init_handler)
-	.prologue
-	// stash the information the SAL passed to os
-	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+ia64_new_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	add temp2=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp1)
+	invala
 	;;
-	SAVE_MIN_WITH_COVER
+	add temp2=temp2, temp1		// struct ia64_sal_os_state.pal_min_state on MCA or INIT stack
+	add regs=regs, temp1		// struct pt_regs on MCA or INIT stack
 	;;
-	mov r8=cr.ifa
-	mov r9=cr.isr
-	adds r3=8,r2				// set up second base pointer
+	// Address of minstate area provided by PAL is physical, uncacheable.
+	// Convert to Linux virtual address in region 6 for C code.
+	ld8 ms=[temp2]			// pal_min_state, physical
 	;;
-	SAVE_REST
-
-// ok, enough should be saved at this point to be dangerous, and supply
-// information for a dump
-// We need to switch to Virtual mode before hitting the C functions.
+	dep temp1=-1,ms,62,2		// set region 6
+	mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET
+	;;
+	st8 [temp2]=temp1		// pal_min_state, virtual
 
-	movl	r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
-	mov	r3=psr	// get the current psr, minimum enabled at this point
+	add temp4=temp3, regs		// start of bspstore on new stack
 	;;
-	or	r2=r2,r3
+	mov ar.bspstore=temp4		// switch RBS to MCA/INIT stack
 	;;
-	movl	r3=IVirtual_Switch
+	flushrs				// must be first in group
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//	ia64_old_stack()
+//
+// Stub Description:
+//
+//	Switch to the old stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	On entry, pal_min_state is virtual, on exit it is physical.
+//
+//	On entry RBS is on the MCA/INIT stack, this routine switches RBS
+//	back to the previous stack.
+//
+//	The psr is set to all zeroes.  SAL return requires either all zeroes or
+//	just psr.mc set.  Leaving psr.mc off allows INIT to be issued if this
+//	code does not perform correctly.
+//
+//	The dirty registers at the time of the event were flushed to the
+//	MCA/INIT stack in ia64_pt_regs_save().  Restore the dirty registers
+//	before reverting to the previous bspstore.
+//--
+
+ia64_old_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp2)
+	LOAD_PHYSICAL(p0,temp1,1f)
 	;;
-	mov	cr.iip=r3	// short return to set the appropriate bits
-	mov	cr.ipsr=r2	// need to do an rfi to set appropriate bits
+	mov cr.ipsr=r0
+	mov cr.ifs=r0
+	mov cr.iip=temp1
 	;;
+	invala
 	rfi
+1:
+
+	add regs=regs, temp2		// struct pt_regs on MCA or INIT stack
 	;;
-IVirtual_Switch:
-	//
-	// We should now be running virtual
-	//
-	// Let's call the C handler to get the rest of the state info
-	//
-	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	add temp1=PT(LOADRS), regs
 	;;
-	adds out0=16,sp				// out0 = pointer to pt_regs
+	ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS)	// restore loadrs
 	;;
-	DO_SAVE_SWITCH_STACK
-	.body
-	adds out1=16,sp				// out0 = pointer to switch_stack
+	ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE)	// restore ar.bspstore
+	mov ar.rsc=temp2
+	;;
+	loadrs
+	ld8 temp4=[temp1]		// restore ar.rnat
+	;;
+	mov ar.bspstore=temp3		// back to old stack
+	;;
+	mov ar.rnat=temp4
+	;;
+
+	br.sptk b0
 
-	br.call.sptk.many rp=ia64_init_handler
-.ret1:
+//EndStub//////////////////////////////////////////////////////////////////////
 
-return_from_init:
-	br.sptk return_from_init
-END(ia64_monarch_init_handler)
 
+//++
+// Name:
+//	ia64_set_kernel_registers()
 //
-// SAL to OS entry point for INIT on the slave processor
-// This has been defined for registration purposes with SAL
-// as a part of ia64_mca_init.
+// Stub Description:
+//
+//	Set the registers that are required by the C code in order to run on an
+//	MCA/INIT stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
 //
+//--
+
+ia64_set_kernel_registers:
+	add temp3=MCA_SP_OFFSET, r3
+	add temp4=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_OS_GP_OFFSET, r3
+	mov b0=r2		// save return address
+	GET_IA64_MCA_DATA(temp1)
+	;;
+	add temp4=temp4, temp1	// &struct ia64_sal_os_state.os_gp
+	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
+	add r13=temp1, r3	// set current to start of MCA/INIT stack
+	;;
+	ld8 r1=[temp4]		// OS GP from SAL OS state
+	;;
+	DATA_PA_TO_VA(r1,temp1)
+	DATA_PA_TO_VA(r12,temp2)
+	DATA_PA_TO_VA(r13,temp3)
+	;;
+	mov IA64_KR(CURRENT)=r13
+
+	// FIXME: do I need to wire IA64_KR_CURRENT_STACK and IA64_TR_CURRENT_STACK?
+
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+#undef	ms
+#undef	regs
+#undef	temp1
+#undef	temp2
+#undef	temp3
+#undef	temp4
+
 
-GLOBAL_ENTRY(ia64_slave_init_handler)
-1:	br.sptk 1b
-END(ia64_slave_init_handler)
+// Support function for mca.c, it is here to avoid using inline asm.  Given the
+// address of an rnat slot, if that address is below the current ar.bspstore
+// then return the contents of that slot, otherwise return the contents of
+// ar.rnat.
+GLOBAL_ENTRY(ia64_get_rnat)
+	alloc r14=ar.pfs,1,0,0,0
+	mov ar.rsc=0
+	;;
+	mov r14=ar.bspstore
+	;;
+	cmp.lt p6,p7=in0,r14
+	;;
+(p6)	ld8 r8=[in0]
+(p7)	mov r8=ar.rnat
+	mov ar.rsc=3
+	br.ret.sptk.many rp
+END(ia64_get_rnat)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index abc0113a821d..6e683745af49 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -4,6 +4,8 @@
  *
  * Copyright (C) 2004 FUJITSU LIMITED
  * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -38,10 +40,6 @@
 /* max size of SAL error record (default) */
 static int sal_rec_max = 10000;
 
-/* from mca.c */
-static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state;
-static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state;
-
 /* from mca_drv_asm.S */
 extern void *mca_handler_bhhook(void);
 
@@ -316,7 +314,8 @@ init_record_index_pools(void)
  */
 
 static mca_type_t
-is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+	      struct ia64_sal_os_state *sos)
 {
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
 
@@ -327,7 +326,7 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
 	 * Therefore it is local MCA when rendezvous has not been requested.
 	 * Failed to rendezvous, the system must be down.
 	 */
-	switch (sal_to_os_handoff_state->imsto_rendez_state) {
+	switch (sos->rv_rc) {
 		case -1: /* SAL rendezvous unsuccessful */
 			return MCA_IS_GLOBAL;
 		case  0: /* SAL rendezvous not required */
@@ -388,7 +387,8 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
  */
 
 static int
-recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			struct ia64_sal_os_state *sos)
 {
 	sal_log_mod_error_info_t *smei;
 	pal_min_state_area_t *pmsa;
@@ -426,7 +426,7 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
 			 *  setup for resume to bottom half of MCA,
 			 * "mca_handler_bhhook"
 			 */
-			pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61));
+			pmsa = sos->pal_min_state;
 			/* pass to bhhook as 1st argument (gr8) */
 			pmsa->pmsa_gr[8-1] = smei->target_identifier;
 			/* set interrupted return address (but no use) */
@@ -459,7 +459,8 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
  */
 
 static int
-recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			    struct ia64_sal_os_state *sos)
 {
 	int status = 0;
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
@@ -469,7 +470,7 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
 		case 1: /* partial read */
 		case 3: /* full line(cpu) read */
 		case 9: /* I/O space read */
-			status = recover_from_read_error(slidx, peidx, pbci);
+			status = recover_from_read_error(slidx, peidx, pbci, sos);
 			break;
 		case 0: /* unknown */
 		case 2: /* partial write */
@@ -508,7 +509,8 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
  */
 
 static int
-recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			     struct ia64_sal_os_state *sos)
 {
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
 
@@ -545,7 +547,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
 	 * This means "there are some platform errors".
 	 */
 	if (platform) 
-		return recover_from_platform_error(slidx, peidx, pbci);
+		return recover_from_platform_error(slidx, peidx, pbci, sos);
 	/* 
 	 * On account of strange SAL error record, we cannot recover. 
 	 */
@@ -562,8 +564,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
 
 static int
 mca_try_to_recover(void *rec, 
-	ia64_mca_sal_to_os_state_t *sal_to_os_state,
-	ia64_mca_os_to_sal_state_t *os_to_sal_state)
+	struct ia64_sal_os_state *sos)
 {
 	int platform_err;
 	int n_proc_err;
@@ -571,10 +572,6 @@ mca_try_to_recover(void *rec,
 	peidx_table_t peidx;
 	pal_bus_check_info_t pbci;
 
-	/* handoff state from/to mca.c */
-	sal_to_os_handoff_state = sal_to_os_state;
-	os_to_sal_handoff_state = os_to_sal_state;
-
 	/* Make index of SAL error record */
 	platform_err = mca_make_slidx(rec, &slidx);
 
@@ -597,11 +594,11 @@ mca_try_to_recover(void *rec,
 	*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
 
 	/* Check whether MCA is global or not */
-	if (is_mca_global(&peidx, &pbci))
+	if (is_mca_global(&peidx, &pbci, sos))
 		return 0;
 	
 	/* Try to recover a processor error */
-	return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci);
+	return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci, sos);
 }
 
 /*
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h
index 149ad0118455..97a28b8b2ddd 100644
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -11,8 +11,6 @@
 #ifndef _ASM_IA64_MCA_H
 #define _ASM_IA64_MCA_H
 
-#define IA64_MCA_STACK_SIZE	8192
-
 #if !defined(__ASSEMBLY__)
 
 #include <linux/interrupt.h>
@@ -48,7 +46,8 @@ typedef union cmcv_reg_u {
 
 enum {
 	IA64_MCA_RENDEZ_CHECKIN_NOTDONE	=	0x0,
-	IA64_MCA_RENDEZ_CHECKIN_DONE	=	0x1
+	IA64_MCA_RENDEZ_CHECKIN_DONE	=	0x1,
+	IA64_MCA_RENDEZ_CHECKIN_INIT	=	0x2,
 };
 
 /* Information maintained by the MC infrastructure */
@@ -63,18 +62,42 @@ typedef struct ia64_mc_info_s {
 
 } ia64_mc_info_t;
 
-typedef struct ia64_mca_sal_to_os_state_s {
-	u64		imsto_os_gp;		/* GP of the os registered with the SAL */
-	u64		imsto_pal_proc;		/* PAL_PROC entry point - physical addr */
-	u64		imsto_sal_proc;		/* SAL_PROC entry point - physical addr */
-	u64		imsto_sal_gp;		/* GP of the SAL - physical */
-	u64		imsto_rendez_state;	/* Rendez state information */
-	u64		imsto_sal_check_ra;	/* Return address in SAL_CHECK while going
-						 * back to SAL from OS after MCA handling.
-						 */
-	u64		pal_min_state;		/* from PAL in r17 */
-	u64		proc_state_param;	/* from PAL in r18. See SDV 2:268 11.3.2.1 */
-} ia64_mca_sal_to_os_state_t;
+/* Handover state from SAL to OS and vice versa, for both MCA and INIT events.
+ * Besides the handover state, it also contains some saved registers from the
+ * time of the event.
+ * Note: mca_asm.S depends on the precise layout of this structure.
+ */
+
+struct ia64_sal_os_state {
+	/* SAL to OS, must be at offset 0 */
+	u64			os_gp;			/* GP of the os registered with the SAL, physical */
+	u64			pal_proc;		/* PAL_PROC entry point, physical */
+	u64			sal_proc;		/* SAL_PROC entry point, physical */
+	u64			rv_rc;			/* MCA - Rendezvous state, INIT - reason code */
+	u64			proc_state_param;	/* from R18 */
+	u64			monarch;		/* 1 for a monarch event, 0 for a slave */
+	/* common, must follow SAL to OS */
+	u64			sal_ra;			/* Return address in SAL, physical */
+	u64			sal_gp;			/* GP of the SAL - physical */
+	pal_min_state_area_t	*pal_min_state;		/* from R17.  physical in asm, virtual in C */
+	u64			prev_IA64_KR_CURRENT;	/* previous value of IA64_KR(CURRENT) */
+	struct task_struct	*prev_task;		/* previous task, NULL if it is not useful */
+	/* Some interrupt registers are not saved in minstate, pt_regs or
+	 * switch_stack.  Because MCA/INIT can occur when interrupts are
+	 * disabled, we need to save the additional interrupt registers over
+	 * MCA/INIT and resume.
+	 */
+	u64			isr;
+	u64			ifa;
+	u64			itir;
+	u64			iipa;
+	u64			iim;
+	u64			iha;
+	/* OS to SAL, must follow common */
+	u64			os_status;		/* OS status to SAL, enum below */
+	u64			context;		/* 0 if return to same context
+							   1 if return to new context */
+};
 
 enum {
 	IA64_MCA_CORRECTED	=	0x0,	/* Error has been corrected by OS_MCA */
@@ -83,36 +106,22 @@ enum {
 	IA64_MCA_HALT		=	-3	/* System to be halted by SAL */
 };
 
+enum {
+	IA64_INIT_RESUME	=	0x0,	/* Resume after return from INIT */
+	IA64_INIT_WARM_BOOT	=	-1,	/* Warm boot of the system need from SAL */
+};
+
 enum {
 	IA64_MCA_SAME_CONTEXT	=	0x0,	/* SAL to return to same context */
 	IA64_MCA_NEW_CONTEXT	=	-1	/* SAL to return to new context */
 };
 
-typedef struct ia64_mca_os_to_sal_state_s {
-	u64		imots_os_status;	/*   OS status to SAL as to what happened
-						 *   with the MCA handling.
-						 */
-	u64		imots_sal_gp;		/* GP of the SAL - physical */
-	u64		imots_context;		/* 0 if return to same context
-						   1 if return to new context */
-	u64		*imots_new_min_state;	/* Pointer to structure containing
-						 * new values of registers in the min state
-						 * save area.
-						 */
-	u64		imots_sal_check_ra;	/* Return address in SAL_CHECK while going
-						 * back to SAL from OS after MCA handling.
-						 */
-} ia64_mca_os_to_sal_state_t;
-
 /* Per-CPU MCA state that is too big for normal per-CPU variables.  */
 
 struct ia64_mca_cpu {
-	u64 stack[IA64_MCA_STACK_SIZE/8];	/* MCA memory-stack */
-	u64 proc_state_dump[512];
-	u64 stackframe[32];
-	u64 rbstore[IA64_MCA_STACK_SIZE/8];	/* MCA reg.-backing store */
+	u64 mca_stack[KERNEL_STACK_SIZE/8];
 	u64 init_stack[KERNEL_STACK_SIZE/8];
-} __attribute__ ((aligned(16)));
+};
 
 /* Array of physical addresses of each CPU's MCA area.  */
 extern unsigned long __per_cpu_mca[NR_CPUS];
@@ -121,12 +130,29 @@ extern void ia64_mca_init(void);
 extern void ia64_mca_cpu_init(void *);
 extern void ia64_os_mca_dispatch(void);
 extern void ia64_os_mca_dispatch_end(void);
-extern void ia64_mca_ucmc_handler(void);
+extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *);
+extern void ia64_init_handler(struct pt_regs *,
+			      struct switch_stack *,
+			      struct ia64_sal_os_state *);
 extern void ia64_monarch_init_handler(void);
 extern void ia64_slave_init_handler(void);
 extern void ia64_mca_cmc_vector_setup(void);
-extern int  ia64_reg_MCA_extension(void*);
+extern int  ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *));
 extern void ia64_unreg_MCA_extension(void);
+extern u64 ia64_get_rnat(u64 *);
+
+#else	/* __ASSEMBLY__ */
+
+#define IA64_MCA_CORRECTED	0x0	/* Error has been corrected by OS_MCA */
+#define IA64_MCA_WARM_BOOT	-1	/* Warm boot of the system need from SAL */
+#define IA64_MCA_COLD_BOOT	-2	/* Cold boot of the system need from SAL */
+#define IA64_MCA_HALT		-3	/* System to be halted by SAL */
+
+#define IA64_INIT_RESUME	0x0	/* Resume after return from INIT */
+#define IA64_INIT_WARM_BOOT	-1	/* Warm boot of the system need from SAL */
+
+#define IA64_MCA_SAME_CONTEXT	0x0	/* SAL to return to same context */
+#define IA64_MCA_NEW_CONTEXT	-1	/* SAL to return to new context */
 
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_IA64_MCA_H */
diff --git a/include/asm-ia64/mca_asm.h b/include/asm-ia64/mca_asm.h
index 836953e0f91f..27c9203d8ce3 100644
--- a/include/asm-ia64/mca_asm.h
+++ b/include/asm-ia64/mca_asm.h
@@ -8,6 +8,8 @@
  * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 2002 Intel Corp.
  * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
  */
 #ifndef _ASM_IA64_MCA_ASM_H
 #define _ASM_IA64_MCA_ASM_H
@@ -207,106 +209,33 @@
 	;;
 
 /*
- * The following offsets capture the order in which the
- * RSE related registers from the old context are
- * saved onto the new stack frame.
+ * The MCA and INIT stacks in struct ia64_mca_cpu look like normal kernel
+ * stacks, except that the SAL/OS state and a switch_stack are stored near the
+ * top of the MCA/INIT stack.  To support concurrent entry to MCA or INIT, as
+ * well as MCA over INIT, each event needs its own SAL/OS state.  All entries
+ * are 16 byte aligned.
  *
- *	+-----------------------+
- *	|NDIRTY [BSP - BSPSTORE]|
- *	+-----------------------+
- *	|	RNAT		|
- *	+-----------------------+
- *	|	BSPSTORE	|
- *	+-----------------------+
- *	|	IFS		|
- *	+-----------------------+
- *	|	PFS		|
- *	+-----------------------+
- *	|	RSC		|
- *	+-----------------------+ <-------- Bottom of new stack frame
+ *      +---------------------------+
+ *      |          pt_regs          |
+ *      +---------------------------+
+ *      |        switch_stack       |
+ *      +---------------------------+
+ *      |        SAL/OS state       |
+ *      +---------------------------+
+ *      |    16 byte scratch area   |
+ *      +---------------------------+ <-------- SP at start of C MCA handler
+ *      |           .....           |
+ *      +---------------------------+
+ *      | RBS for MCA/INIT handler  |
+ *      +---------------------------+
+ *      | struct task for MCA/INIT  |
+ *      +---------------------------+ <-------- Bottom of MCA/INIT stack
  */
-#define  rse_rsc_offset		0
-#define  rse_pfs_offset		(rse_rsc_offset+0x08)
-#define  rse_ifs_offset		(rse_pfs_offset+0x08)
-#define  rse_bspstore_offset	(rse_ifs_offset+0x08)
-#define  rse_rnat_offset	(rse_bspstore_offset+0x08)
-#define  rse_ndirty_offset	(rse_rnat_offset+0x08)
 
-/*
- * rse_switch_context
- *
- *	1. Save old RSC onto the new stack frame
- *	2. Save PFS onto new stack frame
- *	3. Cover the old frame and start a new frame.
- *	4. Save IFS onto new stack frame
- *	5. Save the old BSPSTORE on the new stack frame
- *	6. Save the old RNAT on the new stack frame
- *	7. Write BSPSTORE with the new backing store pointer
- *	8. Read and save the new BSP to calculate the #dirty registers
- * NOTE: Look at pages 11-10, 11-11 in PRM Vol 2
- */
-#define rse_switch_context(temp,p_stackframe,p_bspstore)			\
-	;;									\
-	mov     temp=ar.rsc;;							\
-	st8     [p_stackframe]=temp,8;;					\
-	mov     temp=ar.pfs;;							\
-	st8     [p_stackframe]=temp,8;						\
-	cover ;;								\
-	mov     temp=cr.ifs;;							\
-	st8     [p_stackframe]=temp,8;;						\
-	mov     temp=ar.bspstore;;						\
-	st8     [p_stackframe]=temp,8;;					\
-	mov     temp=ar.rnat;;							\
-	st8     [p_stackframe]=temp,8;						\
-	mov     ar.bspstore=p_bspstore;;					\
-	mov     temp=ar.bsp;;							\
-	sub     temp=temp,p_bspstore;;						\
-	st8     [p_stackframe]=temp,8;;
-
-/*
- * rse_return_context
- *	1. Allocate a zero-sized frame
- *	2. Store the number of dirty registers RSC.loadrs field
- *	3. Issue a loadrs to insure that any registers from the interrupted
- *	   context which were saved on the new stack frame have been loaded
- *	   back into the stacked registers
- *	4. Restore BSPSTORE
- *	5. Restore RNAT
- *	6. Restore PFS
- *	7. Restore IFS
- *	8. Restore RSC
- *	9. Issue an RFI
- */
-#define rse_return_context(psr_mask_reg,temp,p_stackframe)			\
-	;;									\
-	alloc   temp=ar.pfs,0,0,0,0;						\
-	add     p_stackframe=rse_ndirty_offset,p_stackframe;;			\
-	ld8     temp=[p_stackframe];;						\
-	shl     temp=temp,16;;							\
-	mov     ar.rsc=temp;;							\
-	loadrs;;								\
-	add     p_stackframe=-rse_ndirty_offset+rse_bspstore_offset,p_stackframe;;\
-	ld8     temp=[p_stackframe];;						\
-	mov     ar.bspstore=temp;;						\
-	add     p_stackframe=-rse_bspstore_offset+rse_rnat_offset,p_stackframe;;\
-	ld8     temp=[p_stackframe];;						\
-	mov     ar.rnat=temp;;							\
-	add     p_stackframe=-rse_rnat_offset+rse_pfs_offset,p_stackframe;;	\
-	ld8     temp=[p_stackframe];;						\
-	mov     ar.pfs=temp;;							\
-	add     p_stackframe=-rse_pfs_offset+rse_ifs_offset,p_stackframe;;	\
-	ld8     temp=[p_stackframe];;						\
-	mov     cr.ifs=temp;;							\
-	add     p_stackframe=-rse_ifs_offset+rse_rsc_offset,p_stackframe;;	\
-	ld8     temp=[p_stackframe];;						\
-	mov     ar.rsc=temp ;							\
-	mov     temp=psr;;							\
-	or      temp=temp,psr_mask_reg;;					\
-	mov     cr.ipsr=temp;;							\
-	mov     temp=ip;;							\
-	add     temp=0x30,temp;;						\
-	mov     cr.iip=temp;;							\
-	srlz.i;;								\
-	rfi;;
+#define ALIGN16(x)			((x)&~15)
+#define MCA_PT_REGS_OFFSET		ALIGN16(KERNEL_STACK_SIZE-IA64_PT_REGS_SIZE)
+#define MCA_SWITCH_STACK_OFFSET		ALIGN16(MCA_PT_REGS_OFFSET-IA64_SWITCH_STACK_SIZE)
+#define MCA_SOS_OFFSET			ALIGN16(MCA_SWITCH_STACK_OFFSET-IA64_SAL_OS_STATE_SIZE)
+#define MCA_SP_OFFSET			ALIGN16(MCA_SOS_OFFSET-16)
 
 #endif /* _ASM_IA64_MCA_ASM_H */
-- 
cgit v1.2.3


From 05f335ea04881ecb912b572c83e281a016149169 Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Sun, 11 Sep 2005 17:23:42 +1000
Subject: [IA64] MCA/INIT: remove the physical mode path from minstate.h

Remove the physical mode path from minstate.h.

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/ivt.S      |  1 -
 arch/ia64/kernel/minstate.h | 88 ++++++++++-----------------------------------
 2 files changed, 18 insertions(+), 71 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 3ba8384cb43d..c13ca0d49c4a 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -69,7 +69,6 @@
 # define DBG_FAULT(i)
 #endif
 
-#define MINSTATE_VIRT	/* needed by minstate.h */
 #include "minstate.h"
 
 #define FAULT(n)									\
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index f6d8a010d99b..85ed54179afa 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -4,73 +4,6 @@
 
 #include "entry.h"
 
-/*
- * For ivt.s we want to access the stack virtually so we don't have to disable translation
- * on interrupts.
- *
- *  On entry:
- *	r1:	pointer to current task (ar.k6)
- */
-#define MINSTATE_START_SAVE_MIN_VIRT								\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-	;;											\
-(pUStk)	mov.m r24=ar.rnat;									\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
-(pKStk) mov r1=sp;					/* get sp  */				\
-	;;											\
-(pUStk) lfetch.fault.excl.nt1 [r22];								\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */
-
-#define MINSTATE_END_SAVE_MIN_VIRT								\
-	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
-	;;
-
-/*
- * For mca_asm.S we want to access the stack physically since the state is saved before we
- * go virtual and don't want to destroy the iip or ipsr.
- */
-#define MINSTATE_START_SAVE_MIN_PHYS								\
-(pKStk) mov r3=IA64_KR(PER_CPU_DATA);;								\
-(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;;							\
-(pKStk) ld8 r3 = [r3];;										\
-(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;;						\
-(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
-	;;											\
-(pUStk)	mov r24=ar.rnat;									\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-(pUStk)	dep r22=-1,r22,61,3;			/* compute kernel virtual addr of RBS */	\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;			/* switch to kernel RBS */			\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
-
-#define MINSTATE_END_SAVE_MIN_PHYS								\
-	dep r12=-1,r12,61,3;		/* make sp a kernel virtual address */			\
-	;;
-
-#ifdef MINSTATE_VIRT
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT)
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
-#endif
-
-#ifdef MINSTATE_PHYS
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT);; tpa reg=reg
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
-#endif
-
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -97,7 +30,7 @@
  * we can pass interruption state as arguments to a handler.
  */
 #define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
-	MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
+	mov r16=IA64_KR(CURRENT);	/* M */							\
 	mov r27=ar.rsc;			/* M */							\
 	mov r20=r1;			/* A */							\
 	mov r25=ar.unat;		/* M */							\
@@ -118,7 +51,21 @@
 	SAVE_IFS;										\
 	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
 	;;											\
-	MINSTATE_START_SAVE_MIN									\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
 	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
 	adds r16=PT(CR_IPSR),r1;								\
 	;;											\
@@ -181,7 +128,8 @@
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
-	MINSTATE_END_SAVE_MIN
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
 
 /*
  * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
-- 
cgit v1.2.3


From 49a28cc8fd26f5317c47a9aeb2bdd1c33e21738e Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Sun, 11 Sep 2005 17:24:42 +1000
Subject: [IA64] MCA/INIT: remove obsolete unwind code

Delete the special case unwind code that was only used by the old
MCA/INIT handler.

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/unwind.c | 22 ----------------------
 include/asm-ia64/unwind.h |  7 -------
 2 files changed, 29 deletions(-)

(limited to 'arch/ia64')

diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 3288be47bc75..93d5a3b41f69 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -2019,28 +2019,6 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t,
 	STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags));
 }
 
-void
-unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t,
-			    struct pt_regs *pt, struct switch_stack *sw)
-{
-	unsigned long sof;
-
-	init_frame_info(info, t, sw, pt->r12);
-	info->cfm_loc = &pt->cr_ifs;
-	info->unat_loc = &pt->ar_unat;
-	info->pfs_loc = &pt->ar_pfs;
-	sof = *info->cfm_loc & 0x7f;
-	info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sof);
-	info->ip = pt->cr_iip + ia64_psr(pt)->ri;
-	info->pt = (unsigned long) pt;
-	UNW_DPRINT(3, "unwind.%s:\n"
-		   "  bsp    0x%lx\n"
-		   "  sof    0x%lx\n"
-		   "  ip     0x%lx\n",
-		   __FUNCTION__, info->bsp, sof, info->ip);
-	find_save_locs(info);
-}
-
 void
 unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw)
 {
diff --git a/include/asm-ia64/unwind.h b/include/asm-ia64/unwind.h
index 61426ad3ecdb..5df0276b0493 100644
--- a/include/asm-ia64/unwind.h
+++ b/include/asm-ia64/unwind.h
@@ -114,13 +114,6 @@ extern void unw_remove_unwind_table (void *handle);
  */
 extern void unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t);
 
-/*
- * Prepare to unwind from interruption.  The pt-regs and switch-stack structures must have
- * be "adjacent" (no state modifications between pt-regs and switch-stack).
- */
-extern void unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t,
-					struct pt_regs *pt, struct switch_stack *sw);
-
 extern void unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t,
 				 struct switch_stack *sw);
 
-- 
cgit v1.2.3