aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/Kconfig28
-rw-r--r--arch/ia64/Makefile5
-rw-r--r--arch/ia64/configs/gensparse_defconfig1
-rw-r--r--arch/ia64/configs/sn2_defconfig1
-rw-r--r--arch/ia64/configs/tiger_defconfig2
-rw-r--r--arch/ia64/defconfig1
-rw-r--r--arch/ia64/dig/setup.c5
-rw-r--r--arch/ia64/hp/sim/simserial.c7
-rw-r--r--arch/ia64/ia32/sys_ia32.c14
-rw-r--r--arch/ia64/kernel/acpi.c49
-rw-r--r--arch/ia64/kernel/entry.S14
-rw-r--r--arch/ia64/kernel/iosapic.c6
-rw-r--r--arch/ia64/kernel/irq.c13
-rw-r--r--arch/ia64/kernel/ivt.S1
-rw-r--r--arch/ia64/kernel/machvec.c19
-rw-r--r--arch/ia64/kernel/mca.c194
-rw-r--r--arch/ia64/kernel/mca_drv.c22
-rw-r--r--arch/ia64/kernel/mca_drv.h7
-rw-r--r--arch/ia64/kernel/mca_drv_asm.S13
-rw-r--r--arch/ia64/kernel/numa.c2
-rw-r--r--arch/ia64/kernel/patch.c8
-rw-r--r--arch/ia64/kernel/perfmon.c5
-rw-r--r--arch/ia64/kernel/setup.c61
-rw-r--r--arch/ia64/kernel/signal.c101
-rw-r--r--arch/ia64/kernel/smpboot.c221
-rw-r--r--arch/ia64/kernel/time.c9
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S60
-rw-r--r--arch/ia64/mm/contig.c12
-rw-r--r--arch/ia64/mm/discontig.c11
-rw-r--r--arch/ia64/mm/hugetlbpage.c12
-rw-r--r--arch/ia64/mm/init.c22
-rw-r--r--arch/ia64/sn/kernel/Makefile3
-rw-r--r--arch/ia64/sn/kernel/bte.c2
-rw-r--r--arch/ia64/sn/kernel/io_init.c29
-rw-r--r--arch/ia64/sn/kernel/irq.c21
-rw-r--r--arch/ia64/sn/kernel/pio_phys.S71
-rw-r--r--arch/ia64/sn/kernel/setup.c6
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c21
-rw-r--r--arch/ia64/sn/kernel/tiocx.c10
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c102
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c1
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c28
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_provider.c17
-rw-r--r--arch/ia64/sn/pci/tioca_provider.c2
-rw-r--r--arch/ia64/sn/pci/tioce_provider.c326
46 files changed, 1038 insertions, 529 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index a85ea9d37f05..10b6b9e7716b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -252,6 +252,15 @@ config NR_CPUS
than 64 will cause the use of a CPU mask array, causing a small
performance hit.
+config IA64_NR_NODES
+ int "Maximum number of NODEs (256-1024)" if (IA64_SGI_SN2 || IA64_GENERIC)
+ range 256 1024
+ depends on IA64_SGI_SN2 || IA64_GENERIC
+ default "256"
+ help
+ This option specifies the maximum number of nodes in your SSI system.
+ If in doubt, use the default.
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
depends on SMP && EXPERIMENTAL
@@ -271,6 +280,25 @@ config SCHED_SMT
Intel IA64 chips with MultiThreading at a cost of slightly increased
overhead in some places. If unsure say N here.
+config PERMIT_BSP_REMOVE
+ bool "Support removal of Bootstrap Processor"
+ depends on HOTPLUG_CPU
+ default n
+ ---help---
+ Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+ support.
+
+config FORCE_CPEI_RETARGET
+ bool "Force assumption that CPEI can be re-targetted"
+ depends on PERMIT_BSP_REMOVE
+ default n
+ ---help---
+ Say Y if you need to force the assumption that CPEI can be re-targetted to
+ any cpu in the system. This hint is available via ACPI 3.0 specifications.
+ Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+ This option it useful to enable this feature on older BIOS's as well.
+ You can also enable this by using boot command line option force_cpei=1.
+
config PREEMPT
bool "Preemptible Kernel"
help
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f722e1a25948..80ea7506fa1a 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -1,6 +1,9 @@
#
# ia64/Makefile
#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
@@ -62,7 +65,7 @@ drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
boot := arch/ia64/hp/sim/boot
-.PHONY: boot compressed check
+PHONY += boot compressed check
all: compressed unwcheck
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
index 744fd2f79f61..0d29aa2066b3 100644
--- a/arch/ia64/configs/gensparse_defconfig
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -116,6 +116,7 @@ CONFIG_IOSAPIC=y
CONFIG_FORCE_MAX_ZONEORDER=17
CONFIG_SMP=y
CONFIG_NR_CPUS=512
+CONFIG_IA64_NR_NODES=256
CONFIG_HOTPLUG_CPU=y
# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index 8206752161bb..a718034d68d0 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -116,6 +116,7 @@ CONFIG_IA64_SGI_SN_XP=m
CONFIG_FORCE_MAX_ZONEORDER=17
CONFIG_SMP=y
CONFIG_NR_CPUS=1024
+CONFIG_IA64_NR_NODES=256
# CONFIG_HOTPLUG_CPU is not set
CONFIG_SCHED_SMT=y
CONFIG_PREEMPT=y
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 125568118b84..766bf4955432 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17
CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
+CONFIG_PERMIT_BSP_REMOVE=y
+CONFIG_FORCE_CPEI_RETARGET=y
# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
CONFIG_SELECT_MEMORY_MODEL=y
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
index 3e767288a745..6cba55da572a 100644
--- a/arch/ia64/defconfig
+++ b/arch/ia64/defconfig
@@ -116,6 +116,7 @@ CONFIG_IOSAPIC=y
CONFIG_FORCE_MAX_ZONEORDER=17
CONFIG_SMP=y
CONFIG_NR_CPUS=512
+CONFIG_IA64_NR_NODES=256
CONFIG_HOTPLUG_CPU=y
# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
index c9104bfff667..38aa9c108857 100644
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -69,8 +69,3 @@ dig_setup (char **cmdline_p)
screen_info.orig_video_isVGA = 1; /* XXX fake */
screen_info.orig_video_ega_bx = 3; /* XXX fake */
}
-
-void __init
-dig_irq_init (void)
-{
-}
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 626cdc83668b..0e5c6ae50228 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -46,11 +46,6 @@
#define KEYBOARD_INTR 3 /* must match with simulator! */
#define NR_PORTS 1 /* only one port for now */
-#define SERIAL_INLINE 1
-
-#ifdef SERIAL_INLINE
-#define _INLINE_ inline
-#endif
#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
@@ -237,7 +232,7 @@ static void rs_put_char(struct tty_struct *tty, unsigned char ch)
local_irq_restore(flags);
}
-static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
+static void transmit_chars(struct async_struct *info, int *intr_done)
{
int count;
unsigned long flags;
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 70dba1f0e2ee..13e739e4c84d 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1166,19 +1166,7 @@ put_tv32 (struct compat_timeval __user *o, struct timeval *i)
asmlinkage unsigned long
sys32_alarm (unsigned int seconds)
{
- struct itimerval it_new, it_old;
- unsigned int oldalarm;
-
- it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
- it_new.it_value.tv_sec = seconds;
- it_new.it_value.tv_usec = 0;
- do_setitimer(ITIMER_REAL, &it_new, &it_old);
- oldalarm = it_old.it_value.tv_sec;
- /* ehhh.. We can't return 0 if we have an alarm pending.. */
- /* And we'd better return too much than too little anyway */
- if (it_old.it_value.tv_usec)
- oldalarm++;
- return oldalarm;
+ return alarm_setitimer(seconds);
}
/* Translations due to time_t size differences. Which affects all
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index ecd44bdc8394..a4e218ce2edb 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header,
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
unsigned int can_cpei_retarget(void)
{
extern int cpe_vector;
+ extern unsigned int force_cpei_retarget;
/*
* Only if CPEI is supported and the override flag
* is present, otherwise return that its re-targettable
* if we are in polling mode.
*/
- if (cpe_vector > 0 && !acpi_cpei_override)
- return 0;
- else
- return 1;
+ if (cpe_vector > 0) {
+ if (acpi_cpei_override || force_cpei_retarget)
+ return 1;
+ else
+ return 0;
+ }
+ return 1;
}
unsigned int is_cpu_cpei_target(unsigned int cpu)
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu)
{
acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
}
+#endif
unsigned int get_cpei_target_cpu(void)
{
@@ -414,6 +420,26 @@ int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
int __initdata nid_to_pxm_map[MAX_NUMNODES];
static struct acpi_table_slit __initdata *slit_table;
+static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa)
+{
+ int pxm;
+
+ pxm = pa->proximity_domain;
+ if (ia64_platform_is("sn2"))
+ pxm += pa->reserved[0] << 8;
+ return pxm;
+}
+
+static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma)
+{
+ int pxm;
+
+ pxm = ma->proximity_domain;
+ if (ia64_platform_is("sn2"))
+ pxm += ma->reserved1[0] << 8;
+ return pxm;
+}
+
/*
* ACPI 2.0 SLIT (System Locality Information Table)
* http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
@@ -437,13 +463,20 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
void __init
acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
{
+ int pxm;
+
+ if (!pa->flags.enabled)
+ return;
+
+ pxm = get_processor_proximity_domain(pa);
+
/* record this node in proximity bitmap */
- pxm_bit_set(pa->proximity_domain);
+ pxm_bit_set(pxm);
node_cpuid[srat_num_cpus].phys_id =
(pa->apic_id << 8) | (pa->lsapic_eid);
/* nid should be overridden as logical node id later */
- node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+ node_cpuid[srat_num_cpus].nid = pxm;
srat_num_cpus++;
}
@@ -451,10 +484,10 @@ void __init
acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
{
unsigned long paddr, size;
- u8 pxm;
+ int pxm;
struct node_memblk_s *p, *q, *pend;
- pxm = ma->proximity_domain;
+ pxm = get_memory_proximity_domain(ma);
/* fill node memory chunk structure */
paddr = ma->base_addr_hi;
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 930fdfca6ddb..0e3eda99e549 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1102,9 +1102,6 @@ skip_rbs_switch:
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
-(p6) br.cond.sptk.few .sigdelayed
- ;;
tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
(p6) br.cond.sptk.few .notify
#ifdef CONFIG_PREEMPT
@@ -1131,17 +1128,6 @@ skip_rbs_switch:
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
br.cond.sptk.many .work_processed_kernel // don't re-check
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered. Deliver it now. The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
- br.call.sptk.many rp=do_sigdelayed
- cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
-
.work_pending_syscall_end:
adds r2=PT(R8)+16,r12
adds r3=PT(R10)+16,r12
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 574084f343fa..8832c553230a 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int vector)
{
#ifdef CONFIG_SMP
static int cpu = -1;
+ extern int cpe_vector;
/*
* In case of vector shared by multiple RTEs, all RTEs that
@@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int vector)
if (!cpu_online(smp_processor_id()))
return cpu_physical_id(smp_processor_id());
+#ifdef CONFIG_ACPI
+ if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+ return get_cpei_target_cpu();
+#endif
+
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index d33244c32759..5ce908ef9c95 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -163,8 +163,19 @@ void fixup_irqs(void)
{
unsigned int irq;
extern void ia64_process_pending_intr(void);
+ extern void ia64_disable_timer(void);
+ extern volatile int time_keeper_id;
+
+ ia64_disable_timer();
+
+ /*
+ * Find a new timesync master
+ */
+ if (smp_processor_id() == time_keeper_id) {
+ time_keeper_id = first_cpu(cpu_online_map);
+ printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+ }
- ia64_set_itv(1<<16);
/*
* Phase 1: Locate irq's bound to this cpu and
* relocate them for cpu removal.
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index dcd906fe5749..829a43cab797 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -865,6 +865,7 @@ ENTRY(interrupt)
;;
SAVE_REST
;;
+ MCA_RECOVER_RANGE(interrupt)
alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
mov out0=cr.ivr // pass cr.ivr as first arg
add out1=16,sp // pass pointer to pt_regs as second arg
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index c3a04ee7f4f6..4b0b71d5aef4 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -14,7 +14,15 @@
struct ia64_machine_vector ia64_mv;
EXPORT_SYMBOL(ia64_mv);
-static struct ia64_machine_vector *
+static __initdata const char *mvec_name;
+static __init int setup_mvec(char *s)
+{
+ mvec_name = s;
+ return 0;
+}
+early_param("machvec", setup_mvec);
+
+static struct ia64_machine_vector * __init
lookup_machvec (const char *name)
{
extern struct ia64_machine_vector machvec_start[];
@@ -33,10 +41,13 @@ machvec_init (const char *name)
{
struct ia64_machine_vector *mv;
+ if (!name)
+ name = mvec_name ? mvec_name : acpi_get_sysname();
mv = lookup_machvec(name);
- if (!mv) {
- panic("generic kernel failed to find machine vector for platform %s!", name);
- }
+ if (!mv)
+ panic("generic kernel failed to find machine vector for"
+ " platform %s!", name);
+
ia64_mv = *mv;
printk(KERN_INFO "booting generic kernel on platform %s\n", name);
}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index ee7eec9ee576..87ff7fe33cfb 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -83,6 +83,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
+#include "mca_drv.h"
#include "entry.h"
#if defined(IA64_MCA_DEBUG_INFO)
@@ -133,7 +134,7 @@ static int cpe_poll_enabled = 1;
extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
-static int mca_init;
+static int mca_init __initdata;
static void inline
@@ -184,7 +185,7 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
* Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
* Outputs : None
*/
-static void
+static void __init
ia64_log_init(int sal_info_type)
{
u64 max_size = 0;
@@ -282,13 +283,53 @@ ia64_mca_log_sal_error_record(int sal_info_type)
}
/*
- * platform dependent error handling
+ * search_mca_table
+ * See if the MCA surfaced in an instruction range
+ * that has been tagged as recoverable.
+ *
+ * Inputs
+ * first First address range to check
+ * last Last address range to check
+ * ip Instruction pointer, address we are looking for
+ *
+ * Return value:
+ * 1 on Success (in the table)/ 0 on Failure (not in the table)
*/
-#ifndef PLATFORM_MCA_HANDLERS
+int
+search_mca_table (const struct mca_table_entry *first,
+ const struct mca_table_entry *last,
+ unsigned long ip)
+{
+ const struct mca_table_entry *curr;
+ u64 curr_start, curr_end;
+
+ curr = first;
+ while (curr <= last) {
+ curr_start = (u64) &curr->start_addr + curr->start_addr;
+ curr_end = (u64) &curr->end_addr + curr->end_addr;
+
+ if ((ip >= curr_start) && (ip <= curr_end)) {
+ return 1;
+ }
+ curr++;
+ }
+ return 0;
+}
+
+/* Given an address, look for it in the mca tables. */
+int mca_recover_range(unsigned long addr)
+{
+ extern struct mca_table_entry __start___mca_table[];
+ extern struct mca_table_entry __stop___mca_table[];
+
+ return search_mca_table(__start___mca_table, __stop___mca_table-1, addr);
+}
+EXPORT_SYMBOL_GPL(mca_recover_range);
#ifdef CONFIG_ACPI
int cpe_vector = -1;
+int ia64_cpe_irq = -1;
static irqreturn_t
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
@@ -359,7 +400,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
* Outputs
* None
*/
-static void
+static void __init
ia64_mca_register_cpev (int cpev)
{
/* Register the CPE interrupt vector with SAL */
@@ -377,8 +418,6 @@ ia64_mca_register_cpev (int cpev)
}
#endif /* CONFIG_ACPI */
-#endif /* PLATFORM_MCA_HANDLERS */
-
/*
* ia64_mca_cmc_vector_setup
*
@@ -392,7 +431,7 @@ ia64_mca_register_cpev (int cpev)
* Outputs
* None
*/
-void
+void __cpuinit
ia64_mca_cmc_vector_setup (void)
{
cmcv_reg_t cmcv;
@@ -630,6 +669,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
*tnat |= (nat << tslot);
}
+/* Change the comm field on the MCA/INT task to include the pid that
+ * was interrupted, it makes for easier debugging. If that pid was 0
+ * (swapper or nested MCA/INIT) then use the start of the previous comm
+ * field suffixed with its cpu.
+ */
+
+static void
+ia64_mca_modify_comm(const task_t *previous_current)
+{
+ char *p, comm[sizeof(current->comm)];
+ if (previous_current->pid)
+ snprintf(comm, sizeof(comm), "%s %d",
+ current->comm, previous_current->pid);
+ else {
+ int l;
+ if ((p = strchr(previous_current->comm, ' ')))
+ l = p - previous_current->comm;
+ else
+ l = strlen(previous_current->comm);
+ snprintf(comm, sizeof(comm), "%s %*s %d",
+ current->comm, l, previous_current->comm,
+ task_thread_info(previous_current)->cpu);
+ }
+ memcpy(current->comm, comm, sizeof(current->comm));
+}
+
/* On entry to this routine, we are running on the per cpu stack, see
* mca_asm.h. The original stack has not been touched by this event. Some of
* the original stack's registers will be in the RBS on this stack. This stack
@@ -648,7 +713,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
struct ia64_sal_os_state *sos,
const char *type)
{
- char *p, comm[sizeof(current->comm)];
+ char *p;
ia64_va va;
extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */
const pal_min_state_area_t *ms = sos->pal_min_state;
@@ -721,54 +786,43 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
/* Verify the previous stack state before we change it */
if (user_mode(regs)) {
msg = "occurred in user space";
- goto no_mod;
- }
- if (r13 != sos->prev_IA64_KR_CURRENT) {
- msg = "inconsistent previous current and r13";
- goto no_mod;
- }
- if ((r12 - r13) >= KERNEL_STACK_SIZE) {
- msg = "inconsistent r12 and r13";
- goto no_mod;
- }
- if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
- msg = "inconsistent ar.bspstore and r13";
- goto no_mod;
- }
- va.p = old_bspstore;
- if (va.f.reg < 5) {
- msg = "old_bspstore is in the wrong region";
- goto no_mod;
- }
- if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
- msg = "inconsistent ar.bsp and r13";
- goto no_mod;
- }
- size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
- if (ar_bspstore + size > r12) {
- msg = "no room for blocked state";
+ /* previous_current is guaranteed to be valid when the task was
+ * in user space, so ...
+ */
+ ia64_mca_modify_comm(previous_current);
goto no_mod;
}
- /* Change the comm field on the MCA/INT task to include the pid that
- * was interrupted, it makes for easier debugging. If that pid was 0
- * (swapper or nested MCA/INIT) then use the start of the previous comm
- * field suffixed with its cpu.
- */
- if (previous_current->pid)
- snprintf(comm, sizeof(comm), "%s %d",
- current->comm, previous_current->pid);
- else {
- int l;
- if ((p = strchr(previous_current->comm, ' ')))
- l = p - previous_current->comm;
- else
- l = strlen(previous_current->comm);
- snprintf(comm, sizeof(comm), "%s %*s %d",
- current->comm, l, previous_current->comm,
- task_thread_info(previous_current)->cpu);
+ if (!mca_recover_range(ms->pmsa_iip)) {
+ if (r13 != sos->prev_IA64_KR_CURRENT) {
+ msg = "inconsistent previous current and r13";
+ goto no_mod;
+ }
+ if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+ msg = "inconsistent r12 and r13";
+ goto no_mod;
+ }
+ if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+ msg = "inconsistent ar.bspstore and r13";
+ goto no_mod;
+ }
+ va.p = old_bspstore;
+ if (va.f.reg < 5) {
+ msg = "old_bspstore is in the wrong region";
+ goto no_mod;
+ }
+ if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+ msg = "inconsistent ar.bsp and r13";
+ goto no_mod;
+ }
+ size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+ if (ar_bspstore + size > r12) {
+ msg = "no room for blocked state";
+ goto no_mod;
+ }
}
- memcpy(current->comm, comm, sizeof(current->comm));
+
+ ia64_mca_modify_comm(previous_current);
/* Make the original task look blocked. First stack a struct pt_regs,
* describing the state at the time of interrupt. mca_asm.S built a
@@ -908,7 +962,7 @@ no_mod:
static void
ia64_wait_for_slaves(int monarch)
{
- int c, wait = 0;
+ int c, wait = 0, missing = 0;
for_each_online_cpu(c) {
if (c == monarch)
continue;
@@ -919,15 +973,32 @@ ia64_wait_for_slaves(int monarch)
}
}
if (!wait)
- return;
+ goto all_in;
for_each_online_cpu(c) {
if (c == monarch)
continue;
if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ missing = 1;
break;
}
}
+ if (!missing)
+ goto all_in;
+ printk(KERN_INFO "OS MCA slave did not rendezvous on cpu");
+ for_each_online_cpu(c) {
+ if (c == monarch)
+ continue;
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ printk(" %d", c);
+ }
+ printk("\n");
+ return;
+
+all_in:
+ printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n");
+ return;
}
/*
@@ -953,6 +1024,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
task_t *previous_current;
oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
+ console_loglevel = 15; /* make sure printks make it to console */
+ printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
+ sos->proc_state_param, cpu, sos->monarch);
+
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
monarch_cpu = cpu;
if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)
@@ -1416,7 +1491,7 @@ static struct irqaction mca_cpep_irqaction = {
* format most of the fields.
*/
-static void
+static void __cpuinit
format_mca_init_stack(void *mca_data, unsigned long offset,
const char *type, int cpu)
{
@@ -1440,15 +1515,17 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
/* Do per-CPU MCA-related initialization. */
-void __devinit
+void __cpuinit
ia64_mca_cpu_init(void *cpu_data)
{
void *pal_vaddr;
+ static int first_time = 1;
- if (smp_processor_id() == 0) {
+ if (first_time) {
void *mca_data;
int cpu;
+ first_time = 0;
mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
* NR_CPUS + KERNEL_STACK_SIZE);
mca_data = (void *)(((unsigned long)mca_data +
@@ -1704,6 +1781,7 @@ ia64_mca_late_init(void)
desc = irq_descp(irq);
desc->status |= IRQ_PER_CPU;
setup_irq(irq, &mca_cpe_irqaction);
+ ia64_cpe_irq = irq;
}
ia64_mca_register_cpev(cpe_vector);
IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index e883d85906db..37c88eb55873 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -6,6 +6,7 @@
* Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
* Copyright (C) 2005 Silicon Graphics, Inc
* Copyright (C) 2005 Keith Owens <kaos@sgi.com>
+ * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
*/
#include <linux/config.h>
#include <linux/types.h>
@@ -121,11 +122,12 @@ mca_page_isolate(unsigned long paddr)
*/
void
-mca_handler_bh(unsigned long paddr)
+mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
{
- printk(KERN_ERR
- "OS_MCA: process [pid: %d](%s) encounters MCA (paddr=%lx)\n",
- current->pid, current->comm, paddr);
+ printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
+ "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
+ raw_smp_processor_id(), current->pid, current->uid,
+ iip, ipsr, paddr, current->comm);
spin_lock(&mca_bh_lock);
switch (mca_page_isolate(paddr)) {
@@ -442,21 +444,26 @@ recover_from_read_error(slidx_table_t *slidx,
if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
return 0;
psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+ psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
/*
* Check the privilege level of interrupted context.
* If it is user-mode, then terminate affected process.
*/
- if (psr1->cpl != 0) {
+
+ pmsa = sos->pal_min_state;
+ if (psr1->cpl != 0 ||
+ ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
smei = peidx_bus_check(peidx, 0);
if (smei->valid.target_identifier) {
/*
* setup for resume to bottom half of MCA,
* "mca_handler_bhhook"
*/
- pmsa = sos->pal_min_state;
- /* pass to bhhook as 1st argument (gr8) */
+ /* pass to bhhook as argument (gr8, ...) */
pmsa->pmsa_gr[8-1] = smei->target_identifier;
+ pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
+ pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
/* set interrupted return address (but no use) */
pmsa->pmsa_br0 = pmsa->pmsa_iip;
/* change resume address to bottom half */
@@ -466,6 +473,7 @@ recover_from_read_error(slidx_table_t *slidx,
psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
psr2->cpl = 0;
psr2->ri = 0;
+ psr2->bn = 1;
psr2->i = 0;
return 1;
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index e2f6fa1e0ef6..31a2e52bb16f 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -111,3 +111,10 @@ typedef struct slidx_table {
slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
__count; })
+struct mca_table_entry {
+ int start_addr; /* location-relative starting address of MCA recoverable range */
+ int end_addr; /* location-relative ending address of MCA recoverable range */
+};
+
+extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
+extern int mca_recover_range(unsigned long);
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
index 3f298ee4d00c..e6a580d354b9 100644
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -14,15 +14,12 @@
GLOBAL_ENTRY(mca_handler_bhhook)
invala // clear RSE ?
- ;;
cover
;;
clrrrb
;;
- alloc r16=ar.pfs,0,2,1,0 // make a new frame
- ;;
+ alloc r16=ar.pfs,0,2,3,0 // make a new frame
mov ar.rsc=0
- ;;
mov r13=IA64_KR(CURRENT) // current task pointer
;;
mov r2=r13
@@ -30,7 +27,6 @@ GLOBAL_ENTRY(mca_handler_bhhook)
addl r22=IA64_RBS_OFFSET,r2
;;
mov ar.bspstore=r22
- ;;
addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
;;
adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
@@ -40,12 +36,12 @@ GLOBAL_ENTRY(mca_handler_bhhook)
movl loc1=mca_handler_bh // recovery C function
;;
mov out0=r8 // poisoned address
+ mov out1=r9 // iip
+ mov out2=r10 // psr
mov b6=loc1
;;
mov loc1=rp
- ;;
- ssm psr.i
- ;;
+ ssm psr.i | psr.ic
br.call.sptk.many rp=b6 // does not return ...
;;
mov ar.pfs=loc0
@@ -53,5 +49,4 @@ GLOBAL_ENTRY(mca_handler_bhhook)
;;
mov r8=r0
br.ret.sptk.many rp
- ;;
END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a68ce6678092..0766493d4d00 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -25,7 +25,7 @@
#include <asm/processor.h>
#include <asm/smp.h>
-u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_to_node_map);
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 6a4ac7d70b35..bc11bb096f58 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -115,7 +115,7 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-void
+void __init
ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
{
static int first_time = 1;
@@ -149,7 +149,7 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-static void
+static void __init
patch_fsyscall_table (unsigned long start, unsigned long end)
{
extern unsigned long fsyscall_table[NR_syscalls];
@@ -166,7 +166,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-static void
+static void __init
patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
{
extern char fsys_bubble_down[];
@@ -184,7 +184,7 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-void
+void __init
ia64_patch_gate (void)
{
# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9c5194b385da..077f21216b65 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6722,6 +6722,7 @@ __initcall(pfm_init);
void
pfm_init_percpu (void)
{
+ static int first_time=1;
/*
* make sure no measurement is active
* (may inherit programmed PMCs from EFI).
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void)
*/
pfm_unfreeze_pmu();
- if (smp_processor_id() == 0)
+ if (first_time) {
register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+ first_time=0;
+ }
ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
ia64_srlz_d();
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 3258e09278d0..eb388e271b2b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -41,7 +41,6 @@
#include <linux/serial_core.h>
#include <linux/efi.h>
#include <linux/initrd.h>
-#include <linux/platform.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
@@ -131,8 +130,8 @@ EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
/*
* We use a special marker for the end of memory and it uses the extra (+1) slot
*/
-struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
-int num_rsvd_regions;
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
+int num_rsvd_regions __initdata;
/*
@@ -141,7 +140,7 @@ int num_rsvd_regions;
* caller-specified function is called with the memory ranges that remain after filtering.
* This routine does not assume the incoming segments are sorted.
*/
-int
+int __init
filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
{
unsigned long range_start, range_end, prev_start;
@@ -177,7 +176,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
-static void
+static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
int j;
@@ -218,7 +217,7 @@ __initcall(register_memory);
* initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined,
* see include/asm-ia64/meminit.h if you need to define more.
*/
-void
+void __init
reserve_memory (void)
{
int n = 0;
@@ -270,7 +269,7 @@ reserve_memory (void)
* Grab the initrd start and end from the boot parameter struct given us by
* the boot loader.
*/
-void
+void __init
find_initrd (void)
{
#ifdef CONFIG_BLK_DEV_INITRD
@@ -362,7 +361,7 @@ mark_bsp_online (void)
}
#ifdef CONFIG_SMP
-static void
+static void __init
check_for_logical_procs (void)
{
pal_logical_to_physical_t info;
@@ -389,6 +388,14 @@ check_for_logical_procs (void)
}
#endif
+static __initdata int nomca;
+static __init int setup_nomca(char *s)
+{
+ nomca = 1;
+ return 0;
+}
+early_param("nomca", setup_nomca);
+
void __init
setup_arch (char **cmdline_p)
{
@@ -402,35 +409,15 @@ setup_arch (char **cmdline_p)
efi_init();
io_port_init();
+ parse_early_param();
+
#ifdef CONFIG_IA64_GENERIC
- {
- const char *mvec_name = strstr (*cmdline_p, "machvec=");
- char str[64];
-
- if (mvec_name) {
- const char *end;
- size_t len;
-
- mvec_name += 8;
- end = strchr (mvec_name, ' ');
- if (end)
- len = end - mvec_name;
- else
- len = strlen (mvec_name);
- len = min(len, sizeof (str) - 1);
- strncpy (str, mvec_name, len);
- str[len] = '\0';
- mvec_name = str;
- } else
- mvec_name = acpi_get_sysname();
- machvec_init(mvec_name);
- }
+ machvec_init(NULL);
#endif
if (early_console_setup(*cmdline_p) == 0)
mark_bsp_online();
- parse_early_param();
#ifdef CONFIG_ACPI
/* Initialize the ACPI boot-time table parser */
acpi_table_init();
@@ -493,7 +480,7 @@ setup_arch (char **cmdline_p)
#endif
/* enable IA-64 Machine Check Abort Handling unless disabled */
- if (!strstr(saved_command_line, "nomca"))
+ if (!nomca)
ia64_mca_init();
platform_setup(cmdline_p);
@@ -623,7 +610,7 @@ struct seq_operations cpuinfo_op = {
.show = show_cpuinfo
};
-void
+static void __cpuinit
identify_cpu (struct cpuinfo_ia64 *c)
{
union {
@@ -700,7 +687,7 @@ setup_per_cpu_areas (void)
* In addition, the minimum of the i-cache stride sizes is calculated for
* "flush_icache_range()".
*/
-static void
+static void __cpuinit
get_max_cacheline_size (void)
{
unsigned long line_size, max = 1;
@@ -763,10 +750,10 @@ get_max_cacheline_size (void)
* cpu_init() initializes state that is per-CPU. This function acts
* as a 'CPU state barrier', nothing should get across.
*/
-void
+void __cpuinit
cpu_init (void)
{
- extern void __devinit ia64_mmu_init (void *);
+ extern void __cpuinit ia64_mmu_init (void *);
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
unsigned int max_ctx;
@@ -894,7 +881,7 @@ void sched_cacheflush(void)
ia64_sal_cache_flush(3);
}
-void
+void __init
check_bugs (void)
{
ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 463f6bb44d07..1d7903ee2126 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
}
return 0;
}
-
-/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
- * could not be delivered. It is important that the target process is not
- * allowed to do any more work in user space. Possible cases for the target
- * process:
- *
- * - It is sleeping and will wake up soon. Store the data in the current task,
- * the signal will be sent when the current task returns from the next
- * interrupt.
- *
- * - It is running in user context. Store the data in the current task, the
- * signal will be sent when the current task returns from the next interrupt.
- *
- * - It is running in kernel context on this or another cpu and will return to
- * user context. Store the data in the target task, the signal will be sent
- * to itself when the target task returns to user space.
- *
- * - It is running in kernel context on this cpu and will sleep before
- * returning to user context. Because this is also the current task, the
- * signal will not get delivered and the task could sleep indefinitely.
- * Store the data in the idle task for this cpu, the signal will be sent
- * after the idle task processes its next interrupt.
- *
- * To cover all cases, store the data in the target task, the current task and
- * the idle task on this cpu. Whatever happens, the signal will be delivered
- * to the target task before it can do any useful user space work. Multiple
- * deliveries have no unwanted side effects.
- *
- * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
- * disabled. It must not take any locks nor use kernel structures or services
- * that require locks.
- */
-
-/* To ensure that we get the right pid, check its start time. To avoid extra
- * include files in thread_info.h, convert the task start_time to unsigned long,
- * giving us a cycle time of > 580 years.
- */
-static inline unsigned long
-start_time_ul(const struct task_struct *t)
-{
- return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
-}
-
-void
-set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
-{
- struct task_struct *t;
- unsigned long start_time = 0;
- int i;
-
- for (i = 1; i <= 3; ++i) {
- switch (i) {
- case 1:
- t = find_task_by_pid(pid);
- if (t)
- start_time = start_time_ul(t);
- break;
- case 2:
- t = current;
- break;
- default:
- t = idle_task(smp_processor_id());
- break;
- }
-
- if (!t)
- return;
- task_thread_info(t)->sigdelayed.signo = signo;
- task_thread_info(t)->sigdelayed.code = code;
- task_thread_info(t)->sigdelayed.addr = addr;
- task_thread_info(t)->sigdelayed.start_time = start_time;
- task_thread_info(t)->sigdelayed.pid = pid;
- wmb();
- set_tsk_thread_flag(t, TIF_SIGDELAYED);
- }
-}
-
-/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
- * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
- */
-
-void
-do_sigdelayed(void)
-{
- struct siginfo siginfo;
- pid_t pid;
- struct task_struct *t;
-
- clear_thread_flag(TIF_SIGDELAYED);
- memset(&siginfo, 0, sizeof(siginfo));
- siginfo.si_signo = current_thread_info()->sigdelayed.signo;
- siginfo.si_code = current_thread_info()->sigdelayed.code;
- siginfo.si_addr = current_thread_info()->sigdelayed.addr;
- pid = current_thread_info()->sigdelayed.pid;
- t = find_task_by_pid(pid);
- if (!t)
- return;
- if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
- return;
- force_sig_info(siginfo.si_signo, &siginfo, t);
-}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index b681ef34a86e..44e9547878ac 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -70,6 +70,12 @@
#endif
#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok 1
+#else
+#define bsp_remove_ok 0
+#endif
+
/*
* Store all idle threads, this can be reused instead of creating
* a new thread. Also avoids complicated thread destroy functionality
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
/*
* ITC synchronization related stuff:
*/
-#define MASTER 0
+#define MASTER (0)
#define SLAVE (SMP_CACHE_BYTES/8)
#define NUM_ROUNDS 64 /* magic value */
@@ -151,6 +157,27 @@ char __initdata no_int_routing;
unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT (1)
+#else
+#define CPEI_OVERRIDE_DEFAULT (0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+ int value=0;
+
+ get_option (&str, &value);
+ force_cpei_retarget = value;
+
+ return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
static int __init
nointroute (char *str)
{
@@ -161,6 +188,27 @@ nointroute (char *str)
__setup("nointroute", nointroute);
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ int cpuid;
+ static int fix_bsp_b0 = 1;
+
+ cpuid = smp_processor_id();
+
+ /*
+ * Cache the b0 value on the first AP that comes up
+ */
+ if (!(fix_bsp_b0 && cpuid))
+ return;
+
+ sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+ printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+ fix_bsp_b0 = 0;
+#endif
+}
+
void
sync_master (void *arg)
{
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void)
static void __devinit
smp_callin (void)
{
- int cpuid, phys_id;
+ int cpuid, phys_id, itc_master;
extern void ia64_init_itm(void);
+ extern volatile int time_keeper_id;
#ifdef CONFIG_PERFMON
extern void pfm_init_percpu(void);
@@ -336,6 +385,7 @@ smp_callin (void)
cpuid = smp_processor_id();
phys_id = hard_smp_processor_id();
+ itc_master = time_keeper_id;
if (cpu_online(cpuid)) {
printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
@@ -343,6 +393,8 @@ smp_callin (void)
BUG();
}
+ fix_b0_for_bsp();
+
lock_ipi_calllock();
cpu_set(cpuid, cpu_online_map);
unlock_ipi_calllock();
@@ -365,8 +417,8 @@ smp_callin (void)
* calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
* local_bh_enable(), which bugs out if irqs are not enabled...
*/
- Dprintk("Going to syncup ITC with BP.\n");
- ia64_sync_itc(0);
+ Dprintk("Going to syncup ITC with ITC Master.\n");
+ ia64_sync_itc(itc_master);
}
/*
@@ -572,32 +624,8 @@ void __devinit smp_prepare_boot_cpu(void)
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
-/*
- * mt_info[] is a temporary store for all info returned by
- * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the
- * specific cpu comes.
- */
-static struct {
- __u32 socket_id;
- __u16 core_id;
- __u16 thread_id;
- __u16 proc_fixed_addr;
- __u8 valid;
-} mt_info[NR_CPUS] __devinitdata;
-
#ifdef CONFIG_HOTPLUG_CPU
static inline void
-remove_from_mtinfo(int cpu)
-{
- int i;
-
- for_each_cpu(i)
- if (mt_info[i].valid && mt_info[i].socket_id ==
- cpu_data(cpu)->socket_id)
- mt_info[i].valid = 0;
-}
-
-static inline void
clear_cpu_sibling_map(int cpu)
{
int i;
@@ -626,15 +654,50 @@ remove_siblinginfo(int cpu)
/* remove it from all sibling map's */
clear_cpu_sibling_map(cpu);
+}
+
+extern void fixup_irqs(void);
- /* if this cpu is the last in the core group, remove all its info
- * from mt_info structure
+int migrate_platform_irqs(unsigned int cpu)
+{
+ int new_cpei_cpu;
+ irq_desc_t *desc = NULL;
+ cpumask_t mask;
+ int retval = 0;
+
+ /*
+ * dont permit CPEI target to removed.
*/
- if (last)
- remove_from_mtinfo(cpu);
+ if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+ printk ("CPU (%d) is CPEI Target\n", cpu);
+ if (can_cpei_retarget()) {
+ /*
+ * Now re-target the CPEI to a different processor
+ */
+ new_cpei_cpu = any_online_cpu(cpu_online_map);
+ mask = cpumask_of_cpu(new_cpei_cpu);
+ set_cpei_target_cpu(new_cpei_cpu);
+ desc = irq_descp(ia64_cpe_irq);
+ /*
+ * Switch for now, immediatly, we need to do fake intr
+ * as other interrupts, but need to study CPEI behaviour with
+ * polling before making changes.
+ */
+ if (desc) {
+ desc->handler->disable(ia64_cpe_irq);
+ desc->handler->set_affinity(ia64_cpe_irq, mask);
+ desc->handler->enable(ia64_cpe_irq);
+ printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+ }
+ }
+ if (!desc) {
+ printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+ retval = -EBUSY;
+ }
+ }
+ return retval;
}
-extern void fixup_irqs(void);
/* must be called with cpucontrol mutex held */
int __cpu_disable(void)
{
@@ -643,8 +706,17 @@ int __cpu_disable(void)
/*
* dont permit boot processor for now
*/
- if (cpu == 0)
- return -EBUSY;
+ if (cpu == 0 && !bsp_remove_ok) {
+ printk ("Your platform does not support removal of BSP\n");
+ return (-EBUSY);
+ }
+
+ cpu_clear(cpu, cpu_online_map);
+
+ if (migrate_platform_irqs(cpu)) {
+ cpu_set(cpu, cpu_online_map);
+ return (-EBUSY);
+ }
remove_siblinginfo(cpu);
cpu_clear(cpu, cpu_online_map);
@@ -776,40 +848,6 @@ init_smp_config(void)
ia64_sal_strerror(sal_ret));
}
-static inline int __devinit
-check_for_mtinfo_index(void)
-{
- int i;
-
- for_each_cpu(i)
- if (!mt_info[i].valid)
- return i;
-
- return -1;
-}
-
-/*
- * Search the mt_info to find out if this socket's cid/tid information is
- * cached or not. If the socket exists, fill in the core_id and thread_id
- * in cpuinfo
- */
-static int __devinit
-check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c)
-{
- int i;
- __u32 sid = c->socket_id;
-
- for_each_cpu(i) {
- if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address
- && mt_info[i].socket_id == sid) {
- c->core_id = mt_info[i].core_id;
- c->thread_id = mt_info[i].thread_id;
- return 1; /* not a new socket */
- }
- }
- return 0;
-}
-
/*
* identify_siblings(cpu) gets called from identify_cpu. This populates the
* information related to logical execution units in per_cpu_data structure.
@@ -819,14 +857,12 @@ identify_siblings(struct cpuinfo_ia64 *c)
{
s64 status;
u16 pltid;
- u64 proc_fixed_addr;
- int count, i;
pal_logical_to_physical_t info;
if (smp_num_cpucores == 1 && smp_num_siblings == 1)
return;
- if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) {
+ if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) {
printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
status);
return;
@@ -835,47 +871,12 @@ identify_siblings(struct cpuinfo_ia64 *c)
printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
return;
}
- if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) {
- printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status);
- return;
- }
c->socket_id = (pltid << 8) | info.overview_ppid;
c->cores_per_socket = info.overview_cpp;
c->threads_per_core = info.overview_tpc;
- count = c->num_log = info.overview_num_log;
-
- /* If the thread and core id information is already cached, then
- * we will simply update cpu_info and return. Otherwise, we will
- * do the PAL calls and cache core and thread id's of all the siblings.
- */
- if (check_for_new_socket(proc_fixed_addr, c))
- return;
-
- for (i = 0; i < count; i++) {
- int index;
-
- if (i && (status = ia64_pal_logical_to_phys(i, &info))
- != PAL_STATUS_SUCCESS) {
- printk(KERN_ERR "ia64_pal_logical_to_phys failed"
- " with %ld\n", status);
- return;
- }
- if (info.log2_la == proc_fixed_addr) {
- c->core_id = info.log1_cid;
- c->thread_id = info.log1_tid;
- }
+ c->num_log = info.overview_num_log;
- index = check_for_mtinfo_index();
- /* We will not do the mt_info caching optimization in this case.
- */
- if (index < 0)
- continue;
-
- mt_info[index].valid = 1;
- mt_info[index].socket_id = c->socket_id;
- mt_info[index].core_id = info.log1_cid;
- mt_info[index].thread_id = info.log1_tid;
- mt_info[index].proc_fixed_addr = info.log2_la;
- }
+ c->core_id = info.log1_cid;
+ c->thread_id = info.log1_tid;
}
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 307d01e15b2e..ac167436e936 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -32,7 +32,7 @@
extern unsigned long wall_jiffies;
-#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
#ifdef CONFIG_IA64_DEBUG_IRQ
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == TIME_KEEPER_ID) {
+ if (smp_processor_id() == time_keeper_id) {
/*
* Here we are in the timer irq handler. We have irqs locally
* disabled, but we don't know if the timer_bh is running on
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
+void __devinit ia64_disable_timer(void)
+{
+ ia64_set_itv(1 << 16);
+}
+
void __init
time_init (void)
{
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 6e5eea19fa67..3b6fd798c4d6 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
parent = &sysfs_nodes[cpu_to_node(num)];
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_ACPI
+#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
/*
* If CPEI cannot be re-targetted, and this is
* CPEI target, then dont create the control file
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 73af6267d2ef..0b9e56dd7f05 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -70,34 +70,9 @@ SECTIONS
__stop___ex_table = .;
}
- .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
- {
- __start___vtop_patchlist = .;
- *(.data.patch.vtop)
- __end___vtop_patchlist = .;
- }
-
- .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
- {
- __start___mckinley_e9_bundles = .;
- *(.data.patch.mckinley_e9)
- __end___mckinley_e9_bundles = .;
- }
-
/* Global data */
_data = .;
-#if defined(CONFIG_IA64_GENERIC)
- /* Machine Vector */
- . = ALIGN(16);
- .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
- {
- machvec_start = .;
- *(.machvec)
- machvec_end = .;
- }
-#endif
-
/* Unwind info & table: */
. = ALIGN(8);
.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
@@ -154,6 +129,41 @@ SECTIONS
*(.initcall7.init)
__initcall_end = .;
}
+
+ /* MCA table */
+ . = ALIGN(16);
+ __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET)
+ {
+ __start___mca_table = .;
+ *(__mca_table)
+ __stop___mca_table = .;
+ }
+
+ .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
+ {
+ __start___vtop_patchlist = .;
+ *(.data.patch.vtop)
+ __end___vtop_patchlist = .;
+ }
+
+ .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+ {
+ __start___mckinley_e9_bundles = .;
+ *(.data.patch.mckinley_e9)
+ __end___mckinley_e9_bundles = .;
+ }
+
+#if defined(CONFIG_IA64_GENERIC)
+ /* Machine Vector */
+ . = ALIGN(16);
+ .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
+ {
+ machvec_start = .;
+ *(.machvec)
+ machvec_end = .;
+ }
+#endif
+
__con_initcall_start = .;
.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
{ *(.con_initcall.init) }
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index acaaec4e4681..84fd1c14c8a9 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -97,7 +97,7 @@ find_max_pfn (unsigned long start, unsigned long end, void *arg)
* Find a place to put the bootmap and return its starting address in
* bootmap_start. This address must be page-aligned.
*/
-int
+static int __init
find_bootmap_location (unsigned long start, unsigned long end, void *arg)
{
unsigned long needed = *(unsigned long *)arg;
@@ -141,7 +141,7 @@ find_bootmap_location (unsigned long start, unsigned long end, void *arg)
* Walk the EFI memory map and find usable memory for the system, taking
* into account reserved areas.
*/
-void
+void __init
find_memory (void)
{
unsigned long bootmap_size;
@@ -176,18 +176,20 @@ find_memory (void)
*
* Allocate and setup per-cpu data areas.
*/
-void *
+void * __cpuinit
per_cpu_init (void)
{
void *cpu_data;
int cpu;
+ static int first_time=1;
/*
* get_free_pages() cannot be used before cpu_init() done. BSP
* allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
* get_zeroed_page().
*/
- if (smp_processor_id() == 0) {
+ if (first_time) {
+ first_time=0;
cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
for (cpu = 0; cpu < NR_CPUS; cpu++) {
@@ -226,7 +228,7 @@ count_dma_pages (u64 start, u64 end, void *arg)
* Set up the page tables.
*/
-void
+void __init
paging_init (void)
{
unsigned long max_dma;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c87d6d1d5813..2f5e44862e91 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -525,15 +525,20 @@ void __init find_memory(void)
* find_pernode_space() does most of this already, we just need to set
* local_per_cpu_offset
*/
-void *per_cpu_init(void)
+void __cpuinit *per_cpu_init(void)
{
int cpu;
+ static int first_time = 1;
+
if (smp_processor_id() != 0)
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+ if (first_time) {
+ first_time = 0;
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+ }
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 2d13889d0a99..8d506710fdbd 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -68,9 +68,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
/*
- * This function checks for proper alignment of input addr and len parameters.
+ * Don't actually need to do any preparation, but need to make sure
+ * the address is in the right region.
*/
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
@@ -112,8 +113,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
unsigned long floor, unsigned long ceiling)
{
/*
- * This is called only when is_hugepage_only_range(addr,),
- * and it follows that is_hugepage_only_range(end,) also.
+ * This is called to free hugetlb page tables.
*
* The offset of these addresses from the base of the hugetlb
* region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that
@@ -125,9 +125,9 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
addr = htlbpage_to_page(addr);
end = htlbpage_to_page(end);
- if (is_hugepage_only_range(tlb->mm, floor, HPAGE_SIZE))
+ if (REGION_NUMBER(floor) == RGN_HPAGE)
floor = htlbpage_to_page(floor);
- if (is_hugepage_only_range(tlb->mm, ceiling, HPAGE_SIZE))
+ if (REGION_NUMBER(ceiling) == RGN_HPAGE)
ceiling = htlbpage_to_page(ceiling);
free_pgd_range(tlb, addr, end, floor, ceiling);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index b38b6d213c15..ff4f31fcd330 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -197,7 +197,7 @@ free_initmem (void)
eaddr = (unsigned long) ia64_imva(__init_end);
while (addr < eaddr) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
++totalram_pages;
addr += PAGE_SIZE;
@@ -206,7 +206,7 @@ free_initmem (void)
(__init_end - __init_begin) >> 10);
}
-void
+void __init
free_initrd_mem (unsigned long start, unsigned long end)
{
struct page *page;
@@ -252,7 +252,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
continue;
page = virt_to_page(start);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(start);
++totalram_pages;
}
@@ -261,7 +261,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
/*
* This installs a clean page in the kernel's page table.
*/
-struct page *
+static struct page * __init
put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
{
pgd_t *pgd;
@@ -294,7 +294,7 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
return page;
}
-static void
+static void __init
setup_gate (void)
{
struct page *page;
@@ -411,7 +411,7 @@ ia64_mmu_init (void *my_cpu_data)
#ifdef CONFIG_VIRTUAL_MEM_MAP
-int
+int __init
create_mem_map_page_table (u64 start, u64 end, void *arg)
{
unsigned long address, start_page, end_page;
@@ -519,7 +519,7 @@ ia64_pfn_valid (unsigned long pfn)
}
EXPORT_SYMBOL(ia64_pfn_valid);
-int
+int __init
find_largest_hole (u64 start, u64 end, void *arg)
{
u64 *max_gap = arg;
@@ -535,7 +535,7 @@ find_largest_hole (u64 start, u64 end, void *arg)
}
#endif /* CONFIG_VIRTUAL_MEM_MAP */
-static int
+static int __init
count_reserved_pages (u64 start, u64 end, void *arg)
{
unsigned long num_reserved = 0;
@@ -556,7 +556,7 @@ count_reserved_pages (u64 start, u64 end, void *arg)
* purposes.
*/
-static int nolwsys;
+static int nolwsys __initdata;
static int __init
nolwsys_setup (char *s)
@@ -567,7 +567,7 @@ nolwsys_setup (char *s)
__setup("nolwsys", nolwsys_setup);
-void
+void __init
mem_init (void)
{
long reserved_pages, codesize, datasize, initsize;
@@ -640,7 +640,7 @@ mem_init (void)
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
num_physpages++;
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 3e9b4eea7418..ab9c48c88012 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -10,7 +10,8 @@
CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
- huberror.o io_init.o iomv.o klconflib.o sn2/
+ huberror.o io_init.o iomv.o klconflib.o pio_phys.o \
+ sn2/
obj-$(CONFIG_IA64_GENERIC) += machvec.o
obj-$(CONFIG_SGI_TIOCX) += tiocx.o
obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 1f11db470d90..e952ef4f6d91 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -36,7 +36,7 @@ static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
nodepda_t *tmp_nodepda;
if (nasid_to_cnodeid(nasid) == -1)
- return (struct bteinfo_s *)NULL;;
+ return (struct bteinfo_s *)NULL;
tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
return &tmp_nodepda->bte_if[interface];
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index dfb3f2902379..5101ac462643 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -13,6 +13,8 @@
#include <asm/sn/sn_feature_sets.h>
#include <asm/sn/geo.h>
#include <asm/sn/io.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
#include <asm/sn/pcibr_provider.h>
#include <asm/sn/pcibus_provider_defs.h>
#include <asm/sn/pcidev.h>
@@ -710,9 +712,36 @@ cnodeid_get_geoid(cnodeid_t cnode)
return hubdev->hdi_geoid;
}
+void sn_generate_path(struct pci_bus *pci_bus, char *address)
+{
+ nasid_t nasid;
+ cnodeid_t cnode;
+ geoid_t geoid;
+ moduleid_t moduleid;
+ u16 bricktype;
+
+ nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+ cnode = nasid_to_cnodeid(nasid);
+ geoid = cnodeid_get_geoid(cnode);
+ moduleid = geo_module(geoid);
+
+ sprintf(address, "module_%c%c%c%c%.2d",
+ '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)),
+ '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)),
+ '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)),
+ MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid));
+
+ /* Tollhouse requires slot id to be displayed */
+ bricktype = MODULE_GET_BTYPE(moduleid);
+ if ((bricktype == L1_BRICKTYPE_191010) ||
+ (bricktype == L1_BRICKTYPE_1932))
+ sprintf(address, "%s^%d", address, geo_slot(geoid));
+}
+
subsys_initcall(sn_pci_init);
EXPORT_SYMBOL(sn_pci_fixup_slot);
EXPORT_SYMBOL(sn_pci_unfixup_slot);
EXPORT_SYMBOL(sn_pci_controller_fixup);
EXPORT_SYMBOL(sn_bus_store_sysdata);
EXPORT_SYMBOL(sn_bus_free_sysdata);
+EXPORT_SYMBOL(sn_generate_path);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index c373113d073a..c265e02f5036 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -350,9 +350,6 @@ static void force_interrupt(int irq)
static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
{
u64 regval;
- int irr_reg_num;
- int irr_bit;
- u64 irr_reg;
struct pcidev_info *pcidev_info;
struct pcibus_info *pcibus_info;
@@ -373,23 +370,7 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
pdi_pcibus_info;
regval = pcireg_intr_status_get(pcibus_info);
- irr_reg_num = irq_to_vector(irq) / 64;
- irr_bit = irq_to_vector(irq) % 64;
- switch (irr_reg_num) {
- case 0:
- irr_reg = ia64_getreg(_IA64_REG_CR_IRR0);
- break;
- case 1:
- irr_reg = ia64_getreg(_IA64_REG_CR_IRR1);
- break;
- case 2:
- irr_reg = ia64_getreg(_IA64_REG_CR_IRR2);
- break;
- case 3:
- irr_reg = ia64_getreg(_IA64_REG_CR_IRR3);
- break;
- }
- if (!test_bit(irr_bit, &irr_reg)) {
+ if (!ia64_get_irr(irq_to_vector(irq))) {
if (!test_bit(irq, pda->sn_in_service_ivecs)) {
regval &= 0xff;
if (sn_irq_info->irq_int_bit & regval &
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S
new file mode 100644
index 000000000000..3c7d48d6ecb8
--- /dev/null
+++ b/arch/ia64/sn/kernel/pio_phys.S
@@ -0,0 +1,71 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This file contains macros used to access MMR registers via
+ * uncached physical addresses.
+ * pio_phys_read_mmr - read an MMR
+ * pio_phys_write_mmr - write an MMR
+ * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
+ * Second MMR will be skipped if address is NULL
+ *
+ * Addresses passed to these routines should be uncached physical addresses
+ * ie., 0x80000....
+ */
+
+
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+GLOBAL_ENTRY(pio_phys_read_mmr)
+ .prologue
+ .regstk 1,0,0,0
+ .body
+ mov r2=psr
+ rsm psr.i | psr.dt
+ ;;
+ srlz.d
+ ld8.acq r8=[r32]
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_phys_read_mmr)
+
+GLOBAL_ENTRY(pio_phys_write_mmr)
+ .prologue
+ .regstk 2,0,0,0
+ .body
+ mov r2=psr
+ rsm psr.i | psr.dt
+ ;;
+ srlz.d
+ st8.rel [r32]=r33
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_phys_write_mmr)
+
+GLOBAL_ENTRY(pio_atomic_phys_write_mmrs)
+ .prologue
+ .regstk 4,0,0,0
+ .body
+ mov r2=psr
+ cmp.ne p9,p0=r34,r0;
+ rsm psr.i | psr.dt | psr.ic
+ ;;
+ srlz.d
+ st8.rel [r32]=r33
+(p9) st8.rel [r34]=r35
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_atomic_phys_write_mmrs)
+
+
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 5b84836c2171..8b6d5c844708 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/config.h>
@@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p)
* for sn.
*/
pm_power_off = ia64_sn_power_down;
+ current->thread.flags |= IA64_THREAD_MIGRATION;
}
/**
@@ -660,7 +661,8 @@ void __init sn_cpu_init(void)
SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
u64 *pio;
pio = is_shub1() ? pio1 : pio2;
- pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+ pda->pio_write_status_addr =
+ (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
}
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b2e1e746b47f..d9d306c79f2d 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void)
return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
}
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+ pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+ volatile unsigned long *adr = last_pda->pio_write_status_addr;
+ unsigned long val = last_pda->pio_write_status_val;
+
+ /* Drain PIO writes from old CPU's Shub */
+ while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+ != val))
+ cpu_relax();
+}
+
void sn_tlb_migrate_finish(struct mm_struct *mm)
{
/* flush_tlb_mm is inefficient if more than 1 users of mm */
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 99cb28e74295..feaf1a6e8101 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -369,9 +369,15 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
static int is_fpga_tio(int nasid, int *bt)
{
- int ioboard_type;
+ u16 ioboard_type;
+ s64 rc;
- ioboard_type = ia64_sn_sysctl_ioboard_get(nasid);
+ rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type);
+ if (rc) {
+ printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n",
+ rc);
+ return 0;
+ }
switch (ioboard_type) {
case L1_BRICKTYPE_SA:
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index cdf6856ce089..d0abddd9ffe6 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -21,7 +21,6 @@
#include <linux/sched.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <asm/sn/bte.h>
@@ -30,6 +29,31 @@
/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kzalloc will give us cachline aligned memory by default */
+ *base = kzalloc(size, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+ return *base;
+ }
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kzalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
* Set up the initial values for the XPartition Communication channels.
*/
static void
@@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part)
* Allocate all of the channel structures as a contiguous chunk of
* memory.
*/
- part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
+ part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
GFP_KERNEL);
if (part->channels == NULL) {
dev_err(xpc_chan, "can't get memory for channels\n");
return xpcNoMemory;
}
- memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS);
part->nchannels = XPC_NCHANNELS;
/* allocate all the required GET/PUT values */
- part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+ part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
GFP_KERNEL, &part->local_GPs_base);
if (part->local_GPs == NULL) {
kfree(part->channels);
@@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part)
"values\n");
return xpcNoMemory;
}
- memset(part->local_GPs, 0, XPC_GP_SIZE);
- part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+ part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
GFP_KERNEL, &part->remote_GPs_base);
if (part->remote_GPs == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
dev_err(xpc_chan, "can't get memory for remote get/put "
"values\n");
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->remote_GPs, 0, XPC_GP_SIZE);
/* allocate all the required open and close args */
- part->local_openclose_args = xpc_kmalloc_cacheline_aligned(
+ part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
&part->local_openclose_args_base);
if (part->local_openclose_args == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
+ dev_err(xpc_chan, "can't get memory for local connect args\n");
kfree(part->remote_GPs_base);
part->remote_GPs = NULL;
- dev_err(xpc_chan, "can't get memory for local connect args\n");
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
- part->remote_openclose_args = xpc_kmalloc_cacheline_aligned(
+ part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
&part->remote_openclose_args_base);
if (part->remote_openclose_args == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
+ dev_err(xpc_chan, "can't get memory for remote connect args\n");
kfree(part->local_openclose_args_base);
part->local_openclose_args = NULL;
- dev_err(xpc_chan, "can't get memory for remote connect args\n");
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
xpc_initialize_channels(part, partid);
@@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part)
ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
part->IPI_owner, (void *) (u64) partid);
if (ret != 0) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
- kfree(part->local_openclose_args_base);
- part->local_openclose_args = NULL;
- kfree(part->remote_openclose_args_base);
- part->remote_openclose_args = NULL;
dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
"errno=%d\n", -ret);
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+ kfree(part->local_openclose_args_base);
+ part->local_openclose_args = NULL;
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcLackOfResources;
}
@@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
for (nentries = ch->local_nentries; nentries > 0; nentries--) {
nbytes = nentries * ch->msg_size;
- ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+ ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
GFP_KERNEL,
&ch->local_msgqueue_base);
if (ch->local_msgqueue == NULL) {
continue;
}
- memset(ch->local_msgqueue, 0, nbytes);
nbytes = nentries * sizeof(struct xpc_notify);
- ch->notify_queue = kmalloc(nbytes, GFP_KERNEL);
+ ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
if (ch->notify_queue == NULL) {
kfree(ch->local_msgqueue_base);
ch->local_msgqueue = NULL;
continue;
}
- memset(ch->notify_queue, 0, nbytes);
spin_lock_irqsave(&ch->lock, irq_flags);
if (nentries < ch->local_nentries) {
@@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
nbytes = nentries * ch->msg_size;
- ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+ ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
GFP_KERNEL,
&ch->remote_msgqueue_base);
if (ch->remote_msgqueue == NULL) {
continue;
}
- memset(ch->remote_msgqueue, 0, nbytes);
spin_lock_irqsave(&ch->lock, irq_flags);
if (nentries < ch->remote_nentries) {
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 8cbf16432570..99b123a6421a 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -52,7 +52,6 @@
#include <linux/syscalls.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/completion.h>
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 88a730e6cfdb..94211429fd0c 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kmalloc will give us cachline aligned memory by default */
+ *base = kmalloc(size, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+ return *base;
+ }
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kmalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
* Given a nasid, get the physical address of the partition's reserved page
* for that nasid. This function returns 0 on any error.
*/
@@ -1038,13 +1063,12 @@ xpc_discovery(void)
remote_vars = (struct xpc_vars *) remote_rp;
- discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words,
+ discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
GFP_KERNEL);
if (discovered_nasids == NULL) {
kfree(remote_rp_base);
return;
}
- memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 98f716bd92f0..ab1211ef0176 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -74,6 +74,22 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
return (int)ret_stuff.v0;
}
+u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus)
+{
+ s64 rc;
+ u16 ioboard;
+ nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+
+ rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard);
+ if (rc) {
+ printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n",
+ rc);
+ return 0;
+ }
+
+ return ioboard;
+}
+
/*
* PCI Bridge Error interrupt handler. Gets invoked whenever a PCI
* bridge sends an error interrupt.
@@ -255,3 +271,4 @@ pcibr_init_provider(void)
EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable);
EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable);
+EXPORT_SYMBOL_GPL(sn_ioboard_to_pci_bus);
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 7571a4025529..be0176912968 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -377,7 +377,7 @@ tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size)
struct tioca_dmamap *ca_dmamap;
void *map;
unsigned long flags;
- struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);;
+ struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e52831ed93eb..fa073cc4b565 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -15,6 +15,124 @@
#include <asm/sn/pcidev.h>
#include <asm/sn/pcibus_provider_defs.h>
#include <asm/sn/tioce_provider.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+
+/*
+ * 1/26/2006
+ *
+ * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe
+ * (taken from the above PV) before and after accessing tioce internal MMR's
+ * to avoid tioce lockups.
+ *
+ * The recipe as taken from the PV:
+ *
+ * if(mmr address < 0x45000) {
+ * if(mmr address == 0 or 0x80)
+ * mmr wrt or read address 0xc0
+ * else if(mmr address == 0x148 or 0x200)
+ * mmr wrt or read address 0x28
+ * else
+ * mmr wrt or read address 0x158
+ *
+ * do desired mmr access (rd or wrt)
+ *
+ * if(mmr address == 0x100)
+ * mmr wrt or read address 0x38
+ * mmr wrt or read address 0xb050
+ * } else
+ * do desired mmr access
+ *
+ * According to hw, we can use reads instead of writes to the above addres
+ *
+ * Note this WAR can only to be used for accessing internal MMR's in the
+ * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the
+ * "Local CE Registers and Memories" and "PCI Compatible Config Space" address
+ * spaces from table 2-1 of the "CE Programmer's Reference Overview" document.
+ *
+ * All registers defined in struct tioce will meet that criteria.
+ */
+
+static void inline
+tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
+{
+ u64 mmr_base;
+ u64 mmr_offset;
+
+ if (kern->ce_common->ce_rev != TIOCE_REV_A)
+ return;
+
+ mmr_base = kern->ce_common->ce_pcibus.bs_base;
+ mmr_offset = (u64)mmr_addr - mmr_base;
+
+ if (mmr_offset < 0x45000) {
+ u64 mmr_war_offset;
+
+ if (mmr_offset == 0 || mmr_offset == 0x80)
+ mmr_war_offset = 0xc0;
+ else if (mmr_offset == 0x148 || mmr_offset == 0x200)
+ mmr_war_offset = 0x28;
+ else
+ mmr_war_offset = 0x158;
+
+ readq_relaxed((void *)(mmr_base + mmr_war_offset));
+ }
+}
+
+static void inline
+tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
+{
+ u64 mmr_base;
+ u64 mmr_offset;
+
+ if (kern->ce_common->ce_rev != TIOCE_REV_A)
+ return;
+
+ mmr_base = kern->ce_common->ce_pcibus.bs_base;
+ mmr_offset = (u64)mmr_addr - mmr_base;
+
+ if (mmr_offset < 0x45000) {
+ if (mmr_offset == 0x100)
+ readq_relaxed((void *)(mmr_base + 0x38));
+ readq_relaxed((void *)(mmr_base + 0xb050));
+ }
+}
+
+/* load mmr contents into a variable */
+#define tioce_mmr_load(kern, mmrp, varp) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ *(varp) = readq_relaxed(mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store variable contents into mmr */
+#define tioce_mmr_store(kern, mmrp, varp) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ writeq(*varp, mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store immediate value into mmr */
+#define tioce_mmr_storei(kern, mmrp, val) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ writeq(val, mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* set bits (immediate value) into mmr */
+#define tioce_mmr_seti(kern, mmrp, bits) do {\
+ u64 tmp; \
+ tioce_mmr_load(kern, mmrp, &tmp); \
+ tmp |= (bits); \
+ tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
+
+/* clear bits (immediate value) into mmr */
+#define tioce_mmr_clri(kern, mmrp, bits) do { \
+ u64 tmp; \
+ tioce_mmr_load(kern, mmrp, &tmp); \
+ tmp &= ~(bits); \
+ tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
/**
* Bus address ranges for the 5 flavors of TIOCE DMA
@@ -62,9 +180,9 @@
#define TIOCE_ATE_M40 2
#define TIOCE_ATE_M40S 3
-#define KB(x) ((x) << 10)
-#define MB(x) ((x) << 20)
-#define GB(x) ((x) << 30)
+#define KB(x) ((u64)(x) << 10)
+#define MB(x) ((u64)(x) << 20)
+#define GB(x) ((u64)(x) << 30)
/**
* tioce_dma_d64 - create a DMA mapping using 64-bit direct mode
@@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
int last;
int entries;
int nates;
- int pagesize;
+ u64 pagesize;
u64 *ate_shadow;
u64 *ate_reg;
u64 addr;
@@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
ate = ATE_MAKE(addr, pagesize);
ate_shadow[i + j] = ate;
- writeq(ate, &ate_reg[i + j]);
+ tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
addr += pagesize;
}
@@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr)
u64 tmp;
ce_kern->ce_port[port].dirmap_shadow = ct_upper;
- writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+ ct_upper);
tmp = ce_mmr->ce_ure_dir_map[port];
dma_ok = 1;
} else
@@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
if (TIOCE_D32_ADDR(bus_addr)) {
if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
ce_kern->ce_port[port].dirmap_shadow = 0;
- writeq(0, &ce_mmr->ce_ure_dir_map[port]);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+ 0);
}
} else {
struct tioce_dmamap *map;
@@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
} else if (--map->refcnt == 0) {
for (i = 0; i < map->ate_count; i++) {
map->ate_shadow[i] = 0;
- map->ate_hw[i] = 0;
+ tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0);
}
list_del(&map->ce_dmamap_list);
@@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
dma_map_done:
- if (mapaddr & barrier)
+ if (mapaddr && barrier)
mapaddr = tioce_dma_barrier(mapaddr, 1);
return mapaddr;
@@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
soft->ce_pcibus.bs_persist_segment,
soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0);
+ if (ret_stuff.v0)
+ panic("tioce_error_intr_handler: Fatal TIOCE error");
+
return IRQ_HANDLED;
}
/**
+ * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ate's for
+ * @base: starting bus address to reserve
+ * @limit: last bus address to reserve
+ *
+ * If base/limit falls within the range of bus space mapped through the
+ * M32 space, reserve the resources corresponding to the range.
+ */
+static void
+tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
+{
+ int ate_index, last_ate, ps;
+ struct tioce *ce_mmr;
+
+ if (!TIOCE_M32_ADDR(base))
+ return;
+
+ ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
+ ps = ce_kern->ce_ate3240_pagesize;
+ ate_index = ATE_PAGE(base, ps);
+ last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
+
+ if (ate_index < 64)
+ ate_index = 64;
+
+ while (ate_index <= last_ate) {
+ u64 ate;
+
+ ate = ATE_MAKE(0xdeadbeef, ps);
+ ce_kern->ce_ate3240_shadow[ate_index] = ate;
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
+ ate);
+ ate_index++;
+ }
+}
+
+/**
* tioce_kern_init - init kernel structures related to a given TIOCE
* @tioce_common: ptr to a cached tioce_common struct that originated in prom
- */ static struct tioce_kernel *
+ */
+static struct tioce_kernel *
tioce_kern_init(struct tioce_common *tioce_common)
{
int i;
+ int ps;
+ int dev;
u32 tmp;
+ unsigned int seg, bus;
struct tioce *tioce_mmr;
struct tioce_kernel *tioce_kern;
@@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common)
* here to use pci_read_config_xxx() so use the raw_pci_ops vector.
*/
- raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment,
- tioce_common->ce_pcibus.bs_persist_busnum,
- PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp);
+ seg = tioce_common->ce_pcibus.bs_persist_segment;
+ bus = tioce_common->ce_pcibus.bs_persist_busnum;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
tioce_kern->ce_port1_secondary = (u8) tmp;
/*
@@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common)
*/
tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
- __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK);
- __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE);
- tioce_kern->ce_ate3240_pagesize = KB(256);
+ tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
+ CE_URE_PAGESIZE_MASK);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
+ CE_URE_256K_PAGESIZE);
+ ps = tioce_kern->ce_ate3240_pagesize = KB(256);
for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
tioce_kern->ce_ate40_shadow[i] = 0;
- writeq(0, &tioce_mmr->ce_ure_ate40[i]);
+ tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0);
}
for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
tioce_kern->ce_ate3240_shadow[i] = 0;
- writeq(0, &tioce_mmr->ce_ure_ate3240[i]);
+ tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0);
+ }
+
+ /*
+ * Reserve ATE's corresponding to reserved address ranges. These
+ * include:
+ *
+ * Memory space covered by each PPB mem base/limit register
+ * Memory space covered by each PPB prefetch base/limit register
+ *
+ * These bus ranges are for pio (downstream) traffic only, and so
+ * cannot be used for DMA.
+ */
+
+ for (dev = 1; dev <= 2; dev++) {
+ u64 base, limit;
+
+ /* mem base/limit */
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_MEMORY_BASE, 2, &tmp);
+ base = (u64)tmp << 16;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_MEMORY_LIMIT, 2, &tmp);
+ limit = (u64)tmp << 16;
+ limit |= 0xfffffUL;
+
+ if (base < limit)
+ tioce_reserve_m32(tioce_kern, base, limit);
+
+ /*
+ * prefetch mem base/limit. The tioce ppb's have 64-bit
+ * decoders, so read the upper portions w/o checking the
+ * attributes.
+ */
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_MEMORY_BASE, 2, &tmp);
+ base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_BASE_UPPER32, 4, &tmp);
+ base |= (u64)tmp << 32;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_MEMORY_LIMIT, 2, &tmp);
+
+ limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+ limit |= 0xfffffUL;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_LIMIT_UPPER32, 4, &tmp);
+ limit |= (u64)tmp << 32;
+
+ if ((base < limit) && TIOCE_M32_ADDR(base))
+ tioce_reserve_m32(tioce_kern, base, limit);
}
return tioce_kern;
@@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
{
struct pcidev_info *pcidev_info;
struct tioce_common *ce_common;
+ struct tioce_kernel *ce_kern;
struct tioce *ce_mmr;
u64 force_int_val;
@@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+ ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+ /*
+ * TIOCE Rev A workaround (PV 945826), force an interrupt by writing
+ * the TIO_INTx register directly (1/26/2006)
+ */
+ if (ce_common->ce_rev == TIOCE_REV_A) {
+ u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit);
+ u64 status;
+
+ tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status);
+ if (status & int_bit_mask) {
+ u64 force_irq = (1 << 8) | sn_irq_info->irq_irq;
+ u64 ctalk = sn_irq_info->irq_xtalkaddr;
+ u64 nasid, offset;
+
+ nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT;
+ offset = (ctalk & CTALK_NODE_OFFSET);
+ HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq);
+ }
+
+ return;
+ }
/*
* irq_int_bit is originally set up by prom, and holds the interrupt
@@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
default:
return;
}
- writeq(force_int_val, &ce_mmr->ce_adm_force_int);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val);
}
/**
@@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
{
struct pcidev_info *pcidev_info;
struct tioce_common *ce_common;
+ struct tioce_kernel *ce_kern;
struct tioce *ce_mmr;
int bit;
u64 vector;
@@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+ ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
bit = sn_irq_info->irq_int_bit;
- __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+ tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
vector |= sn_irq_info->irq_xtalkaddr;
- writeq(vector, &ce_mmr->ce_adm_int_dest[bit]);
- __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector);
+ tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
tioce_force_interrupt(sn_irq_info);
}
@@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
static void *
tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
{
+ int my_nasid;
+ cnodeid_t my_cnode, mem_cnode;
struct tioce_common *tioce_common;
+ struct tioce_kernel *tioce_kern;
+ struct tioce *tioce_mmr;
/*
* Allocate kernel bus soft and copy from prom.
@@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET;
- if (tioce_kern_init(tioce_common) == NULL) {
+ tioce_kern = tioce_kern_init(tioce_common);
+ if (tioce_kern == NULL) {
kfree(tioce_common);
return NULL;
}
+ /*
+ * Clear out any transient errors before registering the error
+ * interrupt handler.
+ */
+
+ tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
+ ~0ULL);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
+
if (request_irq(SGI_PCIASIC_ERROR,
tioce_error_intr_handler,
SA_SHIRQ, "TIOCE error", (void *)tioce_common))
@@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
tioce_common->ce_pcibus.bs_persist_segment,
tioce_common->ce_pcibus.bs_persist_busnum);
+ /*
+ * identify closest nasid for memory allocations
+ */
+
+ my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base);
+ my_cnode = nasid_to_cnodeid(my_nasid);
+
+ if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) {
+ printk(KERN_WARNING "tioce_bus_fixup: failed to find "
+ "closest node with MEM to TIO node %d\n", my_cnode);
+ mem_cnode = (cnodeid_t)-1; /* use any node */
+ }
+
+ controller->node = mem_cnode;
+
return tioce_common;
}