From a607c38971fd078865fa9bef39e6c1d4435680c8 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Thu, 1 Sep 2005 14:01:37 -0500 Subject: [IA64-SGI] get XPC to cleanly disengage from remote memory references When XPC is being shutdown (i.e., rmmod, reboot) it doesn't ensure that other partitions with whom it was connected have completely disengaged from any attempt at cross-partition memory references. This can lead to MCAs in any of these other partitions when the partition is reset. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc_partition.c | 304 ++++++++++++++++++++++++++++++------ 1 file changed, 254 insertions(+), 50 deletions(-) (limited to 'arch/ia64/sn/kernel/xpc_partition.c') diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 578265ea9e67..79a0fc4c860c 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -76,11 +76,6 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; -/* systune related variables */ -int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; -int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT; - - /* * Given a nasid, get the physical address of the partition's reserved page * for that nasid. This function returns 0 on any error. @@ -239,16 +234,21 @@ xpc_rsvd_page_init(void) xpc_vars->amos_page = amos_page; /* save for next load of XPC */ - /* - * Initialize the activation related AMO variables. - */ - xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS); - for (i = 1; i < XP_NASID_MASK_WORDS; i++) { - xpc_IPI_init(i + XP_MAX_PARTITIONS); + /* initialize the activate IRQ related AMO variables */ + for (i = 0; i < XP_NASID_MASK_WORDS; i++) { + (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i); } + + /* initialize the engaged remote partitions related AMO variables */ + (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO); + (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO); + /* export AMO page's physical address to other partitions */ xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); + /* timestamp of when reserved page was initialized */ + rp->stamp = CURRENT_TIME; + /* * This signifies to the remote partition that our reserved * page is initialized. @@ -387,6 +387,11 @@ xpc_check_remote_hb(void) remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + + if (xpc_exiting) { + break; + } + if (partid == sn_partition_id) { continue; } @@ -417,7 +422,7 @@ xpc_check_remote_hb(void) if (((remote_vars->heartbeat == part->last_heartbeat) && (remote_vars->kdb_status == 0)) || - !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { + !xpc_hb_allowed(sn_partition_id, remote_vars)) { XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); continue; @@ -436,23 +441,23 @@ xpc_check_remote_hb(void) */ static enum xpc_retval xpc_get_remote_rp(int nasid, u64 *discovered_nasids, - struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa) + struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa) { int bres, i; /* get the reserved page's physical address */ - *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, + *remote_rp_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, XPC_RSVD_PAGE_ALIGNED_SIZE); - if (*remote_rsvd_page_pa == 0) { + if (*remote_rp_pa == 0) { return xpcNoRsvdPageAddr; } /* pull over the reserved page structure */ - bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp), + bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp), XPC_RSVD_PAGE_ALIGNED_SIZE, (BTE_NOTIFY | BTE_WACQUIRE), NULL); if (bres != BTE_SUCCESS) { @@ -523,6 +528,55 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) } +/* + * Update the remote partition's info. + */ +static void +xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version, + struct timespec *remote_rp_stamp, u64 remote_rp_pa, + u64 remote_vars_pa, struct xpc_vars *remote_vars) +{ + part->remote_rp_version = remote_rp_version; + dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n", + part->remote_rp_version); + + part->remote_rp_stamp = *remote_rp_stamp; + dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n", + part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec); + + part->remote_rp_pa = remote_rp_pa; + dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); + + part->remote_vars_pa = remote_vars_pa; + dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", + part->remote_vars_pa); + + part->last_heartbeat = remote_vars->heartbeat; + dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", + part->last_heartbeat); + + part->remote_vars_part_pa = remote_vars->vars_part_pa; + dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", + part->remote_vars_part_pa); + + part->remote_act_nasid = remote_vars->act_nasid; + dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", + part->remote_act_nasid); + + part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; + dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", + part->remote_act_phys_cpuid); + + part->remote_amos_page_pa = remote_vars->amos_page_pa; + dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", + part->remote_amos_page_pa); + + part->remote_vars_version = remote_vars->version; + dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", + part->remote_vars_version); +} + + /* * Prior code has determine the nasid which generated an IPI. Inspect * that nasid to determine if its partition needs to be activated or @@ -542,8 +596,12 @@ xpc_identify_act_IRQ_req(int nasid) { struct xpc_rsvd_page *remote_rp; struct xpc_vars *remote_vars; - u64 remote_rsvd_page_pa; + u64 remote_rp_pa; u64 remote_vars_pa; + int remote_rp_version; + int reactivate = 0; + int stamp_diff; + struct timespec remote_rp_stamp = { 0, 0 }; partid_t partid; struct xpc_partition *part; enum xpc_retval ret; @@ -553,7 +611,7 @@ xpc_identify_act_IRQ_req(int nasid) remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; - ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa); + ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); if (ret != xpcSuccess) { dev_warn(xpc_part, "unable to get reserved page from nasid %d, " "which sent interrupt, reason=%d\n", nasid, ret); @@ -561,6 +619,10 @@ xpc_identify_act_IRQ_req(int nasid) } remote_vars_pa = remote_rp->vars_pa; + remote_rp_version = remote_rp->version; + if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { + remote_rp_stamp = remote_rp->stamp; + } partid = remote_rp->partid; part = &xpc_partitions[partid]; @@ -586,44 +648,117 @@ xpc_identify_act_IRQ_req(int nasid) "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, remote_vars->heartbeat, remote_vars->heartbeating_to_mask); + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_INACTIVE) { - if (part->act_state == XPC_P_INACTIVE) { + xpc_update_partition_info(part, remote_rp_version, + &remote_rp_stamp, remote_rp_pa, + remote_vars_pa, remote_vars); - part->remote_rp_pa = remote_rsvd_page_pa; - dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", - part->remote_rp_pa); + if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { + if (xpc_partition_disengage_requested(1UL << partid)) { + /* + * Other side is waiting on us to disengage, + * even though we already have. + */ + return; + } + } else { + /* other side doesn't support disengage requests */ + xpc_clear_partition_disengage_request(1UL << partid); + } - part->remote_vars_pa = remote_vars_pa; - dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", - part->remote_vars_pa); + xpc_activate_partition(part); + return; + } - part->last_heartbeat = remote_vars->heartbeat; - dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", - part->last_heartbeat); + DBUG_ON(part->remote_rp_version == 0); + DBUG_ON(part->remote_vars_version == 0); + + if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) { + DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part-> + remote_vars_version)); + + if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { + DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> + version)); + /* see if the other side rebooted */ + if (part->remote_amos_page_pa == + remote_vars->amos_page_pa && + xpc_hb_allowed(sn_partition_id, + remote_vars)) { + /* doesn't look that way, so ignore the IPI */ + return; + } + } - part->remote_vars_part_pa = remote_vars->vars_part_pa; - dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", - part->remote_vars_part_pa); + /* + * Other side rebooted and previous XPC didn't support the + * disengage request, so we don't need to do anything special. + */ - part->remote_act_nasid = remote_vars->act_nasid; - dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", - part->remote_act_nasid); + xpc_update_partition_info(part, remote_rp_version, + &remote_rp_stamp, remote_rp_pa, + remote_vars_pa, remote_vars); + part->reactivate_nasid = nasid; + XPC_DEACTIVATE_PARTITION(part, xpcReactivating); + return; + } - part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; - dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", - part->remote_act_phys_cpuid); + DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)); - part->remote_amos_page_pa = remote_vars->amos_page_pa; - dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", - part->remote_amos_page_pa); + if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { + DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); - xpc_activate_partition(part); + /* + * Other side rebooted and previous XPC did support the + * disengage request, but the new one doesn't. + */ - } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa || - !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { + xpc_clear_partition_engaged(1UL << partid); + xpc_clear_partition_disengage_request(1UL << partid); + xpc_update_partition_info(part, remote_rp_version, + &remote_rp_stamp, remote_rp_pa, + remote_vars_pa, remote_vars); + reactivate = 1; + + } else { + DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); + + stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp, + &remote_rp_stamp); + if (stamp_diff != 0) { + DBUG_ON(stamp_diff >= 0); + + /* + * Other side rebooted and the previous XPC did support + * the disengage request, as does the new one. + */ + + DBUG_ON(xpc_partition_engaged(1UL << partid)); + DBUG_ON(xpc_partition_disengage_requested(1UL << + partid)); + + xpc_update_partition_info(part, remote_rp_version, + &remote_rp_stamp, remote_rp_pa, + remote_vars_pa, remote_vars); + reactivate = 1; + } + } + + if (!xpc_partition_disengaged(part)) { + /* still waiting on other side to disengage from us */ + return; + } + + if (reactivate) { part->reactivate_nasid = nasid; XPC_DEACTIVATE_PARTITION(part, xpcReactivating); + + } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) && + xpc_partition_disengage_requested(1UL << partid)) { + XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown); } } @@ -646,12 +781,16 @@ xpc_identify_act_IRQ_sender(void) struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page; - act_amos = xpc_vars->act_amos; + act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS; /* scan through act AMO variable looking for non-zero entries */ for (word = 0; word < XP_NASID_MASK_WORDS; word++) { + if (xpc_exiting) { + break; + } + nasid_mask = xpc_IPI_receive(&act_amos[word]); if (nasid_mask == 0) { /* no IRQs from nasids in this variable */ @@ -687,6 +826,55 @@ xpc_identify_act_IRQ_sender(void) } +/* + * See if the other side has responded to a partition disengage request + * from us. + */ +int +xpc_partition_disengaged(struct xpc_partition *part) +{ + partid_t partid = XPC_PARTID(part); + int disengaged; + + + disengaged = (xpc_partition_engaged(1UL << partid) == 0); + if (part->disengage_request_timeout) { + if (!disengaged) { + if (jiffies < part->disengage_request_timeout) { + /* timelimit hasn't been reached yet */ + return 0; + } + + /* + * Other side hasn't responded to our disengage + * request in a timely fashion, so assume it's dead. + */ + + xpc_clear_partition_engaged(1UL << partid); + disengaged = 1; + } + part->disengage_request_timeout = 0; + + /* cancel the timer function, provided it's not us */ + if (!in_interrupt()) { + del_singleshot_timer_sync(&part-> + disengage_request_timer); + } + + DBUG_ON(part->act_state != XPC_P_DEACTIVATING && + part->act_state != XPC_P_INACTIVE); + if (part->act_state != XPC_P_INACTIVE) { + xpc_wakeup_channel_mgr(part); + } + + if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { + xpc_cancel_partition_disengage_request(part); + } + } + return disengaged; +} + + /* * Mark specified partition as active. */ @@ -721,7 +909,6 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part, enum xpc_retval reason) { unsigned long irq_flags; - partid_t partid = XPC_PARTID(part); spin_lock_irqsave(&part->act_lock, irq_flags); @@ -749,17 +936,27 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part, spin_unlock_irqrestore(&part->act_lock, irq_flags); - XPC_DISALLOW_HB(partid, xpc_vars); + if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { + xpc_request_partition_disengage(part); + xpc_IPI_send_disengage(part); + + /* set a timelimit on the disengage request */ + part->disengage_request_timeout = jiffies + + (XPC_DISENGAGE_REQUEST_TIMELIMIT * HZ); + part->disengage_request_timer.expires = + part->disengage_request_timeout; + add_timer(&part->disengage_request_timer); + } dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, reason); - xpc_partition_down(part, reason); + xpc_partition_going_down(part, reason); } /* - * Mark specified partition as active. + * Mark specified partition as inactive. */ void xpc_mark_partition_inactive(struct xpc_partition *part) @@ -792,7 +989,7 @@ xpc_discovery(void) void *remote_rp_base; struct xpc_rsvd_page *remote_rp; struct xpc_vars *remote_vars; - u64 remote_rsvd_page_pa; + u64 remote_rp_pa; u64 remote_vars_pa; int region; int max_regions; @@ -877,7 +1074,7 @@ xpc_discovery(void) /* pull over the reserved page structure */ ret = xpc_get_remote_rp(nasid, discovered_nasids, - remote_rp, &remote_rsvd_page_pa); + remote_rp, &remote_rp_pa); if (ret != xpcSuccess) { dev_dbg(xpc_part, "unable to get reserved page " "from nasid %d, reason=%d\n", nasid, @@ -948,6 +1145,13 @@ xpc_discovery(void) remote_vars->act_nasid, remote_vars->act_phys_cpuid); + if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> + version)) { + part->remote_amos_page_pa = + remote_vars->amos_page_pa; + xpc_mark_partition_disengaged(part); + xpc_cancel_partition_disengage_request(part); + } xpc_IPI_send_activate(remote_vars); } } -- cgit v1.2.3-59-g8ed1b From 24ee0a6d7b0a52b140c880aae24c255de3b4a9a1 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 12 Sep 2005 12:15:43 -0500 Subject: [IA64] Cleanup use of various #defines related to nodes Some of the SN code & #defines related to compact nodes & IO discovery have gotten stale over the years. This patch attempts to clean them up. Some of the various SN MAX_xxx #defines were also unclear & misused. The primary changes are: - use MAX_NUMNODES. This is the generic linux #define for the number of nodes that are known to the generic kernel. Arrays & loops for constructs that are 1:1 with linux-defined nodes should use the linux #define - not an SN equivalent. - use MAX_COMPACT_NODES for MAX_NUMNODES + NUM_TIOS. This is the number of nodes in the SSI system. Compact nodes are a hack to get around the IA64 architectural limit of 256 nodes. Large SGI systems have more than 256 nodes. When we upgrade to ACPI3.0, I _hope_ that all nodes will be real nodes that are known to the generic kernel. That will allow us to delete the notion of "compact nodes". - add MAX_NUMALINK_NODES for the total number of nodes that are in the numalink domain - all partitions. - simplified (understandable) scan_for_ionodes() - small amount of cleanup related to cnodes Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 4 +- arch/ia64/sn/kernel/setup.c | 160 ++++++++++++------------------------ arch/ia64/sn/kernel/sn2/sn_hwperf.c | 4 +- arch/ia64/sn/kernel/tiocx.c | 5 +- arch/ia64/sn/kernel/xpc_partition.c | 2 +- drivers/char/snsc.c | 4 +- include/asm-ia64/sn/arch.h | 36 +++++--- include/asm-ia64/sn/io.h | 2 +- include/asm-ia64/sn/klconfig.h | 34 +------- include/asm-ia64/sn/sn_cpuid.h | 3 - include/asm-ia64/sn/sn_sal.h | 12 +-- include/asm-ia64/sn/xp.h | 2 +- 12 files changed, 92 insertions(+), 176 deletions(-) (limited to 'arch/ia64/sn/kernel/xpc_partition.c') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 906622d9f933..b4f5053f5e1b 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -22,8 +22,6 @@ #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" -nasid_t master_nasid = INVALID_NASID; /* Partition Master */ - static struct list_head sn_sysdata_list; /* sysdata list struct */ @@ -165,7 +163,7 @@ static void sn_fixup_ionodes(void) * Get SGI Specific HUB chipset information. * Inform Prom that this kernel can support domain bus numbering. */ - for (i = 0; i < numionodes; i++) { + for (i = 0; i < num_cnodes; i++) { hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); nasid = cnodeid_to_nasid(i); hubdev->max_segment_number = 0xffffffff; diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 6f8c5883716b..0fb579ef18c2 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -59,8 +59,6 @@ DEFINE_PER_CPU(struct pda_s, pda_percpu); #define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */ -lboard_t *root_lboard[MAX_COMPACT_NODES]; - extern void bte_init_node(nodepda_t *, cnodeid_t); extern void sn_timer_init(void); @@ -97,15 +95,15 @@ u8 sn_region_size; EXPORT_SYMBOL(sn_region_size); int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ -short physical_node_map[MAX_PHYSNODE_ID]; +short physical_node_map[MAX_NUMALINK_NODES]; static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS]; EXPORT_SYMBOL(physical_node_map); -int numionodes; +int num_cnodes; static void sn_init_pdas(char **); -static void scan_for_ionodes(void); +static void build_cnode_tables(void); static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; @@ -139,19 +137,6 @@ extern char drive_info[4 * 16]; char drive_info[4 * 16]; #endif -/* - * Get nasid of current cpu early in boot before nodepda is initialized - */ -static int -boot_get_nasid(void) -{ - int nasid; - - if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL)) - BUG(); - return nasid; -} - /* * This routine can only be used during init, since * smp_boot_data is an init data structure. @@ -223,7 +208,6 @@ void __init early_sn_setup(void) } extern int platform_intr_list[]; -extern nasid_t master_nasid; static int __initdata shub_1_1_found = 0; /* @@ -269,7 +253,6 @@ static void __init sn_check_for_wars(void) void __init sn_setup(char **cmdline_p) { long status, ticks_per_sec, drift; - int pxm; u32 version = sn_sal_rev(); extern void sn_cpu_init(void); @@ -300,11 +283,10 @@ void __init sn_setup(char **cmdline_p) MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; - memset(physical_node_map, -1, sizeof(physical_node_map)); - for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++) - if (pxm_to_nid_map[pxm] != -1) - physical_node_map[pxm_to_nasid(pxm)] = - pxm_to_nid_map[pxm]; + /* + * Build the tables for managing cnodes. + */ + build_cnode_tables(); /* * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard @@ -319,8 +301,6 @@ void __init sn_setup(char **cmdline_p) printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); - master_nasid = boot_get_nasid(); - status = ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, &drift); @@ -378,15 +358,6 @@ static void __init sn_init_pdas(char **cmdline_p) { cnodeid_t cnode; - memset(sn_cnodeid_to_nasid, -1, - sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); - for_each_online_node(cnode) - sn_cnodeid_to_nasid[cnode] = - pxm_to_nasid(nid_to_pxm_map[cnode]); - - numionodes = num_online_nodes(); - scan_for_ionodes(); - /* * Allocate & initalize the nodepda for each node. */ @@ -402,7 +373,7 @@ static void __init sn_init_pdas(char **cmdline_p) /* * Allocate & initialize nodepda for TIOs. For now, put them on node 0. */ - for (cnode = num_online_nodes(); cnode < numionodes; cnode++) { + for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) { nodepdaindr[cnode] = alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); @@ -411,7 +382,7 @@ static void __init sn_init_pdas(char **cmdline_p) /* * Now copy the array of nodepda pointers to each nodepda. */ - for (cnode = 0; cnode < numionodes; cnode++) + for (cnode = 0; cnode < num_cnodes; cnode++) memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, sizeof(nodepdaindr)); @@ -428,7 +399,7 @@ static void __init sn_init_pdas(char **cmdline_p) * Initialize the per node hubdev. This includes IO Nodes and * headless/memless nodes. */ - for (cnode = 0; cnode < numionodes; cnode++) { + for (cnode = 0; cnode < num_cnodes; cnode++) { hubdev_init_node(nodepdaindr[cnode], cnode); } } @@ -553,87 +524,58 @@ void __init sn_cpu_init(void) } /* - * Scan klconfig for ionodes. Add the nasids to the - * physical_node_map and the pda and increment numionodes. + * Build tables for converting between NASIDs and cnodes. */ +static inline int __init board_needs_cnode(int type) +{ + return (type == KLTYPE_SNIA || type == KLTYPE_TIO); +} -static void __init scan_for_ionodes(void) +void __init build_cnode_tables(void) { - int nasid = 0; + int nasid; + int node; lboard_t *brd; - /* fakeprom does not support klgraph */ - if (IS_RUNNING_ON_FAKE_PROM()) - return; - - /* Setup ionodes with memory */ - for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { - char *klgraph_header; - cnodeid_t cnodeid; - - if (physical_node_map[nasid] == -1) - continue; + memset(physical_node_map, -1, sizeof(physical_node_map)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); - cnodeid = -1; - klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid)); - if (!klgraph_header) { - BUG(); /* All nodes must have klconfig tables! */ - } - cnodeid = nasid_to_cnodeid(nasid); - root_lboard[cnodeid] = (lboard_t *) - NODE_OFFSET_TO_LBOARD((nasid), - ((kl_config_hdr_t - *) (klgraph_header))-> - ch_board_info); + /* + * First populate the tables with C/M bricks. This ensures that + * cnode == node for all C & M bricks. + */ + for_each_online_node(node) { + nasid = pxm_to_nasid(nid_to_pxm_map[node]); + sn_cnodeid_to_nasid[node] = nasid; + physical_node_map[nasid] = node; } - /* Scan headless/memless IO Nodes. */ - for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { - /* if there's no nasid, don't try to read the klconfig on the node */ - if (physical_node_map[nasid] == -1) - continue; - brd = find_lboard_any((lboard_t *) - root_lboard[nasid_to_cnodeid(nasid)], - KLTYPE_SNIA); - if (brd) { - brd = KLCF_NEXT_ANY(brd); /* Skip this node's lboard */ - if (!brd) - continue; - } - - brd = find_lboard_any(brd, KLTYPE_SNIA); + /* + * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node + * limit on the number of nodes, we can't use the generic node numbers + * for this. Note that num_cnodes is incremented below as TIOs or + * headless/memoryless nodes are discovered. + */ + num_cnodes = num_online_nodes(); - while (brd) { - sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; - physical_node_map[brd->brd_nasid] = numionodes; - root_lboard[numionodes] = brd; - numionodes++; - brd = KLCF_NEXT_ANY(brd); - if (!brd) - break; - - brd = find_lboard_any(brd, KLTYPE_SNIA); - } - } + /* fakeprom does not support klgraph */ + if (IS_RUNNING_ON_FAKE_PROM()) + return; - /* Scan for TIO nodes. */ - for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { - /* if there's no nasid, don't try to read the klconfig on the node */ - if (physical_node_map[nasid] == -1) - continue; - brd = find_lboard_any((lboard_t *) - root_lboard[nasid_to_cnodeid(nasid)], - KLTYPE_TIO); + /* Find TIOs & headless/memoryless nodes and add them to the tables */ + for_each_online_node(node) { + kl_config_hdr_t *klgraph_header; + nasid = cnodeid_to_nasid(node); + if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL) + BUG(); + brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); while (brd) { - sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; - physical_node_map[brd->brd_nasid] = numionodes; - root_lboard[numionodes] = brd; - numionodes++; - brd = KLCF_NEXT_ANY(brd); - if (!brd) - break; - - brd = find_lboard_any(brd, KLTYPE_TIO); + if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) { + sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid; + physical_node_map[brd->brd_nasid] = num_cnodes++; + } + brd = find_lboard_next(brd); } } } diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 0513aacac8c1..6c6fbca3229c 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -476,8 +476,8 @@ static int sn_topology_show(struct seq_file *s, void *d) for_each_online_cpu(j) { seq_printf(s, j ? ":%d" : ", dist %d", node_distance( - cpuid_to_cnodeid(i), - cpuid_to_cnodeid(j))); + cpu_to_node(i), + cpu_to_node(j))); } seq_putc(s, '\n'); } diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index b45db5133f55..7e9764a69dc8 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -486,11 +486,10 @@ static int __init tiocx_init(void) bus_register(&tiocx_bus_type); - for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) { + for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) { nasid_t nasid; - if ((nasid = cnodeid_to_nasid(cnodeid)) < 0) - break; /* No more nasids .. bail out of loop */ + nasid = cnodeid_to_nasid(cnodeid); if ((nasid & 0x1) && is_fpga_brick(nasid)) { struct hubdev_info *hubdev; diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 578265ea9e67..72ef330fb784 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -44,7 +44,7 @@ static u64 xpc_sh2_IPI_access3; /* original protection values for each node */ -u64 xpc_prot_vec[MAX_COMPACT_NODES]; +u64 xpc_prot_vec[MAX_NUMNODES]; /* this partition's reserved page */ diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c index 261a41bf6d02..a025a89ea70a 100644 --- a/drivers/char/snsc.c +++ b/drivers/char/snsc.c @@ -377,7 +377,7 @@ scdrv_init(void) dev_t first_dev, dev; nasid_t event_nasid = ia64_sn_get_console_nasid(); - if (alloc_chrdev_region(&first_dev, 0, numionodes, + if (alloc_chrdev_region(&first_dev, 0, num_cnodes, SYSCTL_BASENAME) < 0) { printk("%s: failed to register SN system controller device\n", __FUNCTION__); @@ -385,7 +385,7 @@ scdrv_init(void) } snsc_class = class_create(THIS_MODULE, SYSCTL_BASENAME); - for (cnode = 0; cnode < numionodes; cnode++) { + for (cnode = 0; cnode < num_cnodes; cnode++) { geoid = cnodeid_get_geoid(cnode); devnamep = devname; format_module_id(devnamep, geo_module(geoid), diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index ab827d298569..8fce5a6db951 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -17,6 +17,32 @@ #include #include +/* + * This is the maximum number of NUMALINK nodes that can be part of a single + * SSI kernel. This number includes C-brick, M-bricks, and TIOs. Nodes in + * remote partitions are NOT included in this number. + * The number of compact nodes cannot exceed size of a coherency domain. + * The purpose of this define is to specify a node count that includes + * all C/M/TIO nodes in an SSI system. + * + * SGI system can currently support up to 256 C/M nodes plus additional TIO nodes. + * + * Note: ACPI20 has an architectural limit of 256 nodes. When we upgrade + * to ACPI3.0, this limit will be removed. The notion of "compact nodes" + * should be deleted and TIOs should be included in MAX_NUMNODES. + */ +#define MAX_COMPACT_NODES 512 + +/* + * Maximum number of nodes in all partitions and in all coherency domains. + * This is the total number of nodes accessible in the numalink fabric. It + * includes all C & M bricks, plus all TIOs. + * + * This value is also the value of the maximum number of NASIDs in the numalink + * fabric. + */ +#define MAX_NUMALINK_NODES 2048 + /* * The following defines attributes of the HUB chip. These attributes are * frequently referenced. They are kept in the per-cpu data areas of each cpu. @@ -40,15 +66,6 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); #define enable_shub_wars_1_1() (sn_hub_info->shub_1_1_found) -/* - * This is the maximum number of nodes that can be part of a kernel. - * Effectively, it's the maximum number of compact node ids (cnodeid_t). - * This is not necessarily the same as MAX_NASIDS. - */ -#define MAX_COMPACT_NODES 2048 -#define CPUS_PER_NODE 4 - - /* * Compact node ID to nasid mappings kept in the per-cpu data areas of each * cpu. @@ -57,7 +74,6 @@ DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); #define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0])) - extern u8 sn_partition_id; extern u8 sn_system_size; extern u8 sn_sharing_domain_size; diff --git a/include/asm-ia64/sn/io.h b/include/asm-ia64/sn/io.h index 42209733f6b1..ac30c747c5ab 100644 --- a/include/asm-ia64/sn/io.h +++ b/include/asm-ia64/sn/io.h @@ -14,7 +14,7 @@ extern void * sn_io_addr(unsigned long port) __attribute_const__; /* Forward definition */ extern void __sn_mmiowb(void); /* Forward definition */ -extern int numionodes; +extern int num_cnodes; #define __sn_mf_a() ia64_mfa() diff --git a/include/asm-ia64/sn/klconfig.h b/include/asm-ia64/sn/klconfig.h index 9f920c70a62a..bcbf209d63be 100644 --- a/include/asm-ia64/sn/klconfig.h +++ b/include/asm-ia64/sn/klconfig.h @@ -208,19 +208,6 @@ typedef struct lboard_s { klconf_off_t brd_next_same; /* Next BOARD with same nasid */ } lboard_t; -#define KLCF_NUM_COMPS(_brd) ((_brd)->brd_numcompts) -#define NODE_OFFSET_TO_KLINFO(n,off) ((klinfo_t*) TO_NODE_CAC(n,off)) -#define KLCF_NEXT(_brd) \ - ((_brd)->brd_next_same ? \ - (NODE_OFFSET_TO_LBOARD((_brd)->brd_next_same_host, (_brd)->brd_next_same)): NULL) -#define KLCF_NEXT_ANY(_brd) \ - ((_brd)->brd_next_any ? \ - (NODE_OFFSET_TO_LBOARD(NASID_GET(_brd), (_brd)->brd_next_any)): NULL) -#define KLCF_COMP(_brd, _ndx) \ - ((((_brd)->brd_compts[(_ndx)]) == 0) ? 0 : \ - (NODE_OFFSET_TO_KLINFO(NASID_GET(_brd), (_brd)->brd_compts[(_ndx)]))) - - /* * Generic info structure. This stores common info about a * component. @@ -249,24 +236,11 @@ typedef struct klinfo_s { /* Generic info */ } klinfo_t ; -static inline lboard_t *find_lboard_any(lboard_t * start, unsigned char brd_type) +static inline lboard_t *find_lboard_next(lboard_t * brd) { - /* Search all boards stored on this node. */ - - while (start) { - if (start->brd_type == brd_type) - return start; - start = KLCF_NEXT_ANY(start); - } - /* Didn't find it. */ - return (lboard_t *) NULL; + if (brd && brd->brd_next_any) + return NODE_OFFSET_TO_LBOARD(NASID_GET(brd), brd->brd_next_any); + return NULL; } - -/* external declarations of Linux kernel functions. */ - -extern lboard_t *root_lboard[]; -extern klinfo_t *find_component(lboard_t *brd, klinfo_t *kli, unsigned char type); -extern klinfo_t *find_first_component(lboard_t *brd, unsigned char type); - #endif /* _ASM_IA64_SN_KLCONFIG_H */ diff --git a/include/asm-ia64/sn/sn_cpuid.h b/include/asm-ia64/sn/sn_cpuid.h index d2c1d34dcce4..749deb2ca6c1 100644 --- a/include/asm-ia64/sn/sn_cpuid.h +++ b/include/asm-ia64/sn/sn_cpuid.h @@ -105,7 +105,6 @@ extern short physical_node_map[]; /* indexed by nasid to get cnode */ #define cpuid_to_nasid(cpuid) (sn_nodepda->phys_cpuid[cpuid].nasid) #define cpuid_to_subnode(cpuid) (sn_nodepda->phys_cpuid[cpuid].subnode) #define cpuid_to_slice(cpuid) (sn_nodepda->phys_cpuid[cpuid].slice) -#define cpuid_to_cnodeid(cpuid) (physical_node_map[cpuid_to_nasid(cpuid)]) /* @@ -113,8 +112,6 @@ extern short physical_node_map[]; /* indexed by nasid to get cnode */ * of potentially large tables. */ extern int nasid_slice_to_cpuid(int, int); -#define nasid_slice_to_cpu_physical_id(nasid, slice) \ - cpu_physical_id(nasid_slice_to_cpuid(nasid, slice)) /* * cnodeid_to_nasid - convert a cnodeid to a NASID diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index fea35b33d4e4..5ad855db8464 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -198,26 +198,16 @@ ia64_sn_get_master_baseio_nasid(void) return ret_stuff.v0; } -static inline char * +static inline void * ia64_sn_get_klconfig_addr(nasid_t nasid) { struct ia64_sal_retval ret_stuff; - int cnodeid; - cnodeid = nasid_to_cnodeid(nasid); ret_stuff.status = 0; ret_stuff.v0 = 0; ret_stuff.v1 = 0; ret_stuff.v2 = 0; SAL_CALL(ret_stuff, SN_SAL_GET_KLCONFIG_ADDR, (u64)nasid, 0, 0, 0, 0, 0, 0); - - /* - * We should panic if a valid cnode nasid does not produce - * a klconfig address. - */ - if (ret_stuff.status != 0) { - panic("ia64_sn_get_klconfig_addr: Returned error %lx\n", ret_stuff.status); - } return ret_stuff.v0 ? __va(ret_stuff.v0) : NULL; } diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h index 1df1c9f61a65..75a2f39c6ac6 100644 --- a/include/asm-ia64/sn/xp.h +++ b/include/asm-ia64/sn/xp.h @@ -49,7 +49,7 @@ * C-brick nasids, thus the need for bitmaps which don't account for * odd-numbered (non C-brick) nasids. */ -#define XP_MAX_PHYSNODE_ID (MAX_PHYSNODE_ID / 2) +#define XP_MAX_PHYSNODE_ID (MAX_NUMALINK_NODES / 2) #define XP_NASID_MASK_BYTES ((XP_MAX_PHYSNODE_ID + 7) / 8) #define XP_NASID_MASK_WORDS ((XP_MAX_PHYSNODE_ID + 63) / 64) -- cgit v1.2.3-59-g8ed1b From e54af724c1ae3530c95135157776c9be65cdb747 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Tue, 25 Oct 2005 14:07:43 -0500 Subject: [IA64-SGI] fixes for XPC disengage and open/close protocol This patch addresses a few issues with the open/close protocol that were revealed by the newly added disengage functionality combined with more extensive testing. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc.h | 21 +++--- arch/ia64/sn/kernel/xpc_channel.c | 117 ++++++++++++++++++++--------- arch/ia64/sn/kernel/xpc_main.c | 146 ++++++++++++++++++++++++++---------- arch/ia64/sn/kernel/xpc_partition.c | 8 +- include/asm-ia64/sn/xp.h | 6 +- 5 files changed, 208 insertions(+), 90 deletions(-) (limited to 'arch/ia64/sn/kernel/xpc_partition.c') diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h index 565822ab3d08..ae51d7b4c42e 100644 --- a/arch/ia64/sn/kernel/xpc.h +++ b/arch/ia64/sn/kernel/xpc.h @@ -417,6 +417,9 @@ struct xpc_channel { atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ + u8 delayed_IPI_flags; /* IPI flags received, but delayed */ + /* action until channel disconnected */ + /* queue of msg senders who want to be notified when msg received */ atomic_t n_to_notify; /* #of msg senders to notify */ @@ -478,7 +481,8 @@ struct xpc_channel { #define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ #define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ -#define XPC_C_WDISCONNECT 0x00008000 /* waiting for channel disconnect */ +#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */ +#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */ @@ -508,13 +512,13 @@ struct xpc_partition { int reason_line; /* line# deactivation initiated from */ int reactivate_nasid; /* nasid in partition to reactivate */ - unsigned long disengage_request_timeout; /* timeout in XPC_TICKS */ + unsigned long disengage_request_timeout; /* timeout in jiffies */ struct timer_list disengage_request_timer; /* XPC infrastructure referencing and teardown control */ - volatile u8 setup_state; /* infrastructure setup state */ + volatile u8 setup_state; /* infrastructure setup state */ wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ atomic_t references; /* #of references to infrastructure */ @@ -604,7 +608,7 @@ struct xpc_partition { /* number of seconds to wait for other partitions to disengage */ -#define XPC_DISENGAGE_REQUEST_TIMELIMIT 90 +#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90 /* interval in seconds to print 'waiting disengagement' messages */ #define XPC_DISENGAGE_PRINTMSG_INTERVAL 10 @@ -618,20 +622,18 @@ struct xpc_partition { extern struct xpc_registration xpc_registrations[]; -/* >>> found in xpc_main.c only */ +/* found in xpc_main.c */ extern struct device *xpc_part; extern struct device *xpc_chan; +extern int xpc_disengage_request_timelimit; extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *); extern void xpc_dropped_IPI_check(struct xpc_partition *); +extern void xpc_activate_partition(struct xpc_partition *); extern void xpc_activate_kthreads(struct xpc_channel *, int); extern void xpc_create_kthreads(struct xpc_channel *, int); extern void xpc_disconnect_wait(int); -/* found in xpc_main.c and efi-xpc.c */ -extern void xpc_activate_partition(struct xpc_partition *); - - /* found in xpc_partition.c */ extern int xpc_exiting; extern struct xpc_vars *xpc_vars; @@ -1077,6 +1079,7 @@ xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag, /* given an AMO variable and a channel#, get its associated IPI flags */ #define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff)) +#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8)) #define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f) #define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010) diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index 195ac1b8e262..abf4fc2a87bb 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -792,11 +792,20 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) "reason=%d\n", ch->number, ch->partid, ch->reason); } - /* wake the thread that is waiting for this channel to disconnect */ if (ch->flags & XPC_C_WDISCONNECT) { spin_unlock_irqrestore(&ch->lock, *irq_flags); up(&ch->wdisconnect_sema); spin_lock_irqsave(&ch->lock, *irq_flags); + + } else if (ch->delayed_IPI_flags) { + if (part->act_state != XPC_P_DEACTIVATING) { + /* time to take action on any delayed IPI flags */ + spin_lock(&part->IPI_lock); + XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number, + ch->delayed_IPI_flags); + spin_unlock(&part->IPI_lock); + } + ch->delayed_IPI_flags = 0; } } @@ -818,6 +827,19 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, spin_lock_irqsave(&ch->lock, irq_flags); +again: + + if ((ch->flags & XPC_C_DISCONNECTED) && + (ch->flags & XPC_C_WDISCONNECT)) { + /* + * Delay processing IPI flags until thread waiting disconnect + * has had a chance to see that the channel is disconnected. + */ + ch->delayed_IPI_flags |= IPI_flags; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + if (IPI_flags & XPC_IPI_CLOSEREQUEST) { @@ -843,14 +865,22 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, /* both sides have finished disconnecting */ xpc_process_disconnect(ch, &irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + goto again; } if (ch->flags & XPC_C_DISCONNECTED) { - // >>> explain this section - if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { - DBUG_ON(part->act_state != - XPC_P_DEACTIVATING); + if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, + ch_number) & XPC_IPI_OPENREQUEST)) { + + DBUG_ON(ch->delayed_IPI_flags != 0); + spin_lock(&part->IPI_lock); + XPC_SET_IPI_FLAGS(part->local_IPI_amo, + ch_number, + XPC_IPI_CLOSEREQUEST); + spin_unlock(&part->IPI_lock); + } spin_unlock_irqrestore(&ch->lock, irq_flags); return; } @@ -880,9 +910,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, } XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); - } else { - xpc_process_disconnect(ch, &irq_flags); + + DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; } + + xpc_process_disconnect(ch, &irq_flags); } @@ -898,7 +932,20 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, } DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); - DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST)); + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { + if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number) + & XPC_IPI_CLOSEREQUEST)) { + + DBUG_ON(ch->delayed_IPI_flags != 0); + spin_lock(&part->IPI_lock); + XPC_SET_IPI_FLAGS(part->local_IPI_amo, + ch_number, XPC_IPI_CLOSEREPLY); + spin_unlock(&part->IPI_lock); + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } ch->flags |= XPC_C_RCLOSEREPLY; @@ -916,8 +963,14 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, "channel=%d\n", args->msg_size, args->local_nentries, ch->partid, ch->number); - if ((ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) || - part->act_state == XPC_P_DEACTIVATING) { + if (part->act_state == XPC_P_DEACTIVATING || + (ch->flags & XPC_C_ROPENREQUEST)) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { + ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST; spin_unlock_irqrestore(&ch->lock, irq_flags); return; } @@ -931,8 +984,11 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, * msg_size = size of channel's messages in bytes * local_nentries = remote partition's local_nentries */ - DBUG_ON(args->msg_size == 0); - DBUG_ON(args->local_nentries == 0); + if (args->msg_size == 0 || args->local_nentries == 0) { + /* assume OPENREQUEST was delayed by mistake */ + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); ch->remote_nentries = args->local_nentries; @@ -970,7 +1026,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, spin_unlock_irqrestore(&ch->lock, irq_flags); return; } - DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST)); + if (!(ch->flags & XPC_C_OPENREQUEST)) { + XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); DBUG_ON(ch->flags & XPC_C_CONNECTED); @@ -1024,8 +1086,8 @@ xpc_connect_channel(struct xpc_channel *ch) struct xpc_registration *registration = &xpc_registrations[ch->number]; - if (down_interruptible(®istration->sema) != 0) { - return xpcInterrupted; + if (down_trylock(®istration->sema) != 0) { + return xpcRetry; } if (!XPC_CHANNEL_REGISTERED(ch->number)) { @@ -1445,19 +1507,11 @@ xpc_initiate_connect(int ch_number) if (xpc_part_ref(part)) { ch = &part->channels[ch_number]; - if (!(ch->flags & XPC_C_DISCONNECTING)) { - DBUG_ON(ch->flags & XPC_C_OPENREQUEST); - DBUG_ON(ch->flags & XPC_C_CONNECTED); - DBUG_ON(ch->flags & XPC_C_SETUP); - - /* - * Initiate the establishment of a connection - * on the newly registered channel to the - * remote partition. - */ - xpc_wakeup_channel_mgr(part); - } - + /* + * Initiate the establishment of a connection on the + * newly registered channel to the remote partition. + */ + xpc_wakeup_channel_mgr(part); xpc_part_deref(part); } } @@ -1467,9 +1521,6 @@ xpc_initiate_connect(int ch_number) void xpc_connected_callout(struct xpc_channel *ch) { - unsigned long irq_flags; - - /* let the registerer know that a connection has been established */ if (ch->func != NULL) { @@ -1482,10 +1533,6 @@ xpc_connected_callout(struct xpc_channel *ch) dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, " "partid=%d, channel=%d\n", ch->partid, ch->number); } - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_CONNECTCALLOUT; - spin_unlock_irqrestore(&ch->lock, irq_flags); } diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index feece200b3c3..db349c6d4c58 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -91,6 +91,10 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; static int xpc_hb_check_min_interval = 10; static int xpc_hb_check_max_interval = 120; +int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT; +static int xpc_disengage_request_min_timelimit = 0; +static int xpc_disengage_request_max_timelimit = 120; + static ctl_table xpc_sys_xpc_hb_dir[] = { { 1, @@ -129,6 +133,19 @@ static ctl_table xpc_sys_xpc_dir[] = { 0555, xpc_sys_xpc_hb_dir }, + { + 2, + "disengage_request_timelimit", + &xpc_disengage_request_timelimit, + sizeof(int), + 0644, + NULL, + &proc_dointvec_minmax, + &sysctl_intvec, + NULL, + &xpc_disengage_request_min_timelimit, + &xpc_disengage_request_max_timelimit + }, {0} }; static ctl_table xpc_sys_dir[] = { @@ -153,11 +170,11 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); static unsigned long xpc_hb_check_timeout; -/* used as an indication of when the xpc_hb_checker thread is inactive */ -static DECLARE_MUTEX_LOCKED(xpc_hb_checker_inactive); +/* notification that the xpc_hb_checker thread has exited */ +static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited); -/* used as an indication of when the xpc_discovery thread is inactive */ -static DECLARE_MUTEX_LOCKED(xpc_discovery_inactive); +/* notification that the xpc_discovery thread has exited */ +static DECLARE_MUTEX_LOCKED(xpc_discovery_exited); static struct timer_list xpc_hb_timer; @@ -181,7 +198,7 @@ xpc_timeout_partition_disengage_request(unsigned long data) struct xpc_partition *part = (struct xpc_partition *) data; - DBUG_ON(XPC_TICKS < part->disengage_request_timeout); + DBUG_ON(jiffies < part->disengage_request_timeout); (void) xpc_partition_disengaged(part); @@ -292,8 +309,8 @@ xpc_hb_checker(void *ignore) dev_dbg(xpc_part, "heartbeat checker is exiting\n"); - /* mark this thread as inactive */ - up(&xpc_hb_checker_inactive); + /* mark this thread as having exited */ + up(&xpc_hb_checker_exited); return 0; } @@ -312,8 +329,8 @@ xpc_initiate_discovery(void *ignore) dev_dbg(xpc_part, "discovery thread is exiting\n"); - /* mark this thread as inactive */ - up(&xpc_discovery_inactive); + /* mark this thread as having exited */ + up(&xpc_discovery_exited); return 0; } @@ -703,6 +720,7 @@ xpc_daemonize_kthread(void *args) struct xpc_partition *part = &xpc_partitions[partid]; struct xpc_channel *ch; int n_needed; + unsigned long irq_flags; daemonize("xpc%02dc%d", partid, ch_number); @@ -713,11 +731,14 @@ xpc_daemonize_kthread(void *args) ch = &part->channels[ch_number]; if (!(ch->flags & XPC_C_DISCONNECTING)) { - DBUG_ON(!(ch->flags & XPC_C_CONNECTED)); /* let registerer know that connection has been established */ - if (atomic_read(&ch->kthreads_assigned) == 1) { + spin_lock_irqsave(&ch->lock, irq_flags); + if (!(ch->flags & XPC_C_CONNECTCALLOUT)) { + ch->flags |= XPC_C_CONNECTCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_connected_callout(ch); /* @@ -732,14 +753,23 @@ xpc_daemonize_kthread(void *args) !(ch->flags & XPC_C_DISCONNECTING)) { xpc_activate_kthreads(ch, n_needed); } + } else { + spin_unlock_irqrestore(&ch->lock, irq_flags); } xpc_kthread_waitmsgs(part, ch); } if (atomic_dec_return(&ch->kthreads_assigned) == 0) { - if (ch->flags & XPC_C_CONNECTCALLOUT) { + spin_lock_irqsave(&ch->lock, irq_flags); + if ((ch->flags & XPC_C_CONNECTCALLOUT) && + !(ch->flags & XPC_C_DISCONNECTCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_disconnecting_callout(ch); + } else { + spin_unlock_irqrestore(&ch->lock, irq_flags); } if (atomic_dec_return(&part->nchannels_engaged) == 0) { xpc_mark_partition_disengaged(part); @@ -780,9 +810,29 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed) while (needed-- > 0) { + + /* + * The following is done on behalf of the newly created + * kthread. That kthread is responsible for doing the + * counterpart to the following before it exits. + */ + (void) xpc_part_ref(part); + xpc_msgqueue_ref(ch); + if (atomic_inc_return(&ch->kthreads_assigned) == 1 && + atomic_inc_return(&part->nchannels_engaged) == 1) { + xpc_mark_partition_engaged(part); + } + pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); if (pid < 0) { /* the fork failed */ + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + xpc_mark_partition_disengaged(part); + xpc_IPI_send_disengage(part); + } + xpc_msgqueue_deref(ch); + xpc_part_deref(part); if (atomic_read(&ch->kthreads_assigned) < ch->kthreads_idle_limit) { @@ -802,18 +852,6 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed) break; } - /* - * The following is done on behalf of the newly created - * kthread. That kthread is responsible for doing the - * counterpart to the following before it exits. - */ - (void) xpc_part_ref(part); - xpc_msgqueue_ref(ch); - if (atomic_inc_return(&ch->kthreads_assigned) == 1) { - if (atomic_inc_return(&part->nchannels_engaged) == 1) { - xpc_mark_partition_engaged(part); - } - } ch->kthreads_created++; // >>> temporary debug only!!! } } @@ -826,26 +864,49 @@ xpc_disconnect_wait(int ch_number) partid_t partid; struct xpc_partition *part; struct xpc_channel *ch; + int wakeup_channel_mgr; /* now wait for all callouts to the caller's function to cease */ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { part = &xpc_partitions[partid]; - if (xpc_part_ref(part)) { - ch = &part->channels[ch_number]; + if (!xpc_part_ref(part)) { + continue; + } - if (ch->flags & XPC_C_WDISCONNECT) { - if (!(ch->flags & XPC_C_DISCONNECTED)) { - (void) down(&ch->wdisconnect_sema); - } - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags &= ~XPC_C_WDISCONNECT; - spin_unlock_irqrestore(&ch->lock, irq_flags); - } + ch = &part->channels[ch_number]; + if (!(ch->flags & XPC_C_WDISCONNECT)) { xpc_part_deref(part); + continue; } + + (void) down(&ch->wdisconnect_sema); + + spin_lock_irqsave(&ch->lock, irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + wakeup_channel_mgr = 0; + + if (ch->delayed_IPI_flags) { + if (part->act_state != XPC_P_DEACTIVATING) { + spin_lock(&part->IPI_lock); + XPC_SET_IPI_FLAGS(part->local_IPI_amo, + ch->number, ch->delayed_IPI_flags); + spin_unlock(&part->IPI_lock); + wakeup_channel_mgr = 1; + } + ch->delayed_IPI_flags = 0; + } + + ch->flags &= ~XPC_C_WDISCONNECT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (wakeup_channel_mgr) { + xpc_wakeup_channel_mgr(part); + } + + xpc_part_deref(part); } } @@ -873,11 +934,11 @@ xpc_do_exit(enum xpc_retval reason) /* ignore all incoming interrupts */ free_irq(SGI_XPC_ACTIVATE, NULL); - /* wait for the discovery thread to mark itself inactive */ - down(&xpc_discovery_inactive); + /* wait for the discovery thread to exit */ + down(&xpc_discovery_exited); - /* wait for the heartbeat checker thread to mark itself inactive */ - down(&xpc_hb_checker_inactive); + /* wait for the heartbeat checker thread to exit */ + down(&xpc_hb_checker_exited); /* sleep for a 1/3 of a second or so */ @@ -893,6 +954,7 @@ xpc_do_exit(enum xpc_retval reason) for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { part = &xpc_partitions[partid]; + if (xpc_partition_disengaged(part) && part->act_state == XPC_P_INACTIVE) { continue; @@ -930,7 +992,7 @@ xpc_do_exit(enum xpc_retval reason) /* now it's time to eliminate our heartbeat */ del_timer_sync(&xpc_hb_timer); - DBUG_ON(xpc_vars->heartbeating_to_mask == 0); + DBUG_ON(xpc_vars->heartbeating_to_mask != 0); /* take ourselves off of the reboot_notifier_list */ (void) unregister_reboot_notifier(&xpc_reboot_notifier); @@ -1134,7 +1196,7 @@ xpc_init(void) dev_err(xpc_part, "failed while forking discovery thread\n"); /* mark this new thread as a non-starter */ - up(&xpc_discovery_inactive); + up(&xpc_discovery_exited); xpc_do_exit(xpcUnloading); return -EBUSY; @@ -1172,3 +1234,7 @@ module_param(xpc_hb_check_interval, int, 0); MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " "heartbeat checks."); +module_param(xpc_disengage_request_timelimit, int, 0); +MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " + "for disengage request to complete."); + diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 79a0fc4c860c..958488f55699 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -578,7 +578,7 @@ xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version, /* - * Prior code has determine the nasid which generated an IPI. Inspect + * Prior code has determined the nasid which generated an IPI. Inspect * that nasid to determine if its partition needs to be activated or * deactivated. * @@ -942,14 +942,14 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part, /* set a timelimit on the disengage request */ part->disengage_request_timeout = jiffies + - (XPC_DISENGAGE_REQUEST_TIMELIMIT * HZ); + (xpc_disengage_request_timelimit * HZ); part->disengage_request_timer.expires = part->disengage_request_timeout; add_timer(&part->disengage_request_timer); } - dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, - reason); + dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", + XPC_PARTID(part), reason); xpc_partition_going_down(part, reason); } diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h index f3052a54932b..30312be31206 100644 --- a/include/asm-ia64/sn/xp.h +++ b/include/asm-ia64/sn/xp.h @@ -225,7 +225,9 @@ enum xpc_retval { xpcDisconnecting, /* 49: channel disconnecting (closing) */ - xpcUnknownReason /* 50: unknown reason -- must be last in list */ + xpcOpenCloseError, /* 50: channel open/close protocol error */ + + xpcUnknownReason /* 51: unknown reason -- must be last in list */ }; @@ -350,7 +352,7 @@ typedef void (*xpc_notify_func)(enum xpc_retval reason, partid_t partid, * * The 'func' field points to the function to call when aynchronous * notification is required for such events as: a connection established/lost, - * or an incomming message received, or an error condition encountered. A + * or an incoming message received, or an error condition encountered. A * non-NULL 'func' field indicates that there is an active registration for * the channel. */ -- cgit v1.2.3-59-g8ed1b From 4b38fcd4858204cb3667eb7b3aca48ffb1002f05 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Tue, 25 Oct 2005 14:09:51 -0500 Subject: [IA64-SGI] XPC changes to support more than 2k nasids XPC needs to be changed to support up to 16k nasids on an SGI Altix system. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc.h | 77 ++++++++++++++------ arch/ia64/sn/kernel/xpc_main.c | 8 +-- arch/ia64/sn/kernel/xpc_partition.c | 140 ++++++++++++++++++++---------------- 3 files changed, 141 insertions(+), 84 deletions(-) (limited to 'arch/ia64/sn/kernel/xpc_partition.c') diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h index ae51d7b4c42e..33df1b3758b6 100644 --- a/arch/ia64/sn/kernel/xpc.h +++ b/arch/ia64/sn/kernel/xpc.h @@ -68,29 +68,58 @@ /* - * Reserved Page provided by SAL. + * the reserved page * - * SAL provides one page per partition of reserved memory. When SAL - * initialization is complete, SAL_signature, SAL_version, partid, - * part_nasids, and mach_nasids are set. + * SAL reserves one page of memory per partition for XPC. Though a full page + * in length (16384 bytes), its starting address is not page aligned, but it + * is cacheline aligned. The reserved page consists of the following: + * + * reserved page header + * + * The first cacheline of the reserved page contains the header + * (struct xpc_rsvd_page). Before SAL initialization has completed, + * SAL has set up the following fields of the reserved page header: + * SAL_signature, SAL_version, partid, and nasids_size. The other + * fields are set up by XPC. (xpc_rsvd_page points to the local + * partition's reserved page.) + * + * part_nasids mask + * mach_nasids mask + * + * SAL also sets up two bitmaps (or masks), one that reflects the actual + * nasids in this partition (part_nasids), and the other that reflects + * the actual nasids in the entire machine (mach_nasids). We're only + * interested in the even numbered nasids (which contain the processors + * and/or memory), so we only need half as many bits to represent the + * nasids. The part_nasids mask is located starting at the first cacheline + * following the reserved page header. The mach_nasids mask follows right + * after the part_nasids mask. The size in bytes of each mask is reflected + * by the reserved page header field 'nasids_size'. (Local partition's + * mask pointers are xpc_part_nasids and xpc_mach_nasids.) + * + * vars + * vars part + * + * Immediately following the mach_nasids mask are the XPC variables + * required by other partitions. First are those that are generic to all + * partitions (vars), followed on the next available cacheline by those + * which are partition specific (vars part). These are setup by XPC. + * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) * * Note: Until vars_pa is set, the partition XPC code has not been initialized. */ struct xpc_rsvd_page { - u64 SAL_signature; /* SAL unique signature */ - u64 SAL_version; /* SAL specified version */ - u8 partid; /* partition ID from SAL */ + u64 SAL_signature; /* SAL: unique signature */ + u64 SAL_version; /* SAL: version */ + u8 partid; /* SAL: partition ID */ u8 version; - u8 pad[6]; /* pad to u64 align */ + u8 pad1[6]; /* align to next u64 in cacheline */ volatile u64 vars_pa; - struct timespec stamp; /* time when reserved page was initialized */ - u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; - u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; + struct timespec stamp; /* time when reserved page was setup by XPC */ + u64 pad2[9]; /* align to last u64 in cacheline */ + u64 nasids_size; /* SAL: size of each nasid mask in bytes */ }; -#define XPC_RSVD_PAGE_ALIGNED_SIZE \ - (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))) - #define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */ #define XPC_SUPPORTS_RP_STAMP(_version) \ @@ -142,8 +171,6 @@ struct xpc_vars { AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ }; -#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars))) - #define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */ #define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \ @@ -184,7 +211,7 @@ xpc_disallow_hb(partid_t partid, struct xpc_vars *vars) /* * The AMOs page consists of a number of AMO variables which are divided into * four groups, The first two groups are used to identify an IRQ's sender. - * These two groups consist of 64 and 16 AMO variables respectively. The last + * These two groups consist of 64 and 128 AMO variables respectively. The last * two groups, consisting of just one AMO variable each, are used to identify * the remote partitions that are currently engaged (from the viewpoint of * the XPC running on the remote partition). @@ -233,6 +260,16 @@ struct xpc_vars_part { #define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ +/* the reserved page sizes and offsets */ + +#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) +#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars)) + +#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE) +#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words) +#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words) +#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE) + /* * Functions registered by add_timer() or called by kernel_thread() only @@ -1147,9 +1184,9 @@ xpc_IPI_send_local_msgrequest(struct xpc_channel *ch) * cacheable mapping for the entire region. This will prevent speculative * reading of cached copies of our lines from being issued which will cause * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 - * (XP_MAX_PARTITIONS) AMO variables for message notification and an - * additional 16 (XP_NASID_MASK_WORDS) AMO variables for partition activation - * and 2 AMO variables for partition deactivation. + * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an + * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition + * activation and 2 AMO variables for partition deactivation. */ static inline AMO_t * xpc_IPI_init(int index) diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index db349c6d4c58..38f2c699192c 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -1049,11 +1049,11 @@ xpc_init(void) /* * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng - * both a partition's reserved page and its XPC variables. Its size was - * based on the size of a reserved page. So we need to ensure that the - * XPC variables will fit as well. + * various portions of a partition's reserved page. Its size is based + * on the size of the reserved page header and part_nasids mask. So we + * need to ensure that the other items will fit as well. */ - if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) { + if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) { dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n"); return -EPERM; } diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 958488f55699..6bb1091f2a4d 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -47,13 +47,16 @@ static u64 xpc_sh2_IPI_access3; u64 xpc_prot_vec[MAX_COMPACT_NODES]; -/* this partition's reserved page */ +/* this partition's reserved page pointers */ struct xpc_rsvd_page *xpc_rsvd_page; - -/* this partition's XPC variables (within the reserved page) */ +static u64 *xpc_part_nasids; +static u64 *xpc_mach_nasids; struct xpc_vars *xpc_vars; struct xpc_vars_part *xpc_vars_part; +static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */ +static int xp_nasid_mask_words; /* actual size in words of nasid mask */ + /* * For performance reasons, each entry of xpc_partitions[] is cacheline @@ -65,15 +68,16 @@ struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; /* - * Generic buffer used to store a local copy of the remote partitions - * reserved page or XPC variables. + * Generic buffer used to store a local copy of portions of a remote + * partition's reserved page (either its header and part_nasids mask, + * or its vars). * * xpc_discovery runs only once and is a seperate thread that is * very likely going to be processing in parallel with receiving * interrupts. */ -char ____cacheline_aligned - xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; +char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE + + XP_NASID_MASK_BYTES]; /* @@ -136,7 +140,7 @@ xpc_rsvd_page_init(void) { struct xpc_rsvd_page *rp; AMO_t *amos_page; - u64 rp_pa, next_cl, nasid_array = 0; + u64 rp_pa, nasid_array = 0; int i, ret; @@ -144,7 +148,8 @@ xpc_rsvd_page_init(void) rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0), (u64) xpc_remote_copy_buffer, - XPC_RSVD_PAGE_ALIGNED_SIZE); + XPC_RP_HEADER_SIZE + + L1_CACHE_BYTES); if (rp_pa == 0) { dev_err(xpc_part, "SAL failed to locate the reserved page\n"); return NULL; @@ -159,12 +164,19 @@ xpc_rsvd_page_init(void) rp->version = XPC_RP_VERSION; - /* - * Place the XPC variables on the cache line following the - * reserved page structure. - */ - next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE; - xpc_vars = (struct xpc_vars *) next_cl; + /* establish the actual sizes of the nasid masks */ + if (rp->SAL_version == 1) { + /* SAL_version 1 didn't set the nasids_size field */ + rp->nasids_size = 128; + } + xp_nasid_mask_bytes = rp->nasids_size; + xp_nasid_mask_words = xp_nasid_mask_bytes / 8; + + /* setup the pointers to the various items in the reserved page */ + xpc_part_nasids = XPC_RP_PART_NASIDS(rp); + xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); + xpc_vars = XPC_RP_VARS(rp); + xpc_vars_part = XPC_RP_VARS_PART(rp); /* * Before clearing xpc_vars, see if a page of AMOs had been previously @@ -216,26 +228,23 @@ xpc_rsvd_page_init(void) amos_page = (AMO_t *) TO_AMO((u64) amos_page); } + /* clear xpc_vars */ memset(xpc_vars, 0, sizeof(struct xpc_vars)); - /* - * Place the XPC per partition specific variables on the cache line - * following the XPC variables structure. - */ - next_cl += XPC_VARS_ALIGNED_SIZE; - memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) * - XP_MAX_PARTITIONS); - xpc_vars_part = (struct xpc_vars_part *) next_cl; - xpc_vars->vars_part_pa = __pa(next_cl); - xpc_vars->version = XPC_V_VERSION; xpc_vars->act_nasid = cpuid_to_nasid(0); xpc_vars->act_phys_cpuid = cpu_physical_id(0); + xpc_vars->vars_part_pa = __pa(xpc_vars_part); + xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page); xpc_vars->amos_page = amos_page; /* save for next load of XPC */ + /* clear xpc_vars_part */ + memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) * + XP_MAX_PARTITIONS); + /* initialize the activate IRQ related AMO variables */ - for (i = 0; i < XP_NASID_MASK_WORDS; i++) { + for (i = 0; i < xp_nasid_mask_words; i++) { (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i); } @@ -243,10 +252,7 @@ xpc_rsvd_page_init(void) (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO); (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO); - /* export AMO page's physical address to other partitions */ - xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); - - /* timestamp of when reserved page was initialized */ + /* timestamp of when reserved page was setup by XPC */ rp->stamp = CURRENT_TIME; /* @@ -406,7 +412,7 @@ xpc_check_remote_hb(void) /* pull the remote_hb cache line */ bres = xp_bte_copy(part->remote_vars_pa, ia64_tpa((u64) remote_vars), - XPC_VARS_ALIGNED_SIZE, + XPC_RP_VARS_SIZE, (BTE_NOTIFY | BTE_WACQUIRE), NULL); if (bres != BTE_SUCCESS) { XPC_DEACTIVATE_PARTITION(part, @@ -434,10 +440,11 @@ xpc_check_remote_hb(void) /* - * Get a copy of the remote partition's rsvd page. + * Get a copy of a portion of the remote partition's rsvd page. * * remote_rp points to a buffer that is cacheline aligned for BTE copies and - * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE. + * is large enough to contain a copy of their reserved page header and + * part_nasids mask. */ static enum xpc_retval xpc_get_remote_rp(int nasid, u64 *discovered_nasids, @@ -449,16 +456,17 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids, /* get the reserved page's physical address */ *remote_rp_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, - XPC_RSVD_PAGE_ALIGNED_SIZE); + XPC_RP_HEADER_SIZE + + xp_nasid_mask_bytes); if (*remote_rp_pa == 0) { return xpcNoRsvdPageAddr; } - /* pull over the reserved page structure */ + /* pull over the reserved page header and part_nasids mask */ bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp), - XPC_RSVD_PAGE_ALIGNED_SIZE, + XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL); if (bres != BTE_SUCCESS) { return xpc_map_bte_errors(bres); @@ -466,8 +474,11 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids, if (discovered_nasids != NULL) { - for (i = 0; i < XP_NASID_MASK_WORDS; i++) { - discovered_nasids[i] |= remote_rp->part_nasids[i]; + u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp); + + + for (i = 0; i < xp_nasid_mask_words; i++) { + discovered_nasids[i] |= remote_part_nasids[i]; } } @@ -494,10 +505,10 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids, /* - * Get a copy of the remote partition's XPC variables. + * Get a copy of the remote partition's XPC variables from the reserved page. * * remote_vars points to a buffer that is cacheline aligned for BTE copies and - * assumed to be of size XPC_VARS_ALIGNED_SIZE. + * assumed to be of size XPC_RP_VARS_SIZE. */ static enum xpc_retval xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) @@ -513,7 +524,7 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) /* pull over the cross partition variables */ bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars), - XPC_VARS_ALIGNED_SIZE, + XPC_RP_VARS_SIZE, (BTE_NOTIFY | BTE_WACQUIRE), NULL); if (bres != BTE_SUCCESS) { return xpc_map_bte_errors(bres); @@ -778,14 +789,13 @@ xpc_identify_act_IRQ_sender(void) u64 nasid; /* remote nasid */ int n_IRQs_detected = 0; AMO_t *act_amos; - struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page; act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS; /* scan through act AMO variable looking for non-zero entries */ - for (word = 0; word < XP_NASID_MASK_WORDS; word++) { + for (word = 0; word < xp_nasid_mask_words; word++) { if (xpc_exiting) { break; @@ -807,7 +817,7 @@ xpc_identify_act_IRQ_sender(void) * remote nasid in our reserved pages machine mask. * This is used in the event of module reload. */ - rp->mach_nasids[word] |= nasid_mask; + xpc_mach_nasids[word] |= nasid_mask; /* locate the nasid(s) which sent interrupts */ @@ -992,6 +1002,7 @@ xpc_discovery(void) u64 remote_rp_pa; u64 remote_vars_pa; int region; + int region_size; int max_regions; int nasid; struct xpc_rsvd_page *rp; @@ -1001,7 +1012,8 @@ xpc_discovery(void) enum xpc_retval ret; - remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE, + remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + + xp_nasid_mask_bytes, GFP_KERNEL, &remote_rp_base); if (remote_rp == NULL) { return; @@ -1009,13 +1021,13 @@ xpc_discovery(void) remote_vars = (struct xpc_vars *) remote_rp; - discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS, + discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words, GFP_KERNEL); if (discovered_nasids == NULL) { kfree(remote_rp_base); return; } - memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS); + memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words); rp = (struct xpc_rsvd_page *) xpc_rsvd_page; @@ -1024,11 +1036,19 @@ xpc_discovery(void) * nodes that can comprise an access protection grouping. The access * protection is in regards to memory, IOI and IPI. */ -//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or -//>>> include/asm-ia64/sn/addrs.h -#define SH1_MAX_REGIONS 64 -#define SH2_MAX_REGIONS 256 - max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS; + max_regions = 64; + region_size = sn_region_size; + + switch (region_size) { + case 128: + max_regions *= 2; + case 64: + max_regions *= 2; + case 32: + max_regions *= 2; + region_size = 16; + DBUG_ON(!is_shub2()); + } for (region = 0; region < max_regions; region++) { @@ -1038,8 +1058,8 @@ xpc_discovery(void) dev_dbg(xpc_part, "searching region %d\n", region); - for (nasid = (region * sn_region_size * 2); - nasid < ((region + 1) * sn_region_size * 2); + for (nasid = (region * region_size * 2); + nasid < ((region + 1) * region_size * 2); nasid += 2) { if ((volatile int) xpc_exiting) { @@ -1049,14 +1069,14 @@ xpc_discovery(void) dev_dbg(xpc_part, "checking nasid %d\n", nasid); - if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) { + if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) { dev_dbg(xpc_part, "PROM indicates Nasid %d is " "part of the local partition; skipping " "region\n", nasid); break; } - if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) { + if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) { dev_dbg(xpc_part, "PROM indicates Nasid %d was " "not on Numa-Link network at reset\n", nasid); @@ -1178,12 +1198,12 @@ xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask) return xpcPartitionDown; } - part_nasid_pa = part->remote_rp_pa + - (u64) &((struct xpc_rsvd_page *) 0)->part_nasids; + memset(nasid_mask, 0, XP_NASID_MASK_BYTES); + + part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa); bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask), - L1_CACHE_ALIGN(XP_NASID_MASK_BYTES), - (BTE_NOTIFY | BTE_WACQUIRE), NULL); + xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL); return xpc_map_bte_errors(bte_res); } -- cgit v1.2.3-59-g8ed1b From 279290294662d4a35d209fb7d7c46523cfa3d63d Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Tue, 25 Oct 2005 14:11:53 -0500 Subject: [IA64-SGI] cleanup the way XPC locates the reserved page Eliminate the passing in of a scratch buffer used for locating the reserved page setup for XPC. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc_partition.c | 39 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'arch/ia64/sn/kernel/xpc_partition.c') diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 6bb1091f2a4d..ce5a37ff4388 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -85,13 +85,16 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE + * for that nasid. This function returns 0 on any error. */ static u64 -xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) +xpc_get_rsvd_page_pa(int nasid) { bte_result_t bte_res; s64 status; u64 cookie = 0; u64 rp_pa = nasid; /* seed with nasid */ u64 len = 0; + u64 buf = buf; + u64 buf_len = 0; + void *buf_base = NULL; while (1) { @@ -107,13 +110,22 @@ xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) break; } - if (len > buf_size) { - dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len); - status = SALRET_ERROR; - break; + if (L1_CACHE_ALIGN(len) > buf_len) { + if (buf_base != NULL) { + kfree(buf_base); + } + buf_len = L1_CACHE_ALIGN(len); + buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len, + GFP_KERNEL, &buf_base); + if (buf_base == NULL) { + dev_err(xpc_part, "unable to kmalloc " + "len=0x%016lx\n", buf_len); + status = SALRET_ERROR; + break; + } } - bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size, + bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len, (BTE_NOTIFY | BTE_WACQUIRE), NULL); if (bte_res != BTE_SUCCESS) { dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); @@ -122,6 +134,10 @@ xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) } } + if (buf_base != NULL) { + kfree(buf_base); + } + if (status != SALRET_OK) { rp_pa = 0; } @@ -146,10 +162,9 @@ xpc_rsvd_page_init(void) /* get the local reserved page's address */ - rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0), - (u64) xpc_remote_copy_buffer, - XPC_RP_HEADER_SIZE + - L1_CACHE_BYTES); + preempt_disable(); + rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id())); + preempt_enable(); if (rp_pa == 0) { dev_err(xpc_part, "SAL failed to locate the reserved page\n"); return NULL; @@ -455,9 +470,7 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids, /* get the reserved page's physical address */ - *remote_rp_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, - XPC_RP_HEADER_SIZE + - xp_nasid_mask_bytes); + *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); if (*remote_rp_pa == 0) { return xpcNoRsvdPageAddr; } -- cgit v1.2.3-59-g8ed1b