/* $Id$ * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc. * Copyright (C) 2000 by Colin Ngam */ #ifndef _ASM_SN_NODEPDA_H #define _ASM_SN_NODEPDA_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include #include /* #include */ #ifdef LATER typedef struct module_s module_t; /* Avoids sys/SN/module.h */ #else #include #endif /* #include */ /* * NUMA Node-Specific Data structures are defined in this file. * In particular, this is the location of the node PDA. * A pointer to the right node PDA is saved in each CPU PDA. */ /* * Subnode PDA structures. Each node needs a few data structures that * correspond to the PIs on the HUB chip that supports the node. * * WARNING!!!! 6.5.x compatibility requirements prevent us from * changing or reordering fields in the following structure for IP27. * It is essential that the data mappings not change for IP27 platforms. * It is OK to add fields that are IP35 specific if they are under #ifdef IP35. */ struct subnodepda_s { intr_vecblk_t intr_dispatch0; intr_vecblk_t intr_dispatch1; uint64_t next_prof_timeout; int prof_count; }; typedef struct subnodepda_s subnode_pda_t; struct ptpool_s; #if defined(CONFIG_IA64_SGI_SYNERGY_PERF) struct synergy_perf_s; #endif /* * Node-specific data structure. * * One of these structures is allocated on each node of a NUMA system. * Non-NUMA systems are considered to be systems with one node, and * hence there will be one of this structure for the entire system. * * This structure provides a convenient way of keeping together * all per-node data structures. */ #ifdef LATER /* * The following structure is contained in the nodepda & contains * a lock & queue-head for sanon pages that belong to the node. * See the anon manager for more details. */ typedef struct { lock_t sal_lock; plist_t sal_listhead; } sanon_list_head_t; #endif struct nodepda_s { #ifdef NUMA_BASE /* * Pointer to this node's copy of Nodepdaindr */ struct nodepda_s **pernode_pdaindr; /* * Data used for migration control */ struct migr_control_data_s *mcd; /* * Data used for replication control */ struct repl_control_data_s *rcd; /* * Numa statistics */ struct numa_stats_s *numa_stats; /* * Load distribution */ uint memfit_assign; /* * New extended memory reference counters */ void *migr_refcnt_counterbase; void *migr_refcnt_counterbuffer; size_t migr_refcnt_cbsize; int migr_refcnt_numsets; /* * mem_tick quiescing lock */ uint mem_tick_lock; /* * Migration candidate set * by migration prologue intr handler */ uint64_t migr_candidate; /* * Each node gets its own syswait counter to remove contention * on the global one. */ #ifdef LATER struct syswait syswait; #endif #endif /* NUMA_BASE */ /* * Node-specific Zone structures. */ #ifdef LATER zoneset_element_t node_zones; pg_data_t node_pg_data; /* VM page data structures */ plist_t error_discard_plist; #endif uint error_discard_count; uint error_page_count; uint error_cleaned_count; spinlock_t error_discard_lock; /* Information needed for SN Hub chip interrupt handling. */ subnode_pda_t snpda[NUM_SUBNODES]; /* Distributed kernel support */ #ifdef LATER kern_vars_t kern_vars; #endif /* Vector operation support */ /* Change this to a sleep lock? */ spinlock_t vector_lock; /* State of the vector unit for this node */ char vector_unit_busy; cpuid_t node_first_cpu; /* Starting cpu number for node */ ushort node_num_cpus; /* Number of cpus present */ /* node utlbmiss info */ spinlock_t node_utlbswitchlock; volatile cpumask_t node_utlbmiss_flush; volatile signed char node_need_utlbmiss_patch; volatile char node_utlbmiss_patched; nodepda_router_info_t *npda_rip_first; nodepda_router_info_t **npda_rip_last; int dependent_routers; #if defined(CONFIG_IA64_SGI_SYNERGY_PERF) int synergy_perf_enabled; int synergy_perf_freq; spinlock_t synergy_perf_lock; uint64_t synergy_inactive_intervals; uint64_t synergy_active_intervals; struct synergy_perf_s *synergy_perf_data; struct synergy_perf_s *synergy_perf_first; /* reporting consistency .. */ #endif /* CONFIG_IA64_SGI_SYNERGY_PERF */ devfs_handle_t xbow_vhdl; nasid_t xbow_peer; /* NASID of our peer hub on xbow */ struct semaphore xbow_sema; /* Sema for xbow synchronization */ slotid_t slotdesc; moduleid_t module_id; /* Module ID (redundant local copy) */ module_t *module; /* Pointer to containing module */ int hub_chip_rev; /* Rev of my Hub chip */ char nasid_mask[NASID_MASK_BYTES]; /* Need a copy of the nasid mask * on every node */ xwidgetnum_t basew_id; devfs_handle_t basew_xc; spinlock_t fprom_lock; char ni_error_print; /* For printing ni error state * only once during system panic */ #ifdef LATER md_perf_monitor_t node_md_perfmon; hubstat_t hubstats; int hubticks; sbe_info_t *sbe_info; /* ECC single-bit error statistics */ #endif /* LATER */ int huberror_ticks; router_queue_t *visited_router_q; router_queue_t *bfs_router_q; /* Used for router traversal */ #if defined (CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC) router_map_ent_t router_map[MAX_RTR_BREADTH]; #endif int num_routers; /* Total routers in the system */ char membank_flavor; /* Indicates what sort of memory * banks are present on this node */ char *hwg_node_name; /* hwgraph node name */ struct widget_info_t *widget_info; /* Node as xtalk widget */ devfs_handle_t node_vertex; /* Hwgraph vertex for this node */ void *pdinfo; /* Platform-dependent per-node info */ uint64_t *dump_stack; /* Dump stack during nmi handling */ int dump_count; /* To allow only one cpu-per-node */ #ifdef LATER io_perf_monitor_t node_io_perfmon; #endif /* * Each node gets its own pdcount counter to remove contention * on the global one. */ int pdcount; /* count of pdinserted pages */ #ifdef NUMA_BASE void *cached_global_pool; /* pointer to cached vmpool */ #endif /* NUMA_BASE */ #ifdef LATER sanon_list_head_t sanon_list_head; /* head for sanon pages */ #endif #ifdef NUMA_BASE struct ptpool_s *ptpool; /* ptpool for this node */ #endif /* NUMA_BASE */ /* * The BTEs on this node are shared by the local cpus */ #if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC) #ifdef LATER bteinfo_t *node_bte_info[BTES_PER_NODE]; #endif #endif }; typedef struct nodepda_s nodepda_t; #define NODE_MODULEID(_node) (NODEPDA(_node)->module_id) #define NODE_SLOTID(_node) (NODEPDA(_node)->slotdesc) #ifdef NUMA_BASE /* * Access Functions for node PDA. * Since there is one nodepda for each node, we need a convenient mechanism * to access these nodepdas without cluttering code with #ifdefs. * The next set of definitions provides this. * Routines are expected to use * * nodepda -> to access PDA for the node on which code is running * subnodepda -> to access subnode PDA for the node on which code is running * * NODEPDA(x) -> to access node PDA for cnodeid 'x' * SUBNODEPDA(x,s) -> to access subnode PDA for cnodeid/slice 'x' */ #ifdef LATER #define nodepda private.p_nodepda /* Ptr to this node's PDA */ #if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC #define subnodepda private.p_subnodepda /* Ptr to this node's subnode PDA */ #endif #else /* * Until we have a shared node local area defined, do it this way .. * like in Caliase space. See above. */ extern nodepda_t *nodepda; extern subnode_pda_t *subnodepda; #endif /* * Nodepdaindr[] * This is a private data structure for use only in early initialization. * All users of nodepda should use the macro NODEPDA(nodenum) to get * the suitable nodepda structure. * This macro has the advantage of not requiring #ifdefs for NUMA and * non-NUMA code. */ extern nodepda_t *Nodepdaindr[]; /* * NODEPDA_GLOBAL(x) macro should ONLY be used during early initialization. * Once meminit is complete, NODEPDA(x) is ready to use. * During early init, the system fills up Nodepdaindr. By the time we * are in meminit(), all nodepdas are initialized, and hence * we can fill up the node_pdaindr array in each nodepda structure. */ #define NODEPDA_GLOBAL(x) Nodepdaindr[x] /* * Returns a pointer to a given node's nodepda. */ #define NODEPDA(x) (nodepda->pernode_pdaindr[x]) /* * Returns a pointer to a given node/slice's subnodepda. * SUBNODEPDA(cnode, subnode) - uses cnode as first arg * SNPDA(npda, subnode) - uses pointer to nodepda as first arg */ #define SUBNODEPDA(x,sn) (&nodepda->pernode_pdaindr[x]->snpda[sn]) #define SNPDA(npda,sn) (&(npda)->snpda[sn]) #define NODEPDA_ERROR_FOOTPRINT(node, cpu) \ (&(NODEPDA(node)->error_stamp[cpu])) #define NODEPDA_MDP_MON(node) (&(NODEPDA(node)->node_md_perfmon)) #define NODEPDA_IOP_MON(node) (&(NODEPDA(node)->node_io_perfmon)) /* * Macros to access data structures inside nodepda */ #if NUMA_MIGR_CONTROL #define NODEPDA_MCD(node) (NODEPDA(node)->mcd) #endif /* NUMA_MIGR_CONTROL */ #if NUMA_REPL_CONTROL #define NODEPDA_RCD(node) (NODEPDA(node)->rcd) #endif /* NUMA_REPL_CONTROL */ #if (NUMA_MIGR_CONTROL || NUMA_REPL_CONTROL) #define NODEPDA_LRS(node) (NODEPDA(node)->lrs) #endif /* (NUMA_MIGR_CONTROL || NUMA_REPL_CONTROL) */ /* * Exported functions */ extern nodepda_t *nodepda_alloc(void); #else /* !NUMA_BASE */ /* * For a single-node system we will just have one global nodepda pointer * allocated at startup. The global nodepda will point to this nodepda * structure. */ extern nodepda_t *Nodepdaindr; /* * On non-NUMA systems, NODEPDA_GLOBAL and NODEPDA macros collapse to * be the same. */ #define NODEPDA_GLOBAL(x) Nodepdaindr /* * Returns a pointer to a given node's nodepda. */ #define NODEPDA(x) Nodepdaindr /* * nodepda can also be defined as private.p_nodepda. * But on non-NUMA systems, there is only one nodepda, and there is * no reason to go through the PDA to access this pointer. * Hence nodepda aliases to the global nodepda directly. * * Routines should use nodepda to access the local node's PDA. */ #define nodepda (Nodepdaindr) #endif /* NUMA_BASE */ /* Quickly convert a compact node ID into a hwgraph vertex */ #define cnodeid_to_vertex(cnodeid) (NODEPDA(cnodeid)->node_vertex) /* Check if given a compact node id the corresponding node has all the * cpus disabled. */ #define is_headless_node(_cnode) ((_cnode == CNODEID_NONE) || \ (CNODE_NUM_CPUS(_cnode) == 0)) /* Check if given a node vertex handle the corresponding node has all the * cpus disabled. */ #define is_headless_node_vertex(_nodevhdl) \ is_headless_node(nodevertex_to_cnodeid(_nodevhdl)) #ifdef __cplusplus } #endif #ifdef NUMA_BASE /* * To remove contention on the global syswait counter each node will have * its own. Each clock tick the clock cpu will re-calculate the global * syswait counter by summing from each of the nodes. The other cpus will * continue to read the global one during their clock ticks. This does * present a problem when a thread increments the count on one node and wakes * up on a different node and decrements it there. Eventually the count could * overflow if this happens continually for a long period. To prevent this * second_thread() periodically preserves the current syswait state and * resets the counters. */ #define ADD_SYSWAIT(_field) atomicAddInt(&nodepda->syswait._field, 1) #define SUB_SYSWAIT(_field) atomicAddInt(&nodepda->syswait._field, -1) #else #define ADD_SYSWAIT(_field) \ { \ ASSERT(syswait._field >= 0); \ atomicAddInt(&syswait._field, 1); \ } #define SUB_SYSWAIT(_field) \ { \ ASSERT(syswait._field > 0); \ atomicAddInt(&syswait._field, -1); \ } #endif /* NUMA_BASE */ #ifdef NUMA_BASE /* * Another global variable to remove contention from: pdcount. * See above comments for SYSWAIT. */ #define ADD_PDCOUNT(_n) \ { \ atomicAddInt(&nodepda->pdcount, _n); \ if (_n > 0 && !pdflag) \ pdflag = 1; \ } #else #define ADD_PDCOUNT(_n) \ { \ ASSERT(&pdcount >= 0); \ atomicAddInt(&pdcount, _n); \ if (_n > 0 && !pdflag) \ pdflag = 1; \ } #endif /* NUMA_BASE */ #endif /* _ASM_SN_NODEPDA_H */