summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/sn/kernel/sn2/sn2_smp.c')
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c295
1 files changed, 295 insertions, 0 deletions
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
new file mode 100644
index 000000000000..7af05a7ac743
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -0,0 +1,295 @@
1/*
2 * SN2 Platform specific SMP Support
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/spinlock.h>
14#include <linux/threads.h>
15#include <linux/sched.h>
16#include <linux/smp.h>
17#include <linux/interrupt.h>
18#include <linux/irq.h>
19#include <linux/mmzone.h>
20#include <linux/module.h>
21#include <linux/bitops.h>
22#include <linux/nodemask.h>
23
24#include <asm/processor.h>
25#include <asm/irq.h>
26#include <asm/sal.h>
27#include <asm/system.h>
28#include <asm/delay.h>
29#include <asm/io.h>
30#include <asm/smp.h>
31#include <asm/tlb.h>
32#include <asm/numa.h>
33#include <asm/hw_irq.h>
34#include <asm/current.h>
35#include <asm/sn/sn_cpuid.h>
36#include <asm/sn/sn_sal.h>
37#include <asm/sn/addrs.h>
38#include <asm/sn/shub_mmr.h>
39#include <asm/sn/nodepda.h>
40#include <asm/sn/rw_mmr.h>
41
42void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0,
43 volatile unsigned long *, unsigned long data1);
44
45static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
46
47static unsigned long sn2_ptc_deadlock_count;
48
49static inline unsigned long wait_piowc(void)
50{
51 volatile unsigned long *piows, zeroval;
52 unsigned long ws;
53
54 piows = pda->pio_write_status_addr;
55 zeroval = pda->pio_write_status_val;
56 do {
57 cpu_relax();
58 } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
59 return ws;
60}
61
62void sn_tlb_migrate_finish(struct mm_struct *mm)
63{
64 if (mm == current->mm)
65 flush_tlb_mm(mm);
66}
67
68/**
69 * sn2_global_tlb_purge - globally purge translation cache of virtual address range
70 * @start: start of virtual address range
71 * @end: end of virtual address range
72 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
73 *
74 * Purges the translation caches of all processors of the given virtual address
75 * range.
76 *
77 * Note:
78 * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
79 * - cpu_vm_mask is converted into a nodemask of the nodes containing the
80 * cpus in cpu_vm_mask.
81 * - if only one bit is set in cpu_vm_mask & it is the current cpu,
82 * then only the local TLB needs to be flushed. This flushing can be done
83 * using ptc.l. This is the common case & avoids the global spinlock.
84 * - if multiple cpus have loaded the context, then flushing has to be
85 * done with ptc.g/MMRs under protection of the global ptc_lock.
86 */
87
88void
89sn2_global_tlb_purge(unsigned long start, unsigned long end,
90 unsigned long nbits)
91{
92 int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
93 volatile unsigned long *ptc0, *ptc1;
94 unsigned long flags = 0, data0 = 0, data1 = 0;
95 struct mm_struct *mm = current->active_mm;
96 short nasids[MAX_NUMNODES], nix;
97 nodemask_t nodes_flushed;
98
99 nodes_clear(nodes_flushed);
100 i = 0;
101
102 for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
103 cnode = cpu_to_node(cpu);
104 node_set(cnode, nodes_flushed);
105 lcpu = cpu;
106 i++;
107 }
108
109 preempt_disable();
110
111 if (likely(i == 1 && lcpu == smp_processor_id())) {
112 do {
113 ia64_ptcl(start, nbits << 2);
114 start += (1UL << nbits);
115 } while (start < end);
116 ia64_srlz_i();
117 preempt_enable();
118 return;
119 }
120
121 if (atomic_read(&mm->mm_users) == 1) {
122 flush_tlb_mm(mm);
123 preempt_enable();
124 return;
125 }
126
127 nix = 0;
128 for_each_node_mask(cnode, nodes_flushed)
129 nasids[nix++] = cnodeid_to_nasid(cnode);
130
131 shub1 = is_shub1();
132 if (shub1) {
133 data0 = (1UL << SH1_PTC_0_A_SHFT) |
134 (nbits << SH1_PTC_0_PS_SHFT) |
135 ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) |
136 (1UL << SH1_PTC_0_START_SHFT);
137 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
138 ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
139 } else {
140 data0 = (1UL << SH2_PTC_A_SHFT) |
141 (nbits << SH2_PTC_PS_SHFT) |
142 (1UL << SH2_PTC_START_SHFT);
143 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC +
144 ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) );
145 ptc1 = NULL;
146 }
147
148
149 mynasid = get_nasid();
150
151 spin_lock_irqsave(&sn2_global_ptc_lock, flags);
152
153 do {
154 if (shub1)
155 data1 = start | (1UL << SH1_PTC_1_START_SHFT);
156 else
157 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
158 for (i = 0; i < nix; i++) {
159 nasid = nasids[i];
160 if (unlikely(nasid == mynasid)) {
161 ia64_ptcga(start, nbits << 2);
162 ia64_srlz_i();
163 } else {
164 ptc0 = CHANGE_NASID(nasid, ptc0);
165 if (ptc1)
166 ptc1 = CHANGE_NASID(nasid, ptc1);
167 pio_atomic_phys_write_mmrs(ptc0, data0, ptc1,
168 data1);
169 flushed = 1;
170 }
171 }
172
173 if (flushed
174 && (wait_piowc() &
175 SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) {
176 sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1);
177 }
178
179 start += (1UL << nbits);
180
181 } while (start < end);
182
183 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
184
185 preempt_enable();
186}
187
188/*
189 * sn2_ptc_deadlock_recovery
190 *
191 * Recover from PTC deadlocks conditions. Recovery requires stepping thru each
192 * TLB flush transaction. The recovery sequence is somewhat tricky & is
193 * coded in assembly language.
194 */
195void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0,
196 volatile unsigned long *ptc1, unsigned long data1)
197{
198 extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
199 volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
200 int cnode, mycnode, nasid;
201 volatile unsigned long *piows;
202 volatile unsigned long zeroval;
203
204 sn2_ptc_deadlock_count++;
205
206 piows = pda->pio_write_status_addr;
207 zeroval = pda->pio_write_status_val;
208
209 mycnode = numa_node_id();
210
211 for_each_online_node(cnode) {
212 if (is_headless_node(cnode) || cnode == mycnode)
213 continue;
214 nasid = cnodeid_to_nasid(cnode);
215 ptc0 = CHANGE_NASID(nasid, ptc0);
216 if (ptc1)
217 ptc1 = CHANGE_NASID(nasid, ptc1);
218 sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
219 }
220}
221
222/**
223 * sn_send_IPI_phys - send an IPI to a Nasid and slice
224 * @nasid: nasid to receive the interrupt (may be outside partition)
225 * @physid: physical cpuid to receive the interrupt.
226 * @vector: command to send
227 * @delivery_mode: delivery mechanism
228 *
229 * Sends an IPI (interprocessor interrupt) to the processor specified by
230 * @physid
231 *
232 * @delivery_mode can be one of the following
233 *
234 * %IA64_IPI_DM_INT - pend an interrupt
235 * %IA64_IPI_DM_PMI - pend a PMI
236 * %IA64_IPI_DM_NMI - pend an NMI
237 * %IA64_IPI_DM_INIT - pend an INIT interrupt
238 */
239void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
240{
241 long val;
242 unsigned long flags = 0;
243 volatile long *p;
244
245 p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
246 val = (1UL << SH_IPI_INT_SEND_SHFT) |
247 (physid << SH_IPI_INT_PID_SHFT) |
248 ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
249 ((long)vector << SH_IPI_INT_IDX_SHFT) |
250 (0x000feeUL << SH_IPI_INT_BASE_SHFT);
251
252 mb();
253 if (enable_shub_wars_1_1()) {
254 spin_lock_irqsave(&sn2_global_ptc_lock, flags);
255 }
256 pio_phys_write_mmr(p, val);
257 if (enable_shub_wars_1_1()) {
258 wait_piowc();
259 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
260 }
261
262}
263
264EXPORT_SYMBOL(sn_send_IPI_phys);
265
266/**
267 * sn2_send_IPI - send an IPI to a processor
268 * @cpuid: target of the IPI
269 * @vector: command to send
270 * @delivery_mode: delivery mechanism
271 * @redirect: redirect the IPI?
272 *
273 * Sends an IPI (InterProcessor Interrupt) to the processor specified by
274 * @cpuid. @vector specifies the command to send, while @delivery_mode can
275 * be one of the following
276 *
277 * %IA64_IPI_DM_INT - pend an interrupt
278 * %IA64_IPI_DM_PMI - pend a PMI
279 * %IA64_IPI_DM_NMI - pend an NMI
280 * %IA64_IPI_DM_INIT - pend an INIT interrupt
281 */
282void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
283{
284 long physid;
285 int nasid;
286
287 physid = cpu_physical_id(cpuid);
288 nasid = cpuid_to_nasid(cpuid);
289
290 /* the following is used only when starting cpus at boot time */
291 if (unlikely(nasid == -1))
292 ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
293
294 sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
295}