aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner2013-04-09 02:33:34 -0500
committerGreg Kroah-Hartman2013-04-25 14:51:09 -0500
commit0e55be921acde0e057419d570005913252f4efb7 (patch)
treeb6540d47d244c80d2fa3636e4b1e66c9836a40e1
parent3d9203cb7216794cd49cfb2aa90e7f69976fbe6d (diff)
downloadkernel-omap-0e55be921acde0e057419d570005913252f4efb7.tar.gz
kernel-omap-0e55be921acde0e057419d570005913252f4efb7.tar.xz
kernel-omap-0e55be921acde0e057419d570005913252f4efb7.zip
kthread: Prevent unpark race which puts threads on the wrong cpu
commit f2530dc71cf0822f90bb63ea4600caaef33a66bb upstream. The smpboot threads rely on the park/unpark mechanism which binds per cpu threads on a particular core. Though the functionality is racy: CPU0 CPU1 CPU2 unpark(T) wake_up_process(T) clear(SHOULD_PARK) T runs leave parkme() due to !SHOULD_PARK bind_to(CPU2) BUG_ON(wrong CPU) We cannot let the tasks move themself to the target CPU as one of those tasks is actually the migration thread itself, which requires that it starts running on the target cpu right away. The solution to this problem is to prevent wakeups in park mode which are not from unpark(). That way we can guarantee that the association of the task to the target cpu is working correctly. Add a new task state (TASK_PARKED) which prevents other wakeups and use this state explicitly for the unpark wakeup. Peter noticed: Also, since the task state is visible to userspace and all the parked tasks are still in the PID space, its a good hint in ps and friends that these tasks aren't really there for the moment. The migration thread has another related issue. CPU0 CPU1 Bring up CPU2 create_thread(T) park(T) wait_for_completion() parkme() complete() sched_set_stop_task() schedule(TASK_PARKED) The sched_set_stop_task() call is issued while the task is on the runqueue of CPU1 and that confuses the hell out of the stop_task class on that cpu. So we need the same synchronizaion before sched_set_stop_task(). Reported-by: Dave Jones <davej@redhat.com> Reported-and-tested-by: Dave Hansen <dave@sr71.net> Reported-and-tested-by: Borislav Petkov <bp@alien8.de> Acked-by: Peter Ziljstra <peterz@infradead.org> Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Cc: dhillf@gmail.com Cc: Ingo Molnar <mingo@kernel.org> Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1304091635430.21884@ionos Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--fs/proc/array.c1
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/trace/events/sched.h2
-rw-r--r--kernel/kthread.c52
4 files changed, 33 insertions, 27 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..be3c22f5729a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -143,6 +143,7 @@ static const char * const task_state_array[] = {
143 "x (dead)", /* 64 */ 143 "x (dead)", /* 64 */
144 "K (wakekill)", /* 128 */ 144 "K (wakekill)", /* 128 */
145 "W (waking)", /* 256 */ 145 "W (waking)", /* 256 */
146 "P (parked)", /* 512 */
146}; 147};
147 148
148static inline const char *get_task_state(struct task_struct *tsk) 149static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2112477ff5e..7e492701142e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -163,9 +163,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
163#define TASK_DEAD 64 163#define TASK_DEAD 64
164#define TASK_WAKEKILL 128 164#define TASK_WAKEKILL 128
165#define TASK_WAKING 256 165#define TASK_WAKING 256
166#define TASK_STATE_MAX 512 166#define TASK_PARKED 512
167#define TASK_STATE_MAX 1024
167 168
168#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" 169#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
169 170
170extern char ___assert_task_state[1 - 2*!!( 171extern char ___assert_task_state[1 - 2*!!(
171 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; 172 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 5a8671e8a67f..e5586caff67a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -147,7 +147,7 @@ TRACE_EVENT(sched_switch,
147 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", 147 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
148 { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, 148 { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
149 { 16, "Z" }, { 32, "X" }, { 64, "x" }, 149 { 16, "Z" }, { 32, "X" }, { 64, "x" },
150 { 128, "W" }) : "R", 150 { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
151 __entry->prev_state & TASK_STATE_MAX ? "+" : "", 151 __entry->prev_state & TASK_STATE_MAX ? "+" : "",
152 __entry->next_comm, __entry->next_pid, __entry->next_prio) 152 __entry->next_comm, __entry->next_pid, __entry->next_prio)
153); 153);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 691dc2ef9baf..9eb7fed0bbaa 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task)
124 124
125static void __kthread_parkme(struct kthread *self) 125static void __kthread_parkme(struct kthread *self)
126{ 126{
127 __set_current_state(TASK_INTERRUPTIBLE); 127 __set_current_state(TASK_PARKED);
128 while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { 128 while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
129 if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) 129 if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
130 complete(&self->parked); 130 complete(&self->parked);
131 schedule(); 131 schedule();
132 __set_current_state(TASK_INTERRUPTIBLE); 132 __set_current_state(TASK_PARKED);
133 } 133 }
134 clear_bit(KTHREAD_IS_PARKED, &self->flags); 134 clear_bit(KTHREAD_IS_PARKED, &self->flags);
135 __set_current_state(TASK_RUNNING); 135 __set_current_state(TASK_RUNNING);
@@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
256} 256}
257EXPORT_SYMBOL(kthread_create_on_node); 257EXPORT_SYMBOL(kthread_create_on_node);
258 258
259static void __kthread_bind(struct task_struct *p, unsigned int cpu) 259static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
260{ 260{
261 /* Must have done schedule() in kthread() before we set_task_cpu */
262 if (!wait_task_inactive(p, state)) {
263 WARN_ON(1);
264 return;
265 }
261 /* It's safe because the task is inactive. */ 266 /* It's safe because the task is inactive. */
262 do_set_cpus_allowed(p, cpumask_of(cpu)); 267 do_set_cpus_allowed(p, cpumask_of(cpu));
263 p->flags |= PF_THREAD_BOUND; 268 p->flags |= PF_THREAD_BOUND;
@@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)
274 */ 279 */
275void kthread_bind(struct task_struct *p, unsigned int cpu) 280void kthread_bind(struct task_struct *p, unsigned int cpu)
276{ 281{
277 /* Must have done schedule() in kthread() before we set_task_cpu */ 282 __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
278 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
279 WARN_ON(1);
280 return;
281 }
282 __kthread_bind(p, cpu);
283} 283}
284EXPORT_SYMBOL(kthread_bind); 284EXPORT_SYMBOL(kthread_bind);
285 285
@@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)
324 return NULL; 324 return NULL;
325} 325}
326 326
327static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
328{
329 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
330 /*
331 * We clear the IS_PARKED bit here as we don't wait
332 * until the task has left the park code. So if we'd
333 * park before that happens we'd see the IS_PARKED bit
334 * which might be about to be cleared.
335 */
336 if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
337 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
338 __kthread_bind(k, kthread->cpu, TASK_PARKED);
339 wake_up_state(k, TASK_PARKED);
340 }
341}
342
327/** 343/**
328 * kthread_unpark - unpark a thread created by kthread_create(). 344 * kthread_unpark - unpark a thread created by kthread_create().
329 * @k: thread created by kthread_create(). 345 * @k: thread created by kthread_create().
@@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k)
336{ 352{
337 struct kthread *kthread = task_get_live_kthread(k); 353 struct kthread *kthread = task_get_live_kthread(k);
338 354
339 if (kthread) { 355 if (kthread)
340 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 356 __kthread_unpark(k, kthread);
341 /*
342 * We clear the IS_PARKED bit here as we don't wait
343 * until the task has left the park code. So if we'd
344 * park before that happens we'd see the IS_PARKED bit
345 * which might be about to be cleared.
346 */
347 if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
348 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
349 __kthread_bind(k, kthread->cpu);
350 wake_up_process(k);
351 }
352 }
353 put_task_struct(k); 357 put_task_struct(k);
354} 358}
355 359
@@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k)
407 trace_sched_kthread_stop(k); 411 trace_sched_kthread_stop(k);
408 if (kthread) { 412 if (kthread) {
409 set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); 413 set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
410 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 414 __kthread_unpark(k, kthread);
411 wake_up_process(k); 415 wake_up_process(k);
412 wait_for_completion(&kthread->exited); 416 wait_for_completion(&kthread->exited);
413 } 417 }