aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmu_notifier.c')
-rw-r--r--mm/mmu_notifier.c147
1 files changed, 86 insertions, 61 deletions
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 71c78115c45..88fa54d158e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -14,10 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/srcu.h>
17#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20 21
22/* global SRCU for all MMs */
23static struct srcu_struct srcu;
24
21/* 25/*
22 * This function can't run concurrently against mmu_notifier_register 26 * This function can't run concurrently against mmu_notifier_register
23 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 27 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -25,58 +29,61 @@
25 * in parallel despite there being no task using this mm any more, 29 * in parallel despite there being no task using this mm any more,
26 * through the vmas outside of the exit_mmap context, such as with 30 * through the vmas outside of the exit_mmap context, such as with
27 * vmtruncate. This serializes against mmu_notifier_unregister with 31 * vmtruncate. This serializes against mmu_notifier_unregister with
28 * the mmu_notifier_mm->lock in addition to RCU and it serializes 32 * the mmu_notifier_mm->lock in addition to SRCU and it serializes
29 * against the other mmu notifiers with RCU. struct mmu_notifier_mm 33 * against the other mmu notifiers with SRCU. struct mmu_notifier_mm
30 * can't go away from under us as exit_mmap holds an mm_count pin 34 * can't go away from under us as exit_mmap holds an mm_count pin
31 * itself. 35 * itself.
32 */ 36 */
33void __mmu_notifier_release(struct mm_struct *mm) 37void __mmu_notifier_release(struct mm_struct *mm)
34{ 38{
35 struct mmu_notifier *mn; 39 struct mmu_notifier *mn;
36 struct hlist_node *n; 40 int id;
37 41
38 /* 42 /*
39 * RCU here will block mmu_notifier_unregister until 43 * srcu_read_lock() here will block synchronize_srcu() in
40 * ->release returns. 44 * mmu_notifier_unregister() until all registered
45 * ->release() callouts this function makes have
46 * returned.
41 */ 47 */
42 rcu_read_lock(); 48 id = srcu_read_lock(&srcu);
43 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist)
44 /*
45 * if ->release runs before mmu_notifier_unregister it
46 * must be handled as it's the only way for the driver
47 * to flush all existing sptes and stop the driver
48 * from establishing any more sptes before all the
49 * pages in the mm are freed.
50 */
51 if (mn->ops->release)
52 mn->ops->release(mn, mm);
53 rcu_read_unlock();
54
55 spin_lock(&mm->mmu_notifier_mm->lock); 49 spin_lock(&mm->mmu_notifier_mm->lock);
56 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 50 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
57 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 51 mn = hlist_entry(mm->mmu_notifier_mm->list.first,
58 struct mmu_notifier, 52 struct mmu_notifier,
59 hlist); 53 hlist);
54
60 /* 55 /*
61 * We arrived before mmu_notifier_unregister so 56 * Unlink. This will prevent mmu_notifier_unregister()
62 * mmu_notifier_unregister will do nothing other than 57 * from also making the ->release() callout.
63 * to wait ->release to finish and
64 * mmu_notifier_unregister to return.
65 */ 58 */
66 hlist_del_init_rcu(&mn->hlist); 59 hlist_del_init_rcu(&mn->hlist);
60 spin_unlock(&mm->mmu_notifier_mm->lock);
61
62 /*
63 * Clear sptes. (see 'release' description in mmu_notifier.h)
64 */
65 if (mn->ops->release)
66 mn->ops->release(mn, mm);
67
68 spin_lock(&mm->mmu_notifier_mm->lock);
67 } 69 }
68 spin_unlock(&mm->mmu_notifier_mm->lock); 70 spin_unlock(&mm->mmu_notifier_mm->lock);
69 71
70 /* 72 /*
71 * synchronize_rcu here prevents mmu_notifier_release to 73 * All callouts to ->release() which we have done are complete.
72 * return to exit_mmap (which would proceed freeing all pages 74 * Allow synchronize_srcu() in mmu_notifier_unregister() to complete
73 * in the mm) until the ->release method returns, if it was 75 */
74 * invoked by mmu_notifier_unregister. 76 srcu_read_unlock(&srcu, id);
75 * 77
76 * The mmu_notifier_mm can't go away from under us because one 78 /*
77 * mm_count is hold by exit_mmap. 79 * mmu_notifier_unregister() may have unlinked a notifier and may
80 * still be calling out to it. Additionally, other notifiers
81 * may have been active via vmtruncate() et. al. Block here
82 * to ensure that all notifier callouts for this mm have been
83 * completed and the sptes are really cleaned up before returning
84 * to exit_mmap().
78 */ 85 */
79 synchronize_rcu(); 86 synchronize_srcu(&srcu);
80} 87}
81 88
82/* 89/*
@@ -89,14 +96,14 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
89{ 96{
90 struct mmu_notifier *mn; 97 struct mmu_notifier *mn;
91 struct hlist_node *n; 98 struct hlist_node *n;
92 int young = 0; 99 int young = 0, id;
93 100
94 rcu_read_lock(); 101 id = srcu_read_lock(&srcu);
95 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 102 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
96 if (mn->ops->clear_flush_young) 103 if (mn->ops->clear_flush_young)
97 young |= mn->ops->clear_flush_young(mn, mm, address); 104 young |= mn->ops->clear_flush_young(mn, mm, address);
98 } 105 }
99 rcu_read_unlock(); 106 srcu_read_unlock(&srcu, id);
100 107
101 return young; 108 return young;
102} 109}
@@ -106,9 +113,9 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
106{ 113{
107 struct mmu_notifier *mn; 114 struct mmu_notifier *mn;
108 struct hlist_node *n; 115 struct hlist_node *n;
109 int young = 0; 116 int young = 0, id;
110 117
111 rcu_read_lock(); 118 id = srcu_read_lock(&srcu);
112 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 119 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
113 if (mn->ops->test_young) { 120 if (mn->ops->test_young) {
114 young = mn->ops->test_young(mn, mm, address); 121 young = mn->ops->test_young(mn, mm, address);
@@ -116,7 +123,7 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
116 break; 123 break;
117 } 124 }
118 } 125 }
119 rcu_read_unlock(); 126 srcu_read_unlock(&srcu, id);
120 127
121 return young; 128 return young;
122} 129}
@@ -126,8 +133,9 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
126{ 133{
127 struct mmu_notifier *mn; 134 struct mmu_notifier *mn;
128 struct hlist_node *n; 135 struct hlist_node *n;
136 int id;
129 137
130 rcu_read_lock(); 138 id = srcu_read_lock(&srcu);
131 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 139 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
132 if (mn->ops->change_pte) 140 if (mn->ops->change_pte)
133 mn->ops->change_pte(mn, mm, address, pte); 141 mn->ops->change_pte(mn, mm, address, pte);
@@ -138,7 +146,7 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
138 else if (mn->ops->invalidate_page) 146 else if (mn->ops->invalidate_page)
139 mn->ops->invalidate_page(mn, mm, address); 147 mn->ops->invalidate_page(mn, mm, address);
140 } 148 }
141 rcu_read_unlock(); 149 srcu_read_unlock(&srcu, id);
142} 150}
143 151
144void __mmu_notifier_invalidate_page(struct mm_struct *mm, 152void __mmu_notifier_invalidate_page(struct mm_struct *mm,
@@ -146,13 +154,14 @@ void __mmu_notifier_invalidate_page(struct mm_struct *mm,
146{ 154{
147 struct mmu_notifier *mn; 155 struct mmu_notifier *mn;
148 struct hlist_node *n; 156 struct hlist_node *n;
157 int id;
149 158
150 rcu_read_lock(); 159 id = srcu_read_lock(&srcu);
151 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 160 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
152 if (mn->ops->invalidate_page) 161 if (mn->ops->invalidate_page)
153 mn->ops->invalidate_page(mn, mm, address); 162 mn->ops->invalidate_page(mn, mm, address);
154 } 163 }
155 rcu_read_unlock(); 164 srcu_read_unlock(&srcu, id);
156} 165}
157 166
158void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 167void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -160,13 +169,14 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
160{ 169{
161 struct mmu_notifier *mn; 170 struct mmu_notifier *mn;
162 struct hlist_node *n; 171 struct hlist_node *n;
172 int id;
163 173
164 rcu_read_lock(); 174 id = srcu_read_lock(&srcu);
165 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 175 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
166 if (mn->ops->invalidate_range_start) 176 if (mn->ops->invalidate_range_start)
167 mn->ops->invalidate_range_start(mn, mm, start, end); 177 mn->ops->invalidate_range_start(mn, mm, start, end);
168 } 178 }
169 rcu_read_unlock(); 179 srcu_read_unlock(&srcu, id);
170} 180}
171 181
172void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 182void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -174,13 +184,14 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
174{ 184{
175 struct mmu_notifier *mn; 185 struct mmu_notifier *mn;
176 struct hlist_node *n; 186 struct hlist_node *n;
187 int id;
177 188
178 rcu_read_lock(); 189 id = srcu_read_lock(&srcu);
179 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 190 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
180 if (mn->ops->invalidate_range_end) 191 if (mn->ops->invalidate_range_end)
181 mn->ops->invalidate_range_end(mn, mm, start, end); 192 mn->ops->invalidate_range_end(mn, mm, start, end);
182 } 193 }
183 rcu_read_unlock(); 194 srcu_read_unlock(&srcu, id);
184} 195}
185 196
186static int do_mmu_notifier_register(struct mmu_notifier *mn, 197static int do_mmu_notifier_register(struct mmu_notifier *mn,
@@ -192,6 +203,12 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
192 203
193 BUG_ON(atomic_read(&mm->mm_users) <= 0); 204 BUG_ON(atomic_read(&mm->mm_users) <= 0);
194 205
206 /*
207 * Verify that mmu_notifier_init() already run and the global srcu is
208 * initialized.
209 */
210 BUG_ON(!srcu.per_cpu_ref);
211
195 ret = -ENOMEM; 212 ret = -ENOMEM;
196 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); 213 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
197 if (unlikely(!mmu_notifier_mm)) 214 if (unlikely(!mmu_notifier_mm))
@@ -274,8 +291,8 @@ void __mmu_notifier_mm_destroy(struct mm_struct *mm)
274/* 291/*
275 * This releases the mm_count pin automatically and frees the mm 292 * This releases the mm_count pin automatically and frees the mm
276 * structure if it was the last user of it. It serializes against 293 * structure if it was the last user of it. It serializes against
277 * running mmu notifiers with RCU and against mmu_notifier_unregister 294 * running mmu notifiers with SRCU and against mmu_notifier_unregister
278 * with the unregister lock + RCU. All sptes must be dropped before 295 * with the unregister lock + SRCU. All sptes must be dropped before
279 * calling mmu_notifier_unregister. ->release or any other notifier 296 * calling mmu_notifier_unregister. ->release or any other notifier
280 * method may be invoked concurrently with mmu_notifier_unregister, 297 * method may be invoked concurrently with mmu_notifier_unregister,
281 * and only after mmu_notifier_unregister returned we're guaranteed 298 * and only after mmu_notifier_unregister returned we're guaranteed
@@ -285,35 +302,43 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
285{ 302{
286 BUG_ON(atomic_read(&mm->mm_count) <= 0); 303 BUG_ON(atomic_read(&mm->mm_count) <= 0);
287 304
305 spin_lock(&mm->mmu_notifier_mm->lock);
288 if (!hlist_unhashed(&mn->hlist)) { 306 if (!hlist_unhashed(&mn->hlist)) {
289 /* 307 int id;
290 * RCU here will force exit_mmap to wait ->release to finish
291 * before freeing the pages.
292 */
293 rcu_read_lock();
294 308
295 /* 309 /*
296 * exit_mmap will block in mmu_notifier_release to 310 * Ensure we synchronize up with __mmu_notifier_release().
297 * guarantee ->release is called before freeing the
298 * pages.
299 */ 311 */
312 id = srcu_read_lock(&srcu);
313
314 hlist_del_rcu(&mn->hlist);
315 spin_unlock(&mm->mmu_notifier_mm->lock);
316
300 if (mn->ops->release) 317 if (mn->ops->release)
301 mn->ops->release(mn, mm); 318 mn->ops->release(mn, mm);
302 rcu_read_unlock();
303 319
304 spin_lock(&mm->mmu_notifier_mm->lock); 320 /*
305 hlist_del_rcu(&mn->hlist); 321 * Allow __mmu_notifier_release() to complete.
322 */
323 srcu_read_unlock(&srcu, id);
324 } else
306 spin_unlock(&mm->mmu_notifier_mm->lock); 325 spin_unlock(&mm->mmu_notifier_mm->lock);
307 }
308 326
309 /* 327 /*
310 * Wait any running method to finish, of course including 328 * Wait for any running method to finish, including ->release() if it
311 * ->release if it was run by mmu_notifier_relase instead of us. 329 * was run by __mmu_notifier_release() instead of us.
312 */ 330 */
313 synchronize_rcu(); 331 synchronize_srcu(&srcu);
314 332
315 BUG_ON(atomic_read(&mm->mm_count) <= 0); 333 BUG_ON(atomic_read(&mm->mm_count) <= 0);
316 334
317 mmdrop(mm); 335 mmdrop(mm);
318} 336}
319EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 337EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
338
339static int __init mmu_notifier_init(void)
340{
341 return init_srcu_struct(&srcu);
342}
343
344module_init(mmu_notifier_init);