aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorColin Cross2013-03-13 19:16:02 -0500
committerColin Cross2013-03-13 19:16:02 -0500
commit0b203ab4aacdb6e6dfb8c277aa290f0a02428e6f (patch)
tree9bab760a750d9cf4504d92603ee7fc82beeb2e38 /mm
parenta276def548828763cf5ac228adf8c1ca73f8d4d3 (diff)
parente28c3f2b514b5581e15614f7cf976131092cf4b6 (diff)
downloadkernel-common-0b203ab4aacdb6e6dfb8c277aa290f0a02428e6f.tar.gz
kernel-common-0b203ab4aacdb6e6dfb8c277aa290f0a02428e6f.tar.xz
kernel-common-0b203ab4aacdb6e6dfb8c277aa290f0a02428e6f.zip
Merge tag 'v3.0.68' into android-3.0
This is the 3.0.68 stable release Conflicts: kernel/cgroup.c Change-Id: I067982d25e18e3a12de93a5eb6429b8829d7ca11
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c6
-rw-r--r--mm/fadvise.c18
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/memory.c18
-rw-r--r--mm/mmu_notifier.c147
-rw-r--r--mm/page_alloc.c17
-rw-r--r--mm/shmem.c10
7 files changed, 140 insertions, 79 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 8ea7308601b..b4689f8117b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -714,14 +714,12 @@ static int compact_node(int nid)
714} 714}
715 715
716/* Compact all nodes in the system */ 716/* Compact all nodes in the system */
717static int compact_nodes(void) 717static void compact_nodes(void)
718{ 718{
719 int nid; 719 int nid;
720 720
721 for_each_online_node(nid) 721 for_each_online_node(nid)
722 compact_node(nid); 722 compact_node(nid);
723
724 return COMPACT_COMPLETE;
725} 723}
726 724
727/* The written value is actually unused, all memory is compacted */ 725/* The written value is actually unused, all memory is compacted */
@@ -732,7 +730,7 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
732 void __user *buffer, size_t *length, loff_t *ppos) 730 void __user *buffer, size_t *length, loff_t *ppos)
733{ 731{
734 if (write) 732 if (write)
735 return compact_nodes(); 733 compact_nodes();
736 734
737 return 0; 735 return 0;
738} 736}
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 8d723c9e8b7..35b2bb089a1 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -17,6 +17,7 @@
17#include <linux/fadvise.h> 17#include <linux/fadvise.h>
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/syscalls.h> 19#include <linux/syscalls.h>
20#include <linux/swap.h>
20 21
21#include <asm/unistd.h> 22#include <asm/unistd.h>
22 23
@@ -123,9 +124,22 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
123 start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT; 124 start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
124 end_index = (endbyte >> PAGE_CACHE_SHIFT); 125 end_index = (endbyte >> PAGE_CACHE_SHIFT);
125 126
126 if (end_index >= start_index) 127 if (end_index >= start_index) {
127 invalidate_mapping_pages(mapping, start_index, 128 unsigned long count = invalidate_mapping_pages(mapping,
129 start_index, end_index);
130
131 /*
132 * If fewer pages were invalidated than expected then
133 * it is possible that some of the pages were on
134 * a per-cpu pagevec for a remote CPU. Drain all
135 * pagevecs and try again.
136 */
137 if (count < (end_index - start_index + 1)) {
138 lru_add_drain_all();
139 invalidate_mapping_pages(mapping, start_index,
128 end_index); 140 end_index);
141 }
142 }
129 break; 143 break;
130 default: 144 default:
131 ret = -EINVAL; 145 ret = -EINVAL;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8cc11dda6a7..a9ab45ec7d5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -920,6 +920,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
920 count_vm_event(THP_FAULT_FALLBACK); 920 count_vm_event(THP_FAULT_FALLBACK);
921 ret = do_huge_pmd_wp_page_fallback(mm, vma, address, 921 ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
922 pmd, orig_pmd, page, haddr); 922 pmd, orig_pmd, page, haddr);
923 if (ret & VM_FAULT_OOM)
924 split_huge_page(page);
923 put_page(page); 925 put_page(page);
924 goto out; 926 goto out;
925 } 927 }
@@ -927,6 +929,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
927 929
928 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 930 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
929 put_page(new_page); 931 put_page(new_page);
932 split_huge_page(page);
930 put_page(page); 933 put_page(page);
931 ret |= VM_FAULT_OOM; 934 ret |= VM_FAULT_OOM;
932 goto out; 935 goto out;
diff --git a/mm/memory.c b/mm/memory.c
index 7292acb92f9..4da0f8ad142 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3470,6 +3470,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3470 if (unlikely(is_vm_hugetlb_page(vma))) 3470 if (unlikely(is_vm_hugetlb_page(vma)))
3471 return hugetlb_fault(mm, vma, address, flags); 3471 return hugetlb_fault(mm, vma, address, flags);
3472 3472
3473retry:
3473 pgd = pgd_offset(mm, address); 3474 pgd = pgd_offset(mm, address);
3474 pud = pud_alloc(mm, pgd, address); 3475 pud = pud_alloc(mm, pgd, address);
3475 if (!pud) 3476 if (!pud)
@@ -3483,13 +3484,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3483 pmd, flags); 3484 pmd, flags);
3484 } else { 3485 } else {
3485 pmd_t orig_pmd = *pmd; 3486 pmd_t orig_pmd = *pmd;
3487 int ret;
3488
3486 barrier(); 3489 barrier();
3487 if (pmd_trans_huge(orig_pmd)) { 3490 if (pmd_trans_huge(orig_pmd)) {
3488 if (flags & FAULT_FLAG_WRITE && 3491 if (flags & FAULT_FLAG_WRITE &&
3489 !pmd_write(orig_pmd) && 3492 !pmd_write(orig_pmd) &&
3490 !pmd_trans_splitting(orig_pmd)) 3493 !pmd_trans_splitting(orig_pmd)) {
3491 return do_huge_pmd_wp_page(mm, vma, address, 3494 ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
3492 pmd, orig_pmd); 3495 orig_pmd);
3496 /*
3497 * If COW results in an oom, the huge pmd will
3498 * have been split, so retry the fault on the
3499 * pte for a smaller charge.
3500 */
3501 if (unlikely(ret & VM_FAULT_OOM))
3502 goto retry;
3503 return ret;
3504 }
3493 return 0; 3505 return 0;
3494 } 3506 }
3495 } 3507 }
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 71c78115c45..88fa54d158e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -14,10 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/srcu.h>
17#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20 21
22/* global SRCU for all MMs */
23static struct srcu_struct srcu;
24
21/* 25/*
22 * This function can't run concurrently against mmu_notifier_register 26 * This function can't run concurrently against mmu_notifier_register
23 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 27 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -25,58 +29,61 @@
25 * in parallel despite there being no task using this mm any more, 29 * in parallel despite there being no task using this mm any more,
26 * through the vmas outside of the exit_mmap context, such as with 30 * through the vmas outside of the exit_mmap context, such as with
27 * vmtruncate. This serializes against mmu_notifier_unregister with 31 * vmtruncate. This serializes against mmu_notifier_unregister with
28 * the mmu_notifier_mm->lock in addition to RCU and it serializes 32 * the mmu_notifier_mm->lock in addition to SRCU and it serializes
29 * against the other mmu notifiers with RCU. struct mmu_notifier_mm 33 * against the other mmu notifiers with SRCU. struct mmu_notifier_mm
30 * can't go away from under us as exit_mmap holds an mm_count pin 34 * can't go away from under us as exit_mmap holds an mm_count pin
31 * itself. 35 * itself.
32 */ 36 */
33void __mmu_notifier_release(struct mm_struct *mm) 37void __mmu_notifier_release(struct mm_struct *mm)
34{ 38{
35 struct mmu_notifier *mn; 39 struct mmu_notifier *mn;
36 struct hlist_node *n; 40 int id;
37 41
38 /* 42 /*
39 * RCU here will block mmu_notifier_unregister until 43 * srcu_read_lock() here will block synchronize_srcu() in
40 * ->release returns. 44 * mmu_notifier_unregister() until all registered
45 * ->release() callouts this function makes have
46 * returned.
41 */ 47 */
42 rcu_read_lock(); 48 id = srcu_read_lock(&srcu);
43 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist)
44 /*
45 * if ->release runs before mmu_notifier_unregister it
46 * must be handled as it's the only way for the driver
47 * to flush all existing sptes and stop the driver
48 * from establishing any more sptes before all the
49 * pages in the mm are freed.
50 */
51 if (mn->ops->release)
52 mn->ops->release(mn, mm);
53 rcu_read_unlock();
54
55 spin_lock(&mm->mmu_notifier_mm->lock); 49 spin_lock(&mm->mmu_notifier_mm->lock);
56 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 50 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
57 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 51 mn = hlist_entry(mm->mmu_notifier_mm->list.first,
58 struct mmu_notifier, 52 struct mmu_notifier,
59 hlist); 53 hlist);
54
60 /* 55 /*
61 * We arrived before mmu_notifier_unregister so 56 * Unlink. This will prevent mmu_notifier_unregister()
62 * mmu_notifier_unregister will do nothing other than 57 * from also making the ->release() callout.
63 * to wait ->release to finish and
64 * mmu_notifier_unregister to return.
65 */ 58 */
66 hlist_del_init_rcu(&mn->hlist); 59 hlist_del_init_rcu(&mn->hlist);
60 spin_unlock(&mm->mmu_notifier_mm->lock);
61
62 /*
63 * Clear sptes. (see 'release' description in mmu_notifier.h)
64 */
65 if (mn->ops->release)
66 mn->ops->release(mn, mm);
67
68 spin_lock(&mm->mmu_notifier_mm->lock);
67 } 69 }
68 spin_unlock(&mm->mmu_notifier_mm->lock); 70 spin_unlock(&mm->mmu_notifier_mm->lock);
69 71
70 /* 72 /*
71 * synchronize_rcu here prevents mmu_notifier_release to 73 * All callouts to ->release() which we have done are complete.
72 * return to exit_mmap (which would proceed freeing all pages 74 * Allow synchronize_srcu() in mmu_notifier_unregister() to complete
73 * in the mm) until the ->release method returns, if it was 75 */
74 * invoked by mmu_notifier_unregister. 76 srcu_read_unlock(&srcu, id);
75 * 77
76 * The mmu_notifier_mm can't go away from under us because one 78 /*
77 * mm_count is hold by exit_mmap. 79 * mmu_notifier_unregister() may have unlinked a notifier and may
80 * still be calling out to it. Additionally, other notifiers
81 * may have been active via vmtruncate() et. al. Block here
82 * to ensure that all notifier callouts for this mm have been
83 * completed and the sptes are really cleaned up before returning
84 * to exit_mmap().
78 */ 85 */
79 synchronize_rcu(); 86 synchronize_srcu(&srcu);
80} 87}
81 88
82/* 89/*
@@ -89,14 +96,14 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
89{ 96{
90 struct mmu_notifier *mn; 97 struct mmu_notifier *mn;
91 struct hlist_node *n; 98 struct hlist_node *n;
92 int young = 0; 99 int young = 0, id;
93 100
94 rcu_read_lock(); 101 id = srcu_read_lock(&srcu);
95 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 102 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
96 if (mn->ops->clear_flush_young) 103 if (mn->ops->clear_flush_young)
97 young |= mn->ops->clear_flush_young(mn, mm, address); 104 young |= mn->ops->clear_flush_young(mn, mm, address);
98 } 105 }
99 rcu_read_unlock(); 106 srcu_read_unlock(&srcu, id);
100 107
101 return young; 108 return young;
102} 109}
@@ -106,9 +113,9 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
106{ 113{
107 struct mmu_notifier *mn; 114 struct mmu_notifier *mn;
108 struct hlist_node *n; 115 struct hlist_node *n;
109 int young = 0; 116 int young = 0, id;
110 117
111 rcu_read_lock(); 118 id = srcu_read_lock(&srcu);
112 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 119 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
113 if (mn->ops->test_young) { 120 if (mn->ops->test_young) {
114 young = mn->ops->test_young(mn, mm, address); 121 young = mn->ops->test_young(mn, mm, address);
@@ -116,7 +123,7 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
116 break; 123 break;
117 } 124 }
118 } 125 }
119 rcu_read_unlock(); 126 srcu_read_unlock(&srcu, id);
120 127
121 return young; 128 return young;
122} 129}
@@ -126,8 +133,9 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
126{ 133{
127 struct mmu_notifier *mn; 134 struct mmu_notifier *mn;
128 struct hlist_node *n; 135 struct hlist_node *n;
136 int id;
129 137
130 rcu_read_lock(); 138 id = srcu_read_lock(&srcu);
131 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 139 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
132 if (mn->ops->change_pte) 140 if (mn->ops->change_pte)
133 mn->ops->change_pte(mn, mm, address, pte); 141 mn->ops->change_pte(mn, mm, address, pte);
@@ -138,7 +146,7 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
138 else if (mn->ops->invalidate_page) 146 else if (mn->ops->invalidate_page)
139 mn->ops->invalidate_page(mn, mm, address); 147 mn->ops->invalidate_page(mn, mm, address);
140 } 148 }
141 rcu_read_unlock(); 149 srcu_read_unlock(&srcu, id);
142} 150}
143 151
144void __mmu_notifier_invalidate_page(struct mm_struct *mm, 152void __mmu_notifier_invalidate_page(struct mm_struct *mm,
@@ -146,13 +154,14 @@ void __mmu_notifier_invalidate_page(struct mm_struct *mm,
146{ 154{
147 struct mmu_notifier *mn; 155 struct mmu_notifier *mn;
148 struct hlist_node *n; 156 struct hlist_node *n;
157 int id;
149 158
150 rcu_read_lock(); 159 id = srcu_read_lock(&srcu);
151 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 160 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
152 if (mn->ops->invalidate_page) 161 if (mn->ops->invalidate_page)
153 mn->ops->invalidate_page(mn, mm, address); 162 mn->ops->invalidate_page(mn, mm, address);
154 } 163 }
155 rcu_read_unlock(); 164 srcu_read_unlock(&srcu, id);
156} 165}
157 166
158void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 167void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -160,13 +169,14 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
160{ 169{
161 struct mmu_notifier *mn; 170 struct mmu_notifier *mn;
162 struct hlist_node *n; 171 struct hlist_node *n;
172 int id;
163 173
164 rcu_read_lock(); 174 id = srcu_read_lock(&srcu);
165 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 175 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
166 if (mn->ops->invalidate_range_start) 176 if (mn->ops->invalidate_range_start)
167 mn->ops->invalidate_range_start(mn, mm, start, end); 177 mn->ops->invalidate_range_start(mn, mm, start, end);
168 } 178 }
169 rcu_read_unlock(); 179 srcu_read_unlock(&srcu, id);
170} 180}
171 181
172void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 182void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -174,13 +184,14 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
174{ 184{
175 struct mmu_notifier *mn; 185 struct mmu_notifier *mn;
176 struct hlist_node *n; 186 struct hlist_node *n;
187 int id;
177 188
178 rcu_read_lock(); 189 id = srcu_read_lock(&srcu);
179 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 190 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
180 if (mn->ops->invalidate_range_end) 191 if (mn->ops->invalidate_range_end)
181 mn->ops->invalidate_range_end(mn, mm, start, end); 192 mn->ops->invalidate_range_end(mn, mm, start, end);
182 } 193 }
183 rcu_read_unlock(); 194 srcu_read_unlock(&srcu, id);
184} 195}
185 196
186static int do_mmu_notifier_register(struct mmu_notifier *mn, 197static int do_mmu_notifier_register(struct mmu_notifier *mn,
@@ -192,6 +203,12 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
192 203
193 BUG_ON(atomic_read(&mm->mm_users) <= 0); 204 BUG_ON(atomic_read(&mm->mm_users) <= 0);
194 205
206 /*
207 * Verify that mmu_notifier_init() already run and the global srcu is
208 * initialized.
209 */
210 BUG_ON(!srcu.per_cpu_ref);
211
195 ret = -ENOMEM; 212 ret = -ENOMEM;
196 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); 213 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
197 if (unlikely(!mmu_notifier_mm)) 214 if (unlikely(!mmu_notifier_mm))
@@ -274,8 +291,8 @@ void __mmu_notifier_mm_destroy(struct mm_struct *mm)
274/* 291/*
275 * This releases the mm_count pin automatically and frees the mm 292 * This releases the mm_count pin automatically and frees the mm
276 * structure if it was the last user of it. It serializes against 293 * structure if it was the last user of it. It serializes against
277 * running mmu notifiers with RCU and against mmu_notifier_unregister 294 * running mmu notifiers with SRCU and against mmu_notifier_unregister
278 * with the unregister lock + RCU. All sptes must be dropped before 295 * with the unregister lock + SRCU. All sptes must be dropped before
279 * calling mmu_notifier_unregister. ->release or any other notifier 296 * calling mmu_notifier_unregister. ->release or any other notifier
280 * method may be invoked concurrently with mmu_notifier_unregister, 297 * method may be invoked concurrently with mmu_notifier_unregister,
281 * and only after mmu_notifier_unregister returned we're guaranteed 298 * and only after mmu_notifier_unregister returned we're guaranteed
@@ -285,35 +302,43 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
285{ 302{
286 BUG_ON(atomic_read(&mm->mm_count) <= 0); 303 BUG_ON(atomic_read(&mm->mm_count) <= 0);
287 304
305 spin_lock(&mm->mmu_notifier_mm->lock);
288 if (!hlist_unhashed(&mn->hlist)) { 306 if (!hlist_unhashed(&mn->hlist)) {
289 /* 307 int id;
290 * RCU here will force exit_mmap to wait ->release to finish
291 * before freeing the pages.
292 */
293 rcu_read_lock();
294 308
295 /* 309 /*
296 * exit_mmap will block in mmu_notifier_release to 310 * Ensure we synchronize up with __mmu_notifier_release().
297 * guarantee ->release is called before freeing the
298 * pages.
299 */ 311 */
312 id = srcu_read_lock(&srcu);
313
314 hlist_del_rcu(&mn->hlist);
315 spin_unlock(&mm->mmu_notifier_mm->lock);
316
300 if (mn->ops->release) 317 if (mn->ops->release)
301 mn->ops->release(mn, mm); 318 mn->ops->release(mn, mm);
302 rcu_read_unlock();
303 319
304 spin_lock(&mm->mmu_notifier_mm->lock); 320 /*
305 hlist_del_rcu(&mn->hlist); 321 * Allow __mmu_notifier_release() to complete.
322 */
323 srcu_read_unlock(&srcu, id);
324 } else
306 spin_unlock(&mm->mmu_notifier_mm->lock); 325 spin_unlock(&mm->mmu_notifier_mm->lock);
307 }
308 326
309 /* 327 /*
310 * Wait any running method to finish, of course including 328 * Wait for any running method to finish, including ->release() if it
311 * ->release if it was run by mmu_notifier_relase instead of us. 329 * was run by __mmu_notifier_release() instead of us.
312 */ 330 */
313 synchronize_rcu(); 331 synchronize_srcu(&srcu);
314 332
315 BUG_ON(atomic_read(&mm->mm_count) <= 0); 333 BUG_ON(atomic_read(&mm->mm_count) <= 0);
316 334
317 mmdrop(mm); 335 mmdrop(mm);
318} 336}
319EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 337EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
338
339static int __init mmu_notifier_init(void)
340{
341 return init_srcu_struct(&srcu);
342}
343
344module_init(mmu_notifier_init);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bfe789472b4..aed2f5598d3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4287,10 +4287,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4287 * round what is now in bits to nearest long in bits, then return it in 4287 * round what is now in bits to nearest long in bits, then return it in
4288 * bytes. 4288 * bytes.
4289 */ 4289 */
4290static unsigned long __init usemap_size(unsigned long zonesize) 4290static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
4291{ 4291{
4292 unsigned long usemapsize; 4292 unsigned long usemapsize;
4293 4293
4294 zonesize += zone_start_pfn & (pageblock_nr_pages-1);
4294 usemapsize = roundup(zonesize, pageblock_nr_pages); 4295 usemapsize = roundup(zonesize, pageblock_nr_pages);
4295 usemapsize = usemapsize >> pageblock_order; 4296 usemapsize = usemapsize >> pageblock_order;
4296 usemapsize *= NR_PAGEBLOCK_BITS; 4297 usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4300,17 +4301,19 @@ static unsigned long __init usemap_size(unsigned long zonesize)
4300} 4301}
4301 4302
4302static void __init setup_usemap(struct pglist_data *pgdat, 4303static void __init setup_usemap(struct pglist_data *pgdat,
4303 struct zone *zone, unsigned long zonesize) 4304 struct zone *zone,
4305 unsigned long zone_start_pfn,
4306 unsigned long zonesize)
4304{ 4307{
4305 unsigned long usemapsize = usemap_size(zonesize); 4308 unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
4306 zone->pageblock_flags = NULL; 4309 zone->pageblock_flags = NULL;
4307 if (usemapsize) 4310 if (usemapsize)
4308 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, 4311 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
4309 usemapsize); 4312 usemapsize);
4310} 4313}
4311#else 4314#else
4312static inline void setup_usemap(struct pglist_data *pgdat, 4315static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
4313 struct zone *zone, unsigned long zonesize) {} 4316 unsigned long zone_start_pfn, unsigned long zonesize) {}
4314#endif /* CONFIG_SPARSEMEM */ 4317#endif /* CONFIG_SPARSEMEM */
4315 4318
4316#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 4319#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4438,7 +4441,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4438 continue; 4441 continue;
4439 4442
4440 set_pageblock_order(pageblock_default_order()); 4443 set_pageblock_order(pageblock_default_order());
4441 setup_usemap(pgdat, zone, size); 4444 setup_usemap(pgdat, zone, zone_start_pfn, size);
4442 ret = init_currently_empty_zone(zone, zone_start_pfn, 4445 ret = init_currently_empty_zone(zone, zone_start_pfn,
4443 size, MEMMAP_EARLY); 4446 size, MEMMAP_EARLY);
4444 BUG_ON(ret); 4447 BUG_ON(ret);
@@ -5515,7 +5518,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
5515 pfn &= (PAGES_PER_SECTION-1); 5518 pfn &= (PAGES_PER_SECTION-1);
5516 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; 5519 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
5517#else 5520#else
5518 pfn = pfn - zone->zone_start_pfn; 5521 pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
5519 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; 5522 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
5520#endif /* CONFIG_SPARSEMEM */ 5523#endif /* CONFIG_SPARSEMEM */
5521} 5524}
diff --git a/mm/shmem.c b/mm/shmem.c
index 492584c6f74..bcfa97dcc0a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2505,6 +2505,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2505 unsigned long inodes; 2505 unsigned long inodes;
2506 int error = -EINVAL; 2506 int error = -EINVAL;
2507 2507
2508 config.mpol = NULL;
2508 if (shmem_parse_options(data, &config, true)) 2509 if (shmem_parse_options(data, &config, true))
2509 return error; 2510 return error;
2510 2511
@@ -2530,8 +2531,13 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2530 sbinfo->max_inodes = config.max_inodes; 2531 sbinfo->max_inodes = config.max_inodes;
2531 sbinfo->free_inodes = config.max_inodes - inodes; 2532 sbinfo->free_inodes = config.max_inodes - inodes;
2532 2533
2533 mpol_put(sbinfo->mpol); 2534 /*
2534 sbinfo->mpol = config.mpol; /* transfers initial ref */ 2535 * Preserve previous mempolicy unless mpol remount option was specified.
2536 */
2537 if (config.mpol) {
2538 mpol_put(sbinfo->mpol);
2539 sbinfo->mpol = config.mpol; /* transfers initial ref */
2540 }
2535out: 2541out:
2536 spin_unlock(&sbinfo->stat_lock); 2542 spin_unlock(&sbinfo->stat_lock);
2537 return error; 2543 return error;