aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorPraneeth Bajjuri2017-08-28 18:03:19 -0500
committerPraneeth Bajjuri2017-08-28 18:03:19 -0500
commitdda8acaf6f2d9bddce62c305edf6b3071f04c136 (patch)
tree612f8b7b18ff3a84184ddc0ce456b9b0287cace6 /mm
parentb22bbe0c5fa4fd5eb4609108a750b28a744a643e (diff)
parenteabbcea7629d5f2ec91568f7bd104536614107db (diff)
downloadkernel-omap-6AM.1.3-rvc-video.tar.gz
kernel-omap-6AM.1.3-rvc-video.tar.xz
kernel-omap-6AM.1.3-rvc-video.zip
Merge branch 'p-ti-lsk-android-linux-4.4.y' of git://git.omapzoom.org/kernel/omap into 6AM.1.3-rvc-video6AM.1.3-rvc-video
* 'p-ti-lsk-android-linux-4.4.y' of git://git.omapzoom.org/kernel/omap: (2048 commits) ARM: dts: dra7: Remove deprecated PCI compatible string ARM: dts: dra76-evm: Enable x2 PCIe lanes ARM: dts: DRA72x: Use PCIe compatible specific to dra72 ARM: dts: DRA74x: Use PCIe compatible specific to dra74 ARM: dts: dra7: Add properties to enable PCIe x2 lane mode PCI: dwc: pci-dra7xx: Enable x2 mode support PCI: dwc: dra7xx: Add support for SoC specific compatible strings dt-bindings: PCI: dra7xx: Add properties to enable x2 lane in dra7 dt-bindings: PCI: dra7xx: Add SoC specific compatible strings ARM: dts: dra7-evm: Move pcie RC node to common file ARM: dts: dra76-evm: add higher speed MMC/SD modes Linux 4.4.84 usb: qmi_wwan: add D-Link DWM-222 device ID usb: optimize acpi companion search for usb port devices perf/x86: Fix LBR related crashes on Intel Atom pids: make task_tgid_nr_ns() safe Sanitize 'move_pages()' permission checks irqchip/atmel-aic: Fix unbalanced refcount in aic_common_rtc_irq_fixup() irqchip/atmel-aic: Fix unbalanced of_node_put() in aic_common_irq_fixup() x86/asm/64: Clear AC on NMI entries ... Signed-off-by: Praneeth Bajjuri <praneeth@ti.com> Conflicts: arch/arm/boot/dts/Makefile drivers/gpu/drm/omapdrm/dss/dispc.c
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c9
-rw-r--r--mm/filemap.c12
-rw-r--r--mm/gup.c5
-rw-r--r--mm/huge_memory.c18
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/internal.h5
-rw-r--r--mm/kasan/report.c3
-rw-r--r--mm/list_lru.c14
-rw-r--r--mm/memblock.c111
-rw-r--r--mm/memcontrol.c40
-rw-r--r--mm/memory-failure.c13
-rw-r--r--mm/memory.c39
-rw-r--r--mm/memory_hotplug.c28
-rw-r--r--mm/mempolicy.c27
-rw-r--r--mm/mempool.c2
-rw-r--r--mm/migrate.c11
-rw-r--r--mm/mlock.c5
-rw-r--r--mm/mmap.c160
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/mremap.c1
-rw-r--r--mm/page_alloc.c39
-rw-r--r--mm/percpu.c5
-rw-r--r--mm/rmap.c36
-rw-r--r--mm/slab.c98
-rw-r--r--mm/slub.c6
-rw-r--r--mm/swap_cgroup.c5
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmpressure.c10
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/zswap.c30
30 files changed, 487 insertions, 256 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 9ef80bf441b3..a988d4ef39da 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -757,15 +757,20 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
757 if (!bdi->wb_congested) 757 if (!bdi->wb_congested)
758 return -ENOMEM; 758 return -ENOMEM;
759 759
760 atomic_set(&bdi->wb_congested->refcnt, 1);
761
760 err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); 762 err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
761 if (err) { 763 if (err) {
762 kfree(bdi->wb_congested); 764 wb_congested_put(bdi->wb_congested);
763 return err; 765 return err;
764 } 766 }
765 return 0; 767 return 0;
766} 768}
767 769
768static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } 770static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
771{
772 wb_congested_put(bdi->wb_congested);
773}
769 774
770#endif /* CONFIG_CGROUP_WRITEBACK */ 775#endif /* CONFIG_CGROUP_WRITEBACK */
771 776
diff --git a/mm/filemap.c b/mm/filemap.c
index c588d1222b2a..69f75c77c098 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -865,9 +865,12 @@ void page_endio(struct page *page, int rw, int err)
865 unlock_page(page); 865 unlock_page(page);
866 } else { /* rw == WRITE */ 866 } else { /* rw == WRITE */
867 if (err) { 867 if (err) {
868 struct address_space *mapping;
869
868 SetPageError(page); 870 SetPageError(page);
869 if (page->mapping) 871 mapping = page_mapping(page);
870 mapping_set_error(page->mapping, err); 872 if (mapping)
873 mapping_set_error(mapping, err);
871 } 874 }
872 end_page_writeback(page); 875 end_page_writeback(page);
873 } 876 }
@@ -1559,6 +1562,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
1559 1562
1560 cond_resched(); 1563 cond_resched();
1561find_page: 1564find_page:
1565 if (fatal_signal_pending(current)) {
1566 error = -EINTR;
1567 goto out;
1568 }
1569
1562 page = find_get_page(mapping, index); 1570 page = find_get_page(mapping, index);
1563 if (!page) { 1571 if (!page) {
1564 page_cache_sync_readahead(mapping, 1572 page_cache_sync_readahead(mapping,
diff --git a/mm/gup.c b/mm/gup.c
index 4b0b7e7d1136..b599526db9f7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -312,11 +312,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
312 /* mlock all present pages, but do not fault in new pages */ 312 /* mlock all present pages, but do not fault in new pages */
313 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) 313 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
314 return -ENOENT; 314 return -ENOENT;
315 /* For mm_populate(), just skip the stack guard page. */
316 if ((*flags & FOLL_POPULATE) &&
317 (stack_guard_page_start(vma, address) ||
318 stack_guard_page_end(vma, address + PAGE_SIZE)))
319 return -ENOENT;
320 if (*flags & FOLL_WRITE) 315 if (*flags & FOLL_WRITE)
321 fault_flags |= FAULT_FLAG_WRITE; 316 fault_flags |= FAULT_FLAG_WRITE;
322 if (nonblocking) 317 if (nonblocking)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 530e6427f823..6c6f5ccfcda1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1269,6 +1269,16 @@ out_unlock:
1269 return ret; 1269 return ret;
1270} 1270}
1271 1271
1272/*
1273 * FOLL_FORCE can write to even unwritable pmd's, but only
1274 * after we've gone through a COW cycle and they are dirty.
1275 */
1276static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
1277{
1278 return pmd_write(pmd) ||
1279 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
1280}
1281
1272struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, 1282struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1273 unsigned long addr, 1283 unsigned long addr,
1274 pmd_t *pmd, 1284 pmd_t *pmd,
@@ -1279,7 +1289,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1279 1289
1280 assert_spin_locked(pmd_lockptr(mm, pmd)); 1290 assert_spin_locked(pmd_lockptr(mm, pmd));
1281 1291
1282 if (flags & FOLL_WRITE && !pmd_write(*pmd)) 1292 if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
1283 goto out; 1293 goto out;
1284 1294
1285 /* Avoid dumping huge zero page */ 1295 /* Avoid dumping huge zero page */
@@ -1353,8 +1363,11 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1353 */ 1363 */
1354 if (unlikely(pmd_trans_migrating(*pmdp))) { 1364 if (unlikely(pmd_trans_migrating(*pmdp))) {
1355 page = pmd_page(*pmdp); 1365 page = pmd_page(*pmdp);
1366 if (!get_page_unless_zero(page))
1367 goto out_unlock;
1356 spin_unlock(ptl); 1368 spin_unlock(ptl);
1357 wait_on_page_locked(page); 1369 wait_on_page_locked(page);
1370 put_page(page);
1358 goto out; 1371 goto out;
1359 } 1372 }
1360 1373
@@ -1386,8 +1399,11 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1386 1399
1387 /* Migration could have started since the pmd_trans_migrating check */ 1400 /* Migration could have started since the pmd_trans_migrating check */
1388 if (!page_locked) { 1401 if (!page_locked) {
1402 if (!get_page_unless_zero(page))
1403 goto out_unlock;
1389 spin_unlock(ptl); 1404 spin_unlock(ptl);
1390 wait_on_page_locked(page); 1405 wait_on_page_locked(page);
1406 put_page(page);
1391 page_nid = -1; 1407 page_nid = -1;
1392 goto out; 1408 goto out;
1393 } 1409 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ea11123a9249..7294301d8495 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4362,6 +4362,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
4362{ 4362{
4363 struct page *page = NULL; 4363 struct page *page = NULL;
4364 spinlock_t *ptl; 4364 spinlock_t *ptl;
4365 pte_t pte;
4365retry: 4366retry:
4366 ptl = pmd_lockptr(mm, pmd); 4367 ptl = pmd_lockptr(mm, pmd);
4367 spin_lock(ptl); 4368 spin_lock(ptl);
@@ -4371,12 +4372,13 @@ retry:
4371 */ 4372 */
4372 if (!pmd_huge(*pmd)) 4373 if (!pmd_huge(*pmd))
4373 goto out; 4374 goto out;
4374 if (pmd_present(*pmd)) { 4375 pte = huge_ptep_get((pte_t *)pmd);
4376 if (pte_present(pte)) {
4375 page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); 4377 page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
4376 if (flags & FOLL_GET) 4378 if (flags & FOLL_GET)
4377 get_page(page); 4379 get_page(page);
4378 } else { 4380 } else {
4379 if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { 4381 if (is_hugetlb_entry_migration(pte)) {
4380 spin_unlock(ptl); 4382 spin_unlock(ptl);
4381 __migration_entry_wait(mm, (pte_t *)pmd, ptl); 4383 __migration_entry_wait(mm, (pte_t *)pmd, ptl);
4382 goto retry; 4384 goto retry;
diff --git a/mm/internal.h b/mm/internal.h
index 6979b2bd3227..f63f4393d633 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -453,6 +453,7 @@ struct tlbflush_unmap_batch;
453#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH 453#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
454void try_to_unmap_flush(void); 454void try_to_unmap_flush(void);
455void try_to_unmap_flush_dirty(void); 455void try_to_unmap_flush_dirty(void);
456void flush_tlb_batched_pending(struct mm_struct *mm);
456#else 457#else
457static inline void try_to_unmap_flush(void) 458static inline void try_to_unmap_flush(void)
458{ 459{
@@ -460,6 +461,8 @@ static inline void try_to_unmap_flush(void)
460static inline void try_to_unmap_flush_dirty(void) 461static inline void try_to_unmap_flush_dirty(void)
461{ 462{
462} 463}
463 464static inline void flush_tlb_batched_pending(struct mm_struct *mm)
465{
466}
464#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ 467#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
465#endif /* __MM_INTERNAL_H */ 468#endif /* __MM_INTERNAL_H */
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 12f222d0224b..b4e31f78ae69 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,6 +13,7 @@
13 * 13 *
14 */ 14 */
15 15
16#include <linux/ftrace.h>
16#include <linux/kernel.h> 17#include <linux/kernel.h>
17#include <linux/mm.h> 18#include <linux/mm.h>
18#include <linux/printk.h> 19#include <linux/printk.h>
@@ -251,6 +252,8 @@ void kasan_report(unsigned long addr, size_t size,
251 if (likely(!kasan_report_enabled())) 252 if (likely(!kasan_report_enabled()))
252 return; 253 return;
253 254
255 disable_trace_on_warning();
256
254 info.access_addr = (void *)addr; 257 info.access_addr = (void *)addr;
255 info.access_size = size; 258 info.access_size = size;
256 info.is_write = is_write; 259 info.is_write = is_write;
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 5d8dffd5b57c..786176b1a0ee 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
117 l = list_lru_from_kmem(nlru, item); 117 l = list_lru_from_kmem(nlru, item);
118 list_add_tail(item, &l->list); 118 list_add_tail(item, &l->list);
119 l->nr_items++; 119 l->nr_items++;
120 nlru->nr_items++;
120 spin_unlock(&nlru->lock); 121 spin_unlock(&nlru->lock);
121 return true; 122 return true;
122 } 123 }
@@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
136 l = list_lru_from_kmem(nlru, item); 137 l = list_lru_from_kmem(nlru, item);
137 list_del_init(item); 138 list_del_init(item);
138 l->nr_items--; 139 l->nr_items--;
140 nlru->nr_items--;
139 spin_unlock(&nlru->lock); 141 spin_unlock(&nlru->lock);
140 return true; 142 return true;
141 } 143 }
@@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one);
183 185
184unsigned long list_lru_count_node(struct list_lru *lru, int nid) 186unsigned long list_lru_count_node(struct list_lru *lru, int nid)
185{ 187{
186 long count = 0; 188 struct list_lru_node *nlru;
187 int memcg_idx;
188 189
189 count += __list_lru_count_one(lru, nid, -1); 190 nlru = &lru->node[nid];
190 if (list_lru_memcg_aware(lru)) { 191 return nlru->nr_items;
191 for_each_memcg_cache_index(memcg_idx)
192 count += __list_lru_count_one(lru, nid, memcg_idx);
193 }
194 return count;
195} 192}
196EXPORT_SYMBOL_GPL(list_lru_count_node); 193EXPORT_SYMBOL_GPL(list_lru_count_node);
197 194
@@ -226,6 +223,7 @@ restart:
226 assert_spin_locked(&nlru->lock); 223 assert_spin_locked(&nlru->lock);
227 case LRU_REMOVED: 224 case LRU_REMOVED:
228 isolated++; 225 isolated++;
226 nlru->nr_items--;
229 /* 227 /*
230 * If the lru lock has been dropped, our list 228 * If the lru lock has been dropped, our list
231 * traversal is now invalid and so we have to 229 * traversal is now invalid and so we have to
diff --git a/mm/memblock.c b/mm/memblock.c
index c3b7ebcc22a6..007b6fc1da9b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -835,6 +835,18 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
835} 835}
836 836
837/** 837/**
838 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
839 * @base: the base phys addr of the region
840 * @size: the size of the region
841 *
842 * Return 0 on success, -errno on failure.
843 */
844int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
845{
846 return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
847}
848
849/**
838 * __next_reserved_mem_region - next function for for_each_reserved_region() 850 * __next_reserved_mem_region - next function for for_each_reserved_region()
839 * @idx: pointer to u64 loop variable 851 * @idx: pointer to u64 loop variable
840 * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL 852 * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL
@@ -1486,15 +1498,16 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
1486 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1498 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
1487} 1499}
1488 1500
1489void __init memblock_enforce_memory_limit(phys_addr_t limit) 1501static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
1490{ 1502{
1491 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; 1503 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
1492 struct memblock_region *r; 1504 struct memblock_region *r;
1493 1505
1494 if (!limit) 1506 /*
1495 return; 1507 * translate the memory @limit size into the max address within one of
1496 1508 * the memory memblock regions, if the @limit exceeds the total size
1497 /* find out max address */ 1509 * of those regions, max_addr will keep original value ULLONG_MAX
1510 */
1498 for_each_memblock(memory, r) { 1511 for_each_memblock(memory, r) {
1499 if (limit <= r->size) { 1512 if (limit <= r->size) {
1500 max_addr = r->base + limit; 1513 max_addr = r->base + limit;
@@ -1503,6 +1516,22 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
1503 limit -= r->size; 1516 limit -= r->size;
1504 } 1517 }
1505 1518
1519 return max_addr;
1520}
1521
1522void __init memblock_enforce_memory_limit(phys_addr_t limit)
1523{
1524 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
1525
1526 if (!limit)
1527 return;
1528
1529 max_addr = __find_max_addr(limit);
1530
1531 /* @limit exceeds the total size of the memory, do nothing */
1532 if (max_addr == (phys_addr_t)ULLONG_MAX)
1533 return;
1534
1506 /* truncate both memory and reserved regions */ 1535 /* truncate both memory and reserved regions */
1507 memblock_remove_range(&memblock.memory, max_addr, 1536 memblock_remove_range(&memblock.memory, max_addr,
1508 (phys_addr_t)ULLONG_MAX); 1537 (phys_addr_t)ULLONG_MAX);
@@ -1510,6 +1539,50 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
1510 (phys_addr_t)ULLONG_MAX); 1539 (phys_addr_t)ULLONG_MAX);
1511} 1540}
1512 1541
1542void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
1543{
1544 int start_rgn, end_rgn;
1545 int i, ret;
1546
1547 if (!size)
1548 return;
1549
1550 ret = memblock_isolate_range(&memblock.memory, base, size,
1551 &start_rgn, &end_rgn);
1552 if (ret)
1553 return;
1554
1555 /* remove all the MAP regions */
1556 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
1557 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1558 memblock_remove_region(&memblock.memory, i);
1559
1560 for (i = start_rgn - 1; i >= 0; i--)
1561 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1562 memblock_remove_region(&memblock.memory, i);
1563
1564 /* truncate the reserved regions */
1565 memblock_remove_range(&memblock.reserved, 0, base);
1566 memblock_remove_range(&memblock.reserved,
1567 base + size, (phys_addr_t)ULLONG_MAX);
1568}
1569
1570void __init memblock_mem_limit_remove_map(phys_addr_t limit)
1571{
1572 phys_addr_t max_addr;
1573
1574 if (!limit)
1575 return;
1576
1577 max_addr = __find_max_addr(limit);
1578
1579 /* @limit exceeds the total size of the memory, do nothing */
1580 if (max_addr == (phys_addr_t)ULLONG_MAX)
1581 return;
1582
1583 memblock_cap_memory_range(0, max_addr);
1584}
1585
1513static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 1586static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
1514{ 1587{
1515 unsigned int left = 0, right = type->cnt; 1588 unsigned int left = 0, right = type->cnt;
@@ -1528,12 +1601,12 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr
1528 return -1; 1601 return -1;
1529} 1602}
1530 1603
1531int __init memblock_is_reserved(phys_addr_t addr) 1604bool __init memblock_is_reserved(phys_addr_t addr)
1532{ 1605{
1533 return memblock_search(&memblock.reserved, addr) != -1; 1606 return memblock_search(&memblock.reserved, addr) != -1;
1534} 1607}
1535 1608
1536int __init_memblock memblock_is_memory(phys_addr_t addr) 1609bool __init_memblock memblock_is_memory(phys_addr_t addr)
1537{ 1610{
1538 return memblock_search(&memblock.memory, addr) != -1; 1611 return memblock_search(&memblock.memory, addr) != -1;
1539} 1612}
@@ -1662,6 +1735,30 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name
1662 } 1735 }
1663} 1736}
1664 1737
1738extern unsigned long __init_memblock
1739memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
1740{
1741 struct memblock_type *type = &memblock.reserved;
1742 unsigned long size = 0;
1743 int idx;
1744
1745 for (idx = 0; idx < type->cnt; idx++) {
1746 struct memblock_region *rgn = &type->regions[idx];
1747 phys_addr_t start, end;
1748
1749 if (rgn->base + rgn->size < start_addr)
1750 continue;
1751 if (rgn->base > end_addr)
1752 continue;
1753
1754 start = rgn->base;
1755 end = start + rgn->size;
1756 size += end - start;
1757 }
1758
1759 return size;
1760}
1761
1665void __init_memblock __memblock_dump_all(void) 1762void __init_memblock __memblock_dump_all(void)
1666{ 1763{
1667 pr_info("MEMBLOCK configuration:\n"); 1764 pr_info("MEMBLOCK configuration:\n");
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 17dfe70f3309..484bedd8d811 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4150,24 +4150,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
4150 atomic_add(n, &memcg->id.ref); 4150 atomic_add(n, &memcg->id.ref);
4151} 4151}
4152 4152
4153static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
4154{
4155 while (!atomic_inc_not_zero(&memcg->id.ref)) {
4156 /*
4157 * The root cgroup cannot be destroyed, so it's refcount must
4158 * always be >= 1.
4159 */
4160 if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
4161 VM_BUG_ON(1);
4162 break;
4163 }
4164 memcg = parent_mem_cgroup(memcg);
4165 if (!memcg)
4166 memcg = root_mem_cgroup;
4167 }
4168 return memcg;
4169}
4170
4171static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) 4153static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
4172{ 4154{
4173 if (atomic_sub_and_test(n, &memcg->id.ref)) { 4155 if (atomic_sub_and_test(n, &memcg->id.ref)) {
@@ -4496,9 +4478,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
4496 return ret; 4478 return ret;
4497 } 4479 }
4498 4480
4499 /* Try charges one by one with reclaim */ 4481 /* Try charges one by one with reclaim, but do not retry */
4500 while (count--) { 4482 while (count--) {
4501 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1); 4483 ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
4502 if (ret) 4484 if (ret)
4503 return ret; 4485 return ret;
4504 mc.precharge++; 4486 mc.precharge++;
@@ -5752,6 +5734,24 @@ static int __init mem_cgroup_init(void)
5752subsys_initcall(mem_cgroup_init); 5734subsys_initcall(mem_cgroup_init);
5753 5735
5754#ifdef CONFIG_MEMCG_SWAP 5736#ifdef CONFIG_MEMCG_SWAP
5737static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
5738{
5739 while (!atomic_inc_not_zero(&memcg->id.ref)) {
5740 /*
5741 * The root cgroup cannot be destroyed, so it's refcount must
5742 * always be >= 1.
5743 */
5744 if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
5745 VM_BUG_ON(1);
5746 break;
5747 }
5748 memcg = parent_mem_cgroup(memcg);
5749 if (!memcg)
5750 memcg = root_mem_cgroup;
5751 }
5752 return memcg;
5753}
5754
5755/** 5755/**
5756 * mem_cgroup_swapout - transfer a memsw charge to swap 5756 * mem_cgroup_swapout - transfer a memsw charge to swap
5757 * @page: page whose memsw charge to transfer 5757 * @page: page whose memsw charge to transfer
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 750b7893ee3a..091fe9b06663 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1208,7 +1208,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1208 * page_remove_rmap() in try_to_unmap_one(). So to determine page status 1208 * page_remove_rmap() in try_to_unmap_one(). So to determine page status
1209 * correctly, we save a copy of the page flags at this time. 1209 * correctly, we save a copy of the page flags at this time.
1210 */ 1210 */
1211 page_flags = p->flags; 1211 if (PageHuge(p))
1212 page_flags = hpage->flags;
1213 else
1214 page_flags = p->flags;
1212 1215
1213 /* 1216 /*
1214 * unpoison always clear PG_hwpoison inside page lock 1217 * unpoison always clear PG_hwpoison inside page lock
@@ -1619,12 +1622,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
1619 if (ret) { 1622 if (ret) {
1620 pr_info("soft offline: %#lx: migration failed %d, type %lx\n", 1623 pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
1621 pfn, ret, page->flags); 1624 pfn, ret, page->flags);
1622 /* 1625 if (!list_empty(&pagelist))
1623 * We know that soft_offline_huge_page() tries to migrate 1626 putback_movable_pages(&pagelist);
1624 * only one hugepage pointed to by hpage, so we need not
1625 * run through the pagelist here.
1626 */
1627 putback_active_hugepage(hpage);
1628 if (ret > 0) 1627 if (ret > 0)
1629 ret = -EIO; 1628 ret = -EIO;
1630 } else { 1629 } else {
diff --git a/mm/memory.c b/mm/memory.c
index 76dcee317714..9ac55172aa7b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1127,6 +1127,7 @@ again:
1127 init_rss_vec(rss); 1127 init_rss_vec(rss);
1128 start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 1128 start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
1129 pte = start_pte; 1129 pte = start_pte;
1130 flush_tlb_batched_pending(mm);
1130 arch_enter_lazy_mmu_mode(); 1131 arch_enter_lazy_mmu_mode();
1131 do { 1132 do {
1132 pte_t ptent = *pte; 1133 pte_t ptent = *pte;
@@ -2662,40 +2663,6 @@ out_release:
2662} 2663}
2663 2664
2664/* 2665/*
2665 * This is like a special single-page "expand_{down|up}wards()",
2666 * except we must first make sure that 'address{-|+}PAGE_SIZE'
2667 * doesn't hit another vma.
2668 */
2669static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
2670{
2671 address &= PAGE_MASK;
2672 if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
2673 struct vm_area_struct *prev = vma->vm_prev;
2674
2675 /*
2676 * Is there a mapping abutting this one below?
2677 *
2678 * That's only ok if it's the same stack mapping
2679 * that has gotten split..
2680 */
2681 if (prev && prev->vm_end == address)
2682 return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
2683
2684 return expand_downwards(vma, address - PAGE_SIZE);
2685 }
2686 if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
2687 struct vm_area_struct *next = vma->vm_next;
2688
2689 /* As VM_GROWSDOWN but s/below/above/ */
2690 if (next && next->vm_start == address + PAGE_SIZE)
2691 return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
2692
2693 return expand_upwards(vma, address + PAGE_SIZE);
2694 }
2695 return 0;
2696}
2697
2698/*
2699 * We enter with non-exclusive mmap_sem (to exclude vma changes, 2666 * We enter with non-exclusive mmap_sem (to exclude vma changes,
2700 * but allow concurrent faults), and pte mapped but not yet locked. 2667 * but allow concurrent faults), and pte mapped but not yet locked.
2701 * We return with mmap_sem still held, but pte unmapped and unlocked. 2668 * We return with mmap_sem still held, but pte unmapped and unlocked.
@@ -2715,10 +2682,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2715 if (vma->vm_flags & VM_SHARED) 2682 if (vma->vm_flags & VM_SHARED)
2716 return VM_FAULT_SIGBUS; 2683 return VM_FAULT_SIGBUS;
2717 2684
2718 /* Check if we need to add a guard page to the stack */
2719 if (check_stack_guard_page(vma, address) < 0)
2720 return VM_FAULT_SIGSEGV;
2721
2722 /* Use the zero-page for reads */ 2685 /* Use the zero-page for reads */
2723 if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) { 2686 if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
2724 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), 2687 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a042a9d537bb..a18923e4359d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1371,17 +1371,20 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
1371} 1371}
1372 1372
1373/* 1373/*
1374 * Confirm all pages in a range [start, end) is belongs to the same zone. 1374 * Confirm all pages in a range [start, end) belong to the same zone.
1375 * When true, return its valid [start, end).
1375 */ 1376 */
1376int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) 1377int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
1378 unsigned long *valid_start, unsigned long *valid_end)
1377{ 1379{
1378 unsigned long pfn, sec_end_pfn; 1380 unsigned long pfn, sec_end_pfn;
1381 unsigned long start, end;
1379 struct zone *zone = NULL; 1382 struct zone *zone = NULL;
1380 struct page *page; 1383 struct page *page;
1381 int i; 1384 int i;
1382 for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn); 1385 for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
1383 pfn < end_pfn; 1386 pfn < end_pfn;
1384 pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) { 1387 pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
1385 /* Make sure the memory section is present first */ 1388 /* Make sure the memory section is present first */
1386 if (!present_section_nr(pfn_to_section_nr(pfn))) 1389 if (!present_section_nr(pfn_to_section_nr(pfn)))
1387 continue; 1390 continue;
@@ -1397,10 +1400,20 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
1397 page = pfn_to_page(pfn + i); 1400 page = pfn_to_page(pfn + i);
1398 if (zone && page_zone(page) != zone) 1401 if (zone && page_zone(page) != zone)
1399 return 0; 1402 return 0;
1403 if (!zone)
1404 start = pfn + i;
1400 zone = page_zone(page); 1405 zone = page_zone(page);
1406 end = pfn + MAX_ORDER_NR_PAGES;
1401 } 1407 }
1402 } 1408 }
1403 return 1; 1409
1410 if (zone) {
1411 *valid_start = start;
1412 *valid_end = end;
1413 return 1;
1414 } else {
1415 return 0;
1416 }
1404} 1417}
1405 1418
1406/* 1419/*
@@ -1718,6 +1731,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
1718 long offlined_pages; 1731 long offlined_pages;
1719 int ret, drain, retry_max, node; 1732 int ret, drain, retry_max, node;
1720 unsigned long flags; 1733 unsigned long flags;
1734 unsigned long valid_start, valid_end;
1721 struct zone *zone; 1735 struct zone *zone;
1722 struct memory_notify arg; 1736 struct memory_notify arg;
1723 1737
@@ -1728,10 +1742,10 @@ static int __ref __offline_pages(unsigned long start_pfn,
1728 return -EINVAL; 1742 return -EINVAL;
1729 /* This makes hotplug much easier...and readable. 1743 /* This makes hotplug much easier...and readable.
1730 we assume this for now. .*/ 1744 we assume this for now. .*/
1731 if (!test_pages_in_a_zone(start_pfn, end_pfn)) 1745 if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
1732 return -EINVAL; 1746 return -EINVAL;
1733 1747
1734 zone = page_zone(pfn_to_page(start_pfn)); 1748 zone = page_zone(pfn_to_page(valid_start));
1735 node = zone_to_nid(zone); 1749 node = zone_to_nid(zone);
1736 nr_pages = end_pfn - start_pfn; 1750 nr_pages = end_pfn - start_pfn;
1737 1751
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f20eb4e8c4cc..177668a9c267 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -895,11 +895,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
895 *policy |= (pol->flags & MPOL_MODE_FLAGS); 895 *policy |= (pol->flags & MPOL_MODE_FLAGS);
896 } 896 }
897 897
898 if (vma) {
899 up_read(&current->mm->mmap_sem);
900 vma = NULL;
901 }
902
903 err = 0; 898 err = 0;
904 if (nmask) { 899 if (nmask) {
905 if (mpol_store_user_nodemask(pol)) { 900 if (mpol_store_user_nodemask(pol)) {
@@ -1493,7 +1488,6 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
1493COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, 1488COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
1494 compat_ulong_t, maxnode) 1489 compat_ulong_t, maxnode)
1495{ 1490{
1496 long err = 0;
1497 unsigned long __user *nm = NULL; 1491 unsigned long __user *nm = NULL;
1498 unsigned long nr_bits, alloc_size; 1492 unsigned long nr_bits, alloc_size;
1499 DECLARE_BITMAP(bm, MAX_NUMNODES); 1493 DECLARE_BITMAP(bm, MAX_NUMNODES);
@@ -1502,14 +1496,13 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
1502 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; 1496 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
1503 1497
1504 if (nmask) { 1498 if (nmask) {
1505 err = compat_get_bitmap(bm, nmask, nr_bits); 1499 if (compat_get_bitmap(bm, nmask, nr_bits))
1500 return -EFAULT;
1506 nm = compat_alloc_user_space(alloc_size); 1501 nm = compat_alloc_user_space(alloc_size);
1507 err |= copy_to_user(nm, bm, alloc_size); 1502 if (copy_to_user(nm, bm, alloc_size))
1503 return -EFAULT;
1508 } 1504 }
1509 1505
1510 if (err)
1511 return -EFAULT;
1512
1513 return sys_set_mempolicy(mode, nm, nr_bits+1); 1506 return sys_set_mempolicy(mode, nm, nr_bits+1);
1514} 1507}
1515 1508
@@ -1517,7 +1510,6 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
1517 compat_ulong_t, mode, compat_ulong_t __user *, nmask, 1510 compat_ulong_t, mode, compat_ulong_t __user *, nmask,
1518 compat_ulong_t, maxnode, compat_ulong_t, flags) 1511 compat_ulong_t, maxnode, compat_ulong_t, flags)
1519{ 1512{
1520 long err = 0;
1521 unsigned long __user *nm = NULL; 1513 unsigned long __user *nm = NULL;
1522 unsigned long nr_bits, alloc_size; 1514 unsigned long nr_bits, alloc_size;
1523 nodemask_t bm; 1515 nodemask_t bm;
@@ -1526,14 +1518,13 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
1526 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; 1518 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
1527 1519
1528 if (nmask) { 1520 if (nmask) {
1529 err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits); 1521 if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits))
1522 return -EFAULT;
1530 nm = compat_alloc_user_space(alloc_size); 1523 nm = compat_alloc_user_space(alloc_size);
1531 err |= copy_to_user(nm, nodes_addr(bm), alloc_size); 1524 if (copy_to_user(nm, nodes_addr(bm), alloc_size))
1525 return -EFAULT;
1532 } 1526 }
1533 1527
1534 if (err)
1535 return -EFAULT;
1536
1537 return sys_mbind(start, len, mode, nm, nr_bits+1, flags); 1528 return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
1538} 1529}
1539 1530
@@ -2007,8 +1998,8 @@ retry_cpuset:
2007 1998
2008 nmask = policy_nodemask(gfp, pol); 1999 nmask = policy_nodemask(gfp, pol);
2009 zl = policy_zonelist(gfp, pol, node); 2000 zl = policy_zonelist(gfp, pol, node);
2010 mpol_cond_put(pol);
2011 page = __alloc_pages_nodemask(gfp, order, zl, nmask); 2001 page = __alloc_pages_nodemask(gfp, order, zl, nmask);
2002 mpol_cond_put(pol);
2012out: 2003out:
2013 if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) 2004 if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
2014 goto retry_cpuset; 2005 goto retry_cpuset;
diff --git a/mm/mempool.c b/mm/mempool.c
index 004d42b1dfaf..7924f4f58a6d 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -135,8 +135,8 @@ static void *remove_element(mempool_t *pool)
135 void *element = pool->elements[--pool->curr_nr]; 135 void *element = pool->elements[--pool->curr_nr];
136 136
137 BUG_ON(pool->curr_nr < 0); 137 BUG_ON(pool->curr_nr < 0);
138 check_element(pool, element);
139 kasan_unpoison_element(pool, element); 138 kasan_unpoison_element(pool, element);
139 check_element(pool, element);
140 return element; 140 return element;
141} 141}
142 142
diff --git a/mm/migrate.c b/mm/migrate.c
index 72c09dea6526..afedcfab60e2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -38,6 +38,7 @@
38#include <linux/balloon_compaction.h> 38#include <linux/balloon_compaction.h>
39#include <linux/mmu_notifier.h> 39#include <linux/mmu_notifier.h>
40#include <linux/page_idle.h> 40#include <linux/page_idle.h>
41#include <linux/ptrace.h>
41 42
42#include <asm/tlbflush.h> 43#include <asm/tlbflush.h>
43 44
@@ -1483,7 +1484,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1483 const int __user *, nodes, 1484 const int __user *, nodes,
1484 int __user *, status, int, flags) 1485 int __user *, status, int, flags)
1485{ 1486{
1486 const struct cred *cred = current_cred(), *tcred;
1487 struct task_struct *task; 1487 struct task_struct *task;
1488 struct mm_struct *mm; 1488 struct mm_struct *mm;
1489 int err; 1489 int err;
@@ -1507,14 +1507,9 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1507 1507
1508 /* 1508 /*
1509 * Check if this process has the right to modify the specified 1509 * Check if this process has the right to modify the specified
1510 * process. The right exists if the process has administrative 1510 * process. Use the regular "ptrace_may_access()" checks.
1511 * capabilities, superuser privileges or the same
1512 * userid as the target process.
1513 */ 1511 */
1514 tcred = __task_cred(task); 1512 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
1515 if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
1516 !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) &&
1517 !capable(CAP_SYS_NICE)) {
1518 rcu_read_unlock(); 1513 rcu_read_unlock();
1519 err = -EPERM; 1514 err = -EPERM;
1520 goto out; 1515 goto out;
diff --git a/mm/mlock.c b/mm/mlock.c
index d843bc9d32dd..206e86b98a03 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -277,7 +277,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
277{ 277{
278 int i; 278 int i;
279 int nr = pagevec_count(pvec); 279 int nr = pagevec_count(pvec);
280 int delta_munlocked; 280 int delta_munlocked = -nr;
281 struct pagevec pvec_putback; 281 struct pagevec pvec_putback;
282 int pgrescued = 0; 282 int pgrescued = 0;
283 283
@@ -297,6 +297,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
297 continue; 297 continue;
298 else 298 else
299 __munlock_isolation_failed(page); 299 __munlock_isolation_failed(page);
300 } else {
301 delta_munlocked++;
300 } 302 }
301 303
302 /* 304 /*
@@ -308,7 +310,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
308 pagevec_add(&pvec_putback, pvec->pages[i]); 310 pagevec_add(&pvec_putback, pvec->pages[i]);
309 pvec->pages[i] = NULL; 311 pvec->pages[i] = NULL;
310 } 312 }
311 delta_munlocked = -nr + pagevec_count(&pvec_putback);
312 __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); 313 __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
313 spin_unlock_irq(&zone->lru_lock); 314 spin_unlock_irq(&zone->lru_lock);
314 315
diff --git a/mm/mmap.c b/mm/mmap.c
index a089cca8d79a..19823fc3dcfa 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -300,6 +300,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
300 unsigned long retval; 300 unsigned long retval;
301 unsigned long newbrk, oldbrk; 301 unsigned long newbrk, oldbrk;
302 struct mm_struct *mm = current->mm; 302 struct mm_struct *mm = current->mm;
303 struct vm_area_struct *next;
303 unsigned long min_brk; 304 unsigned long min_brk;
304 bool populate; 305 bool populate;
305 306
@@ -344,7 +345,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
344 } 345 }
345 346
346 /* Check against existing mmap mappings. */ 347 /* Check against existing mmap mappings. */
347 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) 348 next = find_vma(mm, oldbrk);
349 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
348 goto out; 350 goto out;
349 351
350 /* Ok, looks good - let it rip. */ 352 /* Ok, looks good - let it rip. */
@@ -367,10 +369,22 @@ out:
367 369
368static long vma_compute_subtree_gap(struct vm_area_struct *vma) 370static long vma_compute_subtree_gap(struct vm_area_struct *vma)
369{ 371{
370 unsigned long max, subtree_gap; 372 unsigned long max, prev_end, subtree_gap;
371 max = vma->vm_start; 373
372 if (vma->vm_prev) 374 /*
373 max -= vma->vm_prev->vm_end; 375 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
376 * allow two stack_guard_gaps between them here, and when choosing
377 * an unmapped area; whereas when expanding we only require one.
378 * That's a little inconsistent, but keeps the code here simpler.
379 */
380 max = vm_start_gap(vma);
381 if (vma->vm_prev) {
382 prev_end = vm_end_gap(vma->vm_prev);
383 if (max > prev_end)
384 max -= prev_end;
385 else
386 max = 0;
387 }
374 if (vma->vm_rb.rb_left) { 388 if (vma->vm_rb.rb_left) {
375 subtree_gap = rb_entry(vma->vm_rb.rb_left, 389 subtree_gap = rb_entry(vma->vm_rb.rb_left,
376 struct vm_area_struct, vm_rb)->rb_subtree_gap; 390 struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -463,7 +477,7 @@ static void validate_mm(struct mm_struct *mm)
463 anon_vma_unlock_read(anon_vma); 477 anon_vma_unlock_read(anon_vma);
464 } 478 }
465 479
466 highest_address = vma->vm_end; 480 highest_address = vm_end_gap(vma);
467 vma = vma->vm_next; 481 vma = vma->vm_next;
468 i++; 482 i++;
469 } 483 }
@@ -632,7 +646,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
632 if (vma->vm_next) 646 if (vma->vm_next)
633 vma_gap_update(vma->vm_next); 647 vma_gap_update(vma->vm_next);
634 else 648 else
635 mm->highest_vm_end = vma->vm_end; 649 mm->highest_vm_end = vm_end_gap(vma);
636 650
637 /* 651 /*
638 * vma->vm_prev wasn't known when we followed the rbtree to find the 652 * vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -878,7 +892,7 @@ again: remove_next = 1 + (end > next->vm_end);
878 vma_gap_update(vma); 892 vma_gap_update(vma);
879 if (end_changed) { 893 if (end_changed) {
880 if (!next) 894 if (!next)
881 mm->highest_vm_end = end; 895 mm->highest_vm_end = vm_end_gap(vma);
882 else if (!adjust_next) 896 else if (!adjust_next)
883 vma_gap_update(next); 897 vma_gap_update(next);
884 } 898 }
@@ -921,7 +935,7 @@ again: remove_next = 1 + (end > next->vm_end);
921 else if (next) 935 else if (next)
922 vma_gap_update(next); 936 vma_gap_update(next);
923 else 937 else
924 mm->highest_vm_end = end; 938 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
925 } 939 }
926 if (insert && file) 940 if (insert && file)
927 uprobe_mmap(insert); 941 uprobe_mmap(insert);
@@ -1762,7 +1776,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1762 1776
1763 while (true) { 1777 while (true) {
1764 /* Visit left subtree if it looks promising */ 1778 /* Visit left subtree if it looks promising */
1765 gap_end = vma->vm_start; 1779 gap_end = vm_start_gap(vma);
1766 if (gap_end >= low_limit && vma->vm_rb.rb_left) { 1780 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1767 struct vm_area_struct *left = 1781 struct vm_area_struct *left =
1768 rb_entry(vma->vm_rb.rb_left, 1782 rb_entry(vma->vm_rb.rb_left,
@@ -1773,12 +1787,13 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1773 } 1787 }
1774 } 1788 }
1775 1789
1776 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; 1790 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1777check_current: 1791check_current:
1778 /* Check if current node has a suitable gap */ 1792 /* Check if current node has a suitable gap */
1779 if (gap_start > high_limit) 1793 if (gap_start > high_limit)
1780 return -ENOMEM; 1794 return -ENOMEM;
1781 if (gap_end >= low_limit && gap_end - gap_start >= length) 1795 if (gap_end >= low_limit &&
1796 gap_end > gap_start && gap_end - gap_start >= length)
1782 goto found; 1797 goto found;
1783 1798
1784 /* Visit right subtree if it looks promising */ 1799 /* Visit right subtree if it looks promising */
@@ -1800,8 +1815,8 @@ check_current:
1800 vma = rb_entry(rb_parent(prev), 1815 vma = rb_entry(rb_parent(prev),
1801 struct vm_area_struct, vm_rb); 1816 struct vm_area_struct, vm_rb);
1802 if (prev == vma->vm_rb.rb_left) { 1817 if (prev == vma->vm_rb.rb_left) {
1803 gap_start = vma->vm_prev->vm_end; 1818 gap_start = vm_end_gap(vma->vm_prev);
1804 gap_end = vma->vm_start; 1819 gap_end = vm_start_gap(vma);
1805 goto check_current; 1820 goto check_current;
1806 } 1821 }
1807 } 1822 }
@@ -1865,7 +1880,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1865 1880
1866 while (true) { 1881 while (true) {
1867 /* Visit right subtree if it looks promising */ 1882 /* Visit right subtree if it looks promising */
1868 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; 1883 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1869 if (gap_start <= high_limit && vma->vm_rb.rb_right) { 1884 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1870 struct vm_area_struct *right = 1885 struct vm_area_struct *right =
1871 rb_entry(vma->vm_rb.rb_right, 1886 rb_entry(vma->vm_rb.rb_right,
@@ -1878,10 +1893,11 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1878 1893
1879check_current: 1894check_current:
1880 /* Check if current node has a suitable gap */ 1895 /* Check if current node has a suitable gap */
1881 gap_end = vma->vm_start; 1896 gap_end = vm_start_gap(vma);
1882 if (gap_end < low_limit) 1897 if (gap_end < low_limit)
1883 return -ENOMEM; 1898 return -ENOMEM;
1884 if (gap_start <= high_limit && gap_end - gap_start >= length) 1899 if (gap_start <= high_limit &&
1900 gap_end > gap_start && gap_end - gap_start >= length)
1885 goto found; 1901 goto found;
1886 1902
1887 /* Visit left subtree if it looks promising */ 1903 /* Visit left subtree if it looks promising */
@@ -1904,7 +1920,7 @@ check_current:
1904 struct vm_area_struct, vm_rb); 1920 struct vm_area_struct, vm_rb);
1905 if (prev == vma->vm_rb.rb_right) { 1921 if (prev == vma->vm_rb.rb_right) {
1906 gap_start = vma->vm_prev ? 1922 gap_start = vma->vm_prev ?
1907 vma->vm_prev->vm_end : 0; 1923 vm_end_gap(vma->vm_prev) : 0;
1908 goto check_current; 1924 goto check_current;
1909 } 1925 }
1910 } 1926 }
@@ -1942,7 +1958,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
1942 unsigned long len, unsigned long pgoff, unsigned long flags) 1958 unsigned long len, unsigned long pgoff, unsigned long flags)
1943{ 1959{
1944 struct mm_struct *mm = current->mm; 1960 struct mm_struct *mm = current->mm;
1945 struct vm_area_struct *vma; 1961 struct vm_area_struct *vma, *prev;
1946 struct vm_unmapped_area_info info; 1962 struct vm_unmapped_area_info info;
1947 1963
1948 if (len > TASK_SIZE - mmap_min_addr) 1964 if (len > TASK_SIZE - mmap_min_addr)
@@ -1953,9 +1969,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
1953 1969
1954 if (addr) { 1970 if (addr) {
1955 addr = PAGE_ALIGN(addr); 1971 addr = PAGE_ALIGN(addr);
1956 vma = find_vma(mm, addr); 1972 vma = find_vma_prev(mm, addr, &prev);
1957 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 1973 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1958 (!vma || addr + len <= vma->vm_start)) 1974 (!vma || addr + len <= vm_start_gap(vma)) &&
1975 (!prev || addr >= vm_end_gap(prev)))
1959 return addr; 1976 return addr;
1960 } 1977 }
1961 1978
@@ -1978,7 +1995,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1978 const unsigned long len, const unsigned long pgoff, 1995 const unsigned long len, const unsigned long pgoff,
1979 const unsigned long flags) 1996 const unsigned long flags)
1980{ 1997{
1981 struct vm_area_struct *vma; 1998 struct vm_area_struct *vma, *prev;
1982 struct mm_struct *mm = current->mm; 1999 struct mm_struct *mm = current->mm;
1983 unsigned long addr = addr0; 2000 unsigned long addr = addr0;
1984 struct vm_unmapped_area_info info; 2001 struct vm_unmapped_area_info info;
@@ -1993,9 +2010,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1993 /* requesting a specific address */ 2010 /* requesting a specific address */
1994 if (addr) { 2011 if (addr) {
1995 addr = PAGE_ALIGN(addr); 2012 addr = PAGE_ALIGN(addr);
1996 vma = find_vma(mm, addr); 2013 vma = find_vma_prev(mm, addr, &prev);
1997 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 2014 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1998 (!vma || addr + len <= vma->vm_start)) 2015 (!vma || addr + len <= vm_start_gap(vma)) &&
2016 (!prev || addr >= vm_end_gap(prev)))
1999 return addr; 2017 return addr;
2000 } 2018 }
2001 2019
@@ -2120,21 +2138,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
2120 * update accounting. This is shared with both the 2138 * update accounting. This is shared with both the
2121 * grow-up and grow-down cases. 2139 * grow-up and grow-down cases.
2122 */ 2140 */
2123static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) 2141static int acct_stack_growth(struct vm_area_struct *vma,
2142 unsigned long size, unsigned long grow)
2124{ 2143{
2125 struct mm_struct *mm = vma->vm_mm; 2144 struct mm_struct *mm = vma->vm_mm;
2126 struct rlimit *rlim = current->signal->rlim; 2145 struct rlimit *rlim = current->signal->rlim;
2127 unsigned long new_start, actual_size; 2146 unsigned long new_start;
2128 2147
2129 /* address space limit tests */ 2148 /* address space limit tests */
2130 if (!may_expand_vm(mm, grow)) 2149 if (!may_expand_vm(mm, grow))
2131 return -ENOMEM; 2150 return -ENOMEM;
2132 2151
2133 /* Stack limit test */ 2152 /* Stack limit test */
2134 actual_size = size; 2153 if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2135 if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
2136 actual_size -= PAGE_SIZE;
2137 if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2138 return -ENOMEM; 2154 return -ENOMEM;
2139 2155
2140 /* mlock limit tests */ 2156 /* mlock limit tests */
@@ -2172,16 +2188,32 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
2172int expand_upwards(struct vm_area_struct *vma, unsigned long address) 2188int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2173{ 2189{
2174 struct mm_struct *mm = vma->vm_mm; 2190 struct mm_struct *mm = vma->vm_mm;
2191 struct vm_area_struct *next;
2192 unsigned long gap_addr;
2175 int error = 0; 2193 int error = 0;
2176 2194
2177 if (!(vma->vm_flags & VM_GROWSUP)) 2195 if (!(vma->vm_flags & VM_GROWSUP))
2178 return -EFAULT; 2196 return -EFAULT;
2179 2197
2180 /* Guard against wrapping around to address 0. */ 2198 /* Guard against exceeding limits of the address space. */
2181 if (address < PAGE_ALIGN(address+4)) 2199 address &= PAGE_MASK;
2182 address = PAGE_ALIGN(address+4); 2200 if (address >= (TASK_SIZE & PAGE_MASK))
2183 else
2184 return -ENOMEM; 2201 return -ENOMEM;
2202 address += PAGE_SIZE;
2203
2204 /* Enforce stack_guard_gap */
2205 gap_addr = address + stack_guard_gap;
2206
2207 /* Guard against overflow */
2208 if (gap_addr < address || gap_addr > TASK_SIZE)
2209 gap_addr = TASK_SIZE;
2210
2211 next = vma->vm_next;
2212 if (next && next->vm_start < gap_addr) {
2213 if (!(next->vm_flags & VM_GROWSUP))
2214 return -ENOMEM;
2215 /* Check that both stack segments have the same anon_vma? */
2216 }
2185 2217
2186 /* We must make sure the anon_vma is allocated. */ 2218 /* We must make sure the anon_vma is allocated. */
2187 if (unlikely(anon_vma_prepare(vma))) 2219 if (unlikely(anon_vma_prepare(vma)))
@@ -2227,7 +2259,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2227 if (vma->vm_next) 2259 if (vma->vm_next)
2228 vma_gap_update(vma->vm_next); 2260 vma_gap_update(vma->vm_next);
2229 else 2261 else
2230 mm->highest_vm_end = address; 2262 mm->highest_vm_end = vm_end_gap(vma);
2231 spin_unlock(&mm->page_table_lock); 2263 spin_unlock(&mm->page_table_lock);
2232 2264
2233 perf_event_mmap(vma); 2265 perf_event_mmap(vma);
@@ -2248,6 +2280,8 @@ int expand_downwards(struct vm_area_struct *vma,
2248 unsigned long address) 2280 unsigned long address)
2249{ 2281{
2250 struct mm_struct *mm = vma->vm_mm; 2282 struct mm_struct *mm = vma->vm_mm;
2283 struct vm_area_struct *prev;
2284 unsigned long gap_addr;
2251 int error; 2285 int error;
2252 2286
2253 address &= PAGE_MASK; 2287 address &= PAGE_MASK;
@@ -2255,6 +2289,17 @@ int expand_downwards(struct vm_area_struct *vma,
2255 if (error) 2289 if (error)
2256 return error; 2290 return error;
2257 2291
2292 /* Enforce stack_guard_gap */
2293 gap_addr = address - stack_guard_gap;
2294 if (gap_addr > address)
2295 return -ENOMEM;
2296 prev = vma->vm_prev;
2297 if (prev && prev->vm_end > gap_addr) {
2298 if (!(prev->vm_flags & VM_GROWSDOWN))
2299 return -ENOMEM;
2300 /* Check that both stack segments have the same anon_vma? */
2301 }
2302
2258 /* We must make sure the anon_vma is allocated. */ 2303 /* We must make sure the anon_vma is allocated. */
2259 if (unlikely(anon_vma_prepare(vma))) 2304 if (unlikely(anon_vma_prepare(vma)))
2260 return -ENOMEM; 2305 return -ENOMEM;
@@ -2310,28 +2355,25 @@ int expand_downwards(struct vm_area_struct *vma,
2310 return error; 2355 return error;
2311} 2356}
2312 2357
2313/* 2358/* enforced gap between the expanding stack and other mappings. */
2314 * Note how expand_stack() refuses to expand the stack all the way to 2359unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2315 * abut the next virtual mapping, *unless* that mapping itself is also 2360
2316 * a stack mapping. We want to leave room for a guard page, after all 2361static int __init cmdline_parse_stack_guard_gap(char *p)
2317 * (the guard page itself is not added here, that is done by the 2362{
2318 * actual page faulting logic) 2363 unsigned long val;
2319 * 2364 char *endptr;
2320 * This matches the behavior of the guard page logic (see mm/memory.c: 2365
2321 * check_stack_guard_page()), which only allows the guard page to be 2366 val = simple_strtoul(p, &endptr, 10);
2322 * removed under these circumstances. 2367 if (!*endptr)
2323 */ 2368 stack_guard_gap = val << PAGE_SHIFT;
2369
2370 return 0;
2371}
2372__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2373
2324#ifdef CONFIG_STACK_GROWSUP 2374#ifdef CONFIG_STACK_GROWSUP
2325int expand_stack(struct vm_area_struct *vma, unsigned long address) 2375int expand_stack(struct vm_area_struct *vma, unsigned long address)
2326{ 2376{
2327 struct vm_area_struct *next;
2328
2329 address &= PAGE_MASK;
2330 next = vma->vm_next;
2331 if (next && next->vm_start == address + PAGE_SIZE) {
2332 if (!(next->vm_flags & VM_GROWSUP))
2333 return -ENOMEM;
2334 }
2335 return expand_upwards(vma, address); 2377 return expand_upwards(vma, address);
2336} 2378}
2337 2379
@@ -2353,14 +2395,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
2353#else 2395#else
2354int expand_stack(struct vm_area_struct *vma, unsigned long address) 2396int expand_stack(struct vm_area_struct *vma, unsigned long address)
2355{ 2397{
2356 struct vm_area_struct *prev;
2357
2358 address &= PAGE_MASK;
2359 prev = vma->vm_prev;
2360 if (prev && prev->vm_end == address) {
2361 if (!(prev->vm_flags & VM_GROWSDOWN))
2362 return -ENOMEM;
2363 }
2364 return expand_downwards(vma, address); 2398 return expand_downwards(vma, address);
2365} 2399}
2366 2400
@@ -2458,7 +2492,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2458 vma->vm_prev = prev; 2492 vma->vm_prev = prev;
2459 vma_gap_update(vma); 2493 vma_gap_update(vma);
2460 } else 2494 } else
2461 mm->highest_vm_end = prev ? prev->vm_end : 0; 2495 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2462 tail_vma->vm_next = NULL; 2496 tail_vma->vm_next = NULL;
2463 2497
2464 /* Kill the cache */ 2498 /* Kill the cache */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index bddb2c75492d..b8849a3930a0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -72,6 +72,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
72 if (!pte) 72 if (!pte)
73 return 0; 73 return 0;
74 74
75 flush_tlb_batched_pending(vma->vm_mm);
75 arch_enter_lazy_mmu_mode(); 76 arch_enter_lazy_mmu_mode();
76 do { 77 do {
77 oldpte = *pte; 78 oldpte = *pte;
diff --git a/mm/mremap.c b/mm/mremap.c
index c25bc6268e46..fe7b7f65f4f4 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -135,6 +135,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
135 new_ptl = pte_lockptr(mm, new_pmd); 135 new_ptl = pte_lockptr(mm, new_pmd);
136 if (new_ptl != old_ptl) 136 if (new_ptl != old_ptl)
137 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); 137 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
138 flush_tlb_batched_pending(vma->vm_mm);
138 arch_enter_lazy_mmu_mode(); 139 arch_enter_lazy_mmu_mode();
139 140
140 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, 141 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 45e79be95e8d..4ae77db917f2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -274,6 +274,26 @@ int page_group_by_mobility_disabled __read_mostly;
274#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 274#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
275static inline void reset_deferred_meminit(pg_data_t *pgdat) 275static inline void reset_deferred_meminit(pg_data_t *pgdat)
276{ 276{
277 unsigned long max_initialise;
278 unsigned long reserved_lowmem;
279
280 /*
281 * Initialise at least 2G of a node but also take into account that
282 * two large system hashes that can take up 1GB for 0.25TB/node.
283 */
284 max_initialise = max(2UL << (30 - PAGE_SHIFT),
285 (pgdat->node_spanned_pages >> 8));
286
287 /*
288 * Compensate the all the memblock reservations (e.g. crash kernel)
289 * from the initial estimation to make sure we will initialize enough
290 * memory to boot.
291 */
292 reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
293 pgdat->node_start_pfn + max_initialise);
294 max_initialise += reserved_lowmem;
295
296 pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
277 pgdat->first_deferred_pfn = ULONG_MAX; 297 pgdat->first_deferred_pfn = ULONG_MAX;
278} 298}
279 299
@@ -307,10 +327,9 @@ static inline bool update_defer_init(pg_data_t *pgdat,
307 /* Always populate low zones for address-contrained allocations */ 327 /* Always populate low zones for address-contrained allocations */
308 if (zone_end < pgdat_end_pfn(pgdat)) 328 if (zone_end < pgdat_end_pfn(pgdat))
309 return true; 329 return true;
310
311 /* Initialise at least 2G of the highest zone */ 330 /* Initialise at least 2G of the highest zone */
312 (*nr_initialised)++; 331 (*nr_initialised)++;
313 if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) && 332 if ((*nr_initialised > pgdat->static_init_size) &&
314 (pfn & (PAGES_PER_SECTION - 1)) == 0) { 333 (pfn & (PAGES_PER_SECTION - 1)) == 0) {
315 pgdat->first_deferred_pfn = pfn; 334 pgdat->first_deferred_pfn = pfn;
316 return false; 335 return false;
@@ -1513,14 +1532,14 @@ int move_freepages(struct zone *zone,
1513#endif 1532#endif
1514 1533
1515 for (page = start_page; page <= end_page;) { 1534 for (page = start_page; page <= end_page;) {
1516 /* Make sure we are not inadvertently changing nodes */
1517 VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
1518
1519 if (!pfn_valid_within(page_to_pfn(page))) { 1535 if (!pfn_valid_within(page_to_pfn(page))) {
1520 page++; 1536 page++;
1521 continue; 1537 continue;
1522 } 1538 }
1523 1539
1540 /* Make sure we are not inadvertently changing nodes */
1541 VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
1542
1524 if (!PageBuddy(page)) { 1543 if (!PageBuddy(page)) {
1525 page++; 1544 page++;
1526 continue; 1545 continue;
@@ -2472,7 +2491,7 @@ static bool zone_local(struct zone *local_zone, struct zone *zone)
2472 2491
2473static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) 2492static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
2474{ 2493{
2475 return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < 2494 return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
2476 RECLAIM_DISTANCE; 2495 RECLAIM_DISTANCE;
2477} 2496}
2478#else /* CONFIG_NUMA */ 2497#else /* CONFIG_NUMA */
@@ -5348,7 +5367,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
5348 /* pg_data_t should be reset to zero when it's allocated */ 5367 /* pg_data_t should be reset to zero when it's allocated */
5349 WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); 5368 WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
5350 5369
5351 reset_deferred_meminit(pgdat);
5352 pgdat->node_id = nid; 5370 pgdat->node_id = nid;
5353 pgdat->node_start_pfn = node_start_pfn; 5371 pgdat->node_start_pfn = node_start_pfn;
5354#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 5372#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -5367,6 +5385,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
5367 (unsigned long)pgdat->node_mem_map); 5385 (unsigned long)pgdat->node_mem_map);
5368#endif 5386#endif
5369 5387
5388 reset_deferred_meminit(pgdat);
5370 free_area_init_core(pgdat); 5389 free_area_init_core(pgdat);
5371} 5390}
5372 5391
@@ -5833,8 +5852,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
5833 } 5852 }
5834 5853
5835 if (pages && s) 5854 if (pages && s)
5836 pr_info("Freeing %s memory: %ldK (%p - %p)\n", 5855 pr_info("Freeing %s memory: %ldK\n",
5837 s, pages << (PAGE_SHIFT - 10), start, end); 5856 s, pages << (PAGE_SHIFT - 10));
5838 5857
5839 return pages; 5858 return pages;
5840} 5859}
@@ -6788,7 +6807,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
6788 6807
6789 /* Make sure the range is really isolated. */ 6808 /* Make sure the range is really isolated. */
6790 if (test_pages_isolated(outer_start, end, false)) { 6809 if (test_pages_isolated(outer_start, end, false)) {
6791 pr_info("%s: [%lx, %lx) PFNs busy\n", 6810 pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
6792 __func__, outer_start, end); 6811 __func__, outer_start, end);
6793 ret = -EBUSY; 6812 ret = -EBUSY;
6794 goto done; 6813 goto done;
diff --git a/mm/percpu.c b/mm/percpu.c
index 1f376bce413c..ef6353f0adbd 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1012,8 +1012,11 @@ area_found:
1012 mutex_unlock(&pcpu_alloc_mutex); 1012 mutex_unlock(&pcpu_alloc_mutex);
1013 } 1013 }
1014 1014
1015 if (chunk != pcpu_reserved_chunk) 1015 if (chunk != pcpu_reserved_chunk) {
1016 spin_lock_irqsave(&pcpu_lock, flags);
1016 pcpu_nr_empty_pop_pages -= occ_pages; 1017 pcpu_nr_empty_pop_pages -= occ_pages;
1018 spin_unlock_irqrestore(&pcpu_lock, flags);
1019 }
1017 1020
1018 if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) 1021 if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
1019 pcpu_schedule_balance_work(); 1022 pcpu_schedule_balance_work();
diff --git a/mm/rmap.c b/mm/rmap.c
index b577fbb98d4b..ede183c32f45 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -649,6 +649,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
649 tlb_ubc->flush_required = true; 649 tlb_ubc->flush_required = true;
650 650
651 /* 651 /*
652 * Ensure compiler does not re-order the setting of tlb_flush_batched
653 * before the PTE is cleared.
654 */
655 barrier();
656 mm->tlb_flush_batched = true;
657
658 /*
652 * If the PTE was dirty then it's best to assume it's writable. The 659 * If the PTE was dirty then it's best to assume it's writable. The
653 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() 660 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
654 * before the page is queued for IO. 661 * before the page is queued for IO.
@@ -675,6 +682,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
675 682
676 return should_defer; 683 return should_defer;
677} 684}
685
686/*
687 * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
688 * releasing the PTL if TLB flushes are batched. It's possible for a parallel
689 * operation such as mprotect or munmap to race between reclaim unmapping
690 * the page and flushing the page. If this race occurs, it potentially allows
691 * access to data via a stale TLB entry. Tracking all mm's that have TLB
692 * batching in flight would be expensive during reclaim so instead track
693 * whether TLB batching occurred in the past and if so then do a flush here
694 * if required. This will cost one additional flush per reclaim cycle paid
695 * by the first operation at risk such as mprotect and mumap.
696 *
697 * This must be called under the PTL so that an access to tlb_flush_batched
698 * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
699 * via the PTL.
700 */
701void flush_tlb_batched_pending(struct mm_struct *mm)
702{
703 if (mm->tlb_flush_batched) {
704 flush_tlb_mm(mm);
705
706 /*
707 * Do not allow the compiler to re-order the clearing of
708 * tlb_flush_batched before the tlb is flushed.
709 */
710 barrier();
711 mm->tlb_flush_batched = false;
712 }
713}
678#else 714#else
679static void set_tlb_ubc_flush_pending(struct mm_struct *mm, 715static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
680 struct page *page, bool writable) 716 struct page *page, bool writable)
diff --git a/mm/slab.c b/mm/slab.c
index 24a615d42d74..462938fc7cb9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1670,6 +1670,14 @@ static void kmem_rcu_free(struct rcu_head *head)
1670} 1670}
1671 1671
1672#if DEBUG 1672#if DEBUG
1673static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
1674{
1675 if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
1676 (cachep->size % PAGE_SIZE) == 0)
1677 return true;
1678
1679 return false;
1680}
1673 1681
1674#ifdef CONFIG_DEBUG_PAGEALLOC 1682#ifdef CONFIG_DEBUG_PAGEALLOC
1675static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, 1683static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
@@ -1703,6 +1711,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1703 } 1711 }
1704 *addr++ = 0x87654321; 1712 *addr++ = 0x87654321;
1705} 1713}
1714
1715static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
1716 int map, unsigned long caller)
1717{
1718 if (!is_debug_pagealloc_cache(cachep))
1719 return;
1720
1721 if (caller)
1722 store_stackinfo(cachep, objp, caller);
1723
1724 kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
1725}
1726
1727#else
1728static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
1729 int map, unsigned long caller) {}
1730
1706#endif 1731#endif
1707 1732
1708static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) 1733static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
@@ -1781,6 +1806,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1781 int size, i; 1806 int size, i;
1782 int lines = 0; 1807 int lines = 0;
1783 1808
1809 if (is_debug_pagealloc_cache(cachep))
1810 return;
1811
1784 realobj = (char *)objp + obj_offset(cachep); 1812 realobj = (char *)objp + obj_offset(cachep);
1785 size = cachep->object_size; 1813 size = cachep->object_size;
1786 1814
@@ -1846,16 +1874,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
1846 void *objp = index_to_obj(cachep, page, i); 1874 void *objp = index_to_obj(cachep, page, i);
1847 1875
1848 if (cachep->flags & SLAB_POISON) { 1876 if (cachep->flags & SLAB_POISON) {
1849#ifdef CONFIG_DEBUG_PAGEALLOC
1850 if (cachep->size % PAGE_SIZE == 0 &&
1851 OFF_SLAB(cachep))
1852 kernel_map_pages(virt_to_page(objp),
1853 cachep->size / PAGE_SIZE, 1);
1854 else
1855 check_poison_obj(cachep, objp);
1856#else
1857 check_poison_obj(cachep, objp); 1877 check_poison_obj(cachep, objp);
1858#endif 1878 slab_kernel_map(cachep, objp, 1, 0);
1859 } 1879 }
1860 if (cachep->flags & SLAB_RED_ZONE) { 1880 if (cachep->flags & SLAB_RED_ZONE) {
1861 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 1881 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
@@ -2179,7 +2199,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2179 else 2199 else
2180 size += BYTES_PER_WORD; 2200 size += BYTES_PER_WORD;
2181 } 2201 }
2182#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2183 /* 2202 /*
2184 * To activate debug pagealloc, off-slab management is necessary 2203 * To activate debug pagealloc, off-slab management is necessary
2185 * requirement. In early phase of initialization, small sized slab 2204 * requirement. In early phase of initialization, small sized slab
@@ -2187,14 +2206,14 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2187 * to check size >= 256. It guarantees that all necessary small 2206 * to check size >= 256. It guarantees that all necessary small
2188 * sized slab is initialized in current slab initialization sequence. 2207 * sized slab is initialized in current slab initialization sequence.
2189 */ 2208 */
2190 if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && 2209 if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
2210 !slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
2191 size >= 256 && cachep->object_size > cache_line_size() && 2211 size >= 256 && cachep->object_size > cache_line_size() &&
2192 ALIGN(size, cachep->align) < PAGE_SIZE) { 2212 ALIGN(size, cachep->align) < PAGE_SIZE) {
2193 cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); 2213 cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
2194 size = PAGE_SIZE; 2214 size = PAGE_SIZE;
2195 } 2215 }
2196#endif 2216#endif
2197#endif
2198 2217
2199 /* 2218 /*
2200 * Determine if the slab management is 'on' or 'off' slab. 2219 * Determine if the slab management is 'on' or 'off' slab.
@@ -2237,15 +2256,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2237 if (flags & CFLGS_OFF_SLAB) { 2256 if (flags & CFLGS_OFF_SLAB) {
2238 /* really off slab. No need for manual alignment */ 2257 /* really off slab. No need for manual alignment */
2239 freelist_size = calculate_freelist_size(cachep->num, 0); 2258 freelist_size = calculate_freelist_size(cachep->num, 0);
2240
2241#ifdef CONFIG_PAGE_POISONING
2242 /* If we're going to use the generic kernel_map_pages()
2243 * poisoning, then it's going to smash the contents of
2244 * the redzone and userword anyhow, so switch them off.
2245 */
2246 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2247 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2248#endif
2249 } 2259 }
2250 2260
2251 cachep->colour_off = cache_line_size(); 2261 cachep->colour_off = cache_line_size();
@@ -2261,7 +2271,19 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2261 cachep->size = size; 2271 cachep->size = size;
2262 cachep->reciprocal_buffer_size = reciprocal_value(size); 2272 cachep->reciprocal_buffer_size = reciprocal_value(size);
2263 2273
2264 if (flags & CFLGS_OFF_SLAB) { 2274#if DEBUG
2275 /*
2276 * If we're going to use the generic kernel_map_pages()
2277 * poisoning, then it's going to smash the contents of
2278 * the redzone and userword anyhow, so switch them off.
2279 */
2280 if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
2281 (cachep->flags & SLAB_POISON) &&
2282 is_debug_pagealloc_cache(cachep))
2283 cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2284#endif
2285
2286 if (OFF_SLAB(cachep)) {
2265 cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); 2287 cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
2266 /* 2288 /*
2267 * This is a possibility for one of the kmalloc_{dma,}_caches. 2289 * This is a possibility for one of the kmalloc_{dma,}_caches.
@@ -2488,9 +2510,6 @@ static void cache_init_objs(struct kmem_cache *cachep,
2488 for (i = 0; i < cachep->num; i++) { 2510 for (i = 0; i < cachep->num; i++) {
2489 void *objp = index_to_obj(cachep, page, i); 2511 void *objp = index_to_obj(cachep, page, i);
2490#if DEBUG 2512#if DEBUG
2491 /* need to poison the objs? */
2492 if (cachep->flags & SLAB_POISON)
2493 poison_obj(cachep, objp, POISON_FREE);
2494 if (cachep->flags & SLAB_STORE_USER) 2513 if (cachep->flags & SLAB_STORE_USER)
2495 *dbg_userword(cachep, objp) = NULL; 2514 *dbg_userword(cachep, objp) = NULL;
2496 2515
@@ -2514,10 +2533,11 @@ static void cache_init_objs(struct kmem_cache *cachep,
2514 slab_error(cachep, "constructor overwrote the" 2533 slab_error(cachep, "constructor overwrote the"
2515 " start of an object"); 2534 " start of an object");
2516 } 2535 }
2517 if ((cachep->size % PAGE_SIZE) == 0 && 2536 /* need to poison the objs? */
2518 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) 2537 if (cachep->flags & SLAB_POISON) {
2519 kernel_map_pages(virt_to_page(objp), 2538 poison_obj(cachep, objp, POISON_FREE);
2520 cachep->size / PAGE_SIZE, 0); 2539 slab_kernel_map(cachep, objp, 0, 0);
2540 }
2521#else 2541#else
2522 if (cachep->ctor) 2542 if (cachep->ctor)
2523 cachep->ctor(objp); 2543 cachep->ctor(objp);
@@ -2736,17 +2756,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2736 2756
2737 set_obj_status(page, objnr, OBJECT_FREE); 2757 set_obj_status(page, objnr, OBJECT_FREE);
2738 if (cachep->flags & SLAB_POISON) { 2758 if (cachep->flags & SLAB_POISON) {
2739#ifdef CONFIG_DEBUG_PAGEALLOC
2740 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2741 store_stackinfo(cachep, objp, caller);
2742 kernel_map_pages(virt_to_page(objp),
2743 cachep->size / PAGE_SIZE, 0);
2744 } else {
2745 poison_obj(cachep, objp, POISON_FREE);
2746 }
2747#else
2748 poison_obj(cachep, objp, POISON_FREE); 2759 poison_obj(cachep, objp, POISON_FREE);
2749#endif 2760 slab_kernel_map(cachep, objp, 0, caller);
2750 } 2761 }
2751 return objp; 2762 return objp;
2752} 2763}
@@ -2873,15 +2884,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
2873 if (!objp) 2884 if (!objp)
2874 return objp; 2885 return objp;
2875 if (cachep->flags & SLAB_POISON) { 2886 if (cachep->flags & SLAB_POISON) {
2876#ifdef CONFIG_DEBUG_PAGEALLOC
2877 if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
2878 kernel_map_pages(virt_to_page(objp),
2879 cachep->size / PAGE_SIZE, 1);
2880 else
2881 check_poison_obj(cachep, objp);
2882#else
2883 check_poison_obj(cachep, objp); 2887 check_poison_obj(cachep, objp);
2884#endif 2888 slab_kernel_map(cachep, objp, 1, 0);
2885 poison_obj(cachep, objp, POISON_INUSE); 2889 poison_obj(cachep, objp, POISON_INUSE);
2886 } 2890 }
2887 if (cachep->flags & SLAB_STORE_USER) 2891 if (cachep->flags & SLAB_STORE_USER)
diff --git a/mm/slub.c b/mm/slub.c
index 41f7cae64a49..d6fe997c0577 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5343,6 +5343,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5343 char mbuf[64]; 5343 char mbuf[64];
5344 char *buf; 5344 char *buf;
5345 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); 5345 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5346 ssize_t len;
5346 5347
5347 if (!attr || !attr->store || !attr->show) 5348 if (!attr || !attr->store || !attr->show)
5348 continue; 5349 continue;
@@ -5367,8 +5368,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5367 buf = buffer; 5368 buf = buffer;
5368 } 5369 }
5369 5370
5370 attr->show(root_cache, buf); 5371 len = attr->show(root_cache, buf);
5371 attr->store(s, buf, strlen(buf)); 5372 if (len > 0)
5373 attr->store(s, buf, len);
5372 } 5374 }
5373 5375
5374 if (buffer) 5376 if (buffer)
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index b5f7f24b8dd1..09f733b0424a 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -48,6 +48,9 @@ static int swap_cgroup_prepare(int type)
48 if (!page) 48 if (!page)
49 goto not_enough_page; 49 goto not_enough_page;
50 ctrl->map[idx] = page; 50 ctrl->map[idx] = page;
51
52 if (!(idx % SWAP_CLUSTER_MAX))
53 cond_resched();
51 } 54 }
52 return 0; 55 return 0;
53not_enough_page: 56not_enough_page:
@@ -202,6 +205,8 @@ void swap_cgroup_swapoff(int type)
202 struct page *page = map[i]; 205 struct page *page = map[i];
203 if (page) 206 if (page)
204 __free_page(page); 207 __free_page(page);
208 if (!(i % SWAP_CLUSTER_MAX))
209 cond_resched();
205 } 210 }
206 vfree(map); 211 vfree(map);
207 } 212 }
diff --git a/mm/truncate.c b/mm/truncate.c
index 76e35ad97102..f4c8270f7b84 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -732,7 +732,7 @@ EXPORT_SYMBOL(truncate_setsize);
732 */ 732 */
733void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) 733void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
734{ 734{
735 int bsize = 1 << inode->i_blkbits; 735 int bsize = i_blocksize(inode);
736 loff_t rounded_from; 736 loff_t rounded_from;
737 struct page *page; 737 struct page *page;
738 pgoff_t index; 738 pgoff_t index;
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index c5afd573d7da..3fb15c25af87 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -112,9 +112,16 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
112 unsigned long reclaimed) 112 unsigned long reclaimed)
113{ 113{
114 unsigned long scale = scanned + reclaimed; 114 unsigned long scale = scanned + reclaimed;
115 unsigned long pressure; 115 unsigned long pressure = 0;
116 116
117 /* 117 /*
118 * reclaimed can be greater than scanned in cases
119 * like THP, where the scanned is 1 and reclaimed
120 * could be 512
121 */
122 if (reclaimed >= scanned)
123 goto out;
124 /*
118 * We calculate the ratio (in percents) of how many pages were 125 * We calculate the ratio (in percents) of how many pages were
119 * scanned vs. reclaimed in a given time frame (window). Note that 126 * scanned vs. reclaimed in a given time frame (window). Note that
120 * time is in VM reclaimer's "ticks", i.e. number of pages 127 * time is in VM reclaimer's "ticks", i.e. number of pages
@@ -124,6 +131,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
124 pressure = scale - (reclaimed * scale / scanned); 131 pressure = scale - (reclaimed * scale / scanned);
125 pressure = pressure * 100 / scale; 132 pressure = pressure * 100 / scale;
126 133
134out:
127 pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, 135 pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure,
128 scanned, reclaimed); 136 scanned, reclaimed);
129 137
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bfc5050cbd01..440c2df9be82 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2529,7 +2529,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2529 if (!populated_zone(zone)) 2529 if (!populated_zone(zone))
2530 continue; 2530 continue;
2531 2531
2532 classzone_idx = requested_highidx; 2532 classzone_idx = gfp_zone(sc->gfp_mask);
2533 while (!populated_zone(zone->zone_pgdat->node_zones + 2533 while (!populated_zone(zone->zone_pgdat->node_zones +
2534 classzone_idx)) 2534 classzone_idx))
2535 classzone_idx--; 2535 classzone_idx--;
diff --git a/mm/zswap.c b/mm/zswap.c
index 340261946fda..45476f429789 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -78,7 +78,13 @@ static u64 zswap_duplicate_entry;
78 78
79/* Enable/disable zswap (disabled by default) */ 79/* Enable/disable zswap (disabled by default) */
80static bool zswap_enabled; 80static bool zswap_enabled;
81module_param_named(enabled, zswap_enabled, bool, 0644); 81static int zswap_enabled_param_set(const char *,
82 const struct kernel_param *);
83static struct kernel_param_ops zswap_enabled_param_ops = {
84 .set = zswap_enabled_param_set,
85 .get = param_get_bool,
86};
87module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
82 88
83/* Crypto compressor to use */ 89/* Crypto compressor to use */
84#define ZSWAP_COMPRESSOR_DEFAULT "lzo" 90#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
@@ -176,6 +182,9 @@ static atomic_t zswap_pools_count = ATOMIC_INIT(0);
176/* used by param callback function */ 182/* used by param callback function */
177static bool zswap_init_started; 183static bool zswap_init_started;
178 184
185/* fatal error during init */
186static bool zswap_init_failed;
187
179/********************************* 188/*********************************
180* helpers and fwd declarations 189* helpers and fwd declarations
181**********************************/ 190**********************************/
@@ -702,6 +711,11 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
702 char *s = strstrip((char *)val); 711 char *s = strstrip((char *)val);
703 int ret; 712 int ret;
704 713
714 if (zswap_init_failed) {
715 pr_err("can't set param, initialization failed\n");
716 return -ENODEV;
717 }
718
705 /* no change required */ 719 /* no change required */
706 if (!strcmp(s, *(char **)kp->arg)) 720 if (!strcmp(s, *(char **)kp->arg))
707 return 0; 721 return 0;
@@ -781,6 +795,17 @@ static int zswap_zpool_param_set(const char *val,
781 return __zswap_param_set(val, kp, NULL, zswap_compressor); 795 return __zswap_param_set(val, kp, NULL, zswap_compressor);
782} 796}
783 797
798static int zswap_enabled_param_set(const char *val,
799 const struct kernel_param *kp)
800{
801 if (zswap_init_failed) {
802 pr_err("can't enable, initialization failed\n");
803 return -ENODEV;
804 }
805
806 return param_set_bool(val, kp);
807}
808
784/********************************* 809/*********************************
785* writeback code 810* writeback code
786**********************************/ 811**********************************/
@@ -1267,6 +1292,9 @@ pool_fail:
1267dstmem_fail: 1292dstmem_fail:
1268 zswap_entry_cache_destroy(); 1293 zswap_entry_cache_destroy();
1269cache_fail: 1294cache_fail:
1295 /* if built-in, we aren't unloaded on failure; don't allow use */
1296 zswap_init_failed = true;
1297 zswap_enabled = false;
1270 return -ENOMEM; 1298 return -ENOMEM;
1271} 1299}
1272/* must be late so crypto has time to come up */ 1300/* must be late so crypto has time to come up */