aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMinchan Kim2017-01-10 18:58:15 -0600
committerLinus Torvalds2017-01-10 20:31:55 -0600
commitf05714293a591038304ddae7cb0dd747bb3786cc (patch)
tree22cf7f1dda97d459e735a18273875f8c3018f82a
parent4d09d0f45dd5d78b3a301c238272211d1ea7d9e6 (diff)
downloadmkaricheri-ti-linux-kernel-f05714293a591038304ddae7cb0dd747bb3786cc.tar.gz
mkaricheri-ti-linux-kernel-f05714293a591038304ddae7cb0dd747bb3786cc.tar.xz
mkaricheri-ti-linux-kernel-f05714293a591038304ddae7cb0dd747bb3786cc.zip
mm: support anonymous stable page
During developemnt for zram-swap asynchronous writeback, I found strange corruption of compressed page, resulting in: Modules linked in: zram(E) CPU: 3 PID: 1520 Comm: zramd-1 Tainted: G E 4.8.0-mm1-00320-ge0d4894c9c38-dirty #3274 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff88007620b840 task.stack: ffff880078090000 RIP: set_freeobj.part.43+0x1c/0x1f RSP: 0018:ffff880078093ca8 EFLAGS: 00010246 RAX: 0000000000000018 RBX: ffff880076798d88 RCX: ffffffff81c408c8 RDX: 0000000000000018 RSI: 0000000000000000 RDI: 0000000000000246 RBP: ffff880078093cb0 R08: 0000000000000000 R09: 0000000000000000 R10: ffff88005bc43030 R11: 0000000000001df3 R12: ffff880076798d88 R13: 000000000005bc43 R14: ffff88007819d1b8 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88007e380000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc934048f20 CR3: 0000000077b01000 CR4: 00000000000406e0 Call Trace: obj_malloc+0x22b/0x260 zs_malloc+0x1e4/0x580 zram_bvec_rw+0x4cd/0x830 [zram] page_requests_rw+0x9c/0x130 [zram] zram_thread+0xe6/0x173 [zram] kthread+0xca/0xe0 ret_from_fork+0x25/0x30 With investigation, it reveals currently stable page doesn't support anonymous page. IOW, reuse_swap_page can reuse the page without waiting writeback completion so it can overwrite page zram is compressing. Unfortunately, zram has used per-cpu stream feature from v4.7. It aims for increasing cache hit ratio of scratch buffer for compressing. Downside of that approach is that zram should ask memory space for compressed page in per-cpu context which requires stricted gfp flag which could be failed. If so, it retries to allocate memory space out of per-cpu context so it could get memory this time and compress the data again, copies it to the memory space. In this scenario, zram assumes the data should never be changed but it is not true unless stable page supports. So, If the data is changed under us, zram can make buffer overrun because second compression size could be bigger than one we got in previous trial and blindly, copy bigger size object to smaller buffer which is buffer overrun. The overrun breaks zsmalloc free object chaining so system goes crash like above. I think below is same problem. https://bugzilla.suse.com/show_bug.cgi?id=997574 Unfortunately, reuse_swap_page should be atomic so that we cannot wait on writeback in there so the approach in this patch is simply return false if we found it needs stable page. Although it increases memory footprint temporarily, it happens rarely and it should be reclaimed easily althoug it happened. Also, It would be better than waiting of IO completion, which is critial path for application latency. Fixes: da9556a2367c ("zram: user per-cpu compression streams") Link: http://lkml.kernel.org/r/20161120233015.GA14113@bbox Link: http://lkml.kernel.org/r/1482366980-3782-2-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Acked-by: Hugh Dickins <hughd@google.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Darrick J. Wong <darrick.wong@oracle.com> Cc: Takashi Iwai <tiwai@suse.de> Cc: Hyeoncheol Lee <cheol.lee@lge.com> Cc: <yjay.kim@lge.com> Cc: Sangseok Lee <sangseok.lee@lge.com> Cc: <stable@vger.kernel.org> [4.7+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/swap.h3
-rw-r--r--mm/swapfile.c20
2 files changed, 21 insertions, 2 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 09f4be179ff3..7f47b7098b1b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -150,8 +150,9 @@ enum {
150 SWP_FILE = (1 << 7), /* set after swap_activate success */ 150 SWP_FILE = (1 << 7), /* set after swap_activate success */
151 SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ 151 SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */
152 SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ 152 SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */
153 SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */
153 /* add others here before... */ 154 /* add others here before... */
154 SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */ 155 SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */
155}; 156};
156 157
157#define SWAP_CLUSTER_MAX 32UL 158#define SWAP_CLUSTER_MAX 32UL
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1c6e0321205d..4761701d1721 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -943,11 +943,25 @@ bool reuse_swap_page(struct page *page, int *total_mapcount)
943 count = page_trans_huge_mapcount(page, total_mapcount); 943 count = page_trans_huge_mapcount(page, total_mapcount);
944 if (count <= 1 && PageSwapCache(page)) { 944 if (count <= 1 && PageSwapCache(page)) {
945 count += page_swapcount(page); 945 count += page_swapcount(page);
946 if (count == 1 && !PageWriteback(page)) { 946 if (count != 1)
947 goto out;
948 if (!PageWriteback(page)) {
947 delete_from_swap_cache(page); 949 delete_from_swap_cache(page);
948 SetPageDirty(page); 950 SetPageDirty(page);
951 } else {
952 swp_entry_t entry;
953 struct swap_info_struct *p;
954
955 entry.val = page_private(page);
956 p = swap_info_get(entry);
957 if (p->flags & SWP_STABLE_WRITES) {
958 spin_unlock(&p->lock);
959 return false;
960 }
961 spin_unlock(&p->lock);
949 } 962 }
950 } 963 }
964out:
951 return count <= 1; 965 return count <= 1;
952} 966}
953 967
@@ -2448,6 +2462,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2448 error = -ENOMEM; 2462 error = -ENOMEM;
2449 goto bad_swap; 2463 goto bad_swap;
2450 } 2464 }
2465
2466 if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
2467 p->flags |= SWP_STABLE_WRITES;
2468
2451 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2469 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
2452 int cpu; 2470 int cpu;
2453 2471