aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJan Kara2012-10-25 15:37:31 -0500
committerGreg Kroah-Hartman2012-11-05 02:44:26 -0600
commite418b3bbe9a34fc75a148ff890e0b3442628c5c7 (patch)
treec842824518fbee37b39a0d8604e78f9fd2c59e2b /mm
parent05e02741ed77cace45997d4a7d4092f5ac84e19a (diff)
downloadkernel-common-e418b3bbe9a34fc75a148ff890e0b3442628c5c7.tar.gz
kernel-common-e418b3bbe9a34fc75a148ff890e0b3442628c5c7.tar.xz
kernel-common-e418b3bbe9a34fc75a148ff890e0b3442628c5c7.zip
mm: fix XFS oops due to dirty pages without buffers on s390
commit ef5d437f71afdf4afdbab99213add99f4b1318fd upstream. On s390 any write to a page (even from kernel itself) sets architecture specific page dirty bit. Thus when a page is written to via buffered write, HW dirty bit gets set and when we later map and unmap the page, page_remove_rmap() finds the dirty bit and calls set_page_dirty(). Dirtying of a page which shouldn't be dirty can cause all sorts of problems to filesystems. The bug we observed in practice is that buffers from the page get freed, so when the page gets later marked as dirty and writeback writes it, XFS crashes due to an assertion BUG_ON(!PagePrivate(page)) in page_buffers() called from xfs_count_page_state(). Similar problem can also happen when zero_user_segment() call from xfs_vm_writepage() (or block_write_full_page() for that matter) set the hardware dirty bit during writeback, later buffers get freed, and then page unmapped. Fix the issue by ignoring s390 HW dirty bit for page cache pages of mappings with mapping_cap_account_dirty(). This is safe because for such mappings when a page gets marked as writeable in PTE it is also marked dirty in do_wp_page() or do_page_fault(). When the dirty bit is cleared by clear_page_dirty_for_io(), the page gets writeprotected in page_mkclean(). So pagecache page is writeable if and only if it is dirty. Thanks to Hugh Dickins for pointing out mapping has to have mapping_cap_account_dirty() for things to work and proposing a cleaned up variant of the patch. The patch has survived about two hours of running fsx-linux on tmpfs while heavily swapping and several days of running on out build machines where the original problem was triggered. Signed-off-by: Jan Kara <jack@suse.cz> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Hugh Dickins <hughd@google.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/rmap.c21
1 files changed, 16 insertions, 5 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 23295f65ae4..30e44cb236b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -57,6 +57,7 @@
57#include <linux/mmu_notifier.h> 57#include <linux/mmu_notifier.h>
58#include <linux/migrate.h> 58#include <linux/migrate.h>
59#include <linux/hugetlb.h> 59#include <linux/hugetlb.h>
60#include <linux/backing-dev.h>
60 61
61#include <asm/tlbflush.h> 62#include <asm/tlbflush.h>
62 63
@@ -936,11 +937,8 @@ int page_mkclean(struct page *page)
936 937
937 if (page_mapped(page)) { 938 if (page_mapped(page)) {
938 struct address_space *mapping = page_mapping(page); 939 struct address_space *mapping = page_mapping(page);
939 if (mapping) { 940 if (mapping)
940 ret = page_mkclean_file(mapping, page); 941 ret = page_mkclean_file(mapping, page);
941 if (page_test_and_clear_dirty(page_to_pfn(page), 1))
942 ret = 1;
943 }
944 } 942 }
945 943
946 return ret; 944 return ret;
@@ -1121,6 +1119,8 @@ void page_add_file_rmap(struct page *page)
1121 */ 1119 */
1122void page_remove_rmap(struct page *page) 1120void page_remove_rmap(struct page *page)
1123{ 1121{
1122 struct address_space *mapping = page_mapping(page);
1123
1124 /* page still mapped by someone else? */ 1124 /* page still mapped by someone else? */
1125 if (!atomic_add_negative(-1, &page->_mapcount)) 1125 if (!atomic_add_negative(-1, &page->_mapcount))
1126 return; 1126 return;
@@ -1131,8 +1131,19 @@ void page_remove_rmap(struct page *page)
1131 * this if the page is anon, so about to be freed; but perhaps 1131 * this if the page is anon, so about to be freed; but perhaps
1132 * not if it's in swapcache - there might be another pte slot 1132 * not if it's in swapcache - there might be another pte slot
1133 * containing the swap entry, but page not yet written to swap. 1133 * containing the swap entry, but page not yet written to swap.
1134 *
1135 * And we can skip it on file pages, so long as the filesystem
1136 * participates in dirty tracking; but need to catch shm and tmpfs
1137 * and ramfs pages which have been modified since creation by read
1138 * fault.
1139 *
1140 * Note that mapping must be decided above, before decrementing
1141 * mapcount (which luckily provides a barrier): once page is unmapped,
1142 * it could be truncated and page->mapping reset to NULL at any moment.
1143 * Note also that we are relying on page_mapping(page) to set mapping
1144 * to &swapper_space when PageSwapCache(page).
1134 */ 1145 */
1135 if ((!PageAnon(page) || PageSwapCache(page)) && 1146 if (mapping && !mapping_cap_account_dirty(mapping) &&
1136 page_test_and_clear_dirty(page_to_pfn(page), 1)) 1147 page_test_and_clear_dirty(page_to_pfn(page), 1))
1137 set_page_dirty(page); 1148 set_page_dirty(page);
1138 /* 1149 /*