aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason2013-03-26 12:07:00 -0500
committerGreg Kroah-Hartman2013-04-05 11:26:02 -0500
commitda3cbc8328e6062dbb2bf4a7d6c5f7a4afbec9df (patch)
tree36032fbc1ee5cd3de96f89f8718eb367d945d8a3 /fs
parent8e0bf542fadde9a9ef58c46fa1411dd6cdfb3b14 (diff)
downloadkernel-omap-da3cbc8328e6062dbb2bf4a7d6c5f7a4afbec9df.tar.gz
kernel-omap-da3cbc8328e6062dbb2bf4a7d6c5f7a4afbec9df.tar.xz
kernel-omap-da3cbc8328e6062dbb2bf4a7d6c5f7a4afbec9df.zip
Btrfs: fix race between mmap writes and compression
commit 4adaa611020fa6ac65b0ac8db78276af4ec04e63 upstream. Btrfs uses page_mkwrite to ensure stable pages during crc calculations and mmap workloads. We call clear_page_dirty_for_io before we do any crcs, and this forces any application with the file mapped to wait for the crc to finish before it is allowed to change the file. With compression on, the clear_page_dirty_for_io step is happening after we've compressed the pages. This means the applications might be changing the pages while we are compressing them, and some of those modifications might not hit the disk. This commit adds the clear_page_dirty_for_io before compression starts and makes sure to redirty the page if we have to fallback to uncompressed IO as well. Signed-off-by: Chris Mason <chris.mason@fusionio.com> Reported-by: Alexandre Oliva <oliva@gnu.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/inode.c14
3 files changed, 49 insertions, 0 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1b319df29eee..125397eb111d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1258,6 +1258,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1258 GFP_NOFS); 1258 GFP_NOFS);
1259} 1259}
1260 1260
1261int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1262{
1263 unsigned long index = start >> PAGE_CACHE_SHIFT;
1264 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1265 struct page *page;
1266
1267 while (index <= end_index) {
1268 page = find_get_page(inode->i_mapping, index);
1269 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1270 clear_page_dirty_for_io(page);
1271 page_cache_release(page);
1272 index++;
1273 }
1274 return 0;
1275}
1276
1277int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1278{
1279 unsigned long index = start >> PAGE_CACHE_SHIFT;
1280 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1281 struct page *page;
1282
1283 while (index <= end_index) {
1284 page = find_get_page(inode->i_mapping, index);
1285 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1286 account_page_redirty(page);
1287 __set_page_dirty_nobuffers(page);
1288 page_cache_release(page);
1289 index++;
1290 }
1291 return 0;
1292}
1293
1261/* 1294/*
1262 * helper function to set both pages and extents in the tree writeback 1295 * helper function to set both pages and extents in the tree writeback
1263 */ 1296 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2eacfabd3263..715b4748c974 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -329,6 +329,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
329 unsigned long *map_len); 329 unsigned long *map_len);
330int extent_range_uptodate(struct extent_io_tree *tree, 330int extent_range_uptodate(struct extent_io_tree *tree,
331 u64 start, u64 end); 331 u64 start, u64 end);
332int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
333int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
332int extent_clear_unlock_delalloc(struct inode *inode, 334int extent_clear_unlock_delalloc(struct inode *inode,
333 struct extent_io_tree *tree, 335 struct extent_io_tree *tree,
334 u64 start, u64 end, struct page *locked_page, 336 u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 659ea815dbf3..7c4e6ccdba3f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -352,6 +352,7 @@ static noinline int compress_file_range(struct inode *inode,
352 int i; 352 int i;
353 int will_compress; 353 int will_compress;
354 int compress_type = root->fs_info->compress_type; 354 int compress_type = root->fs_info->compress_type;
355 int redirty = 0;
355 356
356 /* if this is a small write inside eof, kick off a defrag */ 357 /* if this is a small write inside eof, kick off a defrag */
357 if ((end - start + 1) < 16 * 1024 && 358 if ((end - start + 1) < 16 * 1024 &&
@@ -414,6 +415,17 @@ again:
414 if (BTRFS_I(inode)->force_compress) 415 if (BTRFS_I(inode)->force_compress)
415 compress_type = BTRFS_I(inode)->force_compress; 416 compress_type = BTRFS_I(inode)->force_compress;
416 417
418 /*
419 * we need to call clear_page_dirty_for_io on each
420 * page in the range. Otherwise applications with the file
421 * mmap'd can wander in and change the page contents while
422 * we are compressing them.
423 *
424 * If the compression fails for any reason, we set the pages
425 * dirty again later on.
426 */
427 extent_range_clear_dirty_for_io(inode, start, end);
428 redirty = 1;
417 ret = btrfs_compress_pages(compress_type, 429 ret = btrfs_compress_pages(compress_type,
418 inode->i_mapping, start, 430 inode->i_mapping, start,
419 total_compressed, pages, 431 total_compressed, pages,
@@ -555,6 +567,8 @@ cleanup_and_bail_uncompressed:
555 __set_page_dirty_nobuffers(locked_page); 567 __set_page_dirty_nobuffers(locked_page);
556 /* unlocked later on in the async handlers */ 568 /* unlocked later on in the async handlers */
557 } 569 }
570 if (redirty)
571 extent_range_redirty_for_io(inode, start, end);
558 add_async_extent(async_cow, start, end - start + 1, 572 add_async_extent(async_cow, start, end - start + 1,
559 0, NULL, 0, BTRFS_COMPRESS_NONE); 573 0, NULL, 0, BTRFS_COMPRESS_NONE);
560 *num_added += 1; 574 *num_added += 1;