aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o2013-04-03 21:02:52 -0500
committerGreg Kroah-Hartman2013-05-07 22:08:24 -0500
commitaab8960fc9c05f82842586235fae4de1082708e1 (patch)
tree637768d60fb3ad49f416ca92882cb543bc5cf665
parentc7e1e426d0b6bc1ab07779615c81faf6176901e5 (diff)
downloadkernel-omap-aab8960fc9c05f82842586235fae4de1082708e1.tar.gz
kernel-omap-aab8960fc9c05f82842586235fae4de1082708e1.tar.xz
kernel-omap-aab8960fc9c05f82842586235fae4de1082708e1.zip
ext4/jbd2: don't wait (forever) for stale tid caused by wraparound
commit d76a3a77113db020d9bb1e894822869410450bd9 upstream. In the case where an inode has a very stale transaction id (tid) in i_datasync_tid or i_sync_tid, it's possible that after a very large (2**31) number of transactions, that the tid number space might wrap, causing tid_geq()'s calculations to fail. Commit deeeaf13 "jbd2: fix fsync() tid wraparound bug", later modified by commit e7b04ac0 "jbd2: don't wake kjournald unnecessarily", attempted to fix this problem, but it only avoided kjournald spinning forever by fixing the logic in jbd2_log_start_commit(). Unfortunately, in the codepaths in fs/ext4/fsync.c and fs/ext4/inode.c that might call jbd2_log_start_commit() with a stale tid, those functions will subsequently call jbd2_log_wait_commit() with the same stale tid, and then wait for a very long time. To fix this, we replace the calls to jbd2_log_start_commit() and jbd2_log_wait_commit() with a call to a new function, jbd2_complete_transaction(), which will correctly handle stale tid's. As a bonus, jbd2_complete_transaction() will avoid locking j_state_lock for writing unless a commit needs to be started. This should have a small (but probably not measurable) improvement for ext4's scalability. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reported-by: Ben Hutchings <ben@decadent.org.uk> Reported-by: George Barnett <gbarnett@atlassian.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--fs/ext4/fsync.c3
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/jbd2/journal.c31
-rw-r--r--include/linux/jbd2.h1
4 files changed, 34 insertions, 4 deletions
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 3278e64e57b6..e0ba8a408def 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -166,8 +166,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
166 if (journal->j_flags & JBD2_BARRIER && 166 if (journal->j_flags & JBD2_BARRIER &&
167 !jbd2_trans_will_send_data_barrier(journal, commit_tid)) 167 !jbd2_trans_will_send_data_barrier(journal, commit_tid))
168 needs_barrier = true; 168 needs_barrier = true;
169 jbd2_log_start_commit(journal, commit_tid); 169 ret = jbd2_complete_transaction(journal, commit_tid);
170 ret = jbd2_log_wait_commit(journal, commit_tid);
171 if (needs_barrier) { 170 if (needs_barrier) {
172 err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 171 err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
173 if (!ret) 172 if (!ret)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 22c5c67ab4d1..0dbc84a13385 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -216,8 +216,7 @@ void ext4_evict_inode(struct inode *inode)
216 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 216 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
217 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 217 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
218 218
219 jbd2_log_start_commit(journal, commit_tid); 219 jbd2_complete_transaction(journal, commit_tid);
220 jbd2_log_wait_commit(journal, commit_tid);
221 filemap_write_and_wait(&inode->i_data); 220 filemap_write_and_wait(&inode->i_data);
222 } 221 }
223 truncate_inode_pages(&inode->i_data, 0); 222 truncate_inode_pages(&inode->i_data, 0);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index dbf41f9452db..42f8cf6cd5da 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -698,6 +698,37 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
698} 698}
699 699
700/* 700/*
701 * When this function returns the transaction corresponding to tid
702 * will be completed. If the transaction has currently running, start
703 * committing that transaction before waiting for it to complete. If
704 * the transaction id is stale, it is by definition already completed,
705 * so just return SUCCESS.
706 */
707int jbd2_complete_transaction(journal_t *journal, tid_t tid)
708{
709 int need_to_wait = 1;
710
711 read_lock(&journal->j_state_lock);
712 if (journal->j_running_transaction &&
713 journal->j_running_transaction->t_tid == tid) {
714 if (journal->j_commit_request != tid) {
715 /* transaction not yet started, so request it */
716 read_unlock(&journal->j_state_lock);
717 jbd2_log_start_commit(journal, tid);
718 goto wait_commit;
719 }
720 } else if (!(journal->j_committing_transaction &&
721 journal->j_committing_transaction->t_tid == tid))
722 need_to_wait = 0;
723 read_unlock(&journal->j_state_lock);
724 if (!need_to_wait)
725 return 0;
726wait_commit:
727 return jbd2_log_wait_commit(journal, tid);
728}
729EXPORT_SYMBOL(jbd2_complete_transaction);
730
731/*
701 * Log buffer allocation routines: 732 * Log buffer allocation routines:
702 */ 733 */
703 734
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index e30b66346942..1ac5255d57ca 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1210,6 +1210,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
1210int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); 1210int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
1211int jbd2_journal_force_commit_nested(journal_t *journal); 1211int jbd2_journal_force_commit_nested(journal_t *journal);
1212int jbd2_log_wait_commit(journal_t *journal, tid_t tid); 1212int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
1213int jbd2_complete_transaction(journal_t *journal, tid_t tid);
1213int jbd2_log_do_checkpoint(journal_t *journal); 1214int jbd2_log_do_checkpoint(journal_t *journal);
1214int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); 1215int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
1215 1216