diff options
author | Theodore Ts'o | 2013-04-03 21:02:52 -0500 |
---|---|---|
committer | Greg Kroah-Hartman | 2013-05-07 22:08:24 -0500 |
commit | aab8960fc9c05f82842586235fae4de1082708e1 (patch) | |
tree | 637768d60fb3ad49f416ca92882cb543bc5cf665 | |
parent | c7e1e426d0b6bc1ab07779615c81faf6176901e5 (diff) | |
download | kernel-omap-aab8960fc9c05f82842586235fae4de1082708e1.tar.gz kernel-omap-aab8960fc9c05f82842586235fae4de1082708e1.tar.xz kernel-omap-aab8960fc9c05f82842586235fae4de1082708e1.zip |
ext4/jbd2: don't wait (forever) for stale tid caused by wraparound
commit d76a3a77113db020d9bb1e894822869410450bd9 upstream.
In the case where an inode has a very stale transaction id (tid) in
i_datasync_tid or i_sync_tid, it's possible that after a very large
(2**31) number of transactions, that the tid number space might wrap,
causing tid_geq()'s calculations to fail.
Commit deeeaf13 "jbd2: fix fsync() tid wraparound bug", later modified
by commit e7b04ac0 "jbd2: don't wake kjournald unnecessarily",
attempted to fix this problem, but it only avoided kjournald spinning
forever by fixing the logic in jbd2_log_start_commit().
Unfortunately, in the codepaths in fs/ext4/fsync.c and fs/ext4/inode.c
that might call jbd2_log_start_commit() with a stale tid, those
functions will subsequently call jbd2_log_wait_commit() with the same
stale tid, and then wait for a very long time. To fix this, we
replace the calls to jbd2_log_start_commit() and
jbd2_log_wait_commit() with a call to a new function,
jbd2_complete_transaction(), which will correctly handle stale tid's.
As a bonus, jbd2_complete_transaction() will avoid locking
j_state_lock for writing unless a commit needs to be started. This
should have a small (but probably not measurable) improvement for
ext4's scalability.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Reported-by: George Barnett <gbarnett@atlassian.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | fs/ext4/fsync.c | 3 | ||||
-rw-r--r-- | fs/ext4/inode.c | 3 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 31 | ||||
-rw-r--r-- | include/linux/jbd2.h | 1 |
4 files changed, 34 insertions, 4 deletions
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 3278e64e57b6..e0ba8a408def 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -166,8 +166,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
166 | if (journal->j_flags & JBD2_BARRIER && | 166 | if (journal->j_flags & JBD2_BARRIER && |
167 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) | 167 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
168 | needs_barrier = true; | 168 | needs_barrier = true; |
169 | jbd2_log_start_commit(journal, commit_tid); | 169 | ret = jbd2_complete_transaction(journal, commit_tid); |
170 | ret = jbd2_log_wait_commit(journal, commit_tid); | ||
171 | if (needs_barrier) { | 170 | if (needs_barrier) { |
172 | err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 171 | err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
173 | if (!ret) | 172 | if (!ret) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 22c5c67ab4d1..0dbc84a13385 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -216,8 +216,7 @@ void ext4_evict_inode(struct inode *inode) | |||
216 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 216 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
217 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; | 217 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; |
218 | 218 | ||
219 | jbd2_log_start_commit(journal, commit_tid); | 219 | jbd2_complete_transaction(journal, commit_tid); |
220 | jbd2_log_wait_commit(journal, commit_tid); | ||
221 | filemap_write_and_wait(&inode->i_data); | 220 | filemap_write_and_wait(&inode->i_data); |
222 | } | 221 | } |
223 | truncate_inode_pages(&inode->i_data, 0); | 222 | truncate_inode_pages(&inode->i_data, 0); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index dbf41f9452db..42f8cf6cd5da 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -698,6 +698,37 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
698 | } | 698 | } |
699 | 699 | ||
700 | /* | 700 | /* |
701 | * When this function returns the transaction corresponding to tid | ||
702 | * will be completed. If the transaction has currently running, start | ||
703 | * committing that transaction before waiting for it to complete. If | ||
704 | * the transaction id is stale, it is by definition already completed, | ||
705 | * so just return SUCCESS. | ||
706 | */ | ||
707 | int jbd2_complete_transaction(journal_t *journal, tid_t tid) | ||
708 | { | ||
709 | int need_to_wait = 1; | ||
710 | |||
711 | read_lock(&journal->j_state_lock); | ||
712 | if (journal->j_running_transaction && | ||
713 | journal->j_running_transaction->t_tid == tid) { | ||
714 | if (journal->j_commit_request != tid) { | ||
715 | /* transaction not yet started, so request it */ | ||
716 | read_unlock(&journal->j_state_lock); | ||
717 | jbd2_log_start_commit(journal, tid); | ||
718 | goto wait_commit; | ||
719 | } | ||
720 | } else if (!(journal->j_committing_transaction && | ||
721 | journal->j_committing_transaction->t_tid == tid)) | ||
722 | need_to_wait = 0; | ||
723 | read_unlock(&journal->j_state_lock); | ||
724 | if (!need_to_wait) | ||
725 | return 0; | ||
726 | wait_commit: | ||
727 | return jbd2_log_wait_commit(journal, tid); | ||
728 | } | ||
729 | EXPORT_SYMBOL(jbd2_complete_transaction); | ||
730 | |||
731 | /* | ||
701 | * Log buffer allocation routines: | 732 | * Log buffer allocation routines: |
702 | */ | 733 | */ |
703 | 734 | ||
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e30b66346942..1ac5255d57ca 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -1210,6 +1210,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t tid); | |||
1210 | int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); | 1210 | int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); |
1211 | int jbd2_journal_force_commit_nested(journal_t *journal); | 1211 | int jbd2_journal_force_commit_nested(journal_t *journal); |
1212 | int jbd2_log_wait_commit(journal_t *journal, tid_t tid); | 1212 | int jbd2_log_wait_commit(journal_t *journal, tid_t tid); |
1213 | int jbd2_complete_transaction(journal_t *journal, tid_t tid); | ||
1213 | int jbd2_log_do_checkpoint(journal_t *journal); | 1214 | int jbd2_log_do_checkpoint(journal_t *journal); |
1214 | int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); | 1215 | int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); |
1215 | 1216 | ||