aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/biodoc.txt5
-rw-r--r--block/blk-core.c540
-rw-r--r--block/blk-exec.c4
-rw-r--r--block/blk-flush.c6
-rw-r--r--block/blk-settings.c8
-rw-r--r--block/blk-throttle.c4
-rw-r--r--block/blk.h2
-rw-r--r--block/cfq-iosched.c8
-rw-r--r--block/deadline-iosched.c9
-rw-r--r--block/elevator.c49
-rw-r--r--block/noop-iosched.c8
-rw-r--r--drivers/block/cciss.c6
-rw-r--r--drivers/block/cpqarray.c3
-rw-r--r--drivers/block/drbd/drbd_actlog.c4
-rw-r--r--drivers/block/drbd/drbd_bitmap.c1
-rw-r--r--drivers/block/drbd/drbd_int.h16
-rw-r--r--drivers/block/drbd/drbd_main.c36
-rw-r--r--drivers/block/drbd/drbd_receiver.c29
-rw-r--r--drivers/block/drbd/drbd_req.c4
-rw-r--r--drivers/block/drbd/drbd_worker.c1
-rw-r--r--drivers/block/drbd/drbd_wrappers.h18
-rw-r--r--drivers/block/floppy.c1
-rw-r--r--drivers/block/loop.c13
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/umem.c16
-rw-r--r--drivers/ide/ide-atapi.c3
-rw-r--r--drivers/ide/ide-cd.c13
-rw-r--r--drivers/ide/ide-io.c4
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/md/bitmap.c5
-rw-r--r--drivers/md/dm-crypt.c9
-rw-r--r--drivers/md/dm-io.c2
-rw-r--r--drivers/md/dm-kcopyd.c55
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-table.c24
-rw-r--r--drivers/md/dm.c33
-rw-r--r--drivers/md/linear.c17
-rw-r--r--drivers/md/md.c12
-rw-r--r--drivers/md/multipath.c31
-rw-r--r--drivers/md/raid0.c16
-rw-r--r--drivers/md/raid1.c86
-rw-r--r--drivers/md/raid10.c90
-rw-r--r--drivers/md/raid5.c63
-rw-r--r--drivers/md/raid5.h2
-rw-r--r--drivers/message/i2o/i2o_block.c6
-rw-r--r--drivers/mmc/card/queue.c3
-rw-r--r--drivers/s390/block/dasd.c2
-rw-r--r--drivers/s390/char/tape_block.c1
-rw-r--r--drivers/scsi/scsi_lib.c44
-rw-r--r--drivers/scsi/scsi_transport_fc.c2
-rw-r--r--drivers/scsi/scsi_transport_sas.c6
-rw-r--r--drivers/target/target_core_iblock.c7
-rw-r--r--fs/adfs/inode.c1
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/aio.c77
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/bfs/file.c1
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/btrfs/disk-io.c79
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/btrfs/volumes.c91
-rw-r--r--fs/buffer.c45
-rw-r--r--fs/cifs/file.c30
-rw-r--r--fs/direct-io.c7
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/exofs/inode.c1
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/inode.c3
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/page-io.c3
-rw-r--r--fs/fat/inode.c1
-rw-r--r--fs/freevxfs/vxfs_subr.c1
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c12
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/jfs/jfs_metapage.c1
-rw-r--r--fs/logfs/dev_bdev.c2
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/mpage.c8
-rw-r--r--fs/nilfs2/btnode.c7
-rw-r--r--fs/nilfs2/gcinode.c1
-rw-r--r--fs/nilfs2/inode.c1
-rw-r--r--fs/nilfs2/mdt.c9
-rw-r--r--fs/nilfs2/page.c5
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/ntfs/aops.c4
-rw-r--r--fs/ntfs/compress.c3
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/ocfs2/cluster/heartbeat.c4
-rw-r--r--fs/omfs/file.c1
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/sysv/itree.c1
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/truncate.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c13
-rw-r--r--include/linux/backing-dev.h16
-rw-r--r--include/linux/blk_types.h4
-rw-r--r--include/linux/blkdev.h73
-rw-r--r--include/linux/buffer_head.h1
-rw-r--r--include/linux/device-mapper.h5
-rw-r--r--include/linux/elevator.h8
-rw-r--r--include/linux/fs.h29
-rw-r--r--include/linux/pagemap.h12
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/swap.h2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/power/block_io.c2
-rw-r--r--kernel/sched.c12
-rw-r--r--mm/backing-dev.c6
-rw-r--r--mm/filemap.c74
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/page_io.c2
-rw-r--r--mm/readahead.c18
-rw-r--r--mm/shmem.c1
-rw-r--r--mm/swap_state.c5
-rw-r--r--mm/swapfile.c37
-rw-r--r--mm/vmscan.c2
137 files changed, 606 insertions, 1533 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index b9a83dd2473..2a7b38c832c 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -963,11 +963,6 @@ elevator_dispatch_fn* fills the dispatch queue with ready requests.
963 963
964elevator_add_req_fn* called to add a new request into the scheduler 964elevator_add_req_fn* called to add a new request into the scheduler
965 965
966elevator_queue_empty_fn returns true if the merge queue is empty.
967 Drivers shouldn't use this, but rather check
968 if elv_next_request is NULL (without losing the
969 request if one exists!)
970
971elevator_former_req_fn 966elevator_former_req_fn
972elevator_latter_req_fn These return the request before or after the 967elevator_latter_req_fn These return the request before or after the
973 one specified in disk sort order. Used by the 968 one specified in disk sort order. Used by the
diff --git a/block/blk-core.c b/block/blk-core.c
index 74d496ccf4d..e1fcf7a2466 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -27,6 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/fault-inject.h> 29#include <linux/fault-inject.h>
30#include <linux/list_sort.h>
30 31
31#define CREATE_TRACE_POINTS 32#define CREATE_TRACE_POINTS
32#include <trace/events/block.h> 33#include <trace/events/block.h>
@@ -198,135 +199,43 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
198EXPORT_SYMBOL(blk_dump_rq_flags); 199EXPORT_SYMBOL(blk_dump_rq_flags);
199 200
200/* 201/*
201 * "plug" the device if there are no outstanding requests: this will 202 * Make sure that plugs that were pending when this function was entered,
202 * force the transfer to start only after we have put all the requests 203 * are now complete and requests pushed to the queue.
203 * on the list. 204*/
204 * 205static inline void queue_sync_plugs(struct request_queue *q)
205 * This is called with interrupts off and no requests on the queue and
206 * with the queue lock held.
207 */
208void blk_plug_device(struct request_queue *q)
209{ 206{
210 WARN_ON(!irqs_disabled());
211
212 /* 207 /*
213 * don't plug a stopped queue, it must be paired with blk_start_queue() 208 * If the current process is plugged and has barriers submitted,
214 * which will restart the queueing 209 * we will livelock if we don't unplug first.
215 */ 210 */
216 if (blk_queue_stopped(q)) 211 blk_flush_plug(current);
217 return;
218
219 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
220 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
221 trace_block_plug(q);
222 }
223}
224EXPORT_SYMBOL(blk_plug_device);
225
226/**
227 * blk_plug_device_unlocked - plug a device without queue lock held
228 * @q: The &struct request_queue to plug
229 *
230 * Description:
231 * Like @blk_plug_device(), but grabs the queue lock and disables
232 * interrupts.
233 **/
234void blk_plug_device_unlocked(struct request_queue *q)
235{
236 unsigned long flags;
237
238 spin_lock_irqsave(q->queue_lock, flags);
239 blk_plug_device(q);
240 spin_unlock_irqrestore(q->queue_lock, flags);
241}
242EXPORT_SYMBOL(blk_plug_device_unlocked);
243
244/*
245 * remove the queue from the plugged list, if present. called with
246 * queue lock held and interrupts disabled.
247 */
248int blk_remove_plug(struct request_queue *q)
249{
250 WARN_ON(!irqs_disabled());
251
252 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
253 return 0;
254
255 del_timer(&q->unplug_timer);
256 return 1;
257} 212}
258EXPORT_SYMBOL(blk_remove_plug);
259 213
260/* 214static void blk_delay_work(struct work_struct *work)
261 * remove the plug and let it rip..
262 */
263void __generic_unplug_device(struct request_queue *q)
264{ 215{
265 if (unlikely(blk_queue_stopped(q))) 216 struct request_queue *q;
266 return;
267 if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
268 return;
269 217
270 q->request_fn(q); 218 q = container_of(work, struct request_queue, delay_work.work);
219 spin_lock_irq(q->queue_lock);
220 __blk_run_queue(q, false);
221 spin_unlock_irq(q->queue_lock);
271} 222}
272 223
273/** 224/**
274 * generic_unplug_device - fire a request queue 225 * blk_delay_queue - restart queueing after defined interval
275 * @q: The &struct request_queue in question 226 * @q: The &struct request_queue in question
227 * @msecs: Delay in msecs
276 * 228 *
277 * Description: 229 * Description:
278 * Linux uses plugging to build bigger requests queues before letting 230 * Sometimes queueing needs to be postponed for a little while, to allow
279 * the device have at them. If a queue is plugged, the I/O scheduler 231 * resources to come back. This function will make sure that queueing is
280 * is still adding and merging requests on the queue. Once the queue 232 * restarted around the specified time.
281 * gets unplugged, the request_fn defined for the queue is invoked and 233 */
282 * transfers started. 234void blk_delay_queue(struct request_queue *q, unsigned long msecs)
283 **/
284void generic_unplug_device(struct request_queue *q)
285{
286 if (blk_queue_plugged(q)) {
287 spin_lock_irq(q->queue_lock);
288 __generic_unplug_device(q);
289 spin_unlock_irq(q->queue_lock);
290 }
291}
292EXPORT_SYMBOL(generic_unplug_device);
293
294static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
295 struct page *page)
296{
297 struct request_queue *q = bdi->unplug_io_data;
298
299 blk_unplug(q);
300}
301
302void blk_unplug_work(struct work_struct *work)
303{
304 struct request_queue *q =
305 container_of(work, struct request_queue, unplug_work);
306
307 trace_block_unplug_io(q);
308 q->unplug_fn(q);
309}
310
311void blk_unplug_timeout(unsigned long data)
312{
313 struct request_queue *q = (struct request_queue *)data;
314
315 trace_block_unplug_timer(q);
316 kblockd_schedule_work(q, &q->unplug_work);
317}
318
319void blk_unplug(struct request_queue *q)
320{ 235{
321 /* 236 schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
322 * devices don't necessarily have an ->unplug_fn defined
323 */
324 if (q->unplug_fn) {
325 trace_block_unplug_io(q);
326 q->unplug_fn(q);
327 }
328} 237}
329EXPORT_SYMBOL(blk_unplug); 238EXPORT_SYMBOL(blk_delay_queue);
330 239
331/** 240/**
332 * blk_start_queue - restart a previously stopped queue 241 * blk_start_queue - restart a previously stopped queue
@@ -362,7 +271,7 @@ EXPORT_SYMBOL(blk_start_queue);
362 **/ 271 **/
363void blk_stop_queue(struct request_queue *q) 272void blk_stop_queue(struct request_queue *q)
364{ 273{
365 blk_remove_plug(q); 274 cancel_delayed_work(&q->delay_work);
366 queue_flag_set(QUEUE_FLAG_STOPPED, q); 275 queue_flag_set(QUEUE_FLAG_STOPPED, q);
367} 276}
368EXPORT_SYMBOL(blk_stop_queue); 277EXPORT_SYMBOL(blk_stop_queue);
@@ -387,9 +296,9 @@ EXPORT_SYMBOL(blk_stop_queue);
387 */ 296 */
388void blk_sync_queue(struct request_queue *q) 297void blk_sync_queue(struct request_queue *q)
389{ 298{
390 del_timer_sync(&q->unplug_timer);
391 del_timer_sync(&q->timeout); 299 del_timer_sync(&q->timeout);
392 cancel_work_sync(&q->unplug_work); 300 cancel_delayed_work_sync(&q->delay_work);
301 queue_sync_plugs(q);
393} 302}
394EXPORT_SYMBOL(blk_sync_queue); 303EXPORT_SYMBOL(blk_sync_queue);
395 304
@@ -405,14 +314,9 @@ EXPORT_SYMBOL(blk_sync_queue);
405 */ 314 */
406void __blk_run_queue(struct request_queue *q, bool force_kblockd) 315void __blk_run_queue(struct request_queue *q, bool force_kblockd)
407{ 316{
408 blk_remove_plug(q);
409
410 if (unlikely(blk_queue_stopped(q))) 317 if (unlikely(blk_queue_stopped(q)))
411 return; 318 return;
412 319
413 if (elv_queue_empty(q))
414 return;
415
416 /* 320 /*
417 * Only recurse once to avoid overrunning the stack, let the unplug 321 * Only recurse once to avoid overrunning the stack, let the unplug
418 * handling reinvoke the handler shortly if we already got there. 322 * handling reinvoke the handler shortly if we already got there.
@@ -420,10 +324,8 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
420 if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 324 if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
421 q->request_fn(q); 325 q->request_fn(q);
422 queue_flag_clear(QUEUE_FLAG_REENTER, q); 326 queue_flag_clear(QUEUE_FLAG_REENTER, q);
423 } else { 327 } else
424 queue_flag_set(QUEUE_FLAG_PLUGGED, q); 328 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
425 kblockd_schedule_work(q, &q->unplug_work);
426 }
427} 329}
428EXPORT_SYMBOL(__blk_run_queue); 330EXPORT_SYMBOL(__blk_run_queue);
429 331
@@ -517,8 +419,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
517 if (!q) 419 if (!q)
518 return NULL; 420 return NULL;
519 421
520 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
521 q->backing_dev_info.unplug_io_data = q;
522 q->backing_dev_info.ra_pages = 422 q->backing_dev_info.ra_pages =
523 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 423 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
524 q->backing_dev_info.state = 0; 424 q->backing_dev_info.state = 0;
@@ -538,13 +438,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
538 438
539 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, 439 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
540 laptop_mode_timer_fn, (unsigned long) q); 440 laptop_mode_timer_fn, (unsigned long) q);
541 init_timer(&q->unplug_timer);
542 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 441 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
543 INIT_LIST_HEAD(&q->timeout_list); 442 INIT_LIST_HEAD(&q->timeout_list);
544 INIT_LIST_HEAD(&q->flush_queue[0]); 443 INIT_LIST_HEAD(&q->flush_queue[0]);
545 INIT_LIST_HEAD(&q->flush_queue[1]); 444 INIT_LIST_HEAD(&q->flush_queue[1]);
546 INIT_LIST_HEAD(&q->flush_data_in_flight); 445 INIT_LIST_HEAD(&q->flush_data_in_flight);
547 INIT_WORK(&q->unplug_work, blk_unplug_work); 446 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
548 447
549 kobject_init(&q->kobj, &blk_queue_ktype); 448 kobject_init(&q->kobj, &blk_queue_ktype);
550 449
@@ -639,7 +538,6 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
639 q->request_fn = rfn; 538 q->request_fn = rfn;
640 q->prep_rq_fn = NULL; 539 q->prep_rq_fn = NULL;
641 q->unprep_rq_fn = NULL; 540 q->unprep_rq_fn = NULL;
642 q->unplug_fn = generic_unplug_device;
643 q->queue_flags = QUEUE_FLAG_DEFAULT; 541 q->queue_flags = QUEUE_FLAG_DEFAULT;
644 542
645 /* Override internal queue lock with supplied lock pointer */ 543 /* Override internal queue lock with supplied lock pointer */
@@ -677,6 +575,8 @@ int blk_get_queue(struct request_queue *q)
677 575
678static inline void blk_free_request(struct request_queue *q, struct request *rq) 576static inline void blk_free_request(struct request_queue *q, struct request *rq)
679{ 577{
578 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
579
680 if (rq->cmd_flags & REQ_ELVPRIV) 580 if (rq->cmd_flags & REQ_ELVPRIV)
681 elv_put_request(q, rq); 581 elv_put_request(q, rq);
682 mempool_free(rq, q->rq.rq_pool); 582 mempool_free(rq, q->rq.rq_pool);
@@ -898,8 +798,8 @@ out:
898} 798}
899 799
900/* 800/*
901 * No available requests for this queue, unplug the device and wait for some 801 * No available requests for this queue, wait for some requests to become
902 * requests to become available. 802 * available.
903 * 803 *
904 * Called with q->queue_lock held, and returns with it unlocked. 804 * Called with q->queue_lock held, and returns with it unlocked.
905 */ 805 */
@@ -920,7 +820,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
920 820
921 trace_block_sleeprq(q, bio, rw_flags & 1); 821 trace_block_sleeprq(q, bio, rw_flags & 1);
922 822
923 __generic_unplug_device(q);
924 spin_unlock_irq(q->queue_lock); 823 spin_unlock_irq(q->queue_lock);
925 io_schedule(); 824 io_schedule();
926 825
@@ -1042,6 +941,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
1042} 941}
1043EXPORT_SYMBOL(blk_requeue_request); 942EXPORT_SYMBOL(blk_requeue_request);
1044 943
944static void add_acct_request(struct request_queue *q, struct request *rq,
945 int where)
946{
947 drive_stat_acct(rq, 1);
948 __elv_add_request(q, rq, where);
949}
950
1045/** 951/**
1046 * blk_insert_request - insert a special request into a request queue 952 * blk_insert_request - insert a special request into a request queue
1047 * @q: request queue where request should be inserted 953 * @q: request queue where request should be inserted
@@ -1084,8 +990,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
1084 if (blk_rq_tagged(rq)) 990 if (blk_rq_tagged(rq))
1085 blk_queue_end_tag(q, rq); 991 blk_queue_end_tag(q, rq);
1086 992
1087 drive_stat_acct(rq, 1); 993 add_acct_request(q, rq, where);
1088 __elv_add_request(q, rq, where, 0);
1089 __blk_run_queue(q, false); 994 __blk_run_queue(q, false);
1090 spin_unlock_irqrestore(q->queue_lock, flags); 995 spin_unlock_irqrestore(q->queue_lock, flags);
1091} 996}
@@ -1206,6 +1111,113 @@ void blk_add_request_payload(struct request *rq, struct page *page,
1206} 1111}
1207EXPORT_SYMBOL_GPL(blk_add_request_payload); 1112EXPORT_SYMBOL_GPL(blk_add_request_payload);
1208 1113
1114static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1115 struct bio *bio)
1116{
1117 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1118
1119 /*
1120 * Debug stuff, kill later
1121 */
1122 if (!rq_mergeable(req)) {
1123 blk_dump_rq_flags(req, "back");
1124 return false;
1125 }
1126
1127 if (!ll_back_merge_fn(q, req, bio))
1128 return false;
1129
1130 trace_block_bio_backmerge(q, bio);
1131
1132 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1133 blk_rq_set_mixed_merge(req);
1134
1135 req->biotail->bi_next = bio;
1136 req->biotail = bio;
1137 req->__data_len += bio->bi_size;
1138 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1139
1140 drive_stat_acct(req, 0);
1141 return true;
1142}
1143
1144static bool bio_attempt_front_merge(struct request_queue *q,
1145 struct request *req, struct bio *bio)
1146{
1147 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1148 sector_t sector;
1149
1150 /*
1151 * Debug stuff, kill later
1152 */
1153 if (!rq_mergeable(req)) {
1154 blk_dump_rq_flags(req, "front");
1155 return false;
1156 }
1157
1158 if (!ll_front_merge_fn(q, req, bio))
1159 return false;
1160
1161 trace_block_bio_frontmerge(q, bio);
1162
1163 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1164 blk_rq_set_mixed_merge(req);
1165
1166 sector = bio->bi_sector;
1167
1168 bio->bi_next = req->bio;
1169 req->bio = bio;
1170
1171 /*
1172 * may not be valid. if the low level driver said
1173 * it didn't need a bounce buffer then it better
1174 * not touch req->buffer either...
1175 */
1176 req->buffer = bio_data(bio);
1177 req->__sector = bio->bi_sector;
1178 req->__data_len += bio->bi_size;
1179 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1180
1181 drive_stat_acct(req, 0);
1182 return true;
1183}
1184
1185/*
1186 * Attempts to merge with the plugged list in the current process. Returns
1187 * true if merge was succesful, otherwise false.
1188 */
1189static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1190 struct bio *bio)
1191{
1192 struct blk_plug *plug;
1193 struct request *rq;
1194 bool ret = false;
1195
1196 plug = tsk->plug;
1197 if (!plug)
1198 goto out;
1199
1200 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1201 int el_ret;
1202
1203 if (rq->q != q)
1204 continue;
1205
1206 el_ret = elv_try_merge(rq, bio);
1207 if (el_ret == ELEVATOR_BACK_MERGE) {
1208 ret = bio_attempt_back_merge(q, rq, bio);
1209 if (ret)
1210 break;
1211 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1212 ret = bio_attempt_front_merge(q, rq, bio);
1213 if (ret)
1214 break;
1215 }
1216 }
1217out:
1218 return ret;
1219}
1220
1209void init_request_from_bio(struct request *req, struct bio *bio) 1221void init_request_from_bio(struct request *req, struct bio *bio)
1210{ 1222{
1211 req->cpu = bio->bi_comp_cpu; 1223 req->cpu = bio->bi_comp_cpu;
@@ -1221,26 +1233,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1221 blk_rq_bio_prep(req->q, req, bio); 1233 blk_rq_bio_prep(req->q, req, bio);
1222} 1234}
1223 1235
1224/*
1225 * Only disabling plugging for non-rotational devices if it does tagging
1226 * as well, otherwise we do need the proper merging
1227 */
1228static inline bool queue_should_plug(struct request_queue *q)
1229{
1230 return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1231}
1232
1233static int __make_request(struct request_queue *q, struct bio *bio) 1236static int __make_request(struct request_queue *q, struct bio *bio)
1234{ 1237{
1235 struct request *req;
1236 int el_ret;
1237 unsigned int bytes = bio->bi_size;
1238 const unsigned short prio = bio_prio(bio);
1239 const bool sync = !!(bio->bi_rw & REQ_SYNC); 1238 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1240 const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); 1239 struct blk_plug *plug;
1241 const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; 1240 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1242 int where = ELEVATOR_INSERT_SORT; 1241 struct request *req;
1243 int rw_flags;
1244 1242
1245 /* 1243 /*
1246 * low level driver can indicate that it wants pages above a 1244 * low level driver can indicate that it wants pages above a
@@ -1249,78 +1247,36 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1249 */ 1247 */
1250 blk_queue_bounce(q, &bio); 1248 blk_queue_bounce(q, &bio);
1251 1249
1252 spin_lock_irq(q->queue_lock);
1253
1254 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { 1250 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1251 spin_lock_irq(q->queue_lock);
1255 where = ELEVATOR_INSERT_FLUSH; 1252 where = ELEVATOR_INSERT_FLUSH;
1256 goto get_rq; 1253 goto get_rq;
1257 } 1254 }
1258 1255
1259 if (elv_queue_empty(q)) 1256 /*
1260 goto get_rq; 1257 * Check if we can merge with the plugged list before grabbing
1261 1258 * any locks.
1262 el_ret = elv_merge(q, &req, bio); 1259 */
1263 switch (el_ret) { 1260 if (attempt_plug_merge(current, q, bio))
1264 case ELEVATOR_BACK_MERGE:
1265 BUG_ON(!rq_mergeable(req));
1266
1267 if (!ll_back_merge_fn(q, req, bio))
1268 break;
1269
1270 trace_block_bio_backmerge(q, bio);
1271
1272 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1273 blk_rq_set_mixed_merge(req);
1274
1275 req->biotail->bi_next = bio;
1276 req->biotail = bio;
1277 req->__data_len += bytes;
1278 req->ioprio = ioprio_best(req->ioprio, prio);
1279 if (!blk_rq_cpu_valid(req))
1280 req->cpu = bio->bi_comp_cpu;
1281 drive_stat_acct(req, 0);
1282 elv_bio_merged(q, req, bio);
1283 if (!attempt_back_merge(q, req))
1284 elv_merged_request(q, req, el_ret);
1285 goto out; 1261 goto out;
1286 1262
1287 case ELEVATOR_FRONT_MERGE: 1263 spin_lock_irq(q->queue_lock);
1288 BUG_ON(!rq_mergeable(req));
1289
1290 if (!ll_front_merge_fn(q, req, bio))
1291 break;
1292
1293 trace_block_bio_frontmerge(q, bio);
1294 1264
1295 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { 1265 el_ret = elv_merge(q, &req, bio);
1296 blk_rq_set_mixed_merge(req); 1266 if (el_ret == ELEVATOR_BACK_MERGE) {
1297 req->cmd_flags &= ~REQ_FAILFAST_MASK; 1267 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1298 req->cmd_flags |= ff; 1268 if (bio_attempt_back_merge(q, req, bio)) {
1269 if (!attempt_back_merge(q, req))
1270 elv_merged_request(q, req, el_ret);
1271 goto out_unlock;
1272 }
1273 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1274 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1275 if (bio_attempt_front_merge(q, req, bio)) {
1276 if (!attempt_front_merge(q, req))
1277 elv_merged_request(q, req, el_ret);
1278 goto out_unlock;
1299 } 1279 }
1300
1301 bio->bi_next = req->bio;
1302 req->bio = bio;
1303
1304 /*
1305 * may not be valid. if the low level driver said
1306 * it didn't need a bounce buffer then it better
1307 * not touch req->buffer either...
1308 */
1309 req->buffer = bio_data(bio);
1310 req->__sector = bio->bi_sector;
1311 req->__data_len += bytes;
1312 req->ioprio = ioprio_best(req->ioprio, prio);
1313 if (!blk_rq_cpu_valid(req))
1314 req->cpu = bio->bi_comp_cpu;
1315 drive_stat_acct(req, 0);
1316 elv_bio_merged(q, req, bio);
1317 if (!attempt_front_merge(q, req))
1318 elv_merged_request(q, req, el_ret);
1319 goto out;
1320
1321 /* ELV_NO_MERGE: elevator says don't/can't merge. */
1322 default:
1323 ;
1324 } 1280 }
1325 1281
1326get_rq: 1282get_rq:
@@ -1347,20 +1303,35 @@ get_rq:
1347 */ 1303 */
1348 init_request_from_bio(req, bio); 1304 init_request_from_bio(req, bio);
1349 1305
1350 spin_lock_irq(q->queue_lock);
1351 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1306 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1352 bio_flagged(bio, BIO_CPU_AFFINE)) 1307 bio_flagged(bio, BIO_CPU_AFFINE)) {
1353 req->cpu = blk_cpu_to_group(smp_processor_id()); 1308 req->cpu = blk_cpu_to_group(get_cpu());
1354 if (queue_should_plug(q) && elv_queue_empty(q)) 1309 put_cpu();
1355 blk_plug_device(q); 1310 }
1356 1311
1357 /* insert the request into the elevator */ 1312 plug = current->plug;
1358 drive_stat_acct(req, 1); 1313 if (plug) {
1359 __elv_add_request(q, req, where, 0); 1314 if (!plug->should_sort && !list_empty(&plug->list)) {
1315 struct request *__rq;
1316
1317 __rq = list_entry_rq(plug->list.prev);
1318 if (__rq->q != q)
1319 plug->should_sort = 1;
1320 }
1321 /*
1322 * Debug flag, kill later
1323 */
1324 req->cmd_flags |= REQ_ON_PLUG;
1325 list_add_tail(&req->queuelist, &plug->list);
1326 drive_stat_acct(req, 1);
1327 } else {
1328 spin_lock_irq(q->queue_lock);
1329 add_acct_request(q, req, where);
1330 __blk_run_queue(q, false);
1331out_unlock:
1332 spin_unlock_irq(q->queue_lock);
1333 }
1360out: 1334out:
1361 if (unplug || !queue_should_plug(q))
1362 __generic_unplug_device(q);
1363 spin_unlock_irq(q->queue_lock);
1364 return 0; 1335 return 0;
1365} 1336}
1366 1337
@@ -1763,9 +1734,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1763 */ 1734 */
1764 BUG_ON(blk_queued_rq(rq)); 1735 BUG_ON(blk_queued_rq(rq));
1765 1736
1766 drive_stat_acct(rq, 1); 1737 add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
1767 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1768
1769 spin_unlock_irqrestore(q->queue_lock, flags); 1738 spin_unlock_irqrestore(q->queue_lock, flags);
1770 1739
1771 return 0; 1740 return 0;
@@ -2643,6 +2612,113 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2643} 2612}
2644EXPORT_SYMBOL(kblockd_schedule_work); 2613EXPORT_SYMBOL(kblockd_schedule_work);
2645 2614
2615int kblockd_schedule_delayed_work(struct request_queue *q,
2616 struct delayed_work *dwork, unsigned long delay)
2617{
2618 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2619}
2620EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2621
2622#define PLUG_MAGIC 0x91827364
2623
2624void blk_start_plug(struct blk_plug *plug)
2625{
2626 struct task_struct *tsk = current;
2627
2628 plug->magic = PLUG_MAGIC;
2629 INIT_LIST_HEAD(&plug->list);
2630 plug->should_sort = 0;
2631
2632 /*
2633 * If this is a nested plug, don't actually assign it. It will be
2634 * flushed on its own.
2635 */
2636 if (!tsk->plug) {
2637 /*
2638 * Store ordering should not be needed here, since a potential
2639 * preempt will imply a full memory barrier
2640 */
2641 tsk->plug = plug;
2642 }
2643}
2644EXPORT_SYMBOL(blk_start_plug);
2645
2646static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2647{
2648 struct request *rqa = container_of(a, struct request, queuelist);
2649 struct request *rqb = container_of(b, struct request, queuelist);
2650
2651 return !(rqa->q == rqb->q);
2652}
2653
2654static void flush_plug_list(struct blk_plug *plug)
2655{
2656 struct request_queue *q;
2657 unsigned long flags;
2658 struct request *rq;
2659
2660 BUG_ON(plug->magic != PLUG_MAGIC);
2661
2662 if (list_empty(&plug->list))
2663 return;
2664
2665 if (plug->should_sort)
2666 list_sort(NULL, &plug->list, plug_rq_cmp);
2667
2668 q = NULL;
2669 local_irq_save(flags);
2670 while (!list_empty(&plug->list)) {
2671 rq = list_entry_rq(plug->list.next);
2672 list_del_init(&rq->queuelist);
2673 BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
2674 BUG_ON(!rq->q);
2675 if (rq->q != q) {
2676 if (q) {
2677 __blk_run_queue(q, false);
2678 spin_unlock(q->queue_lock);
2679 }
2680 q = rq->q;
2681 spin_lock(q->queue_lock);
2682 }
2683 rq->cmd_flags &= ~REQ_ON_PLUG;
2684
2685 /*
2686 * rq is already accounted, so use raw insert
2687 */
2688 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT);
2689 }
2690
2691 if (q) {
2692 __blk_run_queue(q, false);
2693 spin_unlock(q->queue_lock);
2694 }
2695
2696 BUG_ON(!list_empty(&plug->list));
2697 local_irq_restore(flags);
2698}
2699
2700static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
2701{
2702 flush_plug_list(plug);
2703
2704 if (plug == tsk->plug)
2705 tsk->plug = NULL;
2706}
2707
2708void blk_finish_plug(struct blk_plug *plug)
2709{
2710 if (plug)
2711 __blk_finish_plug(current, plug);
2712}
2713EXPORT_SYMBOL(blk_finish_plug);
2714
2715void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
2716{
2717 __blk_finish_plug(tsk, plug);
2718 tsk->plug = plug;
2719}
2720EXPORT_SYMBOL(__blk_flush_plug);
2721
2646int __init blk_dev_init(void) 2722int __init blk_dev_init(void)
2647{ 2723{
2648 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2724 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-exec.c b/block/blk-exec.c
index cf1456a02ac..7482b7fa863 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -54,8 +54,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
54 rq->end_io = done; 54 rq->end_io = done;
55 WARN_ON(irqs_disabled()); 55 WARN_ON(irqs_disabled());
56 spin_lock_irq(q->queue_lock); 56 spin_lock_irq(q->queue_lock);
57 __elv_add_request(q, rq, where, 1); 57 __elv_add_request(q, rq, where);
58 __generic_unplug_device(q); 58 __blk_run_queue(q, false);
59 /* the queue is stopped so it won't be plugged+unplugged */ 59 /* the queue is stopped so it won't be plugged+unplugged */
60 if (rq->cmd_type == REQ_TYPE_PM_RESUME) 60 if (rq->cmd_type == REQ_TYPE_PM_RESUME)
61 q->request_fn(q); 61 q->request_fn(q);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 0bd8c9c5d6e..93d5fd8e51e 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -194,7 +194,6 @@ static void flush_end_io(struct request *flush_rq, int error)
194{ 194{
195 struct request_queue *q = flush_rq->q; 195 struct request_queue *q = flush_rq->q;
196 struct list_head *running = &q->flush_queue[q->flush_running_idx]; 196 struct list_head *running = &q->flush_queue[q->flush_running_idx];
197 bool was_empty = elv_queue_empty(q);
198 bool queued = false; 197 bool queued = false;
199 struct request *rq, *n; 198 struct request *rq, *n;
200 199
@@ -218,7 +217,7 @@ static void flush_end_io(struct request *flush_rq, int error)
218 * from request completion path and calling directly into 217 * from request completion path and calling directly into
219 * request_fn may confuse the driver. Always use kblockd. 218 * request_fn may confuse the driver. Always use kblockd.
220 */ 219 */
221 if (queued && was_empty) 220 if (queued)
222 __blk_run_queue(q, true); 221 __blk_run_queue(q, true);
223} 222}
224 223
@@ -269,13 +268,12 @@ static bool blk_kick_flush(struct request_queue *q)
269static void flush_data_end_io(struct request *rq, int error) 268static void flush_data_end_io(struct request *rq, int error)
270{ 269{
271 struct request_queue *q = rq->q; 270 struct request_queue *q = rq->q;
272 bool was_empty = elv_queue_empty(q);
273 271
274 /* 272 /*
275 * After populating an empty queue, kick it to avoid stall. Read 273 * After populating an empty queue, kick it to avoid stall. Read
276 * the comment in flush_end_io(). 274 * the comment in flush_end_io().
277 */ 275 */
278 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty) 276 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
279 __blk_run_queue(q, true); 277 __blk_run_queue(q, true);
280} 278}
281 279
diff --git a/block/blk-settings.c b/block/blk-settings.c
index df649fa59de..1fa76929359 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -164,14 +164,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
164 blk_queue_congestion_threshold(q); 164 blk_queue_congestion_threshold(q);
165 q->nr_batching = BLK_BATCH_REQ; 165 q->nr_batching = BLK_BATCH_REQ;
166 166
167 q->unplug_thresh = 4; /* hmm */
168 q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */
169 if (q->unplug_delay == 0)
170 q->unplug_delay = 1;
171
172 q->unplug_timer.function = blk_unplug_timeout;
173 q->unplug_timer.data = (unsigned long)q;
174
175 blk_set_default_limits(&q->limits); 167 blk_set_default_limits(&q->limits);
176 blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); 168 blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
177 169
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 32dd3e4b041..37abbfc6859 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -768,6 +768,7 @@ static int throtl_dispatch(struct request_queue *q)
768 unsigned int nr_disp = 0; 768 unsigned int nr_disp = 0;
769 struct bio_list bio_list_on_stack; 769 struct bio_list bio_list_on_stack;
770 struct bio *bio; 770 struct bio *bio;
771 struct blk_plug plug;
771 772
772 spin_lock_irq(q->queue_lock); 773 spin_lock_irq(q->queue_lock);
773 774
@@ -796,9 +797,10 @@ out:
796 * immediate dispatch 797 * immediate dispatch
797 */ 798 */
798 if (nr_disp) { 799 if (nr_disp) {
800 blk_start_plug(&plug);
799 while((bio = bio_list_pop(&bio_list_on_stack))) 801 while((bio = bio_list_pop(&bio_list_on_stack)))
800 generic_make_request(bio); 802 generic_make_request(bio);
801 blk_unplug(q); 803 blk_finish_plug(&plug);
802 } 804 }
803 return nr_disp; 805 return nr_disp;
804} 806}
diff --git a/block/blk.h b/block/blk.h
index 284b500852b..49d21af81d0 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -18,8 +18,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
18void blk_dequeue_request(struct request *rq); 18void blk_dequeue_request(struct request *rq);
19void __blk_queue_free_tags(struct request_queue *q); 19void __blk_queue_free_tags(struct request_queue *q);
20 20
21void blk_unplug_work(struct work_struct *work);
22void blk_unplug_timeout(unsigned long data);
23void blk_rq_timed_out_timer(unsigned long data); 21void blk_rq_timed_out_timer(unsigned long data);
24void blk_delete_timer(struct request *); 22void blk_delete_timer(struct request *);
25void blk_add_timer(struct request *); 23void blk_add_timer(struct request *);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9697053f80b..c826ef81c67 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -500,13 +500,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
500 } 500 }
501} 501}
502 502
503static int cfq_queue_empty(struct request_queue *q)
504{
505 struct cfq_data *cfqd = q->elevator->elevator_data;
506
507 return !cfqd->rq_queued;
508}
509
510/* 503/*
511 * Scale schedule slice based on io priority. Use the sync time slice only 504 * Scale schedule slice based on io priority. Use the sync time slice only
512 * if a queue is marked sync and has sync io queued. A sync queue with async 505 * if a queue is marked sync and has sync io queued. A sync queue with async
@@ -4080,7 +4073,6 @@ static struct elevator_type iosched_cfq = {
4080 .elevator_add_req_fn = cfq_insert_request, 4073 .elevator_add_req_fn = cfq_insert_request,
4081 .elevator_activate_req_fn = cfq_activate_request, 4074 .elevator_activate_req_fn = cfq_activate_request,
4082 .elevator_deactivate_req_fn = cfq_deactivate_request, 4075 .elevator_deactivate_req_fn = cfq_deactivate_request,
4083 .elevator_queue_empty_fn = cfq_queue_empty,
4084 .elevator_completed_req_fn = cfq_completed_request, 4076 .elevator_completed_req_fn = cfq_completed_request,
4085 .elevator_former_req_fn = elv_rb_former_request, 4077 .elevator_former_req_fn = elv_rb_former_request,
4086 .elevator_latter_req_fn = elv_rb_latter_request, 4078 .elevator_latter_req_fn = elv_rb_latter_request,
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index b547cbca7b2..5139c0ea186 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -326,14 +326,6 @@ dispatch_request:
326 return 1; 326 return 1;
327} 327}
328 328
329static int deadline_queue_empty(struct request_queue *q)
330{
331 struct deadline_data *dd = q->elevator->elevator_data;
332
333 return list_empty(&dd->fifo_list[WRITE])
334 && list_empty(&dd->fifo_list[READ]);
335}
336
337static void deadline_exit_queue(struct elevator_queue *e) 329static void deadline_exit_queue(struct elevator_queue *e)
338{ 330{
339 struct deadline_data *dd = e->elevator_data; 331 struct deadline_data *dd = e->elevator_data;
@@ -445,7 +437,6 @@ static struct elevator_type iosched_deadline = {
445 .elevator_merge_req_fn = deadline_merged_requests, 437 .elevator_merge_req_fn = deadline_merged_requests,
446 .elevator_dispatch_fn = deadline_dispatch_requests, 438 .elevator_dispatch_fn = deadline_dispatch_requests,
447 .elevator_add_req_fn = deadline_add_request, 439 .elevator_add_req_fn = deadline_add_request,
448 .elevator_queue_empty_fn = deadline_queue_empty,
449 .elevator_former_req_fn = elv_rb_former_request, 440 .elevator_former_req_fn = elv_rb_former_request,
450 .elevator_latter_req_fn = elv_rb_latter_request, 441 .elevator_latter_req_fn = elv_rb_latter_request,
451 .elevator_init_fn = deadline_init_queue, 442 .elevator_init_fn = deadline_init_queue,
diff --git a/block/elevator.c b/block/elevator.c
index fabf3675c91..542ce826b40 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
113} 113}
114EXPORT_SYMBOL(elv_rq_merge_ok); 114EXPORT_SYMBOL(elv_rq_merge_ok);
115 115
116static inline int elv_try_merge(struct request *__rq, struct bio *bio) 116int elv_try_merge(struct request *__rq, struct bio *bio)
117{ 117{
118 int ret = ELEVATOR_NO_MERGE; 118 int ret = ELEVATOR_NO_MERGE;
119 119
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
421 struct list_head *entry; 421 struct list_head *entry;
422 int stop_flags; 422 int stop_flags;
423 423
424 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
425
424 if (q->last_merge == rq) 426 if (q->last_merge == rq)
425 q->last_merge = NULL; 427 q->last_merge = NULL;
426 428
@@ -617,21 +619,12 @@ void elv_quiesce_end(struct request_queue *q)
617 619
618void elv_insert(struct request_queue *q, struct request *rq, int where) 620void elv_insert(struct request_queue *q, struct request *rq, int where)
619{ 621{
620 int unplug_it = 1;
621
622 trace_block_rq_insert(q, rq); 622 trace_block_rq_insert(q, rq);
623 623
624 rq->q = q; 624 rq->q = q;
625 625
626 switch (where) { 626 switch (where) {
627 case ELEVATOR_INSERT_REQUEUE: 627 case ELEVATOR_INSERT_REQUEUE:
628 /*
629 * Most requeues happen because of a busy condition,
630 * don't force unplug of the queue for that case.
631 * Clear unplug_it and fall through.
632 */
633 unplug_it = 0;
634
635 case ELEVATOR_INSERT_FRONT: 628 case ELEVATOR_INSERT_FRONT:
636 rq->cmd_flags |= REQ_SOFTBARRIER; 629 rq->cmd_flags |= REQ_SOFTBARRIER;
637 list_add(&rq->queuelist, &q->queue_head); 630 list_add(&rq->queuelist, &q->queue_head);
@@ -677,25 +670,17 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
677 rq->cmd_flags |= REQ_SOFTBARRIER; 670 rq->cmd_flags |= REQ_SOFTBARRIER;
678 blk_insert_flush(rq); 671 blk_insert_flush(rq);
679 break; 672 break;
680
681 default: 673 default:
682 printk(KERN_ERR "%s: bad insertion point %d\n", 674 printk(KERN_ERR "%s: bad insertion point %d\n",
683 __func__, where); 675 __func__, where);
684 BUG(); 676 BUG();
685 } 677 }
686
687 if (unplug_it && blk_queue_plugged(q)) {
688 int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
689 - queue_in_flight(q);
690
691 if (nrq >= q->unplug_thresh)
692 __generic_unplug_device(q);
693 }
694} 678}
695 679
696void __elv_add_request(struct request_queue *q, struct request *rq, int where, 680void __elv_add_request(struct request_queue *q, struct request *rq, int where)
697 int plug)
698{ 681{
682 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
683
699 if (rq->cmd_flags & REQ_SOFTBARRIER) { 684 if (rq->cmd_flags & REQ_SOFTBARRIER) {
700 /* barriers are scheduling boundary, update end_sector */ 685 /* barriers are scheduling boundary, update end_sector */
701 if (rq->cmd_type == REQ_TYPE_FS || 686 if (rq->cmd_type == REQ_TYPE_FS ||
@@ -707,38 +692,20 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
707 where == ELEVATOR_INSERT_SORT) 692 where == ELEVATOR_INSERT_SORT)
708 where = ELEVATOR_INSERT_BACK; 693 where = ELEVATOR_INSERT_BACK;
709 694
710 if (plug)
711 blk_plug_device(q);
712
713 elv_insert(q, rq, where); 695 elv_insert(q, rq, where);
714} 696}
715EXPORT_SYMBOL(__elv_add_request); 697EXPORT_SYMBOL(__elv_add_request);
716 698
717void elv_add_request(struct request_queue *q, struct request *rq, int where, 699void elv_add_request(struct request_queue *q, struct request *rq, int where)
718 int plug)
719{ 700{
720 unsigned long flags; 701 unsigned long flags;
721 702
722 spin_lock_irqsave(q->queue_lock, flags); 703 spin_lock_irqsave(q->queue_lock, flags);
723 __elv_add_request(q, rq, where, plug); 704 __elv_add_request(q, rq, where);
724 spin_unlock_irqrestore(q->queue_lock, flags); 705 spin_unlock_irqrestore(q->queue_lock, flags);
725} 706}
726EXPORT_SYMBOL(elv_add_request); 707EXPORT_SYMBOL(elv_add_request);
727 708
728int elv_queue_empty(struct request_queue *q)
729{
730 struct elevator_queue *e = q->elevator;
731
732 if (!list_empty(&q->queue_head))
733 return 0;
734
735 if (e->ops->elevator_queue_empty_fn)
736 return e->ops->elevator_queue_empty_fn(q);
737
738 return 1;
739}
740EXPORT_SYMBOL(elv_queue_empty);
741
742struct request *elv_latter_request(struct request_queue *q, struct request *rq) 709struct request *elv_latter_request(struct request_queue *q, struct request *rq)
743{ 710{
744 struct elevator_queue *e = q->elevator; 711 struct elevator_queue *e = q->elevator;
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 232c4b38cd3..06389e9ef96 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -39,13 +39,6 @@ static void noop_add_request(struct request_queue *q, struct request *rq)
39 list_add_tail(&rq->queuelist, &nd->queue); 39 list_add_tail(&rq->queuelist, &nd->queue);
40} 40}
41 41
42static int noop_queue_empty(struct request_queue *q)
43{
44 struct noop_data *nd = q->elevator->elevator_data;
45
46 return list_empty(&nd->queue);
47}
48
49static struct request * 42static struct request *
50noop_former_request(struct request_queue *q, struct request *rq) 43noop_former_request(struct request_queue *q, struct request *rq)
51{ 44{
@@ -90,7 +83,6 @@ static struct elevator_type elevator_noop = {
90 .elevator_merge_req_fn = noop_merged_requests, 83 .elevator_merge_req_fn = noop_merged_requests,
91 .elevator_dispatch_fn = noop_dispatch, 84 .elevator_dispatch_fn = noop_dispatch,
92 .elevator_add_req_fn = noop_add_request, 85 .elevator_add_req_fn = noop_add_request,
93 .elevator_queue_empty_fn = noop_queue_empty,
94 .elevator_former_req_fn = noop_former_request, 86 .elevator_former_req_fn = noop_former_request,
95 .elevator_latter_req_fn = noop_latter_request, 87 .elevator_latter_req_fn = noop_latter_request,
96 .elevator_init_fn = noop_init_queue, 88 .elevator_init_fn = noop_init_queue,
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9279272b373..35658f445fc 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3170,12 +3170,6 @@ static void do_cciss_request(struct request_queue *q)
3170 int sg_index = 0; 3170 int sg_index = 0;
3171 int chained = 0; 3171 int chained = 0;
3172 3172
3173 /* We call start_io here in case there is a command waiting on the
3174 * queue that has not been sent.
3175 */
3176 if (blk_queue_plugged(q))
3177 goto startio;
3178
3179 queue: 3173 queue:
3180 creq = blk_peek_request(q); 3174 creq = blk_peek_request(q);
3181 if (!creq) 3175 if (!creq)
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 946dad4caef..b2fceb53e80 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -911,9 +911,6 @@ static void do_ida_request(struct request_queue *q)
911 struct scatterlist tmp_sg[SG_MAX]; 911 struct scatterlist tmp_sg[SG_MAX];
912 int i, dir, seg; 912 int i, dir, seg;
913 913
914 if (blk_queue_plugged(q))
915 goto startio;
916
917queue_next: 914queue_next:
918 creq = blk_peek_request(q); 915 creq = blk_peek_request(q);
919 if (!creq) 916 if (!creq)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index ba95cba192b..aca302492ff 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -80,7 +80,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
80 80
81 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) 81 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
82 rw |= REQ_FUA; 82 rw |= REQ_FUA;
83 rw |= REQ_UNPLUG | REQ_SYNC; 83 rw |= REQ_SYNC;
84 84
85 bio = bio_alloc(GFP_NOIO, 1); 85 bio = bio_alloc(GFP_NOIO, 1);
86 bio->bi_bdev = bdev->md_bdev; 86 bio->bi_bdev = bdev->md_bdev;
@@ -689,8 +689,6 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
689 } 689 }
690 } 690 }
691 691
692 drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
693
694 /* always (try to) flush bitmap to stable storage */ 692 /* always (try to) flush bitmap to stable storage */
695 drbd_md_flush(mdev); 693 drbd_md_flush(mdev);
696 694
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index fd42832f785..0645ca829a9 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -840,7 +840,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
840 for (i = 0; i < num_pages; i++) 840 for (i = 0; i < num_pages; i++)
841 bm_page_io_async(mdev, b, i, rw); 841 bm_page_io_async(mdev, b, i, rw);
842 842
843 drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
844 wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); 843 wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
845 844
846 if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { 845 if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 3803a034893..b0bd27dfc1e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -377,7 +377,7 @@ union p_header {
377#define DP_HARDBARRIER 1 /* depricated */ 377#define DP_HARDBARRIER 1 /* depricated */
378#define DP_RW_SYNC 2 /* equals REQ_SYNC */ 378#define DP_RW_SYNC 2 /* equals REQ_SYNC */
379#define DP_MAY_SET_IN_SYNC 4 379#define DP_MAY_SET_IN_SYNC 4
380#define DP_UNPLUG 8 /* equals REQ_UNPLUG */ 380#define DP_UNPLUG 8 /* not used anymore */
381#define DP_FUA 16 /* equals REQ_FUA */ 381#define DP_FUA 16 /* equals REQ_FUA */
382#define DP_FLUSH 32 /* equals REQ_FLUSH */ 382#define DP_FLUSH 32 /* equals REQ_FLUSH */
383#define DP_DISCARD 64 /* equals REQ_DISCARD */ 383#define DP_DISCARD 64 /* equals REQ_DISCARD */
@@ -2382,20 +2382,6 @@ static inline int drbd_queue_order_type(struct drbd_conf *mdev)
2382 return QUEUE_ORDERED_NONE; 2382 return QUEUE_ORDERED_NONE;
2383} 2383}
2384 2384
2385static inline void drbd_blk_run_queue(struct request_queue *q)
2386{
2387 if (q && q->unplug_fn)
2388 q->unplug_fn(q);
2389}
2390
2391static inline void drbd_kick_lo(struct drbd_conf *mdev)
2392{
2393 if (get_ldev(mdev)) {
2394 drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev));
2395 put_ldev(mdev);
2396 }
2397}
2398
2399static inline void drbd_md_flush(struct drbd_conf *mdev) 2385static inline void drbd_md_flush(struct drbd_conf *mdev)
2400{ 2386{
2401 int r; 2387 int r;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 29cd0dc9fe4..8a43ce0edee 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2477,12 +2477,11 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
2477{ 2477{
2478 if (mdev->agreed_pro_version >= 95) 2478 if (mdev->agreed_pro_version >= 95)
2479 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | 2479 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
2480 (bi_rw & REQ_UNPLUG ? DP_UNPLUG : 0) |
2481 (bi_rw & REQ_FUA ? DP_FUA : 0) | 2480 (bi_rw & REQ_FUA ? DP_FUA : 0) |
2482 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) | 2481 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
2483 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0); 2482 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
2484 else 2483 else
2485 return bi_rw & (REQ_SYNC | REQ_UNPLUG) ? DP_RW_SYNC : 0; 2484 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
2486} 2485}
2487 2486
2488/* Used to send write requests 2487/* Used to send write requests
@@ -2719,35 +2718,6 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
2719 return 0; 2718 return 0;
2720} 2719}
2721 2720
2722static void drbd_unplug_fn(struct request_queue *q)
2723{
2724 struct drbd_conf *mdev = q->queuedata;
2725
2726 /* unplug FIRST */
2727 spin_lock_irq(q->queue_lock);
2728 blk_remove_plug(q);
2729 spin_unlock_irq(q->queue_lock);
2730
2731 /* only if connected */
2732 spin_lock_irq(&mdev->req_lock);
2733 if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
2734 D_ASSERT(mdev->state.role == R_PRIMARY);
2735 if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
2736 /* add to the data.work queue,
2737 * unless already queued.
2738 * XXX this might be a good addition to drbd_queue_work
2739 * anyways, to detect "double queuing" ... */
2740 if (list_empty(&mdev->unplug_work.list))
2741 drbd_queue_work(&mdev->data.work,
2742 &mdev->unplug_work);
2743 }
2744 }
2745 spin_unlock_irq(&mdev->req_lock);
2746
2747 if (mdev->state.disk >= D_INCONSISTENT)
2748 drbd_kick_lo(mdev);
2749}
2750
2751static void drbd_set_defaults(struct drbd_conf *mdev) 2721static void drbd_set_defaults(struct drbd_conf *mdev)
2752{ 2722{
2753 /* This way we get a compile error when sync_conf grows, 2723 /* This way we get a compile error when sync_conf grows,
@@ -3222,9 +3192,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
3222 blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); 3192 blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
3223 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); 3193 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
3224 blk_queue_merge_bvec(q, drbd_merge_bvec); 3194 blk_queue_merge_bvec(q, drbd_merge_bvec);
3225 q->queue_lock = &mdev->req_lock; /* needed since we use */ 3195 q->queue_lock = &mdev->req_lock;
3226 /* plugging on a queue, that actually has no requests! */
3227 q->unplug_fn = drbd_unplug_fn;
3228 3196
3229 mdev->md_io_page = alloc_page(GFP_KERNEL); 3197 mdev->md_io_page = alloc_page(GFP_KERNEL);
3230 if (!mdev->md_io_page) 3198 if (!mdev->md_io_page)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 24487d4fb20..8e68be939de 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -187,15 +187,6 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int
187 return NULL; 187 return NULL;
188} 188}
189 189
190/* kick lower level device, if we have more than (arbitrary number)
191 * reference counts on it, which typically are locally submitted io
192 * requests. don't use unacked_cnt, so we speed up proto A and B, too. */
193static void maybe_kick_lo(struct drbd_conf *mdev)
194{
195 if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark)
196 drbd_kick_lo(mdev);
197}
198
199static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) 190static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
200{ 191{
201 struct drbd_epoch_entry *e; 192 struct drbd_epoch_entry *e;
@@ -219,7 +210,6 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
219 LIST_HEAD(reclaimed); 210 LIST_HEAD(reclaimed);
220 struct drbd_epoch_entry *e, *t; 211 struct drbd_epoch_entry *e, *t;
221 212
222 maybe_kick_lo(mdev);
223 spin_lock_irq(&mdev->req_lock); 213 spin_lock_irq(&mdev->req_lock);
224 reclaim_net_ee(mdev, &reclaimed); 214 reclaim_net_ee(mdev, &reclaimed);
225 spin_unlock_irq(&mdev->req_lock); 215 spin_unlock_irq(&mdev->req_lock);
@@ -436,8 +426,7 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
436 while (!list_empty(head)) { 426 while (!list_empty(head)) {
437 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); 427 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
438 spin_unlock_irq(&mdev->req_lock); 428 spin_unlock_irq(&mdev->req_lock);
439 drbd_kick_lo(mdev); 429 io_schedule();
440 schedule();
441 finish_wait(&mdev->ee_wait, &wait); 430 finish_wait(&mdev->ee_wait, &wait);
442 spin_lock_irq(&mdev->req_lock); 431 spin_lock_irq(&mdev->req_lock);
443 } 432 }
@@ -1111,8 +1100,6 @@ next_bio:
1111 /* > e->sector, unless this is the first bio */ 1100 /* > e->sector, unless this is the first bio */
1112 bio->bi_sector = sector; 1101 bio->bi_sector = sector;
1113 bio->bi_bdev = mdev->ldev->backing_bdev; 1102 bio->bi_bdev = mdev->ldev->backing_bdev;
1114 /* we special case some flags in the multi-bio case, see below
1115 * (REQ_UNPLUG) */
1116 bio->bi_rw = rw; 1103 bio->bi_rw = rw;
1117 bio->bi_private = e; 1104 bio->bi_private = e;
1118 bio->bi_end_io = drbd_endio_sec; 1105 bio->bi_end_io = drbd_endio_sec;
@@ -1141,13 +1128,8 @@ next_bio:
1141 bios = bios->bi_next; 1128 bios = bios->bi_next;
1142 bio->bi_next = NULL; 1129 bio->bi_next = NULL;
1143 1130
1144 /* strip off REQ_UNPLUG unless it is the last bio */
1145 if (bios)
1146 bio->bi_rw &= ~REQ_UNPLUG;
1147
1148 drbd_generic_make_request(mdev, fault_type, bio); 1131 drbd_generic_make_request(mdev, fault_type, bio);
1149 } while (bios); 1132 } while (bios);
1150 maybe_kick_lo(mdev);
1151 return 0; 1133 return 0;
1152 1134
1153fail: 1135fail:
@@ -1167,9 +1149,6 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1167 1149
1168 inc_unacked(mdev); 1150 inc_unacked(mdev);
1169 1151
1170 if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
1171 drbd_kick_lo(mdev);
1172
1173 mdev->current_epoch->barrier_nr = p->barrier; 1152 mdev->current_epoch->barrier_nr = p->barrier;
1174 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR); 1153 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1175 1154
@@ -1636,12 +1615,11 @@ static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
1636{ 1615{
1637 if (mdev->agreed_pro_version >= 95) 1616 if (mdev->agreed_pro_version >= 95)
1638 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | 1617 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1639 (dpf & DP_UNPLUG ? REQ_UNPLUG : 0) |
1640 (dpf & DP_FUA ? REQ_FUA : 0) | 1618 (dpf & DP_FUA ? REQ_FUA : 0) |
1641 (dpf & DP_FLUSH ? REQ_FUA : 0) | 1619 (dpf & DP_FLUSH ? REQ_FUA : 0) |
1642 (dpf & DP_DISCARD ? REQ_DISCARD : 0); 1620 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
1643 else 1621 else
1644 return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0; 1622 return dpf & DP_RW_SYNC ? REQ_SYNC : 0;
1645} 1623}
1646 1624
1647/* mirrored write */ 1625/* mirrored write */
@@ -3556,9 +3534,6 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3556 3534
3557static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3535static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3558{ 3536{
3559 if (mdev->state.disk >= D_INCONSISTENT)
3560 drbd_kick_lo(mdev);
3561
3562 /* Make sure we've acked all the TCP data associated 3537 /* Make sure we've acked all the TCP data associated
3563 * with the data requests being unplugged */ 3538 * with the data requests being unplugged */
3564 drbd_tcp_quickack(mdev->data.socket); 3539 drbd_tcp_quickack(mdev->data.socket);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 11a75d32a2e..ad3fc6228f2 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -960,10 +960,6 @@ allocate_barrier:
960 bio_endio(req->private_bio, -EIO); 960 bio_endio(req->private_bio, -EIO);
961 } 961 }
962 962
963 /* we need to plug ALWAYS since we possibly need to kick lo_dev.
964 * we plug after submit, so we won't miss an unplug event */
965 drbd_plug_device(mdev);
966
967 return 0; 963 return 0;
968 964
969fail_conflicting: 965fail_conflicting:
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 34f224b018b..e027446590d 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -792,7 +792,6 @@ int drbd_resync_finished(struct drbd_conf *mdev)
792 * queue (or even the read operations for those packets 792 * queue (or even the read operations for those packets
793 * is not finished by now). Retry in 100ms. */ 793 * is not finished by now). Retry in 100ms. */
794 794
795 drbd_kick_lo(mdev);
796 __set_current_state(TASK_INTERRUPTIBLE); 795 __set_current_state(TASK_INTERRUPTIBLE);
797 schedule_timeout(HZ / 10); 796 schedule_timeout(HZ / 10);
798 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); 797 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
index defdb5013ea..53586fa5ae1 100644
--- a/drivers/block/drbd/drbd_wrappers.h
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -45,24 +45,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
45 generic_make_request(bio); 45 generic_make_request(bio);
46} 46}
47 47
48static inline void drbd_plug_device(struct drbd_conf *mdev)
49{
50 struct request_queue *q;
51 q = bdev_get_queue(mdev->this_bdev);
52
53 spin_lock_irq(q->queue_lock);
54
55/* XXX the check on !blk_queue_plugged is redundant,
56 * implicitly checked in blk_plug_device */
57
58 if (!blk_queue_plugged(q)) {
59 blk_plug_device(q);
60 del_timer(&q->unplug_timer);
61 /* unplugging should not happen automatically... */
62 }
63 spin_unlock_irq(q->queue_lock);
64}
65
66static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm) 48static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
67{ 49{
68 return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK) 50 return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 3851dbc03e0..301d7a9a41a 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3838,7 +3838,6 @@ static int __floppy_read_block_0(struct block_device *bdev)
3838 bio.bi_end_io = floppy_rb0_complete; 3838 bio.bi_end_io = floppy_rb0_complete;
3839 3839
3840 submit_bio(READ, &bio); 3840 submit_bio(READ, &bio);
3841 generic_unplug_device(bdev_get_queue(bdev));
3842 process_fd_request(); 3841 process_fd_request();
3843 wait_for_completion(&complete); 3842 wait_for_completion(&complete);
3844 3843
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 79c3079a620..a076a14ca72 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -540,17 +540,6 @@ out:
540 return 0; 540 return 0;
541} 541}
542 542
543/*
544 * kick off io on the underlying address space
545 */
546static void loop_unplug(struct request_queue *q)
547{
548 struct loop_device *lo = q->queuedata;
549
550 queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
551 blk_run_address_space(lo->lo_backing_file->f_mapping);
552}
553
554struct switch_request { 543struct switch_request {
555 struct file *file; 544 struct file *file;
556 struct completion wait; 545 struct completion wait;
@@ -917,7 +906,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
917 */ 906 */
918 blk_queue_make_request(lo->lo_queue, loop_make_request); 907 blk_queue_make_request(lo->lo_queue, loop_make_request);
919 lo->lo_queue->queuedata = lo; 908 lo->lo_queue->queuedata = lo;
920 lo->lo_queue->unplug_fn = loop_unplug;
921 909
922 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 910 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
923 blk_queue_flush(lo->lo_queue, REQ_FLUSH); 911 blk_queue_flush(lo->lo_queue, REQ_FLUSH);
@@ -1019,7 +1007,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1019 1007
1020 kthread_stop(lo->lo_thread); 1008 kthread_stop(lo->lo_thread);
1021 1009
1022 lo->lo_queue->unplug_fn = NULL;
1023 lo->lo_backing_file = NULL; 1010 lo->lo_backing_file = NULL;
1024 1011
1025 loop_release_xfer(lo); 1012 loop_release_xfer(lo);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index a077db27b7c..07a382eaf0a 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1606,8 +1606,6 @@ static int kcdrwd(void *foobar)
1606 min_sleep_time = pkt->sleep_time; 1606 min_sleep_time = pkt->sleep_time;
1607 } 1607 }
1608 1608
1609 generic_unplug_device(bdev_get_queue(pd->bdev));
1610
1611 VPRINTK("kcdrwd: sleeping\n"); 1609 VPRINTK("kcdrwd: sleeping\n");
1612 residue = schedule_timeout(min_sleep_time); 1610 residue = schedule_timeout(min_sleep_time);
1613 VPRINTK("kcdrwd: wake up\n"); 1611 VPRINTK("kcdrwd: wake up\n");
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 9b102abe209..031ca720d92 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -241,8 +241,7 @@ static void dump_dmastat(struct cardinfo *card, unsigned int dmastat)
241 * 241 *
242 * Whenever IO on the active page completes, the Ready page is activated 242 * Whenever IO on the active page completes, the Ready page is activated
243 * and the ex-Active page is clean out and made Ready. 243 * and the ex-Active page is clean out and made Ready.
244 * Otherwise the Ready page is only activated when it becomes full, or 244 * Otherwise the Ready page is only activated when it becomes full.
245 * when mm_unplug_device is called via the unplug_io_fn.
246 * 245 *
247 * If a request arrives while both pages a full, it is queued, and b_rdev is 246 * If a request arrives while both pages a full, it is queued, and b_rdev is
248 * overloaded to record whether it was a read or a write. 247 * overloaded to record whether it was a read or a write.
@@ -333,17 +332,6 @@ static inline void reset_page(struct mm_page *page)
333 page->biotail = &page->bio; 332 page->biotail = &page->bio;
334} 333}
335 334
336static void mm_unplug_device(struct request_queue *q)
337{
338 struct cardinfo *card = q->queuedata;
339 unsigned long flags;
340
341 spin_lock_irqsave(&card->lock, flags);
342 if (blk_remove_plug(q))
343 activate(card);
344 spin_unlock_irqrestore(&card->lock, flags);
345}
346
347/* 335/*
348 * If there is room on Ready page, take 336 * If there is room on Ready page, take
349 * one bh off list and add it. 337 * one bh off list and add it.
@@ -535,7 +523,6 @@ static int mm_make_request(struct request_queue *q, struct bio *bio)
535 *card->biotail = bio; 523 *card->biotail = bio;
536 bio->bi_next = NULL; 524 bio->bi_next = NULL;
537 card->biotail = &bio->bi_next; 525 card->biotail = &bio->bi_next;
538 blk_plug_device(q);
539 spin_unlock_irq(&card->lock); 526 spin_unlock_irq(&card->lock);
540 527
541 return 0; 528 return 0;
@@ -897,7 +884,6 @@ static int __devinit mm_pci_probe(struct pci_dev *dev,
897 blk_queue_make_request(card->queue, mm_make_request); 884 blk_queue_make_request(card->queue, mm_make_request);
898 card->queue->queue_lock = &card->lock; 885 card->queue->queue_lock = &card->lock;
899 card->queue->queuedata = card; 886 card->queue->queuedata = card;
900 card->queue->unplug_fn = mm_unplug_device;
901 887
902 tasklet_init(&card->tasklet, process_page, (unsigned long)card); 888 tasklet_init(&card->tasklet, process_page, (unsigned long)card);
903 889
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index e88a2cf1771..6f218e014e9 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -233,8 +233,7 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special)
233 233
234 drive->hwif->rq = NULL; 234 drive->hwif->rq = NULL;
235 235
236 elv_add_request(drive->queue, &drive->sense_rq, 236 elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT);
237 ELEVATOR_INSERT_FRONT, 0);
238 return 0; 237 return 0;
239} 238}
240EXPORT_SYMBOL_GPL(ide_queue_sense_rq); 239EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index a2e29099ee0..fd1e1179913 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -258,17 +258,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
258 if (time_after(jiffies, info->write_timeout)) 258 if (time_after(jiffies, info->write_timeout))
259 return 0; 259 return 0;
260 else { 260 else {
261 struct request_queue *q = drive->queue;
262 unsigned long flags;
263
264 /* 261 /*
265 * take a breather relying on the unplug timer to kick us again 262 * take a breather
266 */ 263 */
267 264 blk_delay_queue(drive->queue, 1);
268 spin_lock_irqsave(q->queue_lock, flags);
269 blk_plug_device(q);
270 spin_unlock_irqrestore(q->queue_lock, flags);
271
272 return 1; 265 return 1;
273 } 266 }
274} 267}
@@ -1514,8 +1507,6 @@ static int ide_cdrom_setup(ide_drive_t *drive)
1514 blk_queue_dma_alignment(q, 31); 1507 blk_queue_dma_alignment(q, 31);
1515 blk_queue_update_dma_pad(q, 15); 1508 blk_queue_update_dma_pad(q, 15);
1516 1509
1517 q->unplug_delay = max((1 * HZ) / 1000, 1);
1518
1519 drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED; 1510 drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
1520 drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id); 1511 drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
1521 1512
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 999dac054bc..f4077840d3a 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -549,8 +549,6 @@ plug_device_2:
549 549
550 if (rq) 550 if (rq)
551 blk_requeue_request(q, rq); 551 blk_requeue_request(q, rq);
552 if (!elv_queue_empty(q))
553 blk_plug_device(q);
554} 552}
555 553
556void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) 554void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
@@ -562,8 +560,6 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
562 560
563 if (rq) 561 if (rq)
564 blk_requeue_request(q, rq); 562 blk_requeue_request(q, rq);
565 if (!elv_queue_empty(q))
566 blk_plug_device(q);
567 563
568 spin_unlock_irqrestore(q->queue_lock, flags); 564 spin_unlock_irqrestore(q->queue_lock, flags);
569} 565}
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 88a380c5a47..6ab9ab2a508 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -52,7 +52,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
52 rq->cmd[0] = REQ_UNPARK_HEADS; 52 rq->cmd[0] = REQ_UNPARK_HEADS;
53 rq->cmd_len = 1; 53 rq->cmd_len = 1;
54 rq->cmd_type = REQ_TYPE_SPECIAL; 54 rq->cmd_type = REQ_TYPE_SPECIAL;
55 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1); 55 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
56 56
57out: 57out:
58 return; 58 return;
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 9a35320fb59..ca203cb23f3 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -347,7 +347,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
347 atomic_inc(&bitmap->pending_writes); 347 atomic_inc(&bitmap->pending_writes);
348 set_buffer_locked(bh); 348 set_buffer_locked(bh);
349 set_buffer_mapped(bh); 349 set_buffer_mapped(bh);
350 submit_bh(WRITE | REQ_UNPLUG | REQ_SYNC, bh); 350 submit_bh(WRITE | REQ_SYNC, bh);
351 bh = bh->b_this_page; 351 bh = bh->b_this_page;
352 } 352 }
353 353
@@ -1339,8 +1339,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
1339 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1339 prepare_to_wait(&bitmap->overflow_wait, &__wait,
1340 TASK_UNINTERRUPTIBLE); 1340 TASK_UNINTERRUPTIBLE);
1341 spin_unlock_irq(&bitmap->lock); 1341 spin_unlock_irq(&bitmap->lock);
1342 md_unplug(bitmap->mddev); 1342 io_schedule();
1343 schedule();
1344 finish_wait(&bitmap->overflow_wait, &__wait); 1343 finish_wait(&bitmap->overflow_wait, &__wait);
1345 continue; 1344 continue;
1346 } 1345 }
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4e054bd9166..2c62c1169f7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -991,11 +991,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
991 clone->bi_destructor = dm_crypt_bio_destructor; 991 clone->bi_destructor = dm_crypt_bio_destructor;
992} 992}
993 993
994static void kcryptd_unplug(struct crypt_config *cc)
995{
996 blk_unplug(bdev_get_queue(cc->dev->bdev));
997}
998
999static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) 994static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
1000{ 995{
1001 struct crypt_config *cc = io->target->private; 996 struct crypt_config *cc = io->target->private;
@@ -1008,10 +1003,8 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
1008 * one in order to decrypt the whole bio data *afterwards*. 1003 * one in order to decrypt the whole bio data *afterwards*.
1009 */ 1004 */
1010 clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs); 1005 clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
1011 if (!clone) { 1006 if (!clone)
1012 kcryptd_unplug(cc);
1013 return 1; 1007 return 1;
1014 }
1015 1008
1016 crypt_inc_pending(io); 1009 crypt_inc_pending(io);
1017 1010
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 136d4f71a11..76a5af00a26 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -352,7 +352,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
352 BUG_ON(num_regions > DM_IO_MAX_REGIONS); 352 BUG_ON(num_regions > DM_IO_MAX_REGIONS);
353 353
354 if (sync) 354 if (sync)
355 rw |= REQ_SYNC | REQ_UNPLUG; 355 rw |= REQ_SYNC;
356 356
357 /* 357 /*
358 * For multiple regions we need to be careful to rewind 358 * For multiple regions we need to be careful to rewind
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 924f5f0084c..1bb73a13ca4 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -37,13 +37,6 @@ struct dm_kcopyd_client {
37 unsigned int nr_pages; 37 unsigned int nr_pages;
38 unsigned int nr_free_pages; 38 unsigned int nr_free_pages;
39 39
40 /*
41 * Block devices to unplug.
42 * Non-NULL pointer means that a block device has some pending requests
43 * and needs to be unplugged.
44 */
45 struct block_device *unplug[2];
46
47 struct dm_io_client *io_client; 40 struct dm_io_client *io_client;
48 41
49 wait_queue_head_t destroyq; 42 wait_queue_head_t destroyq;
@@ -315,31 +308,6 @@ static int run_complete_job(struct kcopyd_job *job)
315 return 0; 308 return 0;
316} 309}
317 310
318/*
319 * Unplug the block device at the specified index.
320 */
321static void unplug(struct dm_kcopyd_client *kc, int rw)
322{
323 if (kc->unplug[rw] != NULL) {
324 blk_unplug(bdev_get_queue(kc->unplug[rw]));
325 kc->unplug[rw] = NULL;
326 }
327}
328
329/*
330 * Prepare block device unplug. If there's another device
331 * to be unplugged at the same array index, we unplug that
332 * device first.
333 */
334static void prepare_unplug(struct dm_kcopyd_client *kc, int rw,
335 struct block_device *bdev)
336{
337 if (likely(kc->unplug[rw] == bdev))
338 return;
339 unplug(kc, rw);
340 kc->unplug[rw] = bdev;
341}
342
343static void complete_io(unsigned long error, void *context) 311static void complete_io(unsigned long error, void *context)
344{ 312{
345 struct kcopyd_job *job = (struct kcopyd_job *) context; 313 struct kcopyd_job *job = (struct kcopyd_job *) context;
@@ -386,16 +354,10 @@ static int run_io_job(struct kcopyd_job *job)
386 .client = job->kc->io_client, 354 .client = job->kc->io_client,
387 }; 355 };
388 356
389 if (job->rw == READ) { 357 if (job->rw == READ)
390 r = dm_io(&io_req, 1, &job->source, NULL); 358 r = dm_io(&io_req, 1, &job->source, NULL);
391 prepare_unplug(job->kc, READ, job->source.bdev); 359 else
392 } else {
393 if (job->num_dests > 1)
394 io_req.bi_rw |= REQ_UNPLUG;
395 r = dm_io(&io_req, job->num_dests, job->dests, NULL); 360 r = dm_io(&io_req, job->num_dests, job->dests, NULL);
396 if (!(io_req.bi_rw & REQ_UNPLUG))
397 prepare_unplug(job->kc, WRITE, job->dests[0].bdev);
398 }
399 361
400 return r; 362 return r;
401} 363}
@@ -466,6 +428,7 @@ static void do_work(struct work_struct *work)
466{ 428{
467 struct dm_kcopyd_client *kc = container_of(work, 429 struct dm_kcopyd_client *kc = container_of(work,
468 struct dm_kcopyd_client, kcopyd_work); 430 struct dm_kcopyd_client, kcopyd_work);
431 struct blk_plug plug;
469 432
470 /* 433 /*
471 * The order that these are called is *very* important. 434 * The order that these are called is *very* important.
@@ -473,18 +436,12 @@ static void do_work(struct work_struct *work)
473 * Pages jobs when successful will jump onto the io jobs 436 * Pages jobs when successful will jump onto the io jobs
474 * list. io jobs call wake when they complete and it all 437 * list. io jobs call wake when they complete and it all
475 * starts again. 438 * starts again.
476 *
477 * Note that io_jobs add block devices to the unplug array,
478 * this array is cleared with "unplug" calls. It is thus
479 * forbidden to run complete_jobs after io_jobs and before
480 * unplug because the block device could be destroyed in
481 * job completion callback.
482 */ 439 */
440 blk_start_plug(&plug);
483 process_jobs(&kc->complete_jobs, kc, run_complete_job); 441 process_jobs(&kc->complete_jobs, kc, run_complete_job);
484 process_jobs(&kc->pages_jobs, kc, run_pages_job); 442 process_jobs(&kc->pages_jobs, kc, run_pages_job);
485 process_jobs(&kc->io_jobs, kc, run_io_job); 443 process_jobs(&kc->io_jobs, kc, run_io_job);
486 unplug(kc, READ); 444 blk_finish_plug(&plug);
487 unplug(kc, WRITE);
488} 445}
489 446
490/* 447/*
@@ -665,8 +622,6 @@ int dm_kcopyd_client_create(unsigned int nr_pages,
665 INIT_LIST_HEAD(&kc->io_jobs); 622 INIT_LIST_HEAD(&kc->io_jobs);
666 INIT_LIST_HEAD(&kc->pages_jobs); 623 INIT_LIST_HEAD(&kc->pages_jobs);
667 624
668 memset(kc->unplug, 0, sizeof(kc->unplug));
669
670 kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); 625 kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
671 if (!kc->job_pool) 626 if (!kc->job_pool)
672 goto bad_slab; 627 goto bad_slab;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b9e1e15ef11..5ef136cdba9 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -394,7 +394,7 @@ static void raid_unplug(struct dm_target_callbacks *cb)
394{ 394{
395 struct raid_set *rs = container_of(cb, struct raid_set, callbacks); 395 struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
396 396
397 md_raid5_unplug_device(rs->md.private); 397 md_raid5_kick_device(rs->md.private);
398} 398}
399 399
400/* 400/*
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index dee326775c6..976ad4688af 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -842,8 +842,6 @@ static void do_mirror(struct work_struct *work)
842 do_reads(ms, &reads); 842 do_reads(ms, &reads);
843 do_writes(ms, &writes); 843 do_writes(ms, &writes);
844 do_failures(ms, &failures); 844 do_failures(ms, &failures);
845
846 dm_table_unplug_all(ms->ti->table);
847} 845}
848 846
849/*----------------------------------------------------------------- 847/*-----------------------------------------------------------------
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 38e4eb1bb96..f50a7b95225 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1275,29 +1275,6 @@ int dm_table_any_busy_target(struct dm_table *t)
1275 return 0; 1275 return 0;
1276} 1276}
1277 1277
1278void dm_table_unplug_all(struct dm_table *t)
1279{
1280 struct dm_dev_internal *dd;
1281 struct list_head *devices = dm_table_get_devices(t);
1282 struct dm_target_callbacks *cb;
1283
1284 list_for_each_entry(dd, devices, list) {
1285 struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
1286 char b[BDEVNAME_SIZE];
1287
1288 if (likely(q))
1289 blk_unplug(q);
1290 else
1291 DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s",
1292 dm_device_name(t->md),
1293 bdevname(dd->dm_dev.bdev, b));
1294 }
1295
1296 list_for_each_entry(cb, &t->target_callbacks, list)
1297 if (cb->unplug_fn)
1298 cb->unplug_fn(cb);
1299}
1300
1301struct mapped_device *dm_table_get_md(struct dm_table *t) 1278struct mapped_device *dm_table_get_md(struct dm_table *t)
1302{ 1279{
1303 return t->md; 1280 return t->md;
@@ -1345,4 +1322,3 @@ EXPORT_SYMBOL(dm_table_get_mode);
1345EXPORT_SYMBOL(dm_table_get_md); 1322EXPORT_SYMBOL(dm_table_get_md);
1346EXPORT_SYMBOL(dm_table_put); 1323EXPORT_SYMBOL(dm_table_put);
1347EXPORT_SYMBOL(dm_table_get); 1324EXPORT_SYMBOL(dm_table_get);
1348EXPORT_SYMBOL(dm_table_unplug_all);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eaa3af0e063..d22b9905c16 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -807,8 +807,6 @@ void dm_requeue_unmapped_request(struct request *clone)
807 dm_unprep_request(rq); 807 dm_unprep_request(rq);
808 808
809 spin_lock_irqsave(q->queue_lock, flags); 809 spin_lock_irqsave(q->queue_lock, flags);
810 if (elv_queue_empty(q))
811 blk_plug_device(q);
812 blk_requeue_request(q, rq); 810 blk_requeue_request(q, rq);
813 spin_unlock_irqrestore(q->queue_lock, flags); 811 spin_unlock_irqrestore(q->queue_lock, flags);
814 812
@@ -1613,10 +1611,10 @@ static void dm_request_fn(struct request_queue *q)
1613 * number of in-flight I/Os after the queue is stopped in 1611 * number of in-flight I/Os after the queue is stopped in
1614 * dm_suspend(). 1612 * dm_suspend().
1615 */ 1613 */
1616 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { 1614 while (!blk_queue_stopped(q)) {
1617 rq = blk_peek_request(q); 1615 rq = blk_peek_request(q);
1618 if (!rq) 1616 if (!rq)
1619 goto plug_and_out; 1617 goto delay_and_out;
1620 1618
1621 /* always use block 0 to find the target for flushes for now */ 1619 /* always use block 0 to find the target for flushes for now */
1622 pos = 0; 1620 pos = 0;
@@ -1627,7 +1625,7 @@ static void dm_request_fn(struct request_queue *q)
1627 BUG_ON(!dm_target_is_valid(ti)); 1625 BUG_ON(!dm_target_is_valid(ti));
1628 1626
1629 if (ti->type->busy && ti->type->busy(ti)) 1627 if (ti->type->busy && ti->type->busy(ti))
1630 goto plug_and_out; 1628 goto delay_and_out;
1631 1629
1632 blk_start_request(rq); 1630 blk_start_request(rq);
1633 clone = rq->special; 1631 clone = rq->special;
@@ -1647,11 +1645,8 @@ requeued:
1647 BUG_ON(!irqs_disabled()); 1645 BUG_ON(!irqs_disabled());
1648 spin_lock(q->queue_lock); 1646 spin_lock(q->queue_lock);
1649 1647
1650plug_and_out: 1648delay_and_out:
1651 if (!elv_queue_empty(q)) 1649 blk_delay_queue(q, HZ / 10);
1652 /* Some requests still remain, retry later */
1653 blk_plug_device(q);
1654
1655out: 1650out:
1656 dm_table_put(map); 1651 dm_table_put(map);
1657 1652
@@ -1680,20 +1675,6 @@ static int dm_lld_busy(struct request_queue *q)
1680 return r; 1675 return r;
1681} 1676}
1682 1677
1683static void dm_unplug_all(struct request_queue *q)
1684{
1685 struct mapped_device *md = q->queuedata;
1686 struct dm_table *map = dm_get_live_table(md);
1687
1688 if (map) {
1689 if (dm_request_based(md))
1690 generic_unplug_device(q);
1691
1692 dm_table_unplug_all(map);
1693 dm_table_put(map);
1694 }
1695}
1696
1697static int dm_any_congested(void *congested_data, int bdi_bits) 1678static int dm_any_congested(void *congested_data, int bdi_bits)
1698{ 1679{
1699 int r = bdi_bits; 1680 int r = bdi_bits;
@@ -1817,7 +1798,6 @@ static void dm_init_md_queue(struct mapped_device *md)
1817 md->queue->backing_dev_info.congested_data = md; 1798 md->queue->backing_dev_info.congested_data = md;
1818 blk_queue_make_request(md->queue, dm_request); 1799 blk_queue_make_request(md->queue, dm_request);
1819 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1800 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1820 md->queue->unplug_fn = dm_unplug_all;
1821 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1801 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1822 blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); 1802 blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
1823} 1803}
@@ -2263,8 +2243,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2263 int r = 0; 2243 int r = 0;
2264 DECLARE_WAITQUEUE(wait, current); 2244 DECLARE_WAITQUEUE(wait, current);
2265 2245
2266 dm_unplug_all(md->queue);
2267
2268 add_wait_queue(&md->wait, &wait); 2246 add_wait_queue(&md->wait, &wait);
2269 2247
2270 while (1) { 2248 while (1) {
@@ -2539,7 +2517,6 @@ int dm_resume(struct mapped_device *md)
2539 2517
2540 clear_bit(DMF_SUSPENDED, &md->flags); 2518 clear_bit(DMF_SUSPENDED, &md->flags);
2541 2519
2542 dm_table_unplug_all(map);
2543 r = 0; 2520 r = 0;
2544out: 2521out:
2545 dm_table_put(map); 2522 dm_table_put(map);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 0ed7f6bc2a7..338804f8fb3 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -87,22 +87,6 @@ static int linear_mergeable_bvec(struct request_queue *q,
87 return maxsectors << 9; 87 return maxsectors << 9;
88} 88}
89 89
90static void linear_unplug(struct request_queue *q)
91{
92 mddev_t *mddev = q->queuedata;
93 linear_conf_t *conf;
94 int i;
95
96 rcu_read_lock();
97 conf = rcu_dereference(mddev->private);
98
99 for (i=0; i < mddev->raid_disks; i++) {
100 struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
101 blk_unplug(r_queue);
102 }
103 rcu_read_unlock();
104}
105
106static int linear_congested(void *data, int bits) 90static int linear_congested(void *data, int bits)
107{ 91{
108 mddev_t *mddev = data; 92 mddev_t *mddev = data;
@@ -224,7 +208,6 @@ static int linear_run (mddev_t *mddev)
224 md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 208 md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
225 209
226 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); 210 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
227 mddev->queue->unplug_fn = linear_unplug;
228 mddev->queue->backing_dev_info.congested_fn = linear_congested; 211 mddev->queue->backing_dev_info.congested_fn = linear_congested;
229 mddev->queue->backing_dev_info.congested_data = mddev; 212 mddev->queue->backing_dev_info.congested_data = mddev;
230 md_integrity_register(mddev); 213 md_integrity_register(mddev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 818313e277e..86ba66c0b28 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -780,8 +780,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
780 bio->bi_end_io = super_written; 780 bio->bi_end_io = super_written;
781 781
782 atomic_inc(&mddev->pending_writes); 782 atomic_inc(&mddev->pending_writes);
783 submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA, 783 submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
784 bio);
785} 784}
786 785
787void md_super_wait(mddev_t *mddev) 786void md_super_wait(mddev_t *mddev)
@@ -809,7 +808,7 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
809 struct completion event; 808 struct completion event;
810 int ret; 809 int ret;
811 810
812 rw |= REQ_SYNC | REQ_UNPLUG; 811 rw |= REQ_SYNC;
813 812
814 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ? 813 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
815 rdev->meta_bdev : rdev->bdev; 814 rdev->meta_bdev : rdev->bdev;
@@ -4817,7 +4816,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4817 __md_stop_writes(mddev); 4816 __md_stop_writes(mddev);
4818 md_stop(mddev); 4817 md_stop(mddev);
4819 mddev->queue->merge_bvec_fn = NULL; 4818 mddev->queue->merge_bvec_fn = NULL;
4820 mddev->queue->unplug_fn = NULL;
4821 mddev->queue->backing_dev_info.congested_fn = NULL; 4819 mddev->queue->backing_dev_info.congested_fn = NULL;
4822 4820
4823 /* tell userspace to handle 'inactive' */ 4821 /* tell userspace to handle 'inactive' */
@@ -6692,8 +6690,6 @@ EXPORT_SYMBOL_GPL(md_allow_write);
6692 6690
6693void md_unplug(mddev_t *mddev) 6691void md_unplug(mddev_t *mddev)
6694{ 6692{
6695 if (mddev->queue)
6696 blk_unplug(mddev->queue);
6697 if (mddev->plug) 6693 if (mddev->plug)
6698 mddev->plug->unplug_fn(mddev->plug); 6694 mddev->plug->unplug_fn(mddev->plug);
6699} 6695}
@@ -6876,7 +6872,6 @@ void md_do_sync(mddev_t *mddev)
6876 >= mddev->resync_max - mddev->curr_resync_completed 6872 >= mddev->resync_max - mddev->curr_resync_completed
6877 )) { 6873 )) {
6878 /* time to update curr_resync_completed */ 6874 /* time to update curr_resync_completed */
6879 md_unplug(mddev);
6880 wait_event(mddev->recovery_wait, 6875 wait_event(mddev->recovery_wait,
6881 atomic_read(&mddev->recovery_active) == 0); 6876 atomic_read(&mddev->recovery_active) == 0);
6882 mddev->curr_resync_completed = j; 6877 mddev->curr_resync_completed = j;
@@ -6952,7 +6947,6 @@ void md_do_sync(mddev_t *mddev)
6952 * about not overloading the IO subsystem. (things like an 6947 * about not overloading the IO subsystem. (things like an
6953 * e2fsck being done on the RAID array should execute fast) 6948 * e2fsck being done on the RAID array should execute fast)
6954 */ 6949 */
6955 md_unplug(mddev);
6956 cond_resched(); 6950 cond_resched();
6957 6951
6958 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 6952 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
@@ -6971,8 +6965,6 @@ void md_do_sync(mddev_t *mddev)
6971 * this also signals 'finished resyncing' to md_stop 6965 * this also signals 'finished resyncing' to md_stop
6972 */ 6966 */
6973 out: 6967 out:
6974 md_unplug(mddev);
6975
6976 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 6968 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
6977 6969
6978 /* tell personality that we are finished */ 6970 /* tell personality that we are finished */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 3a62d440e27..5e694b151c3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -106,36 +106,6 @@ static void multipath_end_request(struct bio *bio, int error)
106 rdev_dec_pending(rdev, conf->mddev); 106 rdev_dec_pending(rdev, conf->mddev);
107} 107}
108 108
109static void unplug_slaves(mddev_t *mddev)
110{
111 multipath_conf_t *conf = mddev->private;
112 int i;
113
114 rcu_read_lock();
115 for (i=0; i<mddev->raid_disks; i++) {
116 mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
117 if (rdev && !test_bit(Faulty, &rdev->flags)
118 && atomic_read(&rdev->nr_pending)) {
119 struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
120
121 atomic_inc(&rdev->nr_pending);
122 rcu_read_unlock();
123
124 blk_unplug(r_queue);
125
126 rdev_dec_pending(rdev, mddev);
127 rcu_read_lock();
128 }
129 }
130 rcu_read_unlock();
131}
132
133static void multipath_unplug(struct request_queue *q)
134{
135 unplug_slaves(q->queuedata);
136}
137
138
139static int multipath_make_request(mddev_t *mddev, struct bio * bio) 109static int multipath_make_request(mddev_t *mddev, struct bio * bio)
140{ 110{
141 multipath_conf_t *conf = mddev->private; 111 multipath_conf_t *conf = mddev->private;
@@ -517,7 +487,6 @@ static int multipath_run (mddev_t *mddev)
517 */ 487 */
518 md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); 488 md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
519 489
520 mddev->queue->unplug_fn = multipath_unplug;
521 mddev->queue->backing_dev_info.congested_fn = multipath_congested; 490 mddev->queue->backing_dev_info.congested_fn = multipath_congested;
522 mddev->queue->backing_dev_info.congested_data = mddev; 491 mddev->queue->backing_dev_info.congested_data = mddev;
523 md_integrity_register(mddev); 492 md_integrity_register(mddev);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c0ac457f121..95916fd6394 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -25,21 +25,6 @@
25#include "raid0.h" 25#include "raid0.h"
26#include "raid5.h" 26#include "raid5.h"
27 27
28static void raid0_unplug(struct request_queue *q)
29{
30 mddev_t *mddev = q->queuedata;
31 raid0_conf_t *conf = mddev->private;
32 mdk_rdev_t **devlist = conf->devlist;
33 int raid_disks = conf->strip_zone[0].nb_dev;
34 int i;
35
36 for (i=0; i < raid_disks; i++) {
37 struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
38
39 blk_unplug(r_queue);
40 }
41}
42
43static int raid0_congested(void *data, int bits) 28static int raid0_congested(void *data, int bits)
44{ 29{
45 mddev_t *mddev = data; 30 mddev_t *mddev = data;
@@ -272,7 +257,6 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
272 mdname(mddev), 257 mdname(mddev),
273 (unsigned long long)smallest->sectors); 258 (unsigned long long)smallest->sectors);
274 } 259 }
275 mddev->queue->unplug_fn = raid0_unplug;
276 mddev->queue->backing_dev_info.congested_fn = raid0_congested; 260 mddev->queue->backing_dev_info.congested_fn = raid0_congested;
277 mddev->queue->backing_dev_info.congested_data = mddev; 261 mddev->queue->backing_dev_info.congested_data = mddev;
278 262
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 06cd712807d..8f34ad5c478 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -52,23 +52,16 @@
52#define NR_RAID1_BIOS 256 52#define NR_RAID1_BIOS 256
53 53
54 54
55static void unplug_slaves(mddev_t *mddev);
56
57static void allow_barrier(conf_t *conf); 55static void allow_barrier(conf_t *conf);
58static void lower_barrier(conf_t *conf); 56static void lower_barrier(conf_t *conf);
59 57
60static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) 58static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
61{ 59{
62 struct pool_info *pi = data; 60 struct pool_info *pi = data;
63 r1bio_t *r1_bio;
64 int size = offsetof(r1bio_t, bios[pi->raid_disks]); 61 int size = offsetof(r1bio_t, bios[pi->raid_disks]);
65 62
66 /* allocate a r1bio with room for raid_disks entries in the bios array */ 63 /* allocate a r1bio with room for raid_disks entries in the bios array */
67 r1_bio = kzalloc(size, gfp_flags); 64 return kzalloc(size, gfp_flags);
68 if (!r1_bio && pi->mddev)
69 unplug_slaves(pi->mddev);
70
71 return r1_bio;
72} 65}
73 66
74static void r1bio_pool_free(void *r1_bio, void *data) 67static void r1bio_pool_free(void *r1_bio, void *data)
@@ -91,10 +84,8 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
91 int i, j; 84 int i, j;
92 85
93 r1_bio = r1bio_pool_alloc(gfp_flags, pi); 86 r1_bio = r1bio_pool_alloc(gfp_flags, pi);
94 if (!r1_bio) { 87 if (!r1_bio)
95 unplug_slaves(pi->mddev);
96 return NULL; 88 return NULL;
97 }
98 89
99 /* 90 /*
100 * Allocate bios : 1 for reading, n-1 for writing 91 * Allocate bios : 1 for reading, n-1 for writing
@@ -520,37 +511,6 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
520 return new_disk; 511 return new_disk;
521} 512}
522 513
523static void unplug_slaves(mddev_t *mddev)
524{
525 conf_t *conf = mddev->private;
526 int i;
527
528 rcu_read_lock();
529 for (i=0; i<mddev->raid_disks; i++) {
530 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
531 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
532 struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
533
534 atomic_inc(&rdev->nr_pending);
535 rcu_read_unlock();
536
537 blk_unplug(r_queue);
538
539 rdev_dec_pending(rdev, mddev);
540 rcu_read_lock();
541 }
542 }
543 rcu_read_unlock();
544}
545
546static void raid1_unplug(struct request_queue *q)
547{
548 mddev_t *mddev = q->queuedata;
549
550 unplug_slaves(mddev);
551 md_wakeup_thread(mddev->thread);
552}
553
554static int raid1_congested(void *data, int bits) 514static int raid1_congested(void *data, int bits)
555{ 515{
556 mddev_t *mddev = data; 516 mddev_t *mddev = data;
@@ -580,23 +540,16 @@ static int raid1_congested(void *data, int bits)
580} 540}
581 541
582 542
583static int flush_pending_writes(conf_t *conf) 543static void flush_pending_writes(conf_t *conf)
584{ 544{
585 /* Any writes that have been queued but are awaiting 545 /* Any writes that have been queued but are awaiting
586 * bitmap updates get flushed here. 546 * bitmap updates get flushed here.
587 * We return 1 if any requests were actually submitted.
588 */ 547 */
589 int rv = 0;
590
591 spin_lock_irq(&conf->device_lock); 548 spin_lock_irq(&conf->device_lock);
592 549
593 if (conf->pending_bio_list.head) { 550 if (conf->pending_bio_list.head) {
594 struct bio *bio; 551 struct bio *bio;
595 bio = bio_list_get(&conf->pending_bio_list); 552 bio = bio_list_get(&conf->pending_bio_list);
596 /* Only take the spinlock to quiet a warning */
597 spin_lock(conf->mddev->queue->queue_lock);
598 blk_remove_plug(conf->mddev->queue);
599 spin_unlock(conf->mddev->queue->queue_lock);
600 spin_unlock_irq(&conf->device_lock); 553 spin_unlock_irq(&conf->device_lock);
601 /* flush any pending bitmap writes to 554 /* flush any pending bitmap writes to
602 * disk before proceeding w/ I/O */ 555 * disk before proceeding w/ I/O */
@@ -608,10 +561,14 @@ static int flush_pending_writes(conf_t *conf)
608 generic_make_request(bio); 561 generic_make_request(bio);
609 bio = next; 562 bio = next;
610 } 563 }
611 rv = 1;
612 } else 564 } else
613 spin_unlock_irq(&conf->device_lock); 565 spin_unlock_irq(&conf->device_lock);
614 return rv; 566}
567
568static void md_kick_device(mddev_t *mddev)
569{
570 blk_flush_plug(current);
571 md_wakeup_thread(mddev->thread);
615} 572}
616 573
617/* Barriers.... 574/* Barriers....
@@ -643,8 +600,7 @@ static void raise_barrier(conf_t *conf)
643 600
644 /* Wait until no block IO is waiting */ 601 /* Wait until no block IO is waiting */
645 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, 602 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
646 conf->resync_lock, 603 conf->resync_lock, md_kick_device(conf->mddev));
647 raid1_unplug(conf->mddev->queue));
648 604
649 /* block any new IO from starting */ 605 /* block any new IO from starting */
650 conf->barrier++; 606 conf->barrier++;
@@ -652,8 +608,7 @@ static void raise_barrier(conf_t *conf)
652 /* Now wait for all pending IO to complete */ 608 /* Now wait for all pending IO to complete */
653 wait_event_lock_irq(conf->wait_barrier, 609 wait_event_lock_irq(conf->wait_barrier,
654 !conf->nr_pending && conf->barrier < RESYNC_DEPTH, 610 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
655 conf->resync_lock, 611 conf->resync_lock, md_kick_device(conf->mddev));
656 raid1_unplug(conf->mddev->queue));
657 612
658 spin_unlock_irq(&conf->resync_lock); 613 spin_unlock_irq(&conf->resync_lock);
659} 614}
@@ -675,7 +630,7 @@ static void wait_barrier(conf_t *conf)
675 conf->nr_waiting++; 630 conf->nr_waiting++;
676 wait_event_lock_irq(conf->wait_barrier, !conf->barrier, 631 wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
677 conf->resync_lock, 632 conf->resync_lock,
678 raid1_unplug(conf->mddev->queue)); 633 md_kick_device(conf->mddev));
679 conf->nr_waiting--; 634 conf->nr_waiting--;
680 } 635 }
681 conf->nr_pending++; 636 conf->nr_pending++;
@@ -712,7 +667,7 @@ static void freeze_array(conf_t *conf)
712 conf->nr_pending == conf->nr_queued+1, 667 conf->nr_pending == conf->nr_queued+1,
713 conf->resync_lock, 668 conf->resync_lock,
714 ({ flush_pending_writes(conf); 669 ({ flush_pending_writes(conf);
715 raid1_unplug(conf->mddev->queue); })); 670 md_kick_device(conf->mddev); }));
716 spin_unlock_irq(&conf->resync_lock); 671 spin_unlock_irq(&conf->resync_lock);
717} 672}
718static void unfreeze_array(conf_t *conf) 673static void unfreeze_array(conf_t *conf)
@@ -962,7 +917,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
962 atomic_inc(&r1_bio->remaining); 917 atomic_inc(&r1_bio->remaining);
963 spin_lock_irqsave(&conf->device_lock, flags); 918 spin_lock_irqsave(&conf->device_lock, flags);
964 bio_list_add(&conf->pending_bio_list, mbio); 919 bio_list_add(&conf->pending_bio_list, mbio);
965 blk_plug_device_unlocked(mddev->queue);
966 spin_unlock_irqrestore(&conf->device_lock, flags); 920 spin_unlock_irqrestore(&conf->device_lock, flags);
967 } 921 }
968 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); 922 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -971,7 +925,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
971 /* In case raid1d snuck in to freeze_array */ 925 /* In case raid1d snuck in to freeze_array */
972 wake_up(&conf->wait_barrier); 926 wake_up(&conf->wait_barrier);
973 927
974 if (do_sync) 928 if (do_sync || !bitmap)
975 md_wakeup_thread(mddev->thread); 929 md_wakeup_thread(mddev->thread);
976 930
977 return 0; 931 return 0;
@@ -1561,7 +1515,6 @@ static void raid1d(mddev_t *mddev)
1561 unsigned long flags; 1515 unsigned long flags;
1562 conf_t *conf = mddev->private; 1516 conf_t *conf = mddev->private;
1563 struct list_head *head = &conf->retry_list; 1517 struct list_head *head = &conf->retry_list;
1564 int unplug=0;
1565 mdk_rdev_t *rdev; 1518 mdk_rdev_t *rdev;
1566 1519
1567 md_check_recovery(mddev); 1520 md_check_recovery(mddev);
@@ -1569,7 +1522,7 @@ static void raid1d(mddev_t *mddev)
1569 for (;;) { 1522 for (;;) {
1570 char b[BDEVNAME_SIZE]; 1523 char b[BDEVNAME_SIZE];
1571 1524
1572 unplug += flush_pending_writes(conf); 1525 flush_pending_writes(conf);
1573 1526
1574 spin_lock_irqsave(&conf->device_lock, flags); 1527 spin_lock_irqsave(&conf->device_lock, flags);
1575 if (list_empty(head)) { 1528 if (list_empty(head)) {
@@ -1583,10 +1536,9 @@ static void raid1d(mddev_t *mddev)
1583 1536
1584 mddev = r1_bio->mddev; 1537 mddev = r1_bio->mddev;
1585 conf = mddev->private; 1538 conf = mddev->private;
1586 if (test_bit(R1BIO_IsSync, &r1_bio->state)) { 1539 if (test_bit(R1BIO_IsSync, &r1_bio->state))
1587 sync_request_write(mddev, r1_bio); 1540 sync_request_write(mddev, r1_bio);
1588 unplug = 1; 1541 else {
1589 } else {
1590 int disk; 1542 int disk;
1591 1543
1592 /* we got a read error. Maybe the drive is bad. Maybe just 1544 /* we got a read error. Maybe the drive is bad. Maybe just
@@ -1636,14 +1588,11 @@ static void raid1d(mddev_t *mddev)
1636 bio->bi_end_io = raid1_end_read_request; 1588 bio->bi_end_io = raid1_end_read_request;
1637 bio->bi_rw = READ | do_sync; 1589 bio->bi_rw = READ | do_sync;
1638 bio->bi_private = r1_bio; 1590 bio->bi_private = r1_bio;
1639 unplug = 1;
1640 generic_make_request(bio); 1591 generic_make_request(bio);
1641 } 1592 }
1642 } 1593 }
1643 cond_resched(); 1594 cond_resched();
1644 } 1595 }
1645 if (unplug)
1646 unplug_slaves(mddev);
1647} 1596}
1648 1597
1649 1598
@@ -2066,7 +2015,6 @@ static int run(mddev_t *mddev)
2066 2015
2067 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); 2016 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
2068 2017
2069 mddev->queue->unplug_fn = raid1_unplug;
2070 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2018 mddev->queue->backing_dev_info.congested_fn = raid1_congested;
2071 mddev->queue->backing_dev_info.congested_data = mddev; 2019 mddev->queue->backing_dev_info.congested_data = mddev;
2072 md_integrity_register(mddev); 2020 md_integrity_register(mddev);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 747d061d8e0..c0d0f5f7e40 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -57,23 +57,16 @@
57 */ 57 */
58#define NR_RAID10_BIOS 256 58#define NR_RAID10_BIOS 256
59 59
60static void unplug_slaves(mddev_t *mddev);
61
62static void allow_barrier(conf_t *conf); 60static void allow_barrier(conf_t *conf);
63static void lower_barrier(conf_t *conf); 61static void lower_barrier(conf_t *conf);
64 62
65static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) 63static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
66{ 64{
67 conf_t *conf = data; 65 conf_t *conf = data;
68 r10bio_t *r10_bio;
69 int size = offsetof(struct r10bio_s, devs[conf->copies]); 66 int size = offsetof(struct r10bio_s, devs[conf->copies]);
70 67
71 /* allocate a r10bio with room for raid_disks entries in the bios array */ 68 /* allocate a r10bio with room for raid_disks entries in the bios array */
72 r10_bio = kzalloc(size, gfp_flags); 69 return kzalloc(size, gfp_flags);
73 if (!r10_bio && conf->mddev)
74 unplug_slaves(conf->mddev);
75
76 return r10_bio;
77} 70}
78 71
79static void r10bio_pool_free(void *r10_bio, void *data) 72static void r10bio_pool_free(void *r10_bio, void *data)
@@ -106,10 +99,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
106 int nalloc; 99 int nalloc;
107 100
108 r10_bio = r10bio_pool_alloc(gfp_flags, conf); 101 r10_bio = r10bio_pool_alloc(gfp_flags, conf);
109 if (!r10_bio) { 102 if (!r10_bio)
110 unplug_slaves(conf->mddev);
111 return NULL; 103 return NULL;
112 }
113 104
114 if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) 105 if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
115 nalloc = conf->copies; /* resync */ 106 nalloc = conf->copies; /* resync */
@@ -597,37 +588,6 @@ rb_out:
597 return disk; 588 return disk;
598} 589}
599 590
600static void unplug_slaves(mddev_t *mddev)
601{
602 conf_t *conf = mddev->private;
603 int i;
604
605 rcu_read_lock();
606 for (i=0; i < conf->raid_disks; i++) {
607 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
608 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
609 struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
610
611 atomic_inc(&rdev->nr_pending);
612 rcu_read_unlock();
613
614 blk_unplug(r_queue);
615
616 rdev_dec_pending(rdev, mddev);
617 rcu_read_lock();
618 }
619 }
620 rcu_read_unlock();
621}
622
623static void raid10_unplug(struct request_queue *q)
624{
625 mddev_t *mddev = q->queuedata;
626
627 unplug_slaves(q->queuedata);
628 md_wakeup_thread(mddev->thread);
629}
630
631static int raid10_congested(void *data, int bits) 591static int raid10_congested(void *data, int bits)
632{ 592{
633 mddev_t *mddev = data; 593 mddev_t *mddev = data;
@@ -649,23 +609,16 @@ static int raid10_congested(void *data, int bits)
649 return ret; 609 return ret;
650} 610}
651 611
652static int flush_pending_writes(conf_t *conf) 612static void flush_pending_writes(conf_t *conf)
653{ 613{
654 /* Any writes that have been queued but are awaiting 614 /* Any writes that have been queued but are awaiting
655 * bitmap updates get flushed here. 615 * bitmap updates get flushed here.
656 * We return 1 if any requests were actually submitted.
657 */ 616 */
658 int rv = 0;
659
660 spin_lock_irq(&conf->device_lock); 617 spin_lock_irq(&conf->device_lock);
661 618
662 if (conf->pending_bio_list.head) { 619 if (conf->pending_bio_list.head) {
663 struct bio *bio; 620 struct bio *bio;
664 bio = bio_list_get(&conf->pending_bio_list); 621 bio = bio_list_get(&conf->pending_bio_list);
665 /* Spinlock only taken to quiet a warning */
666 spin_lock(conf->mddev->queue->queue_lock);
667 blk_remove_plug(conf->mddev->queue);
668 spin_unlock(conf->mddev->queue->queue_lock);
669 spin_unlock_irq(&conf->device_lock); 622 spin_unlock_irq(&conf->device_lock);
670 /* flush any pending bitmap writes to disk 623 /* flush any pending bitmap writes to disk
671 * before proceeding w/ I/O */ 624 * before proceeding w/ I/O */
@@ -677,11 +630,16 @@ static int flush_pending_writes(conf_t *conf)
677 generic_make_request(bio); 630 generic_make_request(bio);
678 bio = next; 631 bio = next;
679 } 632 }
680 rv = 1;
681 } else 633 } else
682 spin_unlock_irq(&conf->device_lock); 634 spin_unlock_irq(&conf->device_lock);
683 return rv;
684} 635}
636
637static void md_kick_device(mddev_t *mddev)
638{
639 blk_flush_plug(current);
640 md_wakeup_thread(mddev->thread);
641}
642
685/* Barriers.... 643/* Barriers....
686 * Sometimes we need to suspend IO while we do something else, 644 * Sometimes we need to suspend IO while we do something else,
687 * either some resync/recovery, or reconfigure the array. 645 * either some resync/recovery, or reconfigure the array.
@@ -711,8 +669,7 @@ static void raise_barrier(conf_t *conf, int force)
711 669
712 /* Wait until no block IO is waiting (unless 'force') */ 670 /* Wait until no block IO is waiting (unless 'force') */
713 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, 671 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
714 conf->resync_lock, 672 conf->resync_lock, md_kick_device(conf->mddev));
715 raid10_unplug(conf->mddev->queue));
716 673
717 /* block any new IO from starting */ 674 /* block any new IO from starting */
718 conf->barrier++; 675 conf->barrier++;
@@ -720,8 +677,7 @@ static void raise_barrier(conf_t *conf, int force)
720 /* No wait for all pending IO to complete */ 677 /* No wait for all pending IO to complete */
721 wait_event_lock_irq(conf->wait_barrier, 678 wait_event_lock_irq(conf->wait_barrier,
722 !conf->nr_pending && conf->barrier < RESYNC_DEPTH, 679 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
723 conf->resync_lock, 680 conf->resync_lock, md_kick_device(conf->mddev));
724 raid10_unplug(conf->mddev->queue));
725 681
726 spin_unlock_irq(&conf->resync_lock); 682 spin_unlock_irq(&conf->resync_lock);
727} 683}
@@ -742,7 +698,7 @@ static void wait_barrier(conf_t *conf)
742 conf->nr_waiting++; 698 conf->nr_waiting++;
743 wait_event_lock_irq(conf->wait_barrier, !conf->barrier, 699 wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
744 conf->resync_lock, 700 conf->resync_lock,
745 raid10_unplug(conf->mddev->queue)); 701 md_kick_device(conf->mddev));
746 conf->nr_waiting--; 702 conf->nr_waiting--;
747 } 703 }
748 conf->nr_pending++; 704 conf->nr_pending++;
@@ -779,7 +735,7 @@ static void freeze_array(conf_t *conf)
779 conf->nr_pending == conf->nr_queued+1, 735 conf->nr_pending == conf->nr_queued+1,
780 conf->resync_lock, 736 conf->resync_lock,
781 ({ flush_pending_writes(conf); 737 ({ flush_pending_writes(conf);
782 raid10_unplug(conf->mddev->queue); })); 738 md_kick_device(conf->mddev); }));
783 spin_unlock_irq(&conf->resync_lock); 739 spin_unlock_irq(&conf->resync_lock);
784} 740}
785 741
@@ -974,7 +930,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
974 atomic_inc(&r10_bio->remaining); 930 atomic_inc(&r10_bio->remaining);
975 spin_lock_irqsave(&conf->device_lock, flags); 931 spin_lock_irqsave(&conf->device_lock, flags);
976 bio_list_add(&conf->pending_bio_list, mbio); 932 bio_list_add(&conf->pending_bio_list, mbio);
977 blk_plug_device_unlocked(mddev->queue);
978 spin_unlock_irqrestore(&conf->device_lock, flags); 933 spin_unlock_irqrestore(&conf->device_lock, flags);
979 } 934 }
980 935
@@ -991,7 +946,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
991 /* In case raid10d snuck in to freeze_array */ 946 /* In case raid10d snuck in to freeze_array */
992 wake_up(&conf->wait_barrier); 947 wake_up(&conf->wait_barrier);
993 948
994 if (do_sync) 949 if (do_sync || !mddev->bitmap)
995 md_wakeup_thread(mddev->thread); 950 md_wakeup_thread(mddev->thread);
996 951
997 return 0; 952 return 0;
@@ -1684,7 +1639,6 @@ static void raid10d(mddev_t *mddev)
1684 unsigned long flags; 1639 unsigned long flags;
1685 conf_t *conf = mddev->private; 1640 conf_t *conf = mddev->private;
1686 struct list_head *head = &conf->retry_list; 1641 struct list_head *head = &conf->retry_list;
1687 int unplug=0;
1688 mdk_rdev_t *rdev; 1642 mdk_rdev_t *rdev;
1689 1643
1690 md_check_recovery(mddev); 1644 md_check_recovery(mddev);
@@ -1692,7 +1646,7 @@ static void raid10d(mddev_t *mddev)
1692 for (;;) { 1646 for (;;) {
1693 char b[BDEVNAME_SIZE]; 1647 char b[BDEVNAME_SIZE];
1694 1648
1695 unplug += flush_pending_writes(conf); 1649 flush_pending_writes(conf);
1696 1650
1697 spin_lock_irqsave(&conf->device_lock, flags); 1651 spin_lock_irqsave(&conf->device_lock, flags);
1698 if (list_empty(head)) { 1652 if (list_empty(head)) {
@@ -1706,13 +1660,11 @@ static void raid10d(mddev_t *mddev)
1706 1660
1707 mddev = r10_bio->mddev; 1661 mddev = r10_bio->mddev;
1708 conf = mddev->private; 1662 conf = mddev->private;
1709 if (test_bit(R10BIO_IsSync, &r10_bio->state)) { 1663 if (test_bit(R10BIO_IsSync, &r10_bio->state))
1710 sync_request_write(mddev, r10_bio); 1664 sync_request_write(mddev, r10_bio);
1711 unplug = 1; 1665 else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
1712 } else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) {
1713 recovery_request_write(mddev, r10_bio); 1666 recovery_request_write(mddev, r10_bio);
1714 unplug = 1; 1667 else {
1715 } else {
1716 int mirror; 1668 int mirror;
1717 /* we got a read error. Maybe the drive is bad. Maybe just 1669 /* we got a read error. Maybe the drive is bad. Maybe just
1718 * the block and we can fix it. 1670 * the block and we can fix it.
@@ -1759,14 +1711,11 @@ static void raid10d(mddev_t *mddev)
1759 bio->bi_rw = READ | do_sync; 1711 bio->bi_rw = READ | do_sync;
1760 bio->bi_private = r10_bio; 1712 bio->bi_private = r10_bio;
1761 bio->bi_end_io = raid10_end_read_request; 1713 bio->bi_end_io = raid10_end_read_request;
1762 unplug = 1;
1763 generic_make_request(bio); 1714 generic_make_request(bio);
1764 } 1715 }
1765 } 1716 }
1766 cond_resched(); 1717 cond_resched();
1767 } 1718 }
1768 if (unplug)
1769 unplug_slaves(mddev);
1770} 1719}
1771 1720
1772 1721
@@ -2377,7 +2326,6 @@ static int run(mddev_t *mddev)
2377 md_set_array_sectors(mddev, size); 2326 md_set_array_sectors(mddev, size);
2378 mddev->resync_max_sectors = size; 2327 mddev->resync_max_sectors = size;
2379 2328
2380 mddev->queue->unplug_fn = raid10_unplug;
2381 mddev->queue->backing_dev_info.congested_fn = raid10_congested; 2329 mddev->queue->backing_dev_info.congested_fn = raid10_congested;
2382 mddev->queue->backing_dev_info.congested_data = mddev; 2330 mddev->queue->backing_dev_info.congested_data = mddev;
2383 2331
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 78536fdbd87..e867ee42b15 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -433,8 +433,6 @@ static int has_failed(raid5_conf_t *conf)
433 return 0; 433 return 0;
434} 434}
435 435
436static void unplug_slaves(mddev_t *mddev);
437
438static struct stripe_head * 436static struct stripe_head *
439get_active_stripe(raid5_conf_t *conf, sector_t sector, 437get_active_stripe(raid5_conf_t *conf, sector_t sector,
440 int previous, int noblock, int noquiesce) 438 int previous, int noblock, int noquiesce)
@@ -463,8 +461,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
463 < (conf->max_nr_stripes *3/4) 461 < (conf->max_nr_stripes *3/4)
464 || !conf->inactive_blocked), 462 || !conf->inactive_blocked),
465 conf->device_lock, 463 conf->device_lock,
466 md_raid5_unplug_device(conf) 464 md_raid5_kick_device(conf));
467 );
468 conf->inactive_blocked = 0; 465 conf->inactive_blocked = 0;
469 } else 466 } else
470 init_stripe(sh, sector, previous); 467 init_stripe(sh, sector, previous);
@@ -1473,8 +1470,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1473 wait_event_lock_irq(conf->wait_for_stripe, 1470 wait_event_lock_irq(conf->wait_for_stripe,
1474 !list_empty(&conf->inactive_list), 1471 !list_empty(&conf->inactive_list),
1475 conf->device_lock, 1472 conf->device_lock,
1476 unplug_slaves(conf->mddev) 1473 blk_flush_plug(current));
1477 );
1478 osh = get_free_stripe(conf); 1474 osh = get_free_stripe(conf);
1479 spin_unlock_irq(&conf->device_lock); 1475 spin_unlock_irq(&conf->device_lock);
1480 atomic_set(&nsh->count, 1); 1476 atomic_set(&nsh->count, 1);
@@ -3645,58 +3641,19 @@ static void activate_bit_delay(raid5_conf_t *conf)
3645 } 3641 }
3646} 3642}
3647 3643
3648static void unplug_slaves(mddev_t *mddev) 3644void md_raid5_kick_device(raid5_conf_t *conf)
3649{ 3645{
3650 raid5_conf_t *conf = mddev->private; 3646 blk_flush_plug(current);
3651 int i; 3647 raid5_activate_delayed(conf);
3652 int devs = max(conf->raid_disks, conf->previous_raid_disks);
3653
3654 rcu_read_lock();
3655 for (i = 0; i < devs; i++) {
3656 mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
3657 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
3658 struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
3659
3660 atomic_inc(&rdev->nr_pending);
3661 rcu_read_unlock();
3662
3663 blk_unplug(r_queue);
3664
3665 rdev_dec_pending(rdev, mddev);
3666 rcu_read_lock();
3667 }
3668 }
3669 rcu_read_unlock();
3670}
3671
3672void md_raid5_unplug_device(raid5_conf_t *conf)
3673{
3674 unsigned long flags;
3675
3676 spin_lock_irqsave(&conf->device_lock, flags);
3677
3678 if (plugger_remove_plug(&conf->plug)) {
3679 conf->seq_flush++;
3680 raid5_activate_delayed(conf);
3681 }
3682 md_wakeup_thread(conf->mddev->thread); 3648 md_wakeup_thread(conf->mddev->thread);
3683
3684 spin_unlock_irqrestore(&conf->device_lock, flags);
3685
3686 unplug_slaves(conf->mddev);
3687} 3649}
3688EXPORT_SYMBOL_GPL(md_raid5_unplug_device); 3650EXPORT_SYMBOL_GPL(md_raid5_kick_device);
3689 3651
3690static void raid5_unplug(struct plug_handle *plug) 3652static void raid5_unplug(struct plug_handle *plug)
3691{ 3653{
3692 raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); 3654 raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
3693 md_raid5_unplug_device(conf);
3694}
3695 3655
3696static void raid5_unplug_queue(struct request_queue *q) 3656 md_raid5_kick_device(conf);
3697{
3698 mddev_t *mddev = q->queuedata;
3699 md_raid5_unplug_device(mddev->private);
3700} 3657}
3701 3658
3702int md_raid5_congested(mddev_t *mddev, int bits) 3659int md_raid5_congested(mddev_t *mddev, int bits)
@@ -4100,7 +4057,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
4100 * add failed due to overlap. Flush everything 4057 * add failed due to overlap. Flush everything
4101 * and wait a while 4058 * and wait a while
4102 */ 4059 */
4103 md_raid5_unplug_device(conf); 4060 md_raid5_kick_device(conf);
4104 release_stripe(sh); 4061 release_stripe(sh);
4105 schedule(); 4062 schedule();
4106 goto retry; 4063 goto retry;
@@ -4365,7 +4322,6 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4365 4322
4366 if (sector_nr >= max_sector) { 4323 if (sector_nr >= max_sector) {
4367 /* just being told to finish up .. nothing much to do */ 4324 /* just being told to finish up .. nothing much to do */
4368 unplug_slaves(mddev);
4369 4325
4370 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { 4326 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
4371 end_reshape(conf); 4327 end_reshape(conf);
@@ -4569,7 +4525,6 @@ static void raid5d(mddev_t *mddev)
4569 spin_unlock_irq(&conf->device_lock); 4525 spin_unlock_irq(&conf->device_lock);
4570 4526
4571 async_tx_issue_pending_all(); 4527 async_tx_issue_pending_all();
4572 unplug_slaves(mddev);
4573 4528
4574 pr_debug("--- raid5d inactive\n"); 4529 pr_debug("--- raid5d inactive\n");
4575} 4530}
@@ -5204,7 +5159,7 @@ static int run(mddev_t *mddev)
5204 5159
5205 mddev->queue->backing_dev_info.congested_data = mddev; 5160 mddev->queue->backing_dev_info.congested_data = mddev;
5206 mddev->queue->backing_dev_info.congested_fn = raid5_congested; 5161 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
5207 mddev->queue->unplug_fn = raid5_unplug_queue; 5162 mddev->queue->queue_lock = &conf->device_lock;
5208 5163
5209 chunk_size = mddev->chunk_sectors << 9; 5164 chunk_size = mddev->chunk_sectors << 9;
5210 blk_queue_io_min(mddev->queue, chunk_size); 5165 blk_queue_io_min(mddev->queue, chunk_size);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 2ace0582b40..8d563a4f022 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -503,6 +503,6 @@ static inline int algorithm_is_DDF(int layout)
503} 503}
504 504
505extern int md_raid5_congested(mddev_t *mddev, int bits); 505extern int md_raid5_congested(mddev_t *mddev, int bits);
506extern void md_raid5_unplug_device(raid5_conf_t *conf); 506extern void md_raid5_kick_device(raid5_conf_t *conf);
507extern int raid5_set_cache_size(mddev_t *mddev, int size); 507extern int raid5_set_cache_size(mddev_t *mddev, int size);
508#endif 508#endif
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index f81c25d4a12..47ec5bc0ed2 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -897,11 +897,7 @@ static void i2o_block_request_fn(struct request_queue *q)
897{ 897{
898 struct request *req; 898 struct request *req;
899 899
900 while (!blk_queue_plugged(q)) { 900 while ((req = blk_peek_request(q)) != NULL) {
901 req = blk_peek_request(q);
902 if (!req)
903 break;
904
905 if (req->cmd_type == REQ_TYPE_FS) { 901 if (req->cmd_type == REQ_TYPE_FS) {
906 struct i2o_block_delayed_request *dreq; 902 struct i2o_block_delayed_request *dreq;
907 struct i2o_block_request *ireq = req->special; 903 struct i2o_block_request *ireq = req->special;
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 4e42d030e09..2ae727568df 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -55,8 +55,7 @@ static int mmc_queue_thread(void *d)
55 55
56 spin_lock_irq(q->queue_lock); 56 spin_lock_irq(q->queue_lock);
57 set_current_state(TASK_INTERRUPTIBLE); 57 set_current_state(TASK_INTERRUPTIBLE);
58 if (!blk_queue_plugged(q)) 58 req = blk_fetch_request(q);
59 req = blk_fetch_request(q);
60 mq->req = req; 59 mq->req = req;
61 spin_unlock_irq(q->queue_lock); 60 spin_unlock_irq(q->queue_lock);
62 61
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 794bfd96226..4d2df2f76ea 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1917,7 +1917,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
1917 return; 1917 return;
1918 } 1918 }
1919 /* Now we try to fetch requests from the request queue */ 1919 /* Now we try to fetch requests from the request queue */
1920 while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) { 1920 while ((req = blk_peek_request(queue))) {
1921 if (basedev->features & DASD_FEATURE_READONLY && 1921 if (basedev->features & DASD_FEATURE_READONLY &&
1922 rq_data_dir(req) == WRITE) { 1922 rq_data_dir(req) == WRITE) {
1923 DBF_DEV_EVENT(DBF_ERR, basedev, 1923 DBF_DEV_EVENT(DBF_ERR, basedev,
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index ad843163601..83cea9a55e2 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -161,7 +161,6 @@ tapeblock_requeue(struct work_struct *work) {
161 161
162 spin_lock_irq(&device->blk_data.request_queue_lock); 162 spin_lock_irq(&device->blk_data.request_queue_lock);
163 while ( 163 while (
164 !blk_queue_plugged(queue) &&
165 blk_peek_request(queue) && 164 blk_peek_request(queue) &&
166 nr_queued < TAPEBLOCK_MIN_REQUEUE 165 nr_queued < TAPEBLOCK_MIN_REQUEUE
167 ) { 166 ) {
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fb2bb35c62c..bf80a4c5a48 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -67,6 +67,13 @@ static struct scsi_host_sg_pool scsi_sg_pools[] = {
67 67
68struct kmem_cache *scsi_sdb_cache; 68struct kmem_cache *scsi_sdb_cache;
69 69
70/*
71 * When to reinvoke queueing after a resource shortage. It's 3 msecs to
72 * not change behaviour from the previous unplug mechanism, experimentation
73 * may prove this needs changing.
74 */
75#define SCSI_QUEUE_DELAY 3
76
70static void scsi_run_queue(struct request_queue *q); 77static void scsi_run_queue(struct request_queue *q);
71 78
72/* 79/*
@@ -149,14 +156,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
149 /* 156 /*
150 * Requeue this command. It will go before all other commands 157 * Requeue this command. It will go before all other commands
151 * that are already in the queue. 158 * that are already in the queue.
152 * 159 */
153 * NOTE: there is magic here about the way the queue is plugged if
154 * we have no outstanding commands.
155 *
156 * Although we *don't* plug the queue, we call the request
157 * function. The SCSI request function detects the blocked condition
158 * and plugs the queue appropriately.
159 */
160 spin_lock_irqsave(q->queue_lock, flags); 160 spin_lock_irqsave(q->queue_lock, flags);
161 blk_requeue_request(q, cmd->request); 161 blk_requeue_request(q, cmd->request);
162 spin_unlock_irqrestore(q->queue_lock, flags); 162 spin_unlock_irqrestore(q->queue_lock, flags);
@@ -1194,11 +1194,11 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
1194 case BLKPREP_DEFER: 1194 case BLKPREP_DEFER:
1195 /* 1195 /*
1196 * If we defer, the blk_peek_request() returns NULL, but the 1196 * If we defer, the blk_peek_request() returns NULL, but the
1197 * queue must be restarted, so we plug here if no returning 1197 * queue must be restarted, so we schedule a callback to happen
1198 * command will automatically do that. 1198 * shortly.
1199 */ 1199 */
1200 if (sdev->device_busy == 0) 1200 if (sdev->device_busy == 0)
1201 blk_plug_device(q); 1201 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1202 break; 1202 break;
1203 default: 1203 default:
1204 req->cmd_flags |= REQ_DONTPREP; 1204 req->cmd_flags |= REQ_DONTPREP;
@@ -1237,7 +1237,7 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
1237 sdev_printk(KERN_INFO, sdev, 1237 sdev_printk(KERN_INFO, sdev,
1238 "unblocking device at zero depth\n")); 1238 "unblocking device at zero depth\n"));
1239 } else { 1239 } else {
1240 blk_plug_device(q); 1240 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1241 return 0; 1241 return 0;
1242 } 1242 }
1243 } 1243 }
@@ -1467,7 +1467,7 @@ static void scsi_request_fn(struct request_queue *q)
1467 * the host is no longer able to accept any more requests. 1467 * the host is no longer able to accept any more requests.
1468 */ 1468 */
1469 shost = sdev->host; 1469 shost = sdev->host;
1470 while (!blk_queue_plugged(q)) { 1470 for (;;) {
1471 int rtn; 1471 int rtn;
1472 /* 1472 /*
1473 * get next queueable request. We do this early to make sure 1473 * get next queueable request. We do this early to make sure
@@ -1546,15 +1546,8 @@ static void scsi_request_fn(struct request_queue *q)
1546 */ 1546 */
1547 rtn = scsi_dispatch_cmd(cmd); 1547 rtn = scsi_dispatch_cmd(cmd);
1548 spin_lock_irq(q->queue_lock); 1548 spin_lock_irq(q->queue_lock);
1549 if(rtn) { 1549 if (rtn)
1550 /* we're refusing the command; because of 1550 goto out_delay;
1551 * the way locks get dropped, we need to
1552 * check here if plugging is required */
1553 if(sdev->device_busy == 0)
1554 blk_plug_device(q);
1555
1556 break;
1557 }
1558 } 1551 }
1559 1552
1560 goto out; 1553 goto out;
@@ -1573,9 +1566,10 @@ static void scsi_request_fn(struct request_queue *q)
1573 spin_lock_irq(q->queue_lock); 1566 spin_lock_irq(q->queue_lock);
1574 blk_requeue_request(q, req); 1567 blk_requeue_request(q, req);
1575 sdev->device_busy--; 1568 sdev->device_busy--;
1576 if(sdev->device_busy == 0) 1569out_delay:
1577 blk_plug_device(q); 1570 if (sdev->device_busy == 0)
1578 out: 1571 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1572out:
1579 /* must be careful here...if we trigger the ->remove() function 1573 /* must be careful here...if we trigger the ->remove() function
1580 * we cannot be holding the q lock */ 1574 * we cannot be holding the q lock */
1581 spin_unlock_irq(q->queue_lock); 1575 spin_unlock_irq(q->queue_lock);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 5c3ccfc6b62..2941d2d92c9 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3913,7 +3913,7 @@ fc_bsg_request_handler(struct request_queue *q, struct Scsi_Host *shost,
3913 if (!get_device(dev)) 3913 if (!get_device(dev))
3914 return; 3914 return;
3915 3915
3916 while (!blk_queue_plugged(q)) { 3916 while (1) {
3917 if (rport && (rport->port_state == FC_PORTSTATE_BLOCKED) && 3917 if (rport && (rport->port_state == FC_PORTSTATE_BLOCKED) &&
3918 !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) 3918 !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
3919 break; 3919 break;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 927e99cb722..c6fcf76cade 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -173,11 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
173 int ret; 173 int ret;
174 int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *); 174 int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
175 175
176 while (!blk_queue_plugged(q)) { 176 while ((req = blk_fetch_request(q)) != NULL) {
177 req = blk_fetch_request(q);
178 if (!req)
179 break;
180
181 spin_unlock_irq(q->queue_lock); 177 spin_unlock_irq(q->queue_lock);
182 178
183 handler = to_sas_internal(shost->transportt)->f->smp_handler; 179 handler = to_sas_internal(shost->transportt)->f->smp_handler;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 67f0c09983c..c1b539d7b0d 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -392,9 +392,8 @@ static int iblock_do_task(struct se_task *task)
392{ 392{
393 struct se_device *dev = task->task_se_cmd->se_dev; 393 struct se_device *dev = task->task_se_cmd->se_dev;
394 struct iblock_req *req = IBLOCK_REQ(task); 394 struct iblock_req *req = IBLOCK_REQ(task);
395 struct iblock_dev *ibd = (struct iblock_dev *)req->ib_dev;
396 struct request_queue *q = bdev_get_queue(ibd->ibd_bd);
397 struct bio *bio = req->ib_bio, *nbio = NULL; 395 struct bio *bio = req->ib_bio, *nbio = NULL;
396 struct blk_plug plug;
398 int rw; 397 int rw;
399 398
400 if (task->task_data_direction == DMA_TO_DEVICE) { 399 if (task->task_data_direction == DMA_TO_DEVICE) {
@@ -412,6 +411,7 @@ static int iblock_do_task(struct se_task *task)
412 rw = READ; 411 rw = READ;
413 } 412 }
414 413
414 blk_start_plug(&plug);
415 while (bio) { 415 while (bio) {
416 nbio = bio->bi_next; 416 nbio = bio->bi_next;
417 bio->bi_next = NULL; 417 bio->bi_next = NULL;
@@ -421,9 +421,8 @@ static int iblock_do_task(struct se_task *task)
421 submit_bio(rw, bio); 421 submit_bio(rw, bio);
422 bio = nbio; 422 bio = nbio;
423 } 423 }
424 blk_finish_plug(&plug);
424 425
425 if (q->unplug_fn)
426 q->unplug_fn(q);
427 return PYX_TRANSPORT_SENT_TO_TRANSPORT; 426 return PYX_TRANSPORT_SENT_TO_TRANSPORT;
428} 427}
429 428
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 65794b8fe79..1cc84b27613 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -73,7 +73,6 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
73static const struct address_space_operations adfs_aops = { 73static const struct address_space_operations adfs_aops = {
74 .readpage = adfs_readpage, 74 .readpage = adfs_readpage,
75 .writepage = adfs_writepage, 75 .writepage = adfs_writepage,
76 .sync_page = block_sync_page,
77 .write_begin = adfs_write_begin, 76 .write_begin = adfs_write_begin,
78 .write_end = generic_write_end, 77 .write_end = generic_write_end,
79 .bmap = _adfs_bmap 78 .bmap = _adfs_bmap
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0a90dcd46de..acf321b70fc 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -429,7 +429,6 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
429const struct address_space_operations affs_aops = { 429const struct address_space_operations affs_aops = {
430 .readpage = affs_readpage, 430 .readpage = affs_readpage,
431 .writepage = affs_writepage, 431 .writepage = affs_writepage,
432 .sync_page = block_sync_page,
433 .write_begin = affs_write_begin, 432 .write_begin = affs_write_begin,
434 .write_end = generic_write_end, 433 .write_end = generic_write_end,
435 .bmap = _affs_bmap 434 .bmap = _affs_bmap
@@ -786,7 +785,6 @@ out:
786const struct address_space_operations affs_aops_ofs = { 785const struct address_space_operations affs_aops_ofs = {
787 .readpage = affs_readpage_ofs, 786 .readpage = affs_readpage_ofs,
788 //.writepage = affs_writepage_ofs, 787 //.writepage = affs_writepage_ofs,
789 //.sync_page = affs_sync_page_ofs,
790 .write_begin = affs_write_begin_ofs, 788 .write_begin = affs_write_begin_ofs,
791 .write_end = affs_write_end_ofs 789 .write_end = affs_write_end_ofs
792}; 790};
diff --git a/fs/aio.c b/fs/aio.c
index 26869cde395..a936b7fe4f6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -34,8 +34,6 @@
34#include <linux/security.h> 34#include <linux/security.h>
35#include <linux/eventfd.h> 35#include <linux/eventfd.h>
36#include <linux/blkdev.h> 36#include <linux/blkdev.h>
37#include <linux/mempool.h>
38#include <linux/hash.h>
39#include <linux/compat.h> 37#include <linux/compat.h>
40 38
41#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
@@ -65,14 +63,6 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
65static DEFINE_SPINLOCK(fput_lock); 63static DEFINE_SPINLOCK(fput_lock);
66static LIST_HEAD(fput_head); 64static LIST_HEAD(fput_head);
67 65
68#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
69#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
70struct aio_batch_entry {
71 struct hlist_node list;
72 struct address_space *mapping;
73};
74mempool_t *abe_pool;
75
76static void aio_kick_handler(struct work_struct *); 66static void aio_kick_handler(struct work_struct *);
77static void aio_queue_work(struct kioctx *); 67static void aio_queue_work(struct kioctx *);
78 68
@@ -86,8 +76,7 @@ static int __init aio_setup(void)
86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 76 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
87 77
88 aio_wq = create_workqueue("aio"); 78 aio_wq = create_workqueue("aio");
89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); 79 BUG_ON(!aio_wq);
90 BUG_ON(!aio_wq || !abe_pool);
91 80
92 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 81 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
93 82
@@ -1525,57 +1514,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1525 return 0; 1514 return 0;
1526} 1515}
1527 1516
1528static void aio_batch_add(struct address_space *mapping,
1529 struct hlist_head *batch_hash)
1530{
1531 struct aio_batch_entry *abe;
1532 struct hlist_node *pos;
1533 unsigned bucket;
1534
1535 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1536 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1537 if (abe->mapping == mapping)
1538 return;
1539 }
1540
1541 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1542
1543 /*
1544 * we should be using igrab here, but
1545 * we don't want to hammer on the global
1546 * inode spinlock just to take an extra
1547 * reference on a file that we must already
1548 * have a reference to.
1549 *
1550 * When we're called, we always have a reference
1551 * on the file, so we must always have a reference
1552 * on the inode, so ihold() is safe here.
1553 */
1554 ihold(mapping->host);
1555 abe->mapping = mapping;
1556 hlist_add_head(&abe->list, &batch_hash[bucket]);
1557 return;
1558}
1559
1560static void aio_batch_free(struct hlist_head *batch_hash)
1561{
1562 struct aio_batch_entry *abe;
1563 struct hlist_node *pos, *n;
1564 int i;
1565
1566 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1567 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1568 blk_run_address_space(abe->mapping);
1569 iput(abe->mapping->host);
1570 hlist_del(&abe->list);
1571 mempool_free(abe, abe_pool);
1572 }
1573 }
1574}
1575
1576static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1517static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1577 struct iocb *iocb, struct hlist_head *batch_hash, 1518 struct iocb *iocb, bool compat)
1578 bool compat)
1579{ 1519{
1580 struct kiocb *req; 1520 struct kiocb *req;
1581 struct file *file; 1521 struct file *file;
@@ -1666,11 +1606,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1666 ; 1606 ;
1667 } 1607 }
1668 spin_unlock_irq(&ctx->ctx_lock); 1608 spin_unlock_irq(&ctx->ctx_lock);
1669 if (req->ki_opcode == IOCB_CMD_PREAD ||
1670 req->ki_opcode == IOCB_CMD_PREADV ||
1671 req->ki_opcode == IOCB_CMD_PWRITE ||
1672 req->ki_opcode == IOCB_CMD_PWRITEV)
1673 aio_batch_add(file->f_mapping, batch_hash);
1674 1609
1675 aio_put_req(req); /* drop extra ref to req */ 1610 aio_put_req(req); /* drop extra ref to req */
1676 return 0; 1611 return 0;
@@ -1687,7 +1622,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1687 struct kioctx *ctx; 1622 struct kioctx *ctx;
1688 long ret = 0; 1623 long ret = 0;
1689 int i; 1624 int i;
1690 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; 1625 struct blk_plug plug;
1691 1626
1692 if (unlikely(nr < 0)) 1627 if (unlikely(nr < 0))
1693 return -EINVAL; 1628 return -EINVAL;
@@ -1704,6 +1639,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1704 return -EINVAL; 1639 return -EINVAL;
1705 } 1640 }
1706 1641
1642 blk_start_plug(&plug);
1643
1707 /* 1644 /*
1708 * AKPM: should this return a partial result if some of the IOs were 1645 * AKPM: should this return a partial result if some of the IOs were
1709 * successfully submitted? 1646 * successfully submitted?
@@ -1722,11 +1659,11 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1722 break; 1659 break;
1723 } 1660 }
1724 1661
1725 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat); 1662 ret = io_submit_one(ctx, user_iocb, &tmp, compat);
1726 if (ret) 1663 if (ret)
1727 break; 1664 break;
1728 } 1665 }
1729 aio_batch_free(batch_hash); 1666 blk_finish_plug(&plug);
1730 1667
1731 put_ioctx(ctx); 1668 put_ioctx(ctx);
1732 return i ? i : ret; 1669 return i ? i : ret;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index b1d0c794747..06457ed8f3e 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -75,7 +75,6 @@ static const struct inode_operations befs_dir_inode_operations = {
75 75
76static const struct address_space_operations befs_aops = { 76static const struct address_space_operations befs_aops = {
77 .readpage = befs_readpage, 77 .readpage = befs_readpage,
78 .sync_page = block_sync_page,
79 .bmap = befs_bmap, 78 .bmap = befs_bmap,
80}; 79};
81 80
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index eb67edd0f8e..f20e8a71062 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -186,7 +186,6 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
186const struct address_space_operations bfs_aops = { 186const struct address_space_operations bfs_aops = {
187 .readpage = bfs_readpage, 187 .readpage = bfs_readpage,
188 .writepage = bfs_writepage, 188 .writepage = bfs_writepage,
189 .sync_page = block_sync_page,
190 .write_begin = bfs_write_begin, 189 .write_begin = bfs_write_begin,
191 .write_end = generic_write_end, 190 .write_end = generic_write_end,
192 .bmap = bfs_bmap, 191 .bmap = bfs_bmap,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d42cad2757a..fbe05cbdd69 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1529,7 +1529,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
1529static const struct address_space_operations def_blk_aops = { 1529static const struct address_space_operations def_blk_aops = {
1530 .readpage = blkdev_readpage, 1530 .readpage = blkdev_readpage,
1531 .writepage = blkdev_writepage, 1531 .writepage = blkdev_writepage,
1532 .sync_page = block_sync_page,
1533 .write_begin = blkdev_write_begin, 1532 .write_begin = blkdev_write_begin,
1534 .write_end = blkdev_write_end, 1533 .write_end = blkdev_write_end,
1535 .writepages = generic_writepages, 1534 .writepages = generic_writepages,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e1aa8d607bc..ada1f6bd0a5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -847,7 +847,6 @@ static const struct address_space_operations btree_aops = {
847 .writepages = btree_writepages, 847 .writepages = btree_writepages,
848 .releasepage = btree_releasepage, 848 .releasepage = btree_releasepage,
849 .invalidatepage = btree_invalidatepage, 849 .invalidatepage = btree_invalidatepage,
850 .sync_page = block_sync_page,
851#ifdef CONFIG_MIGRATION 850#ifdef CONFIG_MIGRATION
852 .migratepage = btree_migratepage, 851 .migratepage = btree_migratepage,
853#endif 852#endif
@@ -1331,82 +1330,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1331} 1330}
1332 1331
1333/* 1332/*
1334 * this unplugs every device on the box, and it is only used when page
1335 * is null
1336 */
1337static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1338{
1339 struct btrfs_device *device;
1340 struct btrfs_fs_info *info;
1341
1342 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1343 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1344 if (!device->bdev)
1345 continue;
1346
1347 bdi = blk_get_backing_dev_info(device->bdev);
1348 if (bdi->unplug_io_fn)
1349 bdi->unplug_io_fn(bdi, page);
1350 }
1351}
1352
1353static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1354{
1355 struct inode *inode;
1356 struct extent_map_tree *em_tree;
1357 struct extent_map *em;
1358 struct address_space *mapping;
1359 u64 offset;
1360
1361 /* the generic O_DIRECT read code does this */
1362 if (1 || !page) {
1363 __unplug_io_fn(bdi, page);
1364 return;
1365 }
1366
1367 /*
1368 * page->mapping may change at any time. Get a consistent copy
1369 * and use that for everything below
1370 */
1371 smp_mb();
1372 mapping = page->mapping;
1373 if (!mapping)
1374 return;
1375
1376 inode = mapping->host;
1377
1378 /*
1379 * don't do the expensive searching for a small number of
1380 * devices
1381 */
1382 if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
1383 __unplug_io_fn(bdi, page);
1384 return;
1385 }
1386
1387 offset = page_offset(page);
1388
1389 em_tree = &BTRFS_I(inode)->extent_tree;
1390 read_lock(&em_tree->lock);
1391 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1392 read_unlock(&em_tree->lock);
1393 if (!em) {
1394 __unplug_io_fn(bdi, page);
1395 return;
1396 }
1397
1398 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1399 free_extent_map(em);
1400 __unplug_io_fn(bdi, page);
1401 return;
1402 }
1403 offset = offset - em->start;
1404 btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
1405 em->block_start + offset, page);
1406 free_extent_map(em);
1407}
1408
1409/*
1410 * If this fails, caller must call bdi_destroy() to get rid of the 1333 * If this fails, caller must call bdi_destroy() to get rid of the
1411 * bdi again. 1334 * bdi again.
1412 */ 1335 */
@@ -1420,8 +1343,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1420 return err; 1343 return err;
1421 1344
1422 bdi->ra_pages = default_backing_dev_info.ra_pages; 1345 bdi->ra_pages = default_backing_dev_info.ra_pages;
1423 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1424 bdi->unplug_io_data = info;
1425 bdi->congested_fn = btrfs_congested_fn; 1346 bdi->congested_fn = btrfs_congested_fn;
1426 bdi->congested_data = info; 1347 bdi->congested_data = info;
1427 return 0; 1348 return 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd3f172e94e..00497d551e5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2188,7 +2188,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2188 unsigned long nr_written = 0; 2188 unsigned long nr_written = 0;
2189 2189
2190 if (wbc->sync_mode == WB_SYNC_ALL) 2190 if (wbc->sync_mode == WB_SYNC_ALL)
2191 write_flags = WRITE_SYNC_PLUG; 2191 write_flags = WRITE_SYNC;
2192 else 2192 else
2193 write_flags = WRITE; 2193 write_flags = WRITE;
2194 2194
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0efdb65953c..02438c91792 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7340,7 +7340,6 @@ static const struct address_space_operations btrfs_aops = {
7340 .writepage = btrfs_writepage, 7340 .writepage = btrfs_writepage,
7341 .writepages = btrfs_writepages, 7341 .writepages = btrfs_writepages,
7342 .readpages = btrfs_readpages, 7342 .readpages = btrfs_readpages,
7343 .sync_page = block_sync_page,
7344 .direct_IO = btrfs_direct_IO, 7343 .direct_IO = btrfs_direct_IO,
7345 .invalidatepage = btrfs_invalidatepage, 7344 .invalidatepage = btrfs_invalidatepage,
7346 .releasepage = btrfs_releasepage, 7345 .releasepage = btrfs_releasepage,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dd13eb81ee4..9d554e8e658 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -162,7 +162,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
162 struct bio *cur; 162 struct bio *cur;
163 int again = 0; 163 int again = 0;
164 unsigned long num_run; 164 unsigned long num_run;
165 unsigned long num_sync_run;
166 unsigned long batch_run = 0; 165 unsigned long batch_run = 0;
167 unsigned long limit; 166 unsigned long limit;
168 unsigned long last_waited = 0; 167 unsigned long last_waited = 0;
@@ -173,11 +172,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
173 limit = btrfs_async_submit_limit(fs_info); 172 limit = btrfs_async_submit_limit(fs_info);
174 limit = limit * 2 / 3; 173 limit = limit * 2 / 3;
175 174
176 /* we want to make sure that every time we switch from the sync
177 * list to the normal list, we unplug
178 */
179 num_sync_run = 0;
180
181loop: 175loop:
182 spin_lock(&device->io_lock); 176 spin_lock(&device->io_lock);
183 177
@@ -223,15 +217,6 @@ loop_lock:
223 217
224 spin_unlock(&device->io_lock); 218 spin_unlock(&device->io_lock);
225 219
226 /*
227 * if we're doing the regular priority list, make sure we unplug
228 * for any high prio bios we've sent down
229 */
230 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
231 num_sync_run = 0;
232 blk_run_backing_dev(bdi, NULL);
233 }
234
235 while (pending) { 220 while (pending) {
236 221
237 rmb(); 222 rmb();
@@ -259,19 +244,11 @@ loop_lock:
259 244
260 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 245 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
261 246
262 if (cur->bi_rw & REQ_SYNC)
263 num_sync_run++;
264
265 submit_bio(cur->bi_rw, cur); 247 submit_bio(cur->bi_rw, cur);
266 num_run++; 248 num_run++;
267 batch_run++; 249 batch_run++;
268 if (need_resched()) { 250 if (need_resched())
269 if (num_sync_run) {
270 blk_run_backing_dev(bdi, NULL);
271 num_sync_run = 0;
272 }
273 cond_resched(); 251 cond_resched();
274 }
275 252
276 /* 253 /*
277 * we made progress, there is more work to do and the bdi 254 * we made progress, there is more work to do and the bdi
@@ -304,13 +281,8 @@ loop_lock:
304 * against it before looping 281 * against it before looping
305 */ 282 */
306 last_waited = ioc->last_waited; 283 last_waited = ioc->last_waited;
307 if (need_resched()) { 284 if (need_resched())
308 if (num_sync_run) {
309 blk_run_backing_dev(bdi, NULL);
310 num_sync_run = 0;
311 }
312 cond_resched(); 285 cond_resched();
313 }
314 continue; 286 continue;
315 } 287 }
316 spin_lock(&device->io_lock); 288 spin_lock(&device->io_lock);
@@ -323,22 +295,6 @@ loop_lock:
323 } 295 }
324 } 296 }
325 297
326 if (num_sync_run) {
327 num_sync_run = 0;
328 blk_run_backing_dev(bdi, NULL);
329 }
330 /*
331 * IO has already been through a long path to get here. Checksumming,
332 * async helper threads, perhaps compression. We've done a pretty
333 * good job of collecting a batch of IO and should just unplug
334 * the device right away.
335 *
336 * This will help anyone who is waiting on the IO, they might have
337 * already unplugged, but managed to do so before the bio they
338 * cared about found its way down here.
339 */
340 blk_run_backing_dev(bdi, NULL);
341
342 cond_resched(); 298 cond_resched();
343 if (again) 299 if (again)
344 goto loop; 300 goto loop;
@@ -2955,7 +2911,7 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
2955static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, 2911static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2956 u64 logical, u64 *length, 2912 u64 logical, u64 *length,
2957 struct btrfs_multi_bio **multi_ret, 2913 struct btrfs_multi_bio **multi_ret,
2958 int mirror_num, struct page *unplug_page) 2914 int mirror_num)
2959{ 2915{
2960 struct extent_map *em; 2916 struct extent_map *em;
2961 struct map_lookup *map; 2917 struct map_lookup *map;
@@ -2987,11 +2943,6 @@ again:
2987 em = lookup_extent_mapping(em_tree, logical, *length); 2943 em = lookup_extent_mapping(em_tree, logical, *length);
2988 read_unlock(&em_tree->lock); 2944 read_unlock(&em_tree->lock);
2989 2945
2990 if (!em && unplug_page) {
2991 kfree(multi);
2992 return 0;
2993 }
2994
2995 if (!em) { 2946 if (!em) {
2996 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2947 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
2997 (unsigned long long)logical, 2948 (unsigned long long)logical,
@@ -3047,13 +2998,13 @@ again:
3047 *length = em->len - offset; 2998 *length = em->len - offset;
3048 } 2999 }
3049 3000
3050 if (!multi_ret && !unplug_page) 3001 if (!multi_ret)
3051 goto out; 3002 goto out;
3052 3003
3053 num_stripes = 1; 3004 num_stripes = 1;
3054 stripe_index = 0; 3005 stripe_index = 0;
3055 if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 3006 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3056 if (unplug_page || (rw & REQ_WRITE)) 3007 if (rw & REQ_WRITE)
3057 num_stripes = map->num_stripes; 3008 num_stripes = map->num_stripes;
3058 else if (mirror_num) 3009 else if (mirror_num)
3059 stripe_index = mirror_num - 1; 3010 stripe_index = mirror_num - 1;
@@ -3075,7 +3026,7 @@ again:
3075 stripe_index = do_div(stripe_nr, factor); 3026 stripe_index = do_div(stripe_nr, factor);
3076 stripe_index *= map->sub_stripes; 3027 stripe_index *= map->sub_stripes;
3077 3028
3078 if (unplug_page || (rw & REQ_WRITE)) 3029 if (rw & REQ_WRITE)
3079 num_stripes = map->sub_stripes; 3030 num_stripes = map->sub_stripes;
3080 else if (mirror_num) 3031 else if (mirror_num)
3081 stripe_index += mirror_num - 1; 3032 stripe_index += mirror_num - 1;
@@ -3095,22 +3046,10 @@ again:
3095 BUG_ON(stripe_index >= map->num_stripes); 3046 BUG_ON(stripe_index >= map->num_stripes);
3096 3047
3097 for (i = 0; i < num_stripes; i++) { 3048 for (i = 0; i < num_stripes; i++) {
3098 if (unplug_page) { 3049 multi->stripes[i].physical =
3099 struct btrfs_device *device; 3050 map->stripes[stripe_index].physical +
3100 struct backing_dev_info *bdi; 3051 stripe_offset + stripe_nr * map->stripe_len;
3101 3052 multi->stripes[i].dev = map->stripes[stripe_index].dev;
3102 device = map->stripes[stripe_index].dev;
3103 if (device->bdev) {
3104 bdi = blk_get_backing_dev_info(device->bdev);
3105 if (bdi->unplug_io_fn)
3106 bdi->unplug_io_fn(bdi, unplug_page);
3107 }
3108 } else {
3109 multi->stripes[i].physical =
3110 map->stripes[stripe_index].physical +
3111 stripe_offset + stripe_nr * map->stripe_len;
3112 multi->stripes[i].dev = map->stripes[stripe_index].dev;
3113 }
3114 stripe_index++; 3053 stripe_index++;
3115 } 3054 }
3116 if (multi_ret) { 3055 if (multi_ret) {
@@ -3128,7 +3067,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3128 struct btrfs_multi_bio **multi_ret, int mirror_num) 3067 struct btrfs_multi_bio **multi_ret, int mirror_num)
3129{ 3068{
3130 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, 3069 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
3131 mirror_num, NULL); 3070 mirror_num);
3132} 3071}
3133 3072
3134int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 3073int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3196,14 +3135,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
3196 return 0; 3135 return 0;
3197} 3136}
3198 3137
3199int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
3200 u64 logical, struct page *page)
3201{
3202 u64 length = PAGE_CACHE_SIZE;
3203 return __btrfs_map_block(map_tree, READ, logical, &length,
3204 NULL, 0, page);
3205}
3206
3207static void end_bio_multi_stripe(struct bio *bio, int err) 3138static void end_bio_multi_stripe(struct bio *bio, int err)
3208{ 3139{
3209 struct btrfs_multi_bio *multi = bio->bi_private; 3140 struct btrfs_multi_bio *multi = bio->bi_private;
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2ca..42534f67d71 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
54} 54}
55EXPORT_SYMBOL(init_buffer); 55EXPORT_SYMBOL(init_buffer);
56 56
57static int sync_buffer(void *word) 57static int sleep_on_buffer(void *word)
58{ 58{
59 struct block_device *bd;
60 struct buffer_head *bh
61 = container_of(word, struct buffer_head, b_state);
62
63 smp_mb();
64 bd = bh->b_bdev;
65 if (bd)
66 blk_run_address_space(bd->bd_inode->i_mapping);
67 io_schedule(); 59 io_schedule();
68 return 0; 60 return 0;
69} 61}
70 62
71void __lock_buffer(struct buffer_head *bh) 63void __lock_buffer(struct buffer_head *bh)
72{ 64{
73 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, 65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
74 TASK_UNINTERRUPTIBLE); 66 TASK_UNINTERRUPTIBLE);
75} 67}
76EXPORT_SYMBOL(__lock_buffer); 68EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer);
90 */ 82 */
91void __wait_on_buffer(struct buffer_head * bh) 83void __wait_on_buffer(struct buffer_head * bh)
92{ 84{
93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); 85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
94} 86}
95EXPORT_SYMBOL(__wait_on_buffer); 87EXPORT_SYMBOL(__wait_on_buffer);
96 88
@@ -749,7 +741,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
749{ 741{
750 struct buffer_head *bh; 742 struct buffer_head *bh;
751 struct list_head tmp; 743 struct list_head tmp;
752 struct address_space *mapping, *prev_mapping = NULL; 744 struct address_space *mapping;
753 int err = 0, err2; 745 int err = 0, err2;
754 746
755 INIT_LIST_HEAD(&tmp); 747 INIT_LIST_HEAD(&tmp);
@@ -775,7 +767,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
775 * still in flight on potentially older 767 * still in flight on potentially older
776 * contents. 768 * contents.
777 */ 769 */
778 write_dirty_buffer(bh, WRITE_SYNC_PLUG); 770 write_dirty_buffer(bh, WRITE_SYNC);
779 771
780 /* 772 /*
781 * Kick off IO for the previous mapping. Note 773 * Kick off IO for the previous mapping. Note
@@ -783,10 +775,6 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
783 * wait_on_buffer() will do that for us 775 * wait_on_buffer() will do that for us
784 * through sync_buffer(). 776 * through sync_buffer().
785 */ 777 */
786 if (prev_mapping && prev_mapping != mapping)
787 blk_run_address_space(prev_mapping);
788 prev_mapping = mapping;
789
790 brelse(bh); 778 brelse(bh);
791 spin_lock(lock); 779 spin_lock(lock);
792 } 780 }
@@ -1614,14 +1602,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1614 * prevents this contention from occurring. 1602 * prevents this contention from occurring.
1615 * 1603 *
1616 * If block_write_full_page() is called with wbc->sync_mode == 1604 * If block_write_full_page() is called with wbc->sync_mode ==
1617 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this 1605 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
1618 * causes the writes to be flagged as synchronous writes, but the 1606 * causes the writes to be flagged as synchronous writes.
1619 * block device queue will NOT be unplugged, since usually many pages
1620 * will be pushed to the out before the higher-level caller actually
1621 * waits for the writes to be completed. The various wait functions,
1622 * such as wait_on_writeback_range() will ultimately call sync_page()
1623 * which will ultimately call blk_run_backing_dev(), which will end up
1624 * unplugging the device queue.
1625 */ 1607 */
1626static int __block_write_full_page(struct inode *inode, struct page *page, 1608static int __block_write_full_page(struct inode *inode, struct page *page,
1627 get_block_t *get_block, struct writeback_control *wbc, 1609 get_block_t *get_block, struct writeback_control *wbc,
@@ -1634,7 +1616,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1634 const unsigned blocksize = 1 << inode->i_blkbits; 1616 const unsigned blocksize = 1 << inode->i_blkbits;
1635 int nr_underway = 0; 1617 int nr_underway = 0;
1636 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1618 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1637 WRITE_SYNC_PLUG : WRITE); 1619 WRITE_SYNC : WRITE);
1638 1620
1639 BUG_ON(!PageLocked(page)); 1621 BUG_ON(!PageLocked(page));
1640 1622
@@ -3138,17 +3120,6 @@ out:
3138} 3120}
3139EXPORT_SYMBOL(try_to_free_buffers); 3121EXPORT_SYMBOL(try_to_free_buffers);
3140 3122
3141void block_sync_page(struct page *page)
3142{
3143 struct address_space *mapping;
3144
3145 smp_mb();
3146 mapping = page_mapping(page);
3147 if (mapping)
3148 blk_run_backing_dev(mapping->backing_dev_info, page);
3149}
3150EXPORT_SYMBOL(block_sync_page);
3151
3152/* 3123/*
3153 * There are no bdflush tunables left. But distributions are 3124 * There are no bdflush tunables left. But distributions are
3154 * still running obsolete flush daemons, so we terminate them here. 3125 * still running obsolete flush daemons, so we terminate them here.
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e964b1cd5dd..c27d236738f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1569,34 +1569,6 @@ int cifs_fsync(struct file *file, int datasync)
1569 return rc; 1569 return rc;
1570} 1570}
1571 1571
1572/* static void cifs_sync_page(struct page *page)
1573{
1574 struct address_space *mapping;
1575 struct inode *inode;
1576 unsigned long index = page->index;
1577 unsigned int rpages = 0;
1578 int rc = 0;
1579
1580 cFYI(1, "sync page %p", page);
1581 mapping = page->mapping;
1582 if (!mapping)
1583 return 0;
1584 inode = mapping->host;
1585 if (!inode)
1586 return; */
1587
1588/* fill in rpages then
1589 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1590
1591/* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1592
1593#if 0
1594 if (rc < 0)
1595 return rc;
1596 return 0;
1597#endif
1598} */
1599
1600/* 1572/*
1601 * As file closes, flush all cached write data for this inode checking 1573 * As file closes, flush all cached write data for this inode checking
1602 * for write behind errors. 1574 * for write behind errors.
@@ -2510,7 +2482,6 @@ const struct address_space_operations cifs_addr_ops = {
2510 .set_page_dirty = __set_page_dirty_nobuffers, 2482 .set_page_dirty = __set_page_dirty_nobuffers,
2511 .releasepage = cifs_release_page, 2483 .releasepage = cifs_release_page,
2512 .invalidatepage = cifs_invalidate_page, 2484 .invalidatepage = cifs_invalidate_page,
2513 /* .sync_page = cifs_sync_page, */
2514 /* .direct_IO = */ 2485 /* .direct_IO = */
2515}; 2486};
2516 2487
@@ -2528,6 +2499,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2528 .set_page_dirty = __set_page_dirty_nobuffers, 2499 .set_page_dirty = __set_page_dirty_nobuffers,
2529 .releasepage = cifs_release_page, 2500 .releasepage = cifs_release_page,
2530 .invalidatepage = cifs_invalidate_page, 2501 .invalidatepage = cifs_invalidate_page,
2531 /* .sync_page = cifs_sync_page, */
2532 /* .direct_IO = */ 2502 /* .direct_IO = */
2533}; 2503};
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b044705eedd..42608313609 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1110,11 +1110,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1110 ((rw & READ) || (dio->result == dio->size))) 1110 ((rw & READ) || (dio->result == dio->size)))
1111 ret = -EIOCBQUEUED; 1111 ret = -EIOCBQUEUED;
1112 1112
1113 if (ret != -EIOCBQUEUED) { 1113 if (ret != -EIOCBQUEUED)
1114 /* All IO is now issued, send it on its way */
1115 blk_run_address_space(inode->i_mapping);
1116 dio_await_completion(dio); 1114 dio_await_completion(dio);
1117 }
1118 1115
1119 /* 1116 /*
1120 * Sync will always be dropping the final ref and completing the 1117 * Sync will always be dropping the final ref and completing the
@@ -1176,7 +1173,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1176 struct dio *dio; 1173 struct dio *dio;
1177 1174
1178 if (rw & WRITE) 1175 if (rw & WRITE)
1179 rw = WRITE_ODIRECT_PLUG; 1176 rw = WRITE_ODIRECT;
1180 1177
1181 if (bdev) 1178 if (bdev)
1182 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1179 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index a8e7797b947..9c13412e6c9 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -23,7 +23,6 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block)
23} 23}
24static const struct address_space_operations efs_aops = { 24static const struct address_space_operations efs_aops = {
25 .readpage = efs_readpage, 25 .readpage = efs_readpage,
26 .sync_page = block_sync_page,
27 .bmap = _efs_bmap 26 .bmap = _efs_bmap
28}; 27};
29 28
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a7555238c41..82b94c8f5d2 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -795,7 +795,6 @@ const struct address_space_operations exofs_aops = {
795 .direct_IO = NULL, /* TODO: Should be trivial to do */ 795 .direct_IO = NULL, /* TODO: Should be trivial to do */
796 796
797 /* With these NULL has special meaning or default is not exported */ 797 /* With these NULL has special meaning or default is not exported */
798 .sync_page = NULL,
799 .get_xip_mem = NULL, 798 .get_xip_mem = NULL,
800 .migratepage = NULL, 799 .migratepage = NULL,
801 .launder_page = NULL, 800 .launder_page = NULL,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 40ad210a504..c47f706878b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -860,7 +860,6 @@ const struct address_space_operations ext2_aops = {
860 .readpage = ext2_readpage, 860 .readpage = ext2_readpage,
861 .readpages = ext2_readpages, 861 .readpages = ext2_readpages,
862 .writepage = ext2_writepage, 862 .writepage = ext2_writepage,
863 .sync_page = block_sync_page,
864 .write_begin = ext2_write_begin, 863 .write_begin = ext2_write_begin,
865 .write_end = ext2_write_end, 864 .write_end = ext2_write_end,
866 .bmap = ext2_bmap, 865 .bmap = ext2_bmap,
@@ -880,7 +879,6 @@ const struct address_space_operations ext2_nobh_aops = {
880 .readpage = ext2_readpage, 879 .readpage = ext2_readpage,
881 .readpages = ext2_readpages, 880 .readpages = ext2_readpages,
882 .writepage = ext2_nobh_writepage, 881 .writepage = ext2_nobh_writepage,
883 .sync_page = block_sync_page,
884 .write_begin = ext2_nobh_write_begin, 882 .write_begin = ext2_nobh_write_begin,
885 .write_end = nobh_write_end, 883 .write_end = nobh_write_end,
886 .bmap = ext2_bmap, 884 .bmap = ext2_bmap,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ae94f6d949f..fe2541d250e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1894,7 +1894,6 @@ static const struct address_space_operations ext3_ordered_aops = {
1894 .readpage = ext3_readpage, 1894 .readpage = ext3_readpage,
1895 .readpages = ext3_readpages, 1895 .readpages = ext3_readpages,
1896 .writepage = ext3_ordered_writepage, 1896 .writepage = ext3_ordered_writepage,
1897 .sync_page = block_sync_page,
1898 .write_begin = ext3_write_begin, 1897 .write_begin = ext3_write_begin,
1899 .write_end = ext3_ordered_write_end, 1898 .write_end = ext3_ordered_write_end,
1900 .bmap = ext3_bmap, 1899 .bmap = ext3_bmap,
@@ -1910,7 +1909,6 @@ static const struct address_space_operations ext3_writeback_aops = {
1910 .readpage = ext3_readpage, 1909 .readpage = ext3_readpage,
1911 .readpages = ext3_readpages, 1910 .readpages = ext3_readpages,
1912 .writepage = ext3_writeback_writepage, 1911 .writepage = ext3_writeback_writepage,
1913 .sync_page = block_sync_page,
1914 .write_begin = ext3_write_begin, 1912 .write_begin = ext3_write_begin,
1915 .write_end = ext3_writeback_write_end, 1913 .write_end = ext3_writeback_write_end,
1916 .bmap = ext3_bmap, 1914 .bmap = ext3_bmap,
@@ -1926,7 +1924,6 @@ static const struct address_space_operations ext3_journalled_aops = {
1926 .readpage = ext3_readpage, 1924 .readpage = ext3_readpage,
1927 .readpages = ext3_readpages, 1925 .readpages = ext3_readpages,
1928 .writepage = ext3_journalled_writepage, 1926 .writepage = ext3_journalled_writepage,
1929 .sync_page = block_sync_page,
1930 .write_begin = ext3_write_begin, 1927 .write_begin = ext3_write_begin,
1931 .write_end = ext3_journalled_write_end, 1928 .write_end = ext3_journalled_write_end,
1932 .set_page_dirty = ext3_journalled_set_page_dirty, 1929 .set_page_dirty = ext3_journalled_set_page_dirty,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9f7f9e49914..9297ad46c46 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3903,7 +3903,6 @@ static const struct address_space_operations ext4_ordered_aops = {
3903 .readpage = ext4_readpage, 3903 .readpage = ext4_readpage,
3904 .readpages = ext4_readpages, 3904 .readpages = ext4_readpages,
3905 .writepage = ext4_writepage, 3905 .writepage = ext4_writepage,
3906 .sync_page = block_sync_page,
3907 .write_begin = ext4_write_begin, 3906 .write_begin = ext4_write_begin,
3908 .write_end = ext4_ordered_write_end, 3907 .write_end = ext4_ordered_write_end,
3909 .bmap = ext4_bmap, 3908 .bmap = ext4_bmap,
@@ -3919,7 +3918,6 @@ static const struct address_space_operations ext4_writeback_aops = {
3919 .readpage = ext4_readpage, 3918 .readpage = ext4_readpage,
3920 .readpages = ext4_readpages, 3919 .readpages = ext4_readpages,
3921 .writepage = ext4_writepage, 3920 .writepage = ext4_writepage,
3922 .sync_page = block_sync_page,
3923 .write_begin = ext4_write_begin, 3921 .write_begin = ext4_write_begin,
3924 .write_end = ext4_writeback_write_end, 3922 .write_end = ext4_writeback_write_end,
3925 .bmap = ext4_bmap, 3923 .bmap = ext4_bmap,
@@ -3935,7 +3933,6 @@ static const struct address_space_operations ext4_journalled_aops = {
3935 .readpage = ext4_readpage, 3933 .readpage = ext4_readpage,
3936 .readpages = ext4_readpages, 3934 .readpages = ext4_readpages,
3937 .writepage = ext4_writepage, 3935 .writepage = ext4_writepage,
3938 .sync_page = block_sync_page,
3939 .write_begin = ext4_write_begin, 3936 .write_begin = ext4_write_begin,
3940 .write_end = ext4_journalled_write_end, 3937 .write_end = ext4_journalled_write_end,
3941 .set_page_dirty = ext4_journalled_set_page_dirty, 3938 .set_page_dirty = ext4_journalled_set_page_dirty,
@@ -3951,7 +3948,6 @@ static const struct address_space_operations ext4_da_aops = {
3951 .readpages = ext4_readpages, 3948 .readpages = ext4_readpages,
3952 .writepage = ext4_writepage, 3949 .writepage = ext4_writepage,
3953 .writepages = ext4_da_writepages, 3950 .writepages = ext4_da_writepages,
3954 .sync_page = block_sync_page,
3955 .write_begin = ext4_da_write_begin, 3951 .write_begin = ext4_da_write_begin,
3956 .write_end = ext4_da_write_end, 3952 .write_end = ext4_da_write_end,
3957 .bmap = ext4_bmap, 3953 .bmap = ext4_bmap,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 955cc309142..e2cd90e4bb7 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -310,8 +310,7 @@ static int io_submit_init(struct ext4_io_submit *io,
310 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 310 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
311 311
312 io->io_bio = bio; 312 io->io_bio = bio;
313 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? 313 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
314 WRITE_SYNC_PLUG : WRITE);
315 io->io_next_block = bh->b_blocknr; 314 io->io_next_block = bh->b_blocknr;
316 return 0; 315 return 0;
317} 316}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd..f4ff09fb79b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -236,7 +236,6 @@ static const struct address_space_operations fat_aops = {
236 .readpages = fat_readpages, 236 .readpages = fat_readpages,
237 .writepage = fat_writepage, 237 .writepage = fat_writepage,
238 .writepages = fat_writepages, 238 .writepages = fat_writepages,
239 .sync_page = block_sync_page,
240 .write_begin = fat_write_begin, 239 .write_begin = fat_write_begin,
241 .write_end = fat_write_end, 240 .write_end = fat_write_end,
242 .direct_IO = fat_direct_IO, 241 .direct_IO = fat_direct_IO,
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 1429f3ae1e8..5d318c44f85 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -44,7 +44,6 @@ static sector_t vxfs_bmap(struct address_space *, sector_t);
44const struct address_space_operations vxfs_aops = { 44const struct address_space_operations vxfs_aops = {
45 .readpage = vxfs_readpage, 45 .readpage = vxfs_readpage,
46 .bmap = vxfs_bmap, 46 .bmap = vxfs_bmap,
47 .sync_page = block_sync_page,
48}; 47};
49 48
50inline void 49inline void
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd..09e8d51eeb6 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -868,7 +868,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
868 868
869 fc->bdi.name = "fuse"; 869 fc->bdi.name = "fuse";
870 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 870 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
871 fc->bdi.unplug_io_fn = default_unplug_io_fn;
872 /* fuse does it's own writeback accounting */ 871 /* fuse does it's own writeback accounting */
873 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; 872 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
874 873
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f8832b9..2f87ad27efd 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1116,7 +1116,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
1116 .writepages = gfs2_writeback_writepages, 1116 .writepages = gfs2_writeback_writepages,
1117 .readpage = gfs2_readpage, 1117 .readpage = gfs2_readpage,
1118 .readpages = gfs2_readpages, 1118 .readpages = gfs2_readpages,
1119 .sync_page = block_sync_page,
1120 .write_begin = gfs2_write_begin, 1119 .write_begin = gfs2_write_begin,
1121 .write_end = gfs2_write_end, 1120 .write_end = gfs2_write_end,
1122 .bmap = gfs2_bmap, 1121 .bmap = gfs2_bmap,
@@ -1132,7 +1131,6 @@ static const struct address_space_operations gfs2_ordered_aops = {
1132 .writepage = gfs2_ordered_writepage, 1131 .writepage = gfs2_ordered_writepage,
1133 .readpage = gfs2_readpage, 1132 .readpage = gfs2_readpage,
1134 .readpages = gfs2_readpages, 1133 .readpages = gfs2_readpages,
1135 .sync_page = block_sync_page,
1136 .write_begin = gfs2_write_begin, 1134 .write_begin = gfs2_write_begin,
1137 .write_end = gfs2_write_end, 1135 .write_end = gfs2_write_end,
1138 .set_page_dirty = gfs2_set_page_dirty, 1136 .set_page_dirty = gfs2_set_page_dirty,
@@ -1150,7 +1148,6 @@ static const struct address_space_operations gfs2_jdata_aops = {
1150 .writepages = gfs2_jdata_writepages, 1148 .writepages = gfs2_jdata_writepages,
1151 .readpage = gfs2_readpage, 1149 .readpage = gfs2_readpage,
1152 .readpages = gfs2_readpages, 1150 .readpages = gfs2_readpages,
1153 .sync_page = block_sync_page,
1154 .write_begin = gfs2_write_begin, 1151 .write_begin = gfs2_write_begin,
1155 .write_end = gfs2_write_end, 1152 .write_end = gfs2_write_end,
1156 .set_page_dirty = gfs2_set_page_dirty, 1153 .set_page_dirty = gfs2_set_page_dirty,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f3575e1..7f1c1120234 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -121,7 +121,7 @@ __acquires(&sdp->sd_log_lock)
121 lock_buffer(bh); 121 lock_buffer(bh);
122 if (test_clear_buffer_dirty(bh)) { 122 if (test_clear_buffer_dirty(bh)) {
123 bh->b_end_io = end_buffer_write_sync; 123 bh->b_end_io = end_buffer_write_sync;
124 submit_bh(WRITE_SYNC_PLUG, bh); 124 submit_bh(WRITE_SYNC, bh);
125 } else { 125 } else {
126 unlock_buffer(bh); 126 unlock_buffer(bh);
127 brelse(bh); 127 brelse(bh);
@@ -647,7 +647,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
647 lock_buffer(bh); 647 lock_buffer(bh);
648 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { 648 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
649 bh->b_end_io = end_buffer_write_sync; 649 bh->b_end_io = end_buffer_write_sync;
650 submit_bh(WRITE_SYNC_PLUG, bh); 650 submit_bh(WRITE_SYNC, bh);
651 } else { 651 } else {
652 unlock_buffer(bh); 652 unlock_buffer(bh);
653 brelse(bh); 653 brelse(bh);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058..48b545a1979 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -200,7 +200,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
200 } 200 }
201 201
202 gfs2_log_unlock(sdp); 202 gfs2_log_unlock(sdp);
203 submit_bh(WRITE_SYNC_PLUG, bh); 203 submit_bh(WRITE_SYNC, bh);
204 gfs2_log_lock(sdp); 204 gfs2_log_lock(sdp);
205 205
206 n = 0; 206 n = 0;
@@ -210,7 +210,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
210 gfs2_log_unlock(sdp); 210 gfs2_log_unlock(sdp);
211 lock_buffer(bd2->bd_bh); 211 lock_buffer(bd2->bd_bh);
212 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 212 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
213 submit_bh(WRITE_SYNC_PLUG, bh); 213 submit_bh(WRITE_SYNC, bh);
214 gfs2_log_lock(sdp); 214 gfs2_log_lock(sdp);
215 if (++n >= num) 215 if (++n >= num)
216 break; 216 break;
@@ -352,7 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
352 sdp->sd_log_num_revoke--; 352 sdp->sd_log_num_revoke--;
353 353
354 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 354 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
355 submit_bh(WRITE_SYNC_PLUG, bh); 355 submit_bh(WRITE_SYNC, bh);
356 356
357 bh = gfs2_log_get_buf(sdp); 357 bh = gfs2_log_get_buf(sdp);
358 mh = (struct gfs2_meta_header *)bh->b_data; 358 mh = (struct gfs2_meta_header *)bh->b_data;
@@ -369,7 +369,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
369 } 369 }
370 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 370 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
371 371
372 submit_bh(WRITE_SYNC_PLUG, bh); 372 submit_bh(WRITE_SYNC, bh);
373} 373}
374 374
375static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 375static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -571,7 +571,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
571 ptr = bh_log_ptr(bh); 571 ptr = bh_log_ptr(bh);
572 572
573 get_bh(bh); 573 get_bh(bh);
574 submit_bh(WRITE_SYNC_PLUG, bh); 574 submit_bh(WRITE_SYNC, bh);
575 gfs2_log_lock(sdp); 575 gfs2_log_lock(sdp);
576 while(!list_empty(list)) { 576 while(!list_empty(list)) {
577 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); 577 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -597,7 +597,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
597 } else { 597 } else {
598 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); 598 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
599 } 599 }
600 submit_bh(WRITE_SYNC_PLUG, bh1); 600 submit_bh(WRITE_SYNC, bh1);
601 gfs2_log_lock(sdp); 601 gfs2_log_lock(sdp);
602 ptr += 2; 602 ptr += 2;
603 } 603 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c7b3f..867b713cba9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
37 struct buffer_head *bh, *head; 37 struct buffer_head *bh, *head;
38 int nr_underway = 0; 38 int nr_underway = 0;
39 int write_op = REQ_META | 39 int write_op = REQ_META |
40 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE); 40 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
41 41
42 BUG_ON(!PageLocked(page)); 42 BUG_ON(!PageLocked(page));
43 BUG_ON(!page_has_buffers(page)); 43 BUG_ON(!page_has_buffers(page));
@@ -94,7 +94,6 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
94const struct address_space_operations gfs2_meta_aops = { 94const struct address_space_operations gfs2_meta_aops = {
95 .writepage = gfs2_aspace_writepage, 95 .writepage = gfs2_aspace_writepage,
96 .releasepage = gfs2_releasepage, 96 .releasepage = gfs2_releasepage,
97 .sync_page = block_sync_page,
98}; 97};
99 98
100/** 99/**
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dffb4e99664..fff16c968e6 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,6 @@ static int hfs_writepages(struct address_space *mapping,
150const struct address_space_operations hfs_btree_aops = { 150const struct address_space_operations hfs_btree_aops = {
151 .readpage = hfs_readpage, 151 .readpage = hfs_readpage,
152 .writepage = hfs_writepage, 152 .writepage = hfs_writepage,
153 .sync_page = block_sync_page,
154 .write_begin = hfs_write_begin, 153 .write_begin = hfs_write_begin,
155 .write_end = generic_write_end, 154 .write_end = generic_write_end,
156 .bmap = hfs_bmap, 155 .bmap = hfs_bmap,
@@ -160,7 +159,6 @@ const struct address_space_operations hfs_btree_aops = {
160const struct address_space_operations hfs_aops = { 159const struct address_space_operations hfs_aops = {
161 .readpage = hfs_readpage, 160 .readpage = hfs_readpage,
162 .writepage = hfs_writepage, 161 .writepage = hfs_writepage,
163 .sync_page = block_sync_page,
164 .write_begin = hfs_write_begin, 162 .write_begin = hfs_write_begin,
165 .write_end = generic_write_end, 163 .write_end = generic_write_end,
166 .bmap = hfs_bmap, 164 .bmap = hfs_bmap,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a8df651747f..b248a6cfcad 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -146,7 +146,6 @@ static int hfsplus_writepages(struct address_space *mapping,
146const struct address_space_operations hfsplus_btree_aops = { 146const struct address_space_operations hfsplus_btree_aops = {
147 .readpage = hfsplus_readpage, 147 .readpage = hfsplus_readpage,
148 .writepage = hfsplus_writepage, 148 .writepage = hfsplus_writepage,
149 .sync_page = block_sync_page,
150 .write_begin = hfsplus_write_begin, 149 .write_begin = hfsplus_write_begin,
151 .write_end = generic_write_end, 150 .write_end = generic_write_end,
152 .bmap = hfsplus_bmap, 151 .bmap = hfsplus_bmap,
@@ -156,7 +155,6 @@ const struct address_space_operations hfsplus_btree_aops = {
156const struct address_space_operations hfsplus_aops = { 155const struct address_space_operations hfsplus_aops = {
157 .readpage = hfsplus_readpage, 156 .readpage = hfsplus_readpage,
158 .writepage = hfsplus_writepage, 157 .writepage = hfsplus_writepage,
159 .sync_page = block_sync_page,
160 .write_begin = hfsplus_write_begin, 158 .write_begin = hfsplus_write_begin,
161 .write_end = generic_write_end, 159 .write_end = generic_write_end,
162 .bmap = hfsplus_bmap, 160 .bmap = hfsplus_bmap,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index c0340887c7e..9e84257b3ad 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -120,7 +120,6 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
120const struct address_space_operations hpfs_aops = { 120const struct address_space_operations hpfs_aops = {
121 .readpage = hpfs_readpage, 121 .readpage = hpfs_readpage,
122 .writepage = hpfs_writepage, 122 .writepage = hpfs_writepage,
123 .sync_page = block_sync_page,
124 .write_begin = hpfs_write_begin, 123 .write_begin = hpfs_write_begin,
125 .write_end = generic_write_end, 124 .write_end = generic_write_end,
126 .bmap = _hpfs_bmap 125 .bmap = _hpfs_bmap
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a0f3833c0db..3db5ba4568f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1158,7 +1158,6 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
1158 1158
1159static const struct address_space_operations isofs_aops = { 1159static const struct address_space_operations isofs_aops = {
1160 .readpage = isofs_readpage, 1160 .readpage = isofs_readpage,
1161 .sync_page = block_sync_page,
1162 .bmap = _isofs_bmap 1161 .bmap = _isofs_bmap
1163}; 1162};
1164 1163
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 34a4861c14b..66be299acb1 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -333,7 +333,7 @@ void journal_commit_transaction(journal_t *journal)
333 * instead we rely on sync_buffer() doing the unplug for us. 333 * instead we rely on sync_buffer() doing the unplug for us.
334 */ 334 */
335 if (commit_transaction->t_synchronous_commit) 335 if (commit_transaction->t_synchronous_commit)
336 write_op = WRITE_SYNC_PLUG; 336 write_op = WRITE_SYNC;
337 spin_lock(&commit_transaction->t_handle_lock); 337 spin_lock(&commit_transaction->t_handle_lock);
338 while (commit_transaction->t_updates) { 338 while (commit_transaction->t_updates) {
339 DEFINE_WAIT(wait); 339 DEFINE_WAIT(wait);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f3ad1598b20..3da1cc4346d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -137,9 +137,9 @@ static int journal_submit_commit_record(journal_t *journal,
137 if (journal->j_flags & JBD2_BARRIER && 137 if (journal->j_flags & JBD2_BARRIER &&
138 !JBD2_HAS_INCOMPAT_FEATURE(journal, 138 !JBD2_HAS_INCOMPAT_FEATURE(journal,
139 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) 139 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
140 ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); 140 ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
141 else 141 else
142 ret = submit_bh(WRITE_SYNC_PLUG, bh); 142 ret = submit_bh(WRITE_SYNC, bh);
143 143
144 *cbh = bh; 144 *cbh = bh;
145 return ret; 145 return ret;
@@ -369,7 +369,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
369 * instead we rely on sync_buffer() doing the unplug for us. 369 * instead we rely on sync_buffer() doing the unplug for us.
370 */ 370 */
371 if (commit_transaction->t_synchronous_commit) 371 if (commit_transaction->t_synchronous_commit)
372 write_op = WRITE_SYNC_PLUG; 372 write_op = WRITE_SYNC;
373 trace_jbd2_commit_locking(journal, commit_transaction); 373 trace_jbd2_commit_locking(journal, commit_transaction);
374 stats.run.rs_wait = commit_transaction->t_max_wait; 374 stats.run.rs_wait = commit_transaction->t_max_wait;
375 stats.run.rs_locked = jiffies; 375 stats.run.rs_locked = jiffies;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9978803ceed..eddbb373209 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -352,7 +352,6 @@ const struct address_space_operations jfs_aops = {
352 .readpages = jfs_readpages, 352 .readpages = jfs_readpages,
353 .writepage = jfs_writepage, 353 .writepage = jfs_writepage,
354 .writepages = jfs_writepages, 354 .writepages = jfs_writepages,
355 .sync_page = block_sync_page,
356 .write_begin = jfs_write_begin, 355 .write_begin = jfs_write_begin,
357 .write_end = nobh_write_end, 356 .write_end = nobh_write_end,
358 .bmap = jfs_bmap, 357 .bmap = jfs_bmap,
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 48b44bd8267..6740d34cd82 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -583,7 +583,6 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset)
583const struct address_space_operations jfs_metapage_aops = { 583const struct address_space_operations jfs_metapage_aops = {
584 .readpage = metapage_readpage, 584 .readpage = metapage_readpage,
585 .writepage = metapage_writepage, 585 .writepage = metapage_writepage,
586 .sync_page = block_sync_page,
587 .releasepage = metapage_releasepage, 586 .releasepage = metapage_releasepage,
588 .invalidatepage = metapage_invalidatepage, 587 .invalidatepage = metapage_invalidatepage,
589 .set_page_dirty = __set_page_dirty_nobuffers, 588 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 723bc5bca09..1adc8d455f0 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -39,7 +39,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
39 bio.bi_end_io = request_complete; 39 bio.bi_end_io = request_complete;
40 40
41 submit_bio(rw, &bio); 41 submit_bio(rw, &bio);
42 generic_unplug_device(bdev_get_queue(bdev));
43 wait_for_completion(&complete); 42 wait_for_completion(&complete);
44 return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; 43 return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
45} 44}
@@ -168,7 +167,6 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
168 } 167 }
169 len = PAGE_ALIGN(len); 168 len = PAGE_ALIGN(len);
170 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 169 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
171 generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
172} 170}
173 171
174 172
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index ae0b83f476a..adcdc0a4e18 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -399,7 +399,6 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
399static const struct address_space_operations minix_aops = { 399static const struct address_space_operations minix_aops = {
400 .readpage = minix_readpage, 400 .readpage = minix_readpage,
401 .writepage = minix_writepage, 401 .writepage = minix_writepage,
402 .sync_page = block_sync_page,
403 .write_begin = minix_write_begin, 402 .write_begin = minix_write_begin,
404 .write_end = generic_write_end, 403 .write_end = generic_write_end,
405 .bmap = minix_bmap 404 .bmap = minix_bmap
diff --git a/fs/mpage.c b/fs/mpage.c
index d78455a81ec..0afc809e46e 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -364,6 +364,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
364 sector_t last_block_in_bio = 0; 364 sector_t last_block_in_bio = 0;
365 struct buffer_head map_bh; 365 struct buffer_head map_bh;
366 unsigned long first_logical_block = 0; 366 unsigned long first_logical_block = 0;
367 struct blk_plug plug;
368
369 blk_start_plug(&plug);
367 370
368 map_bh.b_state = 0; 371 map_bh.b_state = 0;
369 map_bh.b_size = 0; 372 map_bh.b_size = 0;
@@ -385,6 +388,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
385 BUG_ON(!list_empty(pages)); 388 BUG_ON(!list_empty(pages));
386 if (bio) 389 if (bio)
387 mpage_bio_submit(READ, bio); 390 mpage_bio_submit(READ, bio);
391 blk_finish_plug(&plug);
388 return 0; 392 return 0;
389} 393}
390EXPORT_SYMBOL(mpage_readpages); 394EXPORT_SYMBOL(mpage_readpages);
@@ -666,8 +670,11 @@ int
666mpage_writepages(struct address_space *mapping, 670mpage_writepages(struct address_space *mapping,
667 struct writeback_control *wbc, get_block_t get_block) 671 struct writeback_control *wbc, get_block_t get_block)
668{ 672{
673 struct blk_plug plug;
669 int ret; 674 int ret;
670 675
676 blk_start_plug(&plug);
677
671 if (!get_block) 678 if (!get_block)
672 ret = generic_writepages(mapping, wbc); 679 ret = generic_writepages(mapping, wbc);
673 else { 680 else {
@@ -682,6 +689,7 @@ mpage_writepages(struct address_space *mapping,
682 if (mpd.bio) 689 if (mpd.bio)
683 mpage_bio_submit(WRITE, mpd.bio); 690 mpage_bio_submit(WRITE, mpd.bio);
684 } 691 }
692 blk_finish_plug(&plug);
685 return ret; 693 return ret;
686} 694}
687EXPORT_SYMBOL(mpage_writepages); 695EXPORT_SYMBOL(mpage_writepages);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 85f7baa15f5..609cd223eea 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,15 +34,10 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37
38static const struct address_space_operations def_btnode_aops = {
39 .sync_page = block_sync_page,
40};
41
42void nilfs_btnode_cache_init(struct address_space *btnc, 37void nilfs_btnode_cache_init(struct address_space *btnc,
43 struct backing_dev_info *bdi) 38 struct backing_dev_info *bdi)
44{ 39{
45 nilfs_mapping_init(btnc, bdi, &def_btnode_aops); 40 nilfs_mapping_init(btnc, bdi);
46} 41}
47 42
48void nilfs_btnode_cache_clear(struct address_space *btnc) 43void nilfs_btnode_cache_clear(struct address_space *btnc)
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index caf9a6a3fb5..1c2a3e23f8b 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -49,7 +49,6 @@
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = { 51static const struct address_space_operations def_gcinode_aops = {
52 .sync_page = block_sync_page,
53}; 52};
54 53
55/* 54/*
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2fd440d8d6b..c89d5d1ea7c 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -262,7 +262,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
262const struct address_space_operations nilfs_aops = { 262const struct address_space_operations nilfs_aops = {
263 .writepage = nilfs_writepage, 263 .writepage = nilfs_writepage,
264 .readpage = nilfs_readpage, 264 .readpage = nilfs_readpage,
265 .sync_page = block_sync_page,
266 .writepages = nilfs_writepages, 265 .writepages = nilfs_writepages,
267 .set_page_dirty = nilfs_set_page_dirty, 266 .set_page_dirty = nilfs_set_page_dirty,
268 .readpages = nilfs_readpages, 267 .readpages = nilfs_readpages,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a0babd2bff6..a649b05f706 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -399,7 +399,6 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
399 399
400static const struct address_space_operations def_mdt_aops = { 400static const struct address_space_operations def_mdt_aops = {
401 .writepage = nilfs_mdt_write_page, 401 .writepage = nilfs_mdt_write_page,
402 .sync_page = block_sync_page,
403}; 402};
404 403
405static const struct inode_operations def_mdt_iops; 404static const struct inode_operations def_mdt_iops;
@@ -438,10 +437,6 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
438 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); 437 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
439} 438}
440 439
441static const struct address_space_operations shadow_map_aops = {
442 .sync_page = block_sync_page,
443};
444
445/** 440/**
446 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file 441 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
447 * @inode: inode of the metadata file 442 * @inode: inode of the metadata file
@@ -455,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
455 450
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 453 nilfs_mapping_init(&shadow->frozen_data, bdi);
459 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
461 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
462 return 0; 457 return 0;
463} 458}
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index a585b35fd6b..4d2a1ee0eb4 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -493,15 +493,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
493} 493}
494 494
495void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
496 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi)
497 const struct address_space_operations *aops)
498{ 497{
499 mapping->host = NULL; 498 mapping->host = NULL;
500 mapping->flags = 0; 499 mapping->flags = 0;
501 mapping_set_gfp_mask(mapping, GFP_NOFS); 500 mapping_set_gfp_mask(mapping, GFP_NOFS);
502 mapping->assoc_mapping = NULL; 501 mapping->assoc_mapping = NULL;
503 mapping->backing_dev_info = bdi; 502 mapping->backing_dev_info = bdi;
504 mapping->a_ops = aops; 503 mapping->a_ops = NULL;
505} 504}
506 505
507/* 506/*
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 2a00953ebd5..f06b79ad749 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -62,8 +62,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
65 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi);
66 const struct address_space_operations *aops);
67unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
68unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 67unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
69 sector_t start_blk, 68 sector_t start_blk,
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 0f83e93935b..2853ff20f85 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -509,7 +509,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
509 * Last BIO is always sent through the following 509 * Last BIO is always sent through the following
510 * submission. 510 * submission.
511 */ 511 */
512 rw |= REQ_SYNC | REQ_UNPLUG; 512 rw |= REQ_SYNC;
513 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); 513 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
514 } 514 }
515 515
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index c3c2c7ac902..0b1e885b8cf 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1543,8 +1543,6 @@ err_out:
1543 */ 1543 */
1544const struct address_space_operations ntfs_aops = { 1544const struct address_space_operations ntfs_aops = {
1545 .readpage = ntfs_readpage, /* Fill page with data. */ 1545 .readpage = ntfs_readpage, /* Fill page with data. */
1546 .sync_page = block_sync_page, /* Currently, just unplugs the
1547 disk request queue. */
1548#ifdef NTFS_RW 1546#ifdef NTFS_RW
1549 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 1547 .writepage = ntfs_writepage, /* Write dirty page to disk. */
1550#endif /* NTFS_RW */ 1548#endif /* NTFS_RW */
@@ -1560,8 +1558,6 @@ const struct address_space_operations ntfs_aops = {
1560 */ 1558 */
1561const struct address_space_operations ntfs_mst_aops = { 1559const struct address_space_operations ntfs_mst_aops = {
1562 .readpage = ntfs_readpage, /* Fill page with data. */ 1560 .readpage = ntfs_readpage, /* Fill page with data. */
1563 .sync_page = block_sync_page, /* Currently, just unplugs the
1564 disk request queue. */
1565#ifdef NTFS_RW 1561#ifdef NTFS_RW
1566 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 1562 .writepage = ntfs_writepage, /* Write dirty page to disk. */
1567 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty 1563 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6551c7cbad9..ef9ed854255 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -698,8 +698,7 @@ lock_retry_remap:
698 "uptodate! Unplugging the disk queue " 698 "uptodate! Unplugging the disk queue "
699 "and rescheduling."); 699 "and rescheduling.");
700 get_bh(tbh); 700 get_bh(tbh);
701 blk_run_address_space(mapping); 701 io_schedule();
702 schedule();
703 put_bh(tbh); 702 put_bh(tbh);
704 if (unlikely(!buffer_uptodate(tbh))) 703 if (unlikely(!buffer_uptodate(tbh)))
705 goto read_err; 704 goto read_err;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1fbb0e20131..daea0359e97 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2043,7 +2043,6 @@ const struct address_space_operations ocfs2_aops = {
2043 .write_begin = ocfs2_write_begin, 2043 .write_begin = ocfs2_write_begin,
2044 .write_end = ocfs2_write_end, 2044 .write_end = ocfs2_write_end,
2045 .bmap = ocfs2_bmap, 2045 .bmap = ocfs2_bmap,
2046 .sync_page = block_sync_page,
2047 .direct_IO = ocfs2_direct_IO, 2046 .direct_IO = ocfs2_direct_IO,
2048 .invalidatepage = ocfs2_invalidatepage, 2047 .invalidatepage = ocfs2_invalidatepage,
2049 .releasepage = ocfs2_releasepage, 2048 .releasepage = ocfs2_releasepage,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b108e863d8f..1adab287bd2 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -367,11 +367,7 @@ static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc,
367static void o2hb_wait_on_io(struct o2hb_region *reg, 367static void o2hb_wait_on_io(struct o2hb_region *reg,
368 struct o2hb_bio_wait_ctxt *wc) 368 struct o2hb_bio_wait_ctxt *wc)
369{ 369{
370 struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
371
372 blk_run_address_space(mapping);
373 o2hb_bio_wait_dec(wc, 1); 370 o2hb_bio_wait_dec(wc, 1);
374
375 wait_for_completion(&wc->wc_io_complete); 371 wait_for_completion(&wc->wc_io_complete);
376} 372}
377 373
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 8a6d34fa668..d738a7e493d 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -372,7 +372,6 @@ const struct address_space_operations omfs_aops = {
372 .readpages = omfs_readpages, 372 .readpages = omfs_readpages,
373 .writepage = omfs_writepage, 373 .writepage = omfs_writepage,
374 .writepages = omfs_writepages, 374 .writepages = omfs_writepages,
375 .sync_page = block_sync_page,
376 .write_begin = omfs_write_begin, 375 .write_begin = omfs_write_begin,
377 .write_end = generic_write_end, 376 .write_end = generic_write_end,
378 .bmap = omfs_bmap, 377 .bmap = omfs_bmap,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index e63b4171d58..2b0646613f5 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -335,7 +335,6 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
335static const struct address_space_operations qnx4_aops = { 335static const struct address_space_operations qnx4_aops = {
336 .readpage = qnx4_readpage, 336 .readpage = qnx4_readpage,
337 .writepage = qnx4_writepage, 337 .writepage = qnx4_writepage,
338 .sync_page = block_sync_page,
339 .write_begin = qnx4_write_begin, 338 .write_begin = qnx4_write_begin,
340 .write_end = generic_write_end, 339 .write_end = generic_write_end,
341 .bmap = qnx4_bmap 340 .bmap = qnx4_bmap
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e..03674675f88 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3212,7 +3212,6 @@ const struct address_space_operations reiserfs_address_space_operations = {
3212 .readpages = reiserfs_readpages, 3212 .readpages = reiserfs_readpages,
3213 .releasepage = reiserfs_releasepage, 3213 .releasepage = reiserfs_releasepage,
3214 .invalidatepage = reiserfs_invalidatepage, 3214 .invalidatepage = reiserfs_invalidatepage,
3215 .sync_page = block_sync_page,
3216 .write_begin = reiserfs_write_begin, 3215 .write_begin = reiserfs_write_begin,
3217 .write_end = reiserfs_write_end, 3216 .write_end = reiserfs_write_end,
3218 .bmap = reiserfs_aop_bmap, 3217 .bmap = reiserfs_aop_bmap,
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 9ca66276315..fa8d43c92bb 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -488,7 +488,6 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
488const struct address_space_operations sysv_aops = { 488const struct address_space_operations sysv_aops = {
489 .readpage = sysv_readpage, 489 .readpage = sysv_readpage,
490 .writepage = sysv_writepage, 490 .writepage = sysv_writepage,
491 .sync_page = block_sync_page,
492 .write_begin = sysv_write_begin, 491 .write_begin = sysv_write_begin,
493 .write_end = generic_write_end, 492 .write_end = generic_write_end,
494 .bmap = sysv_bmap 493 .bmap = sysv_bmap
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6e11c2975dc..81368d4d4a2 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1979,7 +1979,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1979 */ 1979 */
1980 c->bdi.name = "ubifs", 1980 c->bdi.name = "ubifs",
1981 c->bdi.capabilities = BDI_CAP_MAP_COPY; 1981 c->bdi.capabilities = BDI_CAP_MAP_COPY;
1982 c->bdi.unplug_io_fn = default_unplug_io_fn;
1983 err = bdi_init(&c->bdi); 1982 err = bdi_init(&c->bdi);
1984 if (err) 1983 if (err)
1985 goto out_close; 1984 goto out_close;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 89c78486cbb..94e4553491c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -98,7 +98,6 @@ static int udf_adinicb_write_end(struct file *file,
98const struct address_space_operations udf_adinicb_aops = { 98const struct address_space_operations udf_adinicb_aops = {
99 .readpage = udf_adinicb_readpage, 99 .readpage = udf_adinicb_readpage,
100 .writepage = udf_adinicb_writepage, 100 .writepage = udf_adinicb_writepage,
101 .sync_page = block_sync_page,
102 .write_begin = simple_write_begin, 101 .write_begin = simple_write_begin,
103 .write_end = udf_adinicb_write_end, 102 .write_end = udf_adinicb_write_end,
104}; 103};
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c6a2e782b97..fa96fc0fe12 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -133,7 +133,6 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
133const struct address_space_operations udf_aops = { 133const struct address_space_operations udf_aops = {
134 .readpage = udf_readpage, 134 .readpage = udf_readpage,
135 .writepage = udf_writepage, 135 .writepage = udf_writepage,
136 .sync_page = block_sync_page,
137 .write_begin = udf_write_begin, 136 .write_begin = udf_write_begin,
138 .write_end = generic_write_end, 137 .write_end = generic_write_end,
139 .bmap = udf_bmap, 138 .bmap = udf_bmap,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 2b251f2093a..83b28444eb1 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -588,7 +588,6 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
588const struct address_space_operations ufs_aops = { 588const struct address_space_operations ufs_aops = {
589 .readpage = ufs_readpage, 589 .readpage = ufs_readpage,
590 .writepage = ufs_writepage, 590 .writepage = ufs_writepage,
591 .sync_page = block_sync_page,
592 .write_begin = ufs_write_begin, 591 .write_begin = ufs_write_begin,
593 .write_end = generic_write_end, 592 .write_end = generic_write_end,
594 .bmap = ufs_bmap 593 .bmap = ufs_bmap
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index a58f9155fc9..ff0e79276f2 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -481,7 +481,7 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
481 break; 481 break;
482 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) 482 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
483 ufs_sync_inode (inode); 483 ufs_sync_inode (inode);
484 blk_run_address_space(inode->i_mapping); 484 blk_flush_plug(current);
485 yield(); 485 yield();
486 } 486 }
487 487
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index ec7bbb5645b..6bbb0ee3325 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -413,8 +413,7 @@ xfs_submit_ioend_bio(
413 if (xfs_ioend_new_eof(ioend)) 413 if (xfs_ioend_new_eof(ioend))
414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); 414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
415 415
416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
417 WRITE_SYNC_PLUG : WRITE, bio);
418} 417}
419 418
420STATIC struct bio * 419STATIC struct bio *
@@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = {
1495 .readpages = xfs_vm_readpages, 1494 .readpages = xfs_vm_readpages,
1496 .writepage = xfs_vm_writepage, 1495 .writepage = xfs_vm_writepage,
1497 .writepages = xfs_vm_writepages, 1496 .writepages = xfs_vm_writepages,
1498 .sync_page = block_sync_page,
1499 .releasepage = xfs_vm_releasepage, 1497 .releasepage = xfs_vm_releasepage,
1500 .invalidatepage = xfs_vm_invalidatepage, 1498 .invalidatepage = xfs_vm_invalidatepage,
1501 .write_begin = xfs_vm_write_begin, 1499 .write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8378d..4f8f53c4d42 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -991,7 +991,7 @@ xfs_buf_lock(
991 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 991 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
992 xfs_log_force(bp->b_target->bt_mount, 0); 992 xfs_log_force(bp->b_target->bt_mount, 0);
993 if (atomic_read(&bp->b_io_remaining)) 993 if (atomic_read(&bp->b_io_remaining))
994 blk_run_address_space(bp->b_target->bt_mapping); 994 blk_flush_plug(current);
995 down(&bp->b_sema); 995 down(&bp->b_sema);
996 XB_SET_OWNER(bp); 996 XB_SET_OWNER(bp);
997 997
@@ -1035,9 +1035,7 @@ xfs_buf_wait_unpin(
1035 set_current_state(TASK_UNINTERRUPTIBLE); 1035 set_current_state(TASK_UNINTERRUPTIBLE);
1036 if (atomic_read(&bp->b_pin_count) == 0) 1036 if (atomic_read(&bp->b_pin_count) == 0)
1037 break; 1037 break;
1038 if (atomic_read(&bp->b_io_remaining)) 1038 io_schedule();
1039 blk_run_address_space(bp->b_target->bt_mapping);
1040 schedule();
1041 } 1039 }
1042 remove_wait_queue(&bp->b_waiters, &wait); 1040 remove_wait_queue(&bp->b_waiters, &wait);
1043 set_current_state(TASK_RUNNING); 1041 set_current_state(TASK_RUNNING);
@@ -1443,7 +1441,7 @@ xfs_buf_iowait(
1443 trace_xfs_buf_iowait(bp, _RET_IP_); 1441 trace_xfs_buf_iowait(bp, _RET_IP_);
1444 1442
1445 if (atomic_read(&bp->b_io_remaining)) 1443 if (atomic_read(&bp->b_io_remaining))
1446 blk_run_address_space(bp->b_target->bt_mapping); 1444 blk_flush_plug(current);
1447 wait_for_completion(&bp->b_iowait); 1445 wait_for_completion(&bp->b_iowait);
1448 1446
1449 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1447 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1667,7 +1665,6 @@ xfs_mapping_buftarg(
1667 struct inode *inode; 1665 struct inode *inode;
1668 struct address_space *mapping; 1666 struct address_space *mapping;
1669 static const struct address_space_operations mapping_aops = { 1667 static const struct address_space_operations mapping_aops = {
1670 .sync_page = block_sync_page,
1671 .migratepage = fail_migrate_page, 1668 .migratepage = fail_migrate_page,
1672 }; 1669 };
1673 1670
@@ -1948,7 +1945,7 @@ xfsbufd(
1948 count++; 1945 count++;
1949 } 1946 }
1950 if (count) 1947 if (count)
1951 blk_run_address_space(target->bt_mapping); 1948 blk_flush_plug(current);
1952 1949
1953 } while (!kthread_should_stop()); 1950 } while (!kthread_should_stop());
1954 1951
@@ -1996,7 +1993,7 @@ xfs_flush_buftarg(
1996 1993
1997 if (wait) { 1994 if (wait) {
1998 /* Expedite and wait for IO to complete. */ 1995 /* Expedite and wait for IO to complete. */
1999 blk_run_address_space(target->bt_mapping); 1996 blk_flush_plug(current);
2000 while (!list_empty(&wait_list)) { 1997 while (!list_empty(&wait_list)) {
2001 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1998 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2002 1999
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 4ce34fa937d..96f4094b706 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -66,8 +66,6 @@ struct backing_dev_info {
66 unsigned int capabilities; /* Device capabilities */ 66 unsigned int capabilities; /* Device capabilities */
67 congested_fn *congested_fn; /* Function pointer if device is md/dm */ 67 congested_fn *congested_fn; /* Function pointer if device is md/dm */
68 void *congested_data; /* Pointer to aux data for congested func */ 68 void *congested_data; /* Pointer to aux data for congested func */
69 void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
70 void *unplug_io_data;
71 69
72 char *name; 70 char *name;
73 71
@@ -251,7 +249,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
251 249
252extern struct backing_dev_info default_backing_dev_info; 250extern struct backing_dev_info default_backing_dev_info;
253extern struct backing_dev_info noop_backing_dev_info; 251extern struct backing_dev_info noop_backing_dev_info;
254void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page);
255 252
256int writeback_in_progress(struct backing_dev_info *bdi); 253int writeback_in_progress(struct backing_dev_info *bdi);
257 254
@@ -336,17 +333,4 @@ static inline int bdi_sched_wait(void *word)
336 return 0; 333 return 0;
337} 334}
338 335
339static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
340 struct page *page)
341{
342 if (bdi && bdi->unplug_io_fn)
343 bdi->unplug_io_fn(bdi, page);
344}
345
346static inline void blk_run_address_space(struct address_space *mapping)
347{
348 if (mapping)
349 blk_run_backing_dev(mapping->backing_dev_info, NULL);
350}
351
352#endif /* _LINUX_BACKING_DEV_H */ 336#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dddedfc0af8..be50d9e70a7 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -128,7 +128,6 @@ enum rq_flag_bits {
128 __REQ_NOIDLE, /* don't anticipate more IO after this one */ 128 __REQ_NOIDLE, /* don't anticipate more IO after this one */
129 129
130 /* bio only flags */ 130 /* bio only flags */
131 __REQ_UNPLUG, /* unplug the immediately after submission */
132 __REQ_RAHEAD, /* read ahead, can fail anytime */ 131 __REQ_RAHEAD, /* read ahead, can fail anytime */
133 __REQ_THROTTLED, /* This bio has already been subjected to 132 __REQ_THROTTLED, /* This bio has already been subjected to
134 * throttling rules. Don't do it again. */ 133 * throttling rules. Don't do it again. */
@@ -152,6 +151,7 @@ enum rq_flag_bits {
152 __REQ_IO_STAT, /* account I/O stat */ 151 __REQ_IO_STAT, /* account I/O stat */
153 __REQ_MIXED_MERGE, /* merge of different types, fail separately */ 152 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
154 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ 153 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
154 __REQ_ON_PLUG, /* on plug list */
155 __REQ_NR_BITS, /* stops here */ 155 __REQ_NR_BITS, /* stops here */
156}; 156};
157 157
@@ -171,7 +171,6 @@ enum rq_flag_bits {
171 REQ_NOIDLE | REQ_FLUSH | REQ_FUA) 171 REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
172#define REQ_CLONE_MASK REQ_COMMON_MASK 172#define REQ_CLONE_MASK REQ_COMMON_MASK
173 173
174#define REQ_UNPLUG (1 << __REQ_UNPLUG)
175#define REQ_RAHEAD (1 << __REQ_RAHEAD) 174#define REQ_RAHEAD (1 << __REQ_RAHEAD)
176#define REQ_THROTTLED (1 << __REQ_THROTTLED) 175#define REQ_THROTTLED (1 << __REQ_THROTTLED)
177 176
@@ -193,5 +192,6 @@ enum rq_flag_bits {
193#define REQ_IO_STAT (1 << __REQ_IO_STAT) 192#define REQ_IO_STAT (1 << __REQ_IO_STAT)
194#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) 193#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
195#define REQ_SECURE (1 << __REQ_SECURE) 194#define REQ_SECURE (1 << __REQ_SECURE)
195#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)
196 196
197#endif /* __LINUX_BLK_TYPES_H */ 197#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 13b75ca6218..91fa428fa2c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
196typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 196typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
197typedef int (prep_rq_fn) (struct request_queue *, struct request *); 197typedef int (prep_rq_fn) (struct request_queue *, struct request *);
198typedef void (unprep_rq_fn) (struct request_queue *, struct request *); 198typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
199typedef void (unplug_fn) (struct request_queue *);
200 199
201struct bio_vec; 200struct bio_vec;
202struct bvec_merge_data { 201struct bvec_merge_data {
@@ -279,7 +278,6 @@ struct request_queue
279 make_request_fn *make_request_fn; 278 make_request_fn *make_request_fn;
280 prep_rq_fn *prep_rq_fn; 279 prep_rq_fn *prep_rq_fn;
281 unprep_rq_fn *unprep_rq_fn; 280 unprep_rq_fn *unprep_rq_fn;
282 unplug_fn *unplug_fn;
283 merge_bvec_fn *merge_bvec_fn; 281 merge_bvec_fn *merge_bvec_fn;
284 softirq_done_fn *softirq_done_fn; 282 softirq_done_fn *softirq_done_fn;
285 rq_timed_out_fn *rq_timed_out_fn; 283 rq_timed_out_fn *rq_timed_out_fn;
@@ -293,12 +291,9 @@ struct request_queue
293 struct request *boundary_rq; 291 struct request *boundary_rq;
294 292
295 /* 293 /*
296 * Auto-unplugging state 294 * Delayed queue handling
297 */ 295 */
298 struct timer_list unplug_timer; 296 struct delayed_work delay_work;
299 int unplug_thresh; /* After this many requests */
300 unsigned long unplug_delay; /* After this many jiffies */
301 struct work_struct unplug_work;
302 297
303 struct backing_dev_info backing_dev_info; 298 struct backing_dev_info backing_dev_info;
304 299
@@ -394,14 +389,13 @@ struct request_queue
394#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ 389#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
395#define QUEUE_FLAG_DEAD 5 /* queue being torn down */ 390#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
396#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 391#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
397#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 392#define QUEUE_FLAG_ELVSWITCH 7 /* don't use elevator, just do FIFO */
398#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 393#define QUEUE_FLAG_BIDI 8 /* queue supports bidi requests */
399#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 394#define QUEUE_FLAG_NOMERGES 9 /* disable merge attempts */
400#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ 395#define QUEUE_FLAG_SAME_COMP 10 /* force complete on same CPU */
401#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ 396#define QUEUE_FLAG_FAIL_IO 11 /* fake timeout */
402#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ 397#define QUEUE_FLAG_STACKABLE 12 /* supports request stacking */
403#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ 398#define QUEUE_FLAG_NONROT 13 /* non-rotational device (SSD) */
404#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
405#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ 399#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
406#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ 400#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
407#define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ 401#define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */
@@ -479,7 +473,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
479 __clear_bit(flag, &q->queue_flags); 473 __clear_bit(flag, &q->queue_flags);
480} 474}
481 475
482#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
483#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 476#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
484#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 477#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
485#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 478#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
@@ -674,9 +667,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
674extern void blk_rq_unprep_clone(struct request *rq); 667extern void blk_rq_unprep_clone(struct request *rq);
675extern int blk_insert_cloned_request(struct request_queue *q, 668extern int blk_insert_cloned_request(struct request_queue *q,
676 struct request *rq); 669 struct request *rq);
677extern void blk_plug_device(struct request_queue *); 670extern void blk_delay_queue(struct request_queue *, unsigned long);
678extern void blk_plug_device_unlocked(struct request_queue *);
679extern int blk_remove_plug(struct request_queue *);
680extern void blk_recount_segments(struct request_queue *, struct bio *); 671extern void blk_recount_segments(struct request_queue *, struct bio *);
681extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, 672extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
682 unsigned int, void __user *); 673 unsigned int, void __user *);
@@ -720,7 +711,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
720 struct request *, int); 711 struct request *, int);
721extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 712extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
722 struct request *, int, rq_end_io_fn *); 713 struct request *, int, rq_end_io_fn *);
723extern void blk_unplug(struct request_queue *q);
724 714
725static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 715static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
726{ 716{
@@ -857,7 +847,6 @@ extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bd
857 847
858extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 848extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
859extern void blk_dump_rq_flags(struct request *, char *); 849extern void blk_dump_rq_flags(struct request *, char *);
860extern void generic_unplug_device(struct request_queue *);
861extern long nr_blockdev_pages(void); 850extern long nr_blockdev_pages(void);
862 851
863int blk_get_queue(struct request_queue *); 852int blk_get_queue(struct request_queue *);
@@ -865,6 +854,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
865struct request_queue *blk_alloc_queue_node(gfp_t, int); 854struct request_queue *blk_alloc_queue_node(gfp_t, int);
866extern void blk_put_queue(struct request_queue *); 855extern void blk_put_queue(struct request_queue *);
867 856
857struct blk_plug {
858 unsigned long magic;
859 struct list_head list;
860 unsigned int should_sort;
861};
862
863extern void blk_start_plug(struct blk_plug *);
864extern void blk_finish_plug(struct blk_plug *);
865extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
866
867static inline void blk_flush_plug(struct task_struct *tsk)
868{
869 struct blk_plug *plug = tsk->plug;
870
871 if (unlikely(plug))
872 __blk_flush_plug(tsk, plug);
873}
874
875static inline bool blk_needs_flush_plug(struct task_struct *tsk)
876{
877 struct blk_plug *plug = tsk->plug;
878
879 return plug && !list_empty(&plug->list);
880}
881
868/* 882/*
869 * tag stuff 883 * tag stuff
870 */ 884 */
@@ -1283,6 +1297,23 @@ static inline long nr_blockdev_pages(void)
1283 return 0; 1297 return 0;
1284} 1298}
1285 1299
1300static inline void blk_start_plug(struct list_head *list)
1301{
1302}
1303
1304static inline void blk_finish_plug(struct list_head *list)
1305{
1306}
1307
1308static inline void blk_flush_plug(struct task_struct *tsk)
1309{
1310}
1311
1312static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1313{
1314 return false;
1315}
1316
1286#endif /* CONFIG_BLOCK */ 1317#endif /* CONFIG_BLOCK */
1287 1318
1288#endif 1319#endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 68d1fe7b877..f5df23561b9 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -219,7 +219,6 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size);
219int block_commit_write(struct page *page, unsigned from, unsigned to); 219int block_commit_write(struct page *page, unsigned from, unsigned to);
220int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 220int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
221 get_block_t get_block); 221 get_block_t get_block);
222void block_sync_page(struct page *);
223sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); 222sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
224int block_truncate_page(struct address_space *, loff_t, get_block_t *); 223int block_truncate_page(struct address_space *, loff_t, get_block_t *);
225int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, 224int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 272496d1fae..e2768834f39 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -286,11 +286,6 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback
286int dm_table_complete(struct dm_table *t); 286int dm_table_complete(struct dm_table *t);
287 287
288/* 288/*
289 * Unplug all devices in a table.
290 */
291void dm_table_unplug_all(struct dm_table *t);
292
293/*
294 * Table reference counting. 289 * Table reference counting.
295 */ 290 */
296struct dm_table *dm_get_live_table(struct mapped_device *md); 291struct dm_table *dm_get_live_table(struct mapped_device *md);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 39b68edb388..ec6f72b8447 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -20,7 +20,6 @@ typedef void (elevator_bio_merged_fn) (struct request_queue *,
20typedef int (elevator_dispatch_fn) (struct request_queue *, int); 20typedef int (elevator_dispatch_fn) (struct request_queue *, int);
21 21
22typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); 22typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
23typedef int (elevator_queue_empty_fn) (struct request_queue *);
24typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); 23typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
25typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); 24typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
26typedef int (elevator_may_queue_fn) (struct request_queue *, int); 25typedef int (elevator_may_queue_fn) (struct request_queue *, int);
@@ -46,7 +45,6 @@ struct elevator_ops
46 elevator_activate_req_fn *elevator_activate_req_fn; 45 elevator_activate_req_fn *elevator_activate_req_fn;
47 elevator_deactivate_req_fn *elevator_deactivate_req_fn; 46 elevator_deactivate_req_fn *elevator_deactivate_req_fn;
48 47
49 elevator_queue_empty_fn *elevator_queue_empty_fn;
50 elevator_completed_req_fn *elevator_completed_req_fn; 48 elevator_completed_req_fn *elevator_completed_req_fn;
51 49
52 elevator_request_list_fn *elevator_former_req_fn; 50 elevator_request_list_fn *elevator_former_req_fn;
@@ -101,17 +99,17 @@ struct elevator_queue
101 */ 99 */
102extern void elv_dispatch_sort(struct request_queue *, struct request *); 100extern void elv_dispatch_sort(struct request_queue *, struct request *);
103extern void elv_dispatch_add_tail(struct request_queue *, struct request *); 101extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
104extern void elv_add_request(struct request_queue *, struct request *, int, int); 102extern void elv_add_request(struct request_queue *, struct request *, int);
105extern void __elv_add_request(struct request_queue *, struct request *, int, int); 103extern void __elv_add_request(struct request_queue *, struct request *, int);
106extern void elv_insert(struct request_queue *, struct request *, int); 104extern void elv_insert(struct request_queue *, struct request *, int);
107extern int elv_merge(struct request_queue *, struct request **, struct bio *); 105extern int elv_merge(struct request_queue *, struct request **, struct bio *);
106extern int elv_try_merge(struct request *, struct bio *);
108extern void elv_merge_requests(struct request_queue *, struct request *, 107extern void elv_merge_requests(struct request_queue *, struct request *,
109 struct request *); 108 struct request *);
110extern void elv_merged_request(struct request_queue *, struct request *, int); 109extern void elv_merged_request(struct request_queue *, struct request *, int);
111extern void elv_bio_merged(struct request_queue *q, struct request *, 110extern void elv_bio_merged(struct request_queue *q, struct request *,
112 struct bio *); 111 struct bio *);
113extern void elv_requeue_request(struct request_queue *, struct request *); 112extern void elv_requeue_request(struct request_queue *, struct request *);
114extern int elv_queue_empty(struct request_queue *);
115extern struct request *elv_former_request(struct request_queue *, struct request *); 113extern struct request *elv_former_request(struct request_queue *, struct request *);
116extern struct request *elv_latter_request(struct request_queue *, struct request *); 114extern struct request *elv_latter_request(struct request_queue *, struct request *);
117extern int elv_register_queue(struct request_queue *q); 115extern int elv_register_queue(struct request_queue *q);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e38b50a4b9d..b95199daa5e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -135,16 +135,10 @@ struct inodes_stat_t {
135 * block layer could (in theory) choose to ignore this 135 * block layer could (in theory) choose to ignore this
136 * request if it runs into resource problems. 136 * request if it runs into resource problems.
137 * WRITE A normal async write. Device will be plugged. 137 * WRITE A normal async write. Device will be plugged.
138 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down 138 * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down
139 * the hint that someone will be waiting on this IO 139 * the hint that someone will be waiting on this IO
140 * shortly. The device must still be unplugged explicitly, 140 * shortly. The write equivalent of READ_SYNC.
141 * WRITE_SYNC_PLUG does not do this as we could be 141 * WRITE_ODIRECT Special case write for O_DIRECT only.
142 * submitting more writes before we actually wait on any
143 * of them.
144 * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
145 * immediately after submission. The write equivalent
146 * of READ_SYNC.
147 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
148 * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. 142 * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
149 * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on 143 * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
150 * non-volatile media on completion. 144 * non-volatile media on completion.
@@ -160,18 +154,14 @@ struct inodes_stat_t {
160#define WRITE RW_MASK 154#define WRITE RW_MASK
161#define READA RWA_MASK 155#define READA RWA_MASK
162 156
163#define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) 157#define READ_SYNC (READ | REQ_SYNC)
164#define READ_META (READ | REQ_META) 158#define READ_META (READ | REQ_META)
165#define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE) 159#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
166#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) 160#define WRITE_ODIRECT (WRITE | REQ_SYNC)
167#define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC)
168#define WRITE_META (WRITE | REQ_META) 161#define WRITE_META (WRITE | REQ_META)
169#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ 162#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
170 REQ_FLUSH) 163#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
171#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ 164#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
172 REQ_FUA)
173#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
174 REQ_FLUSH | REQ_FUA)
175 165
176#define SEL_IN 1 166#define SEL_IN 1
177#define SEL_OUT 2 167#define SEL_OUT 2
@@ -583,7 +573,6 @@ typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
583struct address_space_operations { 573struct address_space_operations {
584 int (*writepage)(struct page *page, struct writeback_control *wbc); 574 int (*writepage)(struct page *page, struct writeback_control *wbc);
585 int (*readpage)(struct file *, struct page *); 575 int (*readpage)(struct file *, struct page *);
586 void (*sync_page)(struct page *);
587 576
588 /* Write back some dirty pages from this mapping. */ 577 /* Write back some dirty pages from this mapping. */
589 int (*writepages)(struct address_space *, struct writeback_control *); 578 int (*writepages)(struct address_space *, struct writeback_control *);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9c66e994540..e112b8db2f3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -298,7 +298,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
298 298
299extern void __lock_page(struct page *page); 299extern void __lock_page(struct page *page);
300extern int __lock_page_killable(struct page *page); 300extern int __lock_page_killable(struct page *page);
301extern void __lock_page_nosync(struct page *page);
302extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, 301extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
303 unsigned int flags); 302 unsigned int flags);
304extern void unlock_page(struct page *page); 303extern void unlock_page(struct page *page);
@@ -342,17 +341,6 @@ static inline int lock_page_killable(struct page *page)
342} 341}
343 342
344/* 343/*
345 * lock_page_nosync should only be used if we can't pin the page's inode.
346 * Doesn't play quite so well with block device plugging.
347 */
348static inline void lock_page_nosync(struct page *page)
349{
350 might_sleep();
351 if (!trylock_page(page))
352 __lock_page_nosync(page);
353}
354
355/*
356 * lock_page_or_retry - Lock the page, unless this would block and the 344 * lock_page_or_retry - Lock the page, unless this would block and the
357 * caller indicated that it can handle a retry. 345 * caller indicated that it can handle a retry.
358 */ 346 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 777d8a5ed06..96ac2264374 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -99,6 +99,7 @@ struct robust_list_head;
99struct bio_list; 99struct bio_list;
100struct fs_struct; 100struct fs_struct;
101struct perf_event_context; 101struct perf_event_context;
102struct blk_plug;
102 103
103/* 104/*
104 * List of flags we want to share for kernel threads, 105 * List of flags we want to share for kernel threads,
@@ -1429,6 +1430,11 @@ struct task_struct {
1429/* stacked block device info */ 1430/* stacked block device info */
1430 struct bio_list *bio_list; 1431 struct bio_list *bio_list;
1431 1432
1433#ifdef CONFIG_BLOCK
1434/* stack plugging */
1435 struct blk_plug *plug;
1436#endif
1437
1432/* VM state */ 1438/* VM state */
1433 struct reclaim_state *reclaim_state; 1439 struct reclaim_state *reclaim_state;
1434 1440
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4d559325d91..9ee321833b2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -299,8 +299,6 @@ extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
299 struct page **pagep, swp_entry_t *ent); 299 struct page **pagep, swp_entry_t *ent);
300#endif 300#endif
301 301
302extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
303
304#ifdef CONFIG_SWAP 302#ifdef CONFIG_SWAP
305/* linux/mm/page_io.c */ 303/* linux/mm/page_io.c */
306extern int swap_readpage(struct page *); 304extern int swap_readpage(struct page *);
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b..6a488ad2dce 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
908 profile_task_exit(tsk); 908 profile_task_exit(tsk);
909 909
910 WARN_ON(atomic_read(&tsk->fs_excl)); 910 WARN_ON(atomic_read(&tsk->fs_excl));
911 WARN_ON(blk_needs_flush_plug(tsk));
911 912
912 if (unlikely(in_interrupt())) 913 if (unlikely(in_interrupt()))
913 panic("Aiee, killing interrupt handler!"); 914 panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index 25e429152dd..027c80e5162 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1204 * Clear TID on mm_release()? 1204 * Clear TID on mm_release()?
1205 */ 1205 */
1206 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1206 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1207#ifdef CONFIG_BLOCK
1208 p->plug = NULL;
1209#endif
1207#ifdef CONFIG_FUTEX 1210#ifdef CONFIG_FUTEX
1208 p->robust_list = NULL; 1211 p->robust_list = NULL;
1209#ifdef CONFIG_COMPAT 1212#ifdef CONFIG_COMPAT
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index 83bbc7c02df..d09dd10c5a5 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -28,7 +28,7 @@
28static int submit(int rw, struct block_device *bdev, sector_t sector, 28static int submit(int rw, struct block_device *bdev, sector_t sector,
29 struct page *page, struct bio **bio_chain) 29 struct page *page, struct bio **bio_chain)
30{ 30{
31 const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG; 31 const int bio_rw = rw | REQ_SYNC;
32 struct bio *bio; 32 struct bio *bio;
33 33
34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7b..ca098bf4cc6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3978,6 +3978,16 @@ need_resched_nonpreemptible:
3978 switch_count = &prev->nvcsw; 3978 switch_count = &prev->nvcsw;
3979 } 3979 }
3980 3980
3981 /*
3982 * If we are going to sleep and we have plugged IO queued, make
3983 * sure to submit it to avoid deadlocks.
3984 */
3985 if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
3986 raw_spin_unlock(&rq->lock);
3987 blk_flush_plug(prev);
3988 raw_spin_lock(&rq->lock);
3989 }
3990
3981 pre_schedule(rq, prev); 3991 pre_schedule(rq, prev);
3982 3992
3983 if (unlikely(!rq->nr_running)) 3993 if (unlikely(!rq->nr_running))
@@ -5333,6 +5343,7 @@ void __sched io_schedule(void)
5333 5343
5334 delayacct_blkio_start(); 5344 delayacct_blkio_start();
5335 atomic_inc(&rq->nr_iowait); 5345 atomic_inc(&rq->nr_iowait);
5346 blk_flush_plug(current);
5336 current->in_iowait = 1; 5347 current->in_iowait = 1;
5337 schedule(); 5348 schedule();
5338 current->in_iowait = 0; 5349 current->in_iowait = 0;
@@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout)
5348 5359
5349 delayacct_blkio_start(); 5360 delayacct_blkio_start();
5350 atomic_inc(&rq->nr_iowait); 5361 atomic_inc(&rq->nr_iowait);
5362 blk_flush_plug(current);
5351 current->in_iowait = 1; 5363 current->in_iowait = 1;
5352 ret = schedule_timeout(timeout); 5364 ret = schedule_timeout(timeout);
5353 current->in_iowait = 0; 5365 current->in_iowait = 0;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 027100d3022..c91e139a652 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -14,17 +14,11 @@
14 14
15static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 15static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
16 16
17void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
18{
19}
20EXPORT_SYMBOL(default_unplug_io_fn);
21
22struct backing_dev_info default_backing_dev_info = { 17struct backing_dev_info default_backing_dev_info = {
23 .name = "default", 18 .name = "default",
24 .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, 19 .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
25 .state = 0, 20 .state = 0,
26 .capabilities = BDI_CAP_MAP_COPY, 21 .capabilities = BDI_CAP_MAP_COPY,
27 .unplug_io_fn = default_unplug_io_fn,
28}; 22};
29EXPORT_SYMBOL_GPL(default_backing_dev_info); 23EXPORT_SYMBOL_GPL(default_backing_dev_info);
30 24
diff --git a/mm/filemap.c b/mm/filemap.c
index 83a45d35468..f9a29c87a2c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -155,45 +155,15 @@ void remove_from_page_cache(struct page *page)
155} 155}
156EXPORT_SYMBOL(remove_from_page_cache); 156EXPORT_SYMBOL(remove_from_page_cache);
157 157
158static int sync_page(void *word) 158static int sleep_on_page(void *word)
159{ 159{
160 struct address_space *mapping;
161 struct page *page;
162
163 page = container_of((unsigned long *)word, struct page, flags);
164
165 /*
166 * page_mapping() is being called without PG_locked held.
167 * Some knowledge of the state and use of the page is used to
168 * reduce the requirements down to a memory barrier.
169 * The danger here is of a stale page_mapping() return value
170 * indicating a struct address_space different from the one it's
171 * associated with when it is associated with one.
172 * After smp_mb(), it's either the correct page_mapping() for
173 * the page, or an old page_mapping() and the page's own
174 * page_mapping() has gone NULL.
175 * The ->sync_page() address_space operation must tolerate
176 * page_mapping() going NULL. By an amazing coincidence,
177 * this comes about because none of the users of the page
178 * in the ->sync_page() methods make essential use of the
179 * page_mapping(), merely passing the page down to the backing
180 * device's unplug functions when it's non-NULL, which in turn
181 * ignore it for all cases but swap, where only page_private(page) is
182 * of interest. When page_mapping() does go NULL, the entire
183 * call stack gracefully ignores the page and returns.
184 * -- wli
185 */
186 smp_mb();
187 mapping = page_mapping(page);
188 if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
189 mapping->a_ops->sync_page(page);
190 io_schedule(); 160 io_schedule();
191 return 0; 161 return 0;
192} 162}
193 163
194static int sync_page_killable(void *word) 164static int sleep_on_page_killable(void *word)
195{ 165{
196 sync_page(word); 166 sleep_on_page(word);
197 return fatal_signal_pending(current) ? -EINTR : 0; 167 return fatal_signal_pending(current) ? -EINTR : 0;
198} 168}
199 169
@@ -479,12 +449,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
479EXPORT_SYMBOL(__page_cache_alloc); 449EXPORT_SYMBOL(__page_cache_alloc);
480#endif 450#endif
481 451
482static int __sleep_on_page_lock(void *word)
483{
484 io_schedule();
485 return 0;
486}
487
488/* 452/*
489 * In order to wait for pages to become available there must be 453 * In order to wait for pages to become available there must be
490 * waitqueues associated with pages. By using a hash table of 454 * waitqueues associated with pages. By using a hash table of
@@ -512,7 +476,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
512 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 476 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
513 477
514 if (test_bit(bit_nr, &page->flags)) 478 if (test_bit(bit_nr, &page->flags))
515 __wait_on_bit(page_waitqueue(page), &wait, sync_page, 479 __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
516 TASK_UNINTERRUPTIBLE); 480 TASK_UNINTERRUPTIBLE);
517} 481}
518EXPORT_SYMBOL(wait_on_page_bit); 482EXPORT_SYMBOL(wait_on_page_bit);
@@ -576,17 +540,12 @@ EXPORT_SYMBOL(end_page_writeback);
576/** 540/**
577 * __lock_page - get a lock on the page, assuming we need to sleep to get it 541 * __lock_page - get a lock on the page, assuming we need to sleep to get it
578 * @page: the page to lock 542 * @page: the page to lock
579 *
580 * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
581 * random driver's requestfn sets TASK_RUNNING, we could busywait. However
582 * chances are that on the second loop, the block layer's plug list is empty,
583 * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
584 */ 543 */
585void __lock_page(struct page *page) 544void __lock_page(struct page *page)
586{ 545{
587 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); 546 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
588 547
589 __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, 548 __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
590 TASK_UNINTERRUPTIBLE); 549 TASK_UNINTERRUPTIBLE);
591} 550}
592EXPORT_SYMBOL(__lock_page); 551EXPORT_SYMBOL(__lock_page);
@@ -596,24 +555,10 @@ int __lock_page_killable(struct page *page)
596 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); 555 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
597 556
598 return __wait_on_bit_lock(page_waitqueue(page), &wait, 557 return __wait_on_bit_lock(page_waitqueue(page), &wait,
599 sync_page_killable, TASK_KILLABLE); 558 sleep_on_page_killable, TASK_KILLABLE);
600} 559}
601EXPORT_SYMBOL_GPL(__lock_page_killable); 560EXPORT_SYMBOL_GPL(__lock_page_killable);
602 561
603/**
604 * __lock_page_nosync - get a lock on the page, without calling sync_page()
605 * @page: the page to lock
606 *
607 * Variant of lock_page that does not require the caller to hold a reference
608 * on the page's mapping.
609 */
610void __lock_page_nosync(struct page *page)
611{
612 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
613 __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
614 TASK_UNINTERRUPTIBLE);
615}
616
617int __lock_page_or_retry(struct page *page, struct mm_struct *mm, 562int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
618 unsigned int flags) 563 unsigned int flags)
619{ 564{
@@ -1298,12 +1243,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1298 unsigned long seg = 0; 1243 unsigned long seg = 0;
1299 size_t count; 1244 size_t count;
1300 loff_t *ppos = &iocb->ki_pos; 1245 loff_t *ppos = &iocb->ki_pos;
1246 struct blk_plug plug;
1301 1247
1302 count = 0; 1248 count = 0;
1303 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1249 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1304 if (retval) 1250 if (retval)
1305 return retval; 1251 return retval;
1306 1252
1253 blk_start_plug(&plug);
1254
1307 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 1255 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
1308 if (filp->f_flags & O_DIRECT) { 1256 if (filp->f_flags & O_DIRECT) {
1309 loff_t size; 1257 loff_t size;
@@ -1376,6 +1324,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1376 break; 1324 break;
1377 } 1325 }
1378out: 1326out:
1327 blk_finish_plug(&plug);
1379 return retval; 1328 return retval;
1380} 1329}
1381EXPORT_SYMBOL(generic_file_aio_read); 1330EXPORT_SYMBOL(generic_file_aio_read);
@@ -2487,11 +2436,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2487{ 2436{
2488 struct file *file = iocb->ki_filp; 2437 struct file *file = iocb->ki_filp;
2489 struct inode *inode = file->f_mapping->host; 2438 struct inode *inode = file->f_mapping->host;
2439 struct blk_plug plug;
2490 ssize_t ret; 2440 ssize_t ret;
2491 2441
2492 BUG_ON(iocb->ki_pos != pos); 2442 BUG_ON(iocb->ki_pos != pos);
2493 2443
2494 mutex_lock(&inode->i_mutex); 2444 mutex_lock(&inode->i_mutex);
2445 blk_start_plug(&plug);
2495 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 2446 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
2496 mutex_unlock(&inode->i_mutex); 2447 mutex_unlock(&inode->i_mutex);
2497 2448
@@ -2502,6 +2453,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2502 if (err < 0 && ret > 0) 2453 if (err < 0 && ret > 0)
2503 ret = err; 2454 ret = err;
2504 } 2455 }
2456 blk_finish_plug(&plug);
2505 return ret; 2457 return ret;
2506} 2458}
2507EXPORT_SYMBOL(generic_file_aio_write); 2459EXPORT_SYMBOL(generic_file_aio_write);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0207c2f6f8b..bfba796d374 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
945 collect_procs(ppage, &tokill); 945 collect_procs(ppage, &tokill);
946 946
947 if (hpage != ppage) 947 if (hpage != ppage)
948 lock_page_nosync(ppage); 948 lock_page(ppage);
949 949
950 ret = try_to_unmap(ppage, ttu); 950 ret = try_to_unmap(ppage, ttu);
951 if (ret != SWAP_SUCCESS) 951 if (ret != SWAP_SUCCESS)
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
1038 * Check "just unpoisoned", "filter hit", and 1038 * Check "just unpoisoned", "filter hit", and
1039 * "race with other subpage." 1039 * "race with other subpage."
1040 */ 1040 */
1041 lock_page_nosync(hpage); 1041 lock_page(hpage);
1042 if (!PageHWPoison(hpage) 1042 if (!PageHWPoison(hpage)
1043 || (hwpoison_filter(p) && TestClearPageHWPoison(p)) 1043 || (hwpoison_filter(p) && TestClearPageHWPoison(p))
1044 || (p != hpage && TestSetPageHWPoison(hpage))) { 1044 || (p != hpage && TestSetPageHWPoison(hpage))) {
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
1088 * It's very difficult to mess with pages currently under IO 1088 * It's very difficult to mess with pages currently under IO
1089 * and in many cases impossible, so we just avoid it here. 1089 * and in many cases impossible, so we just avoid it here.
1090 */ 1090 */
1091 lock_page_nosync(hpage); 1091 lock_page(hpage);
1092 1092
1093 /* 1093 /*
1094 * unpoison always clear PG_hwpoison inside page lock 1094 * unpoison always clear PG_hwpoison inside page lock
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn)
1231 return 0; 1231 return 0;
1232 } 1232 }
1233 1233
1234 lock_page_nosync(page); 1234 lock_page(page);
1235 /* 1235 /*
1236 * This test is racy because PG_hwpoison is set outside of page lock. 1236 * This test is racy because PG_hwpoison is set outside of page lock.
1237 * That's acceptable because that won't trigger kernel panic. Instead, 1237 * That's acceptable because that won't trigger kernel panic. Instead,
diff --git a/mm/nommu.c b/mm/nommu.c
index f59e1424d3d..fb6cbd6abe1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
1842} 1842}
1843EXPORT_SYMBOL(remap_vmalloc_range); 1843EXPORT_SYMBOL(remap_vmalloc_range);
1844 1844
1845void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1846{
1847}
1848
1849unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, 1845unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1850 unsigned long len, unsigned long pgoff, unsigned long flags) 1846 unsigned long len, unsigned long pgoff, unsigned long flags)
1851{ 1847{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2cb01f6ec5d..cc0ede169e4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1239,7 +1239,7 @@ int set_page_dirty_lock(struct page *page)
1239{ 1239{
1240 int ret; 1240 int ret;
1241 1241
1242 lock_page_nosync(page); 1242 lock_page(page);
1243 ret = set_page_dirty(page); 1243 ret = set_page_dirty(page);
1244 unlock_page(page); 1244 unlock_page(page);
1245 return ret; 1245 return ret;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2dee975bf46..dc76b4d0611 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
106 goto out; 106 goto out;
107 } 107 }
108 if (wbc->sync_mode == WB_SYNC_ALL) 108 if (wbc->sync_mode == WB_SYNC_ALL)
109 rw |= REQ_SYNC | REQ_UNPLUG; 109 rw |= REQ_SYNC;
110 count_vm_event(PSWPOUT); 110 count_vm_event(PSWPOUT);
111 set_page_writeback(page); 111 set_page_writeback(page);
112 unlock_page(page); 112 unlock_page(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a291a2..2c0cc489e28 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -109,9 +109,12 @@ EXPORT_SYMBOL(read_cache_pages);
109static int read_pages(struct address_space *mapping, struct file *filp, 109static int read_pages(struct address_space *mapping, struct file *filp,
110 struct list_head *pages, unsigned nr_pages) 110 struct list_head *pages, unsigned nr_pages)
111{ 111{
112 struct blk_plug plug;
112 unsigned page_idx; 113 unsigned page_idx;
113 int ret; 114 int ret;
114 115
116 blk_start_plug(&plug);
117
115 if (mapping->a_ops->readpages) { 118 if (mapping->a_ops->readpages) {
116 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); 119 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
117 /* Clean up the remaining pages */ 120 /* Clean up the remaining pages */
@@ -129,7 +132,10 @@ static int read_pages(struct address_space *mapping, struct file *filp,
129 page_cache_release(page); 132 page_cache_release(page);
130 } 133 }
131 ret = 0; 134 ret = 0;
135
132out: 136out:
137 blk_finish_plug(&plug);
138
133 return ret; 139 return ret;
134} 140}
135 141
@@ -554,17 +560,5 @@ page_cache_async_readahead(struct address_space *mapping,
554 560
555 /* do read-ahead */ 561 /* do read-ahead */
556 ondemand_readahead(mapping, ra, filp, true, offset, req_size); 562 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
557
558#ifdef CONFIG_BLOCK
559 /*
560 * Normally the current page is !uptodate and lock_page() will be
561 * immediately called to implicitly unplug the device. However this
562 * is not always true for RAID conifgurations, where data arrives
563 * not strictly in their submission order. In this case we need to
564 * explicitly kick off the IO.
565 */
566 if (PageUptodate(page))
567 blk_run_backing_dev(mapping->backing_dev_info, NULL);
568#endif
569} 563}
570EXPORT_SYMBOL_GPL(page_cache_async_readahead); 564EXPORT_SYMBOL_GPL(page_cache_async_readahead);
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c99060..24d23f5bedf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops;
224static struct backing_dev_info shmem_backing_dev_info __read_mostly = { 224static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
225 .ra_pages = 0, /* No readahead */ 225 .ra_pages = 0, /* No readahead */
226 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, 226 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
227 .unplug_io_fn = default_unplug_io_fn,
228}; 227};
229 228
230static LIST_HEAD(shmem_swaplist); 229static LIST_HEAD(shmem_swaplist);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5c8cfabbc9b..46680461785 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,12 +24,10 @@
24 24
25/* 25/*
26 * swapper_space is a fiction, retained to simplify the path through 26 * swapper_space is a fiction, retained to simplify the path through
27 * vmscan's shrink_page_list, to make sync_page look nicer, and to allow 27 * vmscan's shrink_page_list.
28 * future use of radix_tree tags in the swap cache.
29 */ 28 */
30static const struct address_space_operations swap_aops = { 29static const struct address_space_operations swap_aops = {
31 .writepage = swap_writepage, 30 .writepage = swap_writepage,
32 .sync_page = block_sync_page,
33 .set_page_dirty = __set_page_dirty_nobuffers, 31 .set_page_dirty = __set_page_dirty_nobuffers,
34 .migratepage = migrate_page, 32 .migratepage = migrate_page,
35}; 33};
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = {
37static struct backing_dev_info swap_backing_dev_info = { 35static struct backing_dev_info swap_backing_dev_info = {
38 .name = "swap", 36 .name = "swap",
39 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, 37 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
40 .unplug_io_fn = swap_unplug_io_fn,
41}; 38};
42 39
43struct address_space swapper_space = { 40struct address_space swapper_space = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0341c5700e3..64d627ab624 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
95} 95}
96 96
97/* 97/*
98 * We need this because the bdev->unplug_fn can sleep and we cannot
99 * hold swap_lock while calling the unplug_fn. And swap_lock
100 * cannot be turned into a mutex.
101 */
102static DECLARE_RWSEM(swap_unplug_sem);
103
104void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
105{
106 swp_entry_t entry;
107
108 down_read(&swap_unplug_sem);
109 entry.val = page_private(page);
110 if (PageSwapCache(page)) {
111 struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
112 struct backing_dev_info *bdi;
113
114 /*
115 * If the page is removed from swapcache from under us (with a
116 * racy try_to_unuse/swapoff) we need an additional reference
117 * count to avoid reading garbage from page_private(page) above.
118 * If the WARN_ON triggers during a swapoff it maybe the race
119 * condition and it's harmless. However if it triggers without
120 * swapoff it signals a problem.
121 */
122 WARN_ON(page_count(page) <= 1);
123
124 bdi = bdev->bd_inode->i_mapping->backing_dev_info;
125 blk_run_backing_dev(bdi, page);
126 }
127 up_read(&swap_unplug_sem);
128}
129
130/*
131 * swapon tell device that all the old swap contents can be discarded, 98 * swapon tell device that all the old swap contents can be discarded,
132 * to allow the swap device to optimize its wear-levelling. 99 * to allow the swap device to optimize its wear-levelling.
133 */ 100 */
@@ -1643,10 +1610,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1643 goto out_dput; 1610 goto out_dput;
1644 } 1611 }
1645 1612
1646 /* wait for any unplug function to finish */
1647 down_write(&swap_unplug_sem);
1648 up_write(&swap_unplug_sem);
1649
1650 destroy_swap_extents(p); 1613 destroy_swap_extents(p);
1651 if (p->flags & SWP_CONTINUED) 1614 if (p->flags & SWP_CONTINUED)
1652 free_swap_count_continuations(p); 1615 free_swap_count_continuations(p);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6771ea70bfe..951cac21c2e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
358static void handle_write_error(struct address_space *mapping, 358static void handle_write_error(struct address_space *mapping,
359 struct page *page, int error) 359 struct page *page, int error)
360{ 360{
361 lock_page_nosync(page); 361 lock_page(page);
362 if (page_mapping(page) == mapping) 362 if (page_mapping(page) == mapping)
363 mapping_set_error(mapping, error); 363 mapping_set_error(mapping, error);
364 unlock_page(page); 364 unlock_page(page);