aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo2012-06-26 17:05:44 -0500
committerJens Axboe2012-06-26 17:42:49 -0500
commita051661ca6d134c18599498b185b667859d4339b (patch)
tree9d840030874aed9b97a58051bf9568455126e8e8 /block/blk-cgroup.h
parent5b788ce3e2acac9bf109743b1281d77347cf2101 (diff)
downloadkernel-common-a051661ca6d134c18599498b185b667859d4339b.tar.gz
kernel-common-a051661ca6d134c18599498b185b667859d4339b.tar.xz
kernel-common-a051661ca6d134c18599498b185b667859d4339b.zip
blkcg: implement per-blkg request allocation
Currently, request_queue has one request_list to allocate requests from regardless of blkcg of the IO being issued. When the unified request pool is used up, cfq proportional IO limits become meaningless - whoever grabs the next request being freed wins the race regardless of the configured weights. This can be easily demonstrated by creating a blkio cgroup w/ very low weight, put a program which can issue a lot of random direct IOs there and running a sequential IO from a different cgroup. As soon as the request pool is used up, the sequential IO bandwidth crashes. This patch implements per-blkg request_list. Each blkg has its own request_list and any IO allocates its request from the matching blkg making blkcgs completely isolated in terms of request allocation. * Root blkcg uses the request_list embedded in each request_queue, which was renamed to @q->root_rl from @q->rq. While making blkcg rl handling a bit harier, this enables avoiding most overhead for root blkcg. * Queue fullness is properly per request_list but bdi isn't blkcg aware yet, so congestion state currently just follows the root blkcg. As writeback isn't aware of blkcg yet, this works okay for async congestion but readahead may get the wrong signals. It's better than blkcg completely collapsing with shared request_list but needs to be improved with future changes. * After this change, each block cgroup gets a full request pool making resource consumption of each cgroup higher. This makes allowing non-root users to create cgroups less desirable; however, note that allowing non-root users to directly manage cgroups is already severely broken regardless of this patch - each block cgroup consumes kernel memory and skews IO weight (IO weights are not hierarchical). v2: queue-sysfs.txt updated and patch description udpated as suggested by Vivek. v3: blk_get_rl() wasn't checking error return from blkg_lookup_create() and may cause oops on lookup failure. Fix it by falling back to root_rl on blkg lookup failures. This problem was spotted by Rakesh Iyer <rni@google.com>. v4: Updated to accomodate 458f27a982 "block: Avoid missed wakeup in request waitqueue". blk_drain_queue() now wakes up waiters on all blkg->rl on the target queue. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-cgroup.h')
-rw-r--r--block/blk-cgroup.h102
1 files changed, 102 insertions, 0 deletions
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index e74cce1fbac..24597309e23 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -17,6 +17,7 @@
17#include <linux/u64_stats_sync.h> 17#include <linux/u64_stats_sync.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/radix-tree.h> 19#include <linux/radix-tree.h>
20#include <linux/blkdev.h>
20 21
21/* Max limits for throttle policy */ 22/* Max limits for throttle policy */
22#define THROTL_IOPS_MAX UINT_MAX 23#define THROTL_IOPS_MAX UINT_MAX
@@ -93,6 +94,8 @@ struct blkcg_gq {
93 struct list_head q_node; 94 struct list_head q_node;
94 struct hlist_node blkcg_node; 95 struct hlist_node blkcg_node;
95 struct blkcg *blkcg; 96 struct blkcg *blkcg;
97 /* request allocation list for this blkcg-q pair */
98 struct request_list rl;
96 /* reference count */ 99 /* reference count */
97 int refcnt; 100 int refcnt;
98 101
@@ -251,6 +254,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
251} 254}
252 255
253/** 256/**
257 * blk_get_rl - get request_list to use
258 * @q: request_queue of interest
259 * @bio: bio which will be attached to the allocated request (may be %NULL)
260 *
261 * The caller wants to allocate a request from @q to use for @bio. Find
262 * the request_list to use and obtain a reference on it. Should be called
263 * under queue_lock. This function is guaranteed to return non-%NULL
264 * request_list.
265 */
266static inline struct request_list *blk_get_rl(struct request_queue *q,
267 struct bio *bio)
268{
269 struct blkcg *blkcg;
270 struct blkcg_gq *blkg;
271
272 rcu_read_lock();
273
274 blkcg = bio_blkcg(bio);
275
276 /* bypass blkg lookup and use @q->root_rl directly for root */
277 if (blkcg == &blkcg_root)
278 goto root_rl;
279
280 /*
281 * Try to use blkg->rl. blkg lookup may fail under memory pressure
282 * or if either the blkcg or queue is going away. Fall back to
283 * root_rl in such cases.
284 */
285 blkg = blkg_lookup_create(blkcg, q);
286 if (unlikely(IS_ERR(blkg)))
287 goto root_rl;
288
289 blkg_get(blkg);
290 rcu_read_unlock();
291 return &blkg->rl;
292root_rl:
293 rcu_read_unlock();
294 return &q->root_rl;
295}
296
297/**
298 * blk_put_rl - put request_list
299 * @rl: request_list to put
300 *
301 * Put the reference acquired by blk_get_rl(). Should be called under
302 * queue_lock.
303 */
304static inline void blk_put_rl(struct request_list *rl)
305{
306 /* root_rl may not have blkg set */
307 if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
308 blkg_put(rl->blkg);
309}
310
311/**
312 * blk_rq_set_rl - associate a request with a request_list
313 * @rq: request of interest
314 * @rl: target request_list
315 *
316 * Associate @rq with @rl so that accounting and freeing can know the
317 * request_list @rq came from.
318 */
319static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
320{
321 rq->rl = rl;
322}
323
324/**
325 * blk_rq_rl - return the request_list a request came from
326 * @rq: request of interest
327 *
328 * Return the request_list @rq is allocated from.
329 */
330static inline struct request_list *blk_rq_rl(struct request *rq)
331{
332 return rq->rl;
333}
334
335struct request_list *__blk_queue_next_rl(struct request_list *rl,
336 struct request_queue *q);
337/**
338 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
339 *
340 * Should be used under queue_lock.
341 */
342#define blk_queue_for_each_rl(rl, q) \
343 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
344
345/**
254 * blkg_stat_add - add a value to a blkg_stat 346 * blkg_stat_add - add a value to a blkg_stat
255 * @stat: target blkg_stat 347 * @stat: target blkg_stat
256 * @val: value to add 348 * @val: value to add
@@ -392,6 +484,7 @@ static inline void blkcg_deactivate_policy(struct request_queue *q,
392 484
393static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } 485static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
394static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } 486static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
487
395static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 488static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
396 struct blkcg_policy *pol) { return NULL; } 489 struct blkcg_policy *pol) { return NULL; }
397static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } 490static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -399,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
399static inline void blkg_get(struct blkcg_gq *blkg) { } 492static inline void blkg_get(struct blkcg_gq *blkg) { }
400static inline void blkg_put(struct blkcg_gq *blkg) { } 493static inline void blkg_put(struct blkcg_gq *blkg) { }
401 494
495static inline struct request_list *blk_get_rl(struct request_queue *q,
496 struct bio *bio) { return &q->root_rl; }
497static inline void blk_put_rl(struct request_list *rl) { }
498static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
499static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
500
501#define blk_queue_for_each_rl(rl, q) \
502 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
503
402#endif /* CONFIG_BLK_CGROUP */ 504#endif /* CONFIG_BLK_CGROUP */
403#endif /* _BLK_CGROUP_H */ 505#endif /* _BLK_CGROUP_H */