1 diff -uprN linux-2.6.28.orig/block/bfq-iosched.c linux-2.6.28.new/block/bfq-iosched.c
2 --- linux-2.6.28.orig/block/bfq-iosched.c 2011-06-10 22:22:20.927729846 +0200
3 +++ linux-2.6.28.new/block/bfq-iosched.c 2011-06-10 22:21:37.968026248 +0200
5 * complexity derives from the one introduced with EEVDF in [3].
7 * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling
8 - * with Deterministic Guarantees on Bandwidth Distribution,'' to appear
9 - * on IEEE Transactions on Computer.
10 + * with Deterministic Guarantees on Bandwidth Distribution,'',
11 + * IEEE Transactions on Computer, May 2010.
13 - * http://algo.ing.unimo.it/people/paolo/disk_sched/bfq.pdf
14 + * http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf
16 * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
17 * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
18 @@ -354,6 +354,7 @@ static void bfq_add_rq_rb(struct request
19 struct bfq_entity *entity = &bfqq->entity;
20 struct bfq_data *bfqd = bfqq->bfqd;
21 struct request *__alias, *next_rq;
22 + unsigned long old_raising_coeff = bfqq->raising_coeff;
24 bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq));
25 bfqq->queued[rq_is_sync(rq)]++;
26 @@ -377,26 +378,34 @@ static void bfq_add_rq_rb(struct request
27 entity->budget = max_t(unsigned long, bfqq->max_budget,
28 bfq_serv_to_charge(next_rq, bfqq));
30 + if (! bfqd->low_latency)
34 * If the queue is not being boosted and has been idle
35 - * for enough time, start a boosting period
36 + * for enough time, start a weight-raising period
38 - if (bfqd->low_latency && bfqq->high_weight_budget == 0) {
39 - if (bfqq->last_activation_time + BFQ_MIN_ACT_INTERVAL <
40 - jiffies_to_msecs(jiffies)) {
41 - bfqq->high_weight_budget = BFQ_BOOST_BUDGET;
42 - entity->ioprio_changed = 1;
43 - bfq_log_bfqq(bfqd, bfqq,
44 - "wboost starting at %lu msec",
45 - bfqq->last_activation_time);
47 - bfqq->last_activation_time =
48 - jiffies_to_msecs(jiffies);
49 + if(old_raising_coeff == 1 &&
50 + bfqq->last_rais_start_finish +
51 + bfqd->bfq_raising_min_idle_time < jiffies) {
52 + bfqq->raising_coeff = bfqd->bfq_raising_coeff;
53 + entity->ioprio_changed = 1;
54 + bfq_log_bfqq(bfqd, bfqq,
55 + "wrais starting at %lu msec",
56 + bfqq->last_rais_start_finish);
60 bfq_add_bfqq_busy(bfqd, bfqq);
62 bfq_updated_next_req(bfqd, bfqq);
64 + if (! bfqd->low_latency)
67 + if(old_raising_coeff == 1 ||
68 + (bfqd->bfq_raising_max_softrt_rate > 0 &&
69 + bfqq->soft_rt_next_start < jiffies))
70 + bfqq->last_rais_start_finish = jiffies;
73 static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq)
74 @@ -605,12 +614,15 @@ static void bfq_arm_slice_timer(struct b
76 sl = bfqd->bfq_slice_idle;
77 if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) &&
78 - bfqq->entity.service > bfq_max_budget(bfqd) / 8)
79 + bfqq->entity.service > bfq_max_budget(bfqd) / 8 &&
80 + bfqq->raising_coeff == 1)
81 sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
83 + else if (bfqq->raising_coeff > 1)
85 bfqd->last_idling_start = ktime_get();
86 mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
87 - bfq_log(bfqd, "arm idle: %lu ms", sl);
88 + bfq_log(bfqd, "arm idle: %lu/%lu ms",
89 + jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
93 @@ -626,7 +638,7 @@ static void bfq_set_budget_timeout(struc
95 bfq_clear_bfqq_budget_new(bfqq);
96 bfqq->budget_timeout = jiffies +
97 - bfqd->bfq_timeout[!!bfq_bfqq_sync(bfqq)] *
98 + bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
99 (bfqq->entity.weight / bfqq->entity.orig_weight);
102 @@ -997,6 +1009,18 @@ static void bfq_bfqq_expire(struct bfq_d
103 bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3))
104 bfq_bfqq_charge_full_budget(bfqq);
106 + if (bfqd->low_latency && bfqq->raising_coeff == 1)
107 + bfqq->last_rais_start_finish = jiffies;
109 + if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) {
110 + if(reason != BFQ_BFQQ_BUDGET_TIMEOUT)
111 + bfqq->soft_rt_next_start =
113 + HZ * bfqq->entity.service /
114 + bfqd->bfq_raising_max_softrt_rate;
116 + bfqq->soft_rt_next_start = -1; /* infinity */
118 bfq_log_bfqq(bfqd, bfqq,
119 "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow,
120 bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
121 @@ -1128,31 +1152,36 @@ static int __bfq_dispatch_requests(struc
122 bfq_bfqq_served(bfqq, service_to_charge);
123 bfq_dispatch_insert(bfqd->queue, rq);
125 - if (bfqq->high_weight_budget > 0) { /* queue is being boosted */
126 + if (bfqq->raising_coeff > 1) { /* queue is being boosted */
127 struct bfq_entity *entity = &bfqq->entity;
129 - bfq_log_bfqq(bfqd, bfqq, "busy period dur %llu msec, "
130 - "old highwbudg %lu",
131 - jiffies_to_msecs(jiffies) -
132 - bfqq->last_activation_time,
133 - bfqq->high_weight_budget);
134 + bfq_log_bfqq(bfqd, bfqq,
135 + "raising period dur %llu/%lu msec, "
136 + "old raising coeff %lu, w %lu(%lu)",
137 + jiffies - bfqq->last_rais_start_finish,
138 + bfqd->bfq_raising_max_time,
139 + bfqq->raising_coeff,
140 + bfqq->entity.weight, bfqq->entity.orig_weight);
142 + BUG_ON(entity->weight !=
143 + entity->orig_weight * bfqq->raising_coeff);
144 + if(entity->ioprio_changed)
145 + bfq_log_bfqq(bfqd, bfqq,
146 + "WARN: pending prio change");
148 - * Decrease the budget for weight boosting by
149 - * the just received service, or, if too much
150 - * time has elapsed from the beginning of this
151 - * boosting period, stop it
152 + * If too much time has elapsed from the beginning
153 + * of this weight-raising period, stop it
155 - if (jiffies_to_msecs(jiffies) -
156 - bfqq->last_activation_time <= BFQ_BOOST_TIMEOUT
158 - bfqq->high_weight_budget > service_to_charge)
159 - bfqq->high_weight_budget -= service_to_charge;
161 - bfqq->high_weight_budget = 0;
162 - entity->ioprio_changed = 1;
163 - __bfq_entity_update_weight_prio(
164 - bfq_entity_service_tree(entity),
166 + if (jiffies - bfqq->last_rais_start_finish >
167 + bfqd->bfq_raising_max_time) {
168 + bfqq->raising_coeff = 1;
169 + bfqq->last_rais_start_finish = jiffies;
171 + entity->ioprio_changed = 1;
172 + __bfq_entity_update_weight_prio(
173 + bfq_entity_service_tree(entity),
178 bfq_log_bfqq(bfqd, bfqq, "dispatched %lu sec req (%llu), "
179 @@ -1457,8 +1486,9 @@ retry:
180 bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
181 bfqq->pid = current->pid;
183 - bfqq->last_activation_time = 0;
184 - bfqq->high_weight_budget = 0;
185 + bfqq->raising_coeff = 1;
186 + bfqq->last_rais_start_finish = 0;
187 + bfqq->soft_rt_next_start = -1;
189 bfq_log_bfqq(bfqd, bfqq, "allocated");
191 @@ -1584,7 +1614,9 @@ static void bfq_update_idle_window(struc
192 enable_idle = bfq_bfqq_idle_window(bfqq);
194 if (atomic_read(&cic->ioc->nr_tasks) == 0 ||
195 - bfqd->bfq_slice_idle == 0 || (bfqd->hw_tag && BFQQ_SEEKY(bfqq)))
196 + bfqd->bfq_slice_idle == 0 ||
197 + (bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
198 + bfqq->raising_coeff == 1))
200 else if (bfq_sample_valid(cic->ttime_samples)) {
201 if (cic->ttime_mean > bfqd->bfq_slice_idle)
202 @@ -1717,8 +1749,10 @@ static void bfq_completed_request(struct
204 if (bfq_may_expire_for_budg_timeout(bfqq))
205 bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
206 - else if (sync && bfqd->rq_in_driver == 0 &&
207 - RB_EMPTY_ROOT(&bfqq->sort_list))
209 + (bfqd->rq_in_driver == 0 ||
210 + bfqq->raising_coeff > 1)
211 + && RB_EMPTY_ROOT(&bfqq->sort_list))
212 bfq_arm_slice_timer(bfqd);
215 @@ -2065,12 +2099,18 @@ static void *bfq_init_queue(struct reque
216 bfqd->bfq_back_max = bfq_back_max;
217 bfqd->bfq_back_penalty = bfq_back_penalty;
218 bfqd->bfq_slice_idle = bfq_slice_idle;
219 + bfqd->bfq_class_idle_last_service = 0;
220 bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
221 bfqd->bfq_timeout[ASYNC] = bfq_timeout_async;
222 bfqd->bfq_timeout[SYNC] = bfq_timeout_sync;
224 bfqd->low_latency = true;
226 + bfqd->bfq_raising_coeff = 20;
227 + bfqd->bfq_raising_max_time = msecs_to_jiffies(8000);
228 + bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000);
229 + bfqd->bfq_raising_max_softrt_rate = 7000;
234 @@ -2117,6 +2157,27 @@ static ssize_t bfq_var_store(unsigned lo
238 +static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
240 + struct bfq_queue *bfqq;
241 + struct bfq_data *bfqd = e->elevator_data;
242 + ssize_t num_char = 0;
244 + num_char += sprintf(page + num_char, "Active:\n");
245 + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
246 + num_char += sprintf(page + num_char, "pid%d: weight %hu\n",
248 + bfqq->entity.weight);
250 + num_char += sprintf(page + num_char, "Idle:\n");
251 + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
252 + num_char += sprintf(page + num_char, "pid%d: weight %hu\n",
254 + bfqq->entity.weight);
259 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
260 static ssize_t __FUNC(struct elevator_queue *e, char *page) \
262 @@ -2137,6 +2198,12 @@ SHOW_FUNCTION(bfq_max_budget_async_rq_sh
263 SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[SYNC], 1);
264 SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[ASYNC], 1);
265 SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
266 +SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0);
267 +SHOW_FUNCTION(bfq_raising_max_time_show, bfqd->bfq_raising_max_time, 1);
268 +SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time,
270 +SHOW_FUNCTION(bfq_raising_max_softrt_rate_show,
271 + bfqd->bfq_raising_max_softrt_rate, 0);
274 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
275 @@ -2169,8 +2236,23 @@ STORE_FUNCTION(bfq_max_budget_async_rq_s
277 STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[ASYNC], 0,
279 +STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1,
281 +STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0,
283 +STORE_FUNCTION(bfq_raising_min_idle_time_store,
284 + &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1);
285 +STORE_FUNCTION(bfq_raising_max_softrt_rate_store,
286 + &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0);
287 #undef STORE_FUNCTION
289 +/* do nothing for the moment */
290 +static ssize_t bfq_weights_store(struct elevator_queue *e,
291 + const char *page, size_t count)
296 static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
298 u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[SYNC]);
299 @@ -2249,6 +2331,11 @@ static struct elv_fs_entry bfq_attrs[] =
300 BFQ_ATTR(timeout_sync),
301 BFQ_ATTR(timeout_async),
302 BFQ_ATTR(low_latency),
303 + BFQ_ATTR(raising_coeff),
304 + BFQ_ATTR(raising_max_time),
305 + BFQ_ATTR(raising_min_idle_time),
306 + BFQ_ATTR(raising_max_softrt_rate),
311 diff -uprN linux-2.6.28.orig/block/bfq-sched.c linux-2.6.28.new/block/bfq-sched.c
312 --- linux-2.6.28.orig/block/bfq-sched.c 2011-06-10 22:22:20.931729447 +0200
313 +++ linux-2.6.28.new/block/bfq-sched.c 2011-06-10 22:21:37.972025851 +0200
315 for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
317 static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
320 + struct bfq_data *bfqd);
322 static int bfq_update_next_active(struct bfq_sched_data *sd)
324 @@ -34,7 +35,7 @@ static int bfq_update_next_active(struct
325 * next from this subtree. By now we worry more about
326 * correctness than about performance...
328 - next_active = bfq_lookup_next_entity(sd, 0);
329 + next_active = bfq_lookup_next_entity(sd, 0, NULL);
330 sd->next_active = next_active;
332 if (next_active != NULL) {
333 @@ -134,9 +135,8 @@ static inline void bfq_calc_finish(struc
336 bfq_log_bfqq(bfqq->bfqd, bfqq,
337 - "calc_finish: serv %lu, w %lu, hi-budg %lu",
338 - service, entity->weight,
339 - bfqq->high_weight_budget);
340 + "calc_finish: serv %lu, w %lu",
341 + service, entity->weight);
342 bfq_log_bfqq(bfqq->bfqd, bfqq,
343 "calc_finish: start %llu, finish %llu, delta %llu",
344 entity->start, entity->finish,
345 @@ -518,19 +518,8 @@ __bfq_entity_update_weight_prio(struct b
346 struct bfq_service_tree *new_st = old_st;
348 if (entity->ioprio_changed) {
349 - int new_boost_coeff = 1;
350 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
352 - if (bfqq != NULL) {
354 - bfqq->high_weight_budget * BFQ_BOOST_COEFF /
356 - bfq_log_bfqq(bfqq->bfqd, bfqq,
357 - "update_w_prio: wght %lu, hi-budg %lu, coef %d",
358 - entity->weight, bfqq->high_weight_budget,
362 BUG_ON(old_st->wsum < entity->weight);
363 old_st->wsum -= entity->weight;
365 @@ -557,7 +546,8 @@ __bfq_entity_update_weight_prio(struct b
366 * when entity->finish <= old_st->vtime).
368 new_st = bfq_entity_service_tree(entity);
369 - entity->weight = entity->orig_weight * new_boost_coeff;
370 + entity->weight = entity->orig_weight *
371 + (bfqq != NULL ? bfqq->raising_coeff : 1);
372 new_st->wsum += entity->weight;
374 if (new_st != old_st)
375 @@ -893,20 +883,30 @@ static struct bfq_entity *__bfq_lookup_n
378 static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
381 + struct bfq_data *bfqd)
383 struct bfq_service_tree *st = sd->service_tree;
384 struct bfq_entity *entity;
388 BUG_ON(sd->active_entity != NULL);
390 - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++, st++) {
391 - entity = __bfq_lookup_next_entity(st);
392 + if (bfqd != NULL &&
393 + jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
394 + entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1);
395 + if (entity != NULL) {
396 + i = BFQ_IOPRIO_CLASSES - 1;
397 + bfqd->bfq_class_idle_last_service = jiffies;
398 + sd->next_active = entity;
401 + for (; i < BFQ_IOPRIO_CLASSES; i++) {
402 + entity = __bfq_lookup_next_entity(st + i);
403 if (entity != NULL) {
405 bfq_check_next_active(sd, entity);
406 - bfq_active_extract(st, entity);
407 + bfq_active_extract(st + i, entity);
408 sd->active_entity = entity;
409 sd->next_active = NULL;
411 @@ -933,7 +933,7 @@ static struct bfq_queue *bfq_get_next_qu
413 sd = &bfqd->root_group->sched_data;
414 for (; sd != NULL; sd = entity->my_sched_data) {
415 - entity = bfq_lookup_next_entity(sd, 1);
416 + entity = bfq_lookup_next_entity(sd, 1, bfqd);
417 BUG_ON(entity == NULL);
420 diff -uprN linux-2.6.28.orig/block/bfq.h linux-2.6.28.new/block/bfq.h
421 --- linux-2.6.28.orig/block/bfq.h 2011-06-10 22:22:20.931729447 +0200
422 +++ linux-2.6.28.new/block/bfq.h 2011-06-10 22:21:37.972025851 +0200
425 - * BFQ-v1-r1 for 2.6.35: data structures and common functions prototypes.
426 + * BFQ-v2 for 2.6.37: data structures and common functions prototypes.
428 * Based on ideas and code from CFQ:
429 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
433 #define BFQ_IOPRIO_CLASSES 3
434 +#define BFQ_CL_IDLE_TIMEOUT HZ/5
436 #define BFQ_MIN_WEIGHT 1
437 #define BFQ_MAX_WEIGHT 1000
439 #define BFQ_DEFAULT_GRP_IOPRIO 0
440 #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
442 -/* Constants used in weight boosting (in its turn used to reduce latencies): */
443 -/* max factor by which the weight of a boosted queue is multiplied */
444 -#define BFQ_BOOST_COEFF 10
445 -/* max number of sectors that can be served during a boosting period */
446 -#define BFQ_BOOST_BUDGET 49152
447 -/* max duration of a boosting period, msec */
448 -#define BFQ_BOOST_TIMEOUT 6000
449 -/* min idle period after which boosting may be reactivated for a queue, msec */
450 -#define BFQ_MIN_ACT_INTERVAL 20000
455 @@ -216,6 +207,13 @@ struct bfq_group;
456 * they are charged for the whole allocated budget, to try
457 * to preserve a behavior reasonably fair among them, but
458 * without service-domain guarantees).
459 + * @bfq_raising_coeff: Maximum factor by which the weight of a boosted
460 + * queue is multiplied
461 + * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies)
462 + * @bfq_raising_min_idle_time: minimum idle period after which weight-raising
463 + * may be reactivated for a queue (in jiffies)
464 + * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue,
465 + * sectors per seconds
467 * All the fields are protected by the @queue lock.
469 @@ -260,12 +258,19 @@ struct bfq_data {
470 unsigned int bfq_back_penalty;
471 unsigned int bfq_back_max;
472 unsigned int bfq_slice_idle;
473 + u64 bfq_class_idle_last_service;
475 unsigned int bfq_user_max_budget;
476 unsigned int bfq_max_budget_async_rq;
477 unsigned int bfq_timeout[2];
481 + /* parameters of the low_latency heuristics */
482 + unsigned int bfq_raising_coeff;
483 + unsigned int bfq_raising_max_time;
484 + unsigned int bfq_raising_min_idle_time;
485 + unsigned int bfq_raising_max_softrt_rate;
489 @@ -291,7 +296,7 @@ struct bfq_data {
490 * @seek_mean: mean seek distance
491 * @last_request_pos: position of the last request enqueued
492 * @pid: pid of the process owning the queue, used for logging purposes.
493 - * @last_activation_time: time of the last (idle -> backlogged) transition
494 + * @last_rais_start_time: last (idle -> weight-raised) transition attempt
495 * @high_weight_budget: number of sectors left to serve with boosted weight
497 * A bfq_queue is a leaf request queue; it can be associated to an io_context
498 @@ -333,8 +338,9 @@ struct bfq_queue {
502 - u64 last_activation_time;
503 - unsigned long high_weight_budget;
504 + /* weight-raising fileds */
505 + u64 last_rais_start_finish, soft_rt_next_start;
506 + unsigned int raising_coeff;
509 enum bfqq_state_flags {