Loading 0001-BFQ-v9-20181101.patch→0001-BFQ-v9-20181206.patch +205 −57 Original line number Diff line number Diff line Loading @@ -1615,10 +1615,10 @@ index 000000000000..fb7bb8f08b75 +} diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c new file mode 100644 index 000000000000..b904c9b0c654 index 000000000000..4c21d77c6405 --- /dev/null +++ b/block/bfq-mq-iosched.c @@ -0,0 +1,6475 @@ @@ -0,0 +1,6510 @@ +/* + * Budget Fair Queueing (BFQ) I/O scheduler. + * Loading Loading @@ -2202,7 +2202,8 @@ index 000000000000..b904c9b0c654 +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) +{ +#ifdef BFQ_GROUP_IOSCHED_ENABLED + bfq_log(bfqd, "num_active_groups %u", bfqd->num_active_groups); + bfq_log(bfqd, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); +#endif + + /* Loading @@ -2214,7 +2215,7 @@ index 000000000000..b904c9b0c654 + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef BFQ_GROUP_IOSCHED_ENABLED + ) || + (bfqd->num_active_groups > 0 + (bfqd->num_groups_with_pending_reqs > 0 +#endif + ); +} Loading Loading @@ -2411,6 +2412,7 @@ index 000000000000..b904c9b0c654 + */ + + if (sd->next_in_service || sd->in_service_entity) { + BUG_ON(!entity->in_groups_with_pending_reqs); + /* + * entity is still active, because either + * next_in_service or in_service_entity is not Loading @@ -2424,10 +2426,25 @@ index 000000000000..b904c9b0c654 + */ + break; + } + BUG_ON(!bfqd->num_active_groups); + bfqd->num_active_groups--; + bfq_log_bfqq(bfqd, bfqq, "num_active_groups %u", + bfqd->num_active_groups); + + BUG_ON(!bfqd->num_groups_with_pending_reqs && + entity->in_groups_with_pending_reqs); + /* + * The decrement of num_groups_with_pending_reqs is + * not performed immediately upon the deactivation of + * entity, but it is delayed to when it also happens + * that the first leaf descendant bfqq of entity gets + * all its pending requests completed. The following + * instructions perform this delayed decrement, if + * needed. See the comments on + * num_groups_with_pending_reqs for details. + */ + if (entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = false; + bfqd->num_groups_with_pending_reqs--; + } + bfq_log_bfqq(bfqd, bfqq, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); + } +} + Loading Loading @@ -5504,27 +5521,44 @@ index 000000000000..b904c9b0c654 + * fact, if there are active groups, then, for condition (i) + * to become false, it is enough that an active group contains + * more active processes or sub-groups than some other active + * group. We address this issue with the following bi-modal + * behavior, implemented in the function + * group. More precisely, for condition (i) to hold because of + * such a group, it is not even necessary that the group is + * (still) active: it is sufficient that, even if the group + * has become inactive, some of its descendant processes still + * have some request already dispatched but still waiting for + * completion. In fact, requests have still to be guaranteed + * their share of the throughput even after being + * dispatched. In this respect, it is easy to show that, if a + * group frequently becomes inactive while still having + * in-flight requests, and if, when this happens, the group is + * not considered in the calculation of whether the scenario + * is asymmetric, then the group may fail to be guaranteed its + * fair share of the throughput (basically because idling may + * not be performed for the descendant processes of the group, + * but it had to be). We address this issue with the + * following bi-modal behavior, implemented in the function + * bfq_symmetric_scenario(). + * + * If there are active groups, then the scenario is tagged as + * If there are groups with requests waiting for completion + * (as commented above, some of these groups may even be + * already inactive), then the scenario is tagged as + * asymmetric, conservatively, without checking any of the + * conditions (i) and (ii). So the device is idled for bfqq. + * This behavior matches also the fact that groups are created + * exactly if controlling I/O (to preserve bandwidth and + * latency guarantees) is a primary concern. + * + * On the opposite end, if there are no active groups, then + * only condition (i) is actually controlled, i.e., provided + * that condition (i) holds, idling is not performed, + * regardless of whether condition (ii) holds. In other words, + * only if condition (i) does not hold, then idling is + * allowed, and the device tends to be prevented from queueing + * many requests, possibly of several processes. Since there + * are no active groups, then, to control condition (i) it is + * enough to check whether all active queues have the same + * weight. + * exactly if controlling I/O is a primary concern (to + * preserve bandwidth and latency guarantees). + * + * On the opposite end, if there are no groups with requests + * waiting for completion, then only condition (i) is actually + * controlled, i.e., provided that condition (i) holds, idling + * is not performed, regardless of whether condition (ii) + * holds. In other words, only if condition (i) does not hold, + * then idling is allowed, and the device tends to be + * prevented from queueing many requests, possibly of several + * processes. Since there are no groups with requests waiting + * for completion, then, to control condition (i) it is enough + * to check just whether all the queues with requests waiting + * for completion also have the same weight. + * + * Not checking condition (ii) evidently exposes bfqq to the + * risk of getting less throughput than its fair share. Loading Loading @@ -5582,10 +5616,11 @@ index 000000000000..b904c9b0c654 + * bfqq is weight-raised is checked explicitly here. More + * precisely, the compound condition below takes into account + * also the fact that, even if bfqq is being weight-raised, + * the scenario is still symmetric if all active queues happen + * to be weight-raised. Actually, we should be even more + * precise here, and differentiate between interactive weight + * raising and soft real-time weight raising. + * the scenario is still symmetric if all queues with requests + * waiting for completion happen to be + * weight-raised. Actually, we should be even more precise + * here, and differentiate between interactive weight raising + * and soft real-time weight raising. + * + * As a side note, it is worth considering that the above + * device-idling countermeasures may however fail in the Loading Loading @@ -7615,7 +7650,7 @@ index 000000000000..b904c9b0c654 + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + + bfqd->queue_weights_tree = RB_ROOT; + bfqd->num_active_groups = 0; + bfqd->num_groups_with_pending_reqs = 0; + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); Loading Loading @@ -8096,10 +8131,10 @@ index 000000000000..b904c9b0c654 +MODULE_DESCRIPTION("MQ Budget Fair Queueing I/O Scheduler"); diff --git a/block/bfq-mq.h b/block/bfq-mq.h new file mode 100644 index 000000000000..511f251ac5aa index 000000000000..83ae44d9419f --- /dev/null +++ b/block/bfq-mq.h @@ -0,0 +1,1020 @@ @@ -0,0 +1,1067 @@ +/* + * BFQ v9: data structures and common functions prototypes. + * Loading Loading @@ -8296,6 +8331,9 @@ index 000000000000..511f251ac5aa + + /* flag, set to request a weight, ioprio or ioprio_class change */ + int prio_changed; + + /* flag, set if the entity is counted in groups_with_pending_reqs */ + bool in_groups_with_pending_reqs; +}; + +struct bfq_group; Loading Loading @@ -8543,10 +8581,54 @@ index 000000000000..511f251ac5aa + * bfq_weights_tree_[add|remove] for further details). + */ + struct rb_root queue_weights_tree; + + /* + * number of groups with requests still waiting for completion + */ + unsigned int num_active_groups; + * Number of groups with at least one descendant process that + * has at least one request waiting for completion. Note that + * this accounts for also requests already dispatched, but not + * yet completed. Therefore this number of groups may differ + * (be larger) than the number of active groups, as a group is + * considered active only if its corresponding entity has + * descendant queues with at least one request queued. This + * number is used to decide whether a scenario is symmetric. + * For a detailed explanation see comments on the computation + * of the variable asymmetric_scenario in the function + * bfq_better_to_idle(). + * + * However, it is hard to compute this number exactly, for + * groups with multiple descendant processes. Consider a group + * that is inactive, i.e., that has no descendant process with + * pending I/O inside BFQ queues. Then suppose that + * num_groups_with_pending_reqs is still accounting for this + * group, because the group has descendant processes with some + * I/O request still in flight. num_groups_with_pending_reqs + * should be decremented when the in-flight request of the + * last descendant process is finally completed (assuming that + * nothing else has changed for the group in the meantime, in + * terms of composition of the group and active/inactive state of child + * groups and processes). To accomplish this, an additional + * pending-request counter must be added to entities, and must + * be updated correctly. To avoid this additional field and operations, + * we resort to the following tradeoff between simplicity and + * accuracy: for an inactive group that is still counted in + * num_groups_with_pending_reqs, we decrement + * num_groups_with_pending_reqs when the first descendant + * process of the group remains with no request waiting for + * completion. + * + * Even this simpler decrement strategy requires a little + * carefulness: to avoid multiple decrements, we flag a group, + * more precisely an entity representing a group, as still + * counted in num_groups_with_pending_reqs when it becomes + * inactive. Then, when the first descendant queue of the + * entity remains with no request waiting for completion, + * num_groups_with_pending_reqs is decremented, and this flag + * is reset. After this flag is reset for the entity, + * num_groups_with_pending_reqs won't be decremented any + * longer in case a new descendant queue of the entity remains + * with no request waiting for completion. + */ + unsigned int num_groups_with_pending_reqs; + + /* + * Number of bfq_queues containing requests (including the Loading Loading @@ -9122,10 +9204,10 @@ index 000000000000..511f251ac5aa +#endif /* _BFQ_H */ diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 index 000000000000..285ae9cbc5bb index 000000000000..80aa980ee8b1 --- /dev/null +++ b/block/bfq-sched.c @@ -0,0 +1,2076 @@ @@ -0,0 +1,2078 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * Loading Loading @@ -10323,9 +10405,12 @@ index 000000000000..285ae9cbc5bb + struct bfq_data *bfqd = bfqg->bfqd; + + BUG_ON(!bfqd); + bfqd->num_active_groups++; + bfq_log_bfqg(bfqd, bfqg, "num_active_groups %u", + bfqd->num_active_groups); + if (!entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = true; + bfqd->num_groups_with_pending_reqs++; + } + bfq_log_bfqg(bfqd, bfqg, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); + } +#endif + Loading Loading @@ -10479,15 +10564,14 @@ index 000000000000..285ae9cbc5bb +} + +/** + * __bfq_deactivate_entity - deactivate an entity from its service tree. + * @entity: the entity to deactivate. + * __bfq_deactivate_entity - update sched_data and service trees for + * entity, so as to represent entity as inactive + * @entity: the entity being deactivated. + * @ins_into_idle_tree: if false, the entity will not be put into the + * idle tree. + * + * Deactivates an entity, independently of its previous state. Must + * be invoked only if entity is on a service tree. Extracts the entity + * from that tree, and if necessary and allowed, puts it into the idle + * tree. + * If necessary and allowed, puts entity into the idle tree. NOTE: + * entity may be on no tree if in service. + */ +static bool __bfq_deactivate_entity(struct bfq_entity *entity, + bool ins_into_idle_tree) Loading Loading @@ -11204,10 +11288,10 @@ index 000000000000..285ae9cbc5bb +} diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c new file mode 100644 index 000000000000..c1abe12181f9 index 000000000000..fbc0d3985394 --- /dev/null +++ b/block/bfq-sq-iosched.c @@ -0,0 +1,5889 @@ @@ -0,0 +1,5906 @@ +/* + * Budget Fair Queueing (BFQ) I/O scheduler. + * Loading Loading @@ -11731,7 +11815,8 @@ index 000000000000..c1abe12181f9 +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) +{ +#ifdef BFQ_GROUP_IOSCHED_ENABLED + bfq_log(bfqd, "num_active_groups %u", bfqd->num_active_groups); + bfq_log(bfqd, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); +#endif + + /* Loading @@ -11743,7 +11828,7 @@ index 000000000000..c1abe12181f9 + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef BFQ_GROUP_IOSCHED_ENABLED + ) || + (bfqd->num_active_groups > 0 + (bfqd->num_groups_with_pending_reqs > 0 +#endif + ); +} Loading Loading @@ -11940,6 +12025,7 @@ index 000000000000..c1abe12181f9 + */ + + if (sd->next_in_service || sd->in_service_entity) { + BUG_ON(!entity->in_groups_with_pending_reqs); + /* + * entity is still active, because either + * next_in_service or in_service_entity is not Loading @@ -11953,10 +12039,25 @@ index 000000000000..c1abe12181f9 + */ + break; + } + BUG_ON(!bfqd->num_active_groups); + bfqd->num_active_groups--; + bfq_log_bfqq(bfqd, bfqq, "num_active_groups %u", + bfqd->num_active_groups); + + BUG_ON(!bfqd->num_groups_with_pending_reqs && + entity->in_groups_with_pending_reqs); + /* + * The decrement of num_groups_with_pending_reqs is + * not performed immediately upon the deactivation of + * entity, but it is delayed to when it also happens + * that the first leaf descendant bfqq of entity gets + * all its pending requests completed. The following + * instructions perform this delayed decrement, if + * needed. See the comments on + * num_groups_with_pending_reqs for details. + */ + if (entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = false; + bfqd->num_groups_with_pending_reqs--; + } + bfq_log_bfqq(bfqd, bfqq, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); + } +} + Loading Loading @@ -16633,7 +16734,7 @@ index 000000000000..c1abe12181f9 + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + + bfqd->queue_weights_tree = RB_ROOT; + bfqd->num_active_groups = 0; + bfqd->num_groups_with_pending_reqs = 0; + + INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); + Loading Loading @@ -17099,10 +17200,10 @@ index 000000000000..c1abe12181f9 +MODULE_LICENSE("GPL"); diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 index 000000000000..623aabfe67b1 index 000000000000..6d0b1b9d9a76 --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,1017 @@ @@ -0,0 +1,1064 @@ +/* + * BFQ v9: data structures and common functions prototypes. + * Loading Loading @@ -17306,6 +17407,9 @@ index 000000000000..623aabfe67b1 + + /* flag, set to request a weight, ioprio or ioprio_class change */ + int prio_changed; + + /* flag, set if the entity is counted in groups_with_pending_reqs */ + bool in_groups_with_pending_reqs; +}; + +struct bfq_group; Loading Loading @@ -17550,10 +17654,54 @@ index 000000000000..623aabfe67b1 + * bfq_weights_tree_[add|remove] for further details). + */ + struct rb_root queue_weights_tree; + + /* + * number of groups with requests still waiting for completion + */ + unsigned int num_active_groups; + * Number of groups with at least one descendant process that + * has at least one request waiting for completion. Note that + * this accounts for also requests already dispatched, but not + * yet completed. Therefore this number of groups may differ + * (be larger) than the number of active groups, as a group is + * considered active only if its corresponding entity has + * descendant queues with at least one request queued. This + * number is used to decide whether a scenario is symmetric. + * For a detailed explanation see comments on the computation + * of the variable asymmetric_scenario in the function + * bfq_better_to_idle(). + * + * However, it is hard to compute this number exactly, for + * groups with multiple descendant processes. Consider a group + * that is inactive, i.e., that has no descendant process with + * pending I/O inside BFQ queues. Then suppose that + * num_groups_with_pending_reqs is still accounting for this + * group, because the group has descendant processes with some + * I/O request still in flight. num_groups_with_pending_reqs + * should be decremented when the in-flight request of the + * last descendant process is finally completed (assuming that + * nothing else has changed for the group in the meantime, in + * terms of composition of the group and active/inactive state of child + * groups and processes). To accomplish this, an additional + * pending-request counter must be added to entities, and must + * be updated correctly. To avoid this additional field and operations, + * we resort to the following tradeoff between simplicity and + * accuracy: for an inactive group that is still counted in + * num_groups_with_pending_reqs, we decrement + * num_groups_with_pending_reqs when the first descendant + * process of the group remains with no request waiting for + * completion. + * + * Even this simpler decrement strategy requires a little + * carefulness: to avoid multiple decrements, we flag a group, + * more precisely an entity representing a group, as still + * counted in num_groups_with_pending_reqs when it becomes + * inactive. Then, when the first descendant queue of the + * entity remains with no request waiting for completion, + * num_groups_with_pending_reqs is decremented, and this flag + * is reset. After this flag is reset for the entity, + * num_groups_with_pending_reqs won't be decremented any + * longer in case a new descendant queue of the entity remains + * with no request waiting for completion. + */ + unsigned int num_groups_with_pending_reqs; + + /* + * Number of bfq_queues containing requests (including the PKGBUILD +4 −4 Original line number Diff line number Diff line Loading @@ -12,11 +12,11 @@ _basekernel=4.19 _basever=419 _aufs=20181119 _bfq=v9 _bfqdate=20181101 _bfqdate=20181206 _sub=8 _commit= pkgver=${_basekernel}.${_sub} pkgrel=1 pkgrel=2 arch=('i686' 'x86_64') url="http://www.kernel.org/" license=('GPL2') Loading @@ -40,7 +40,7 @@ source=("https://www.kernel.org/pub/linux/kernel/v4.x/linux-${_basekernel}.tar.x 'tmpfs-idr.patch' 'vfs-ino.patch' #"0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/Algodev-github/bfq-mq/compare/0adb328...698937e.patch" 0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/sirlucjan/kernel-patches/raw/master/4.19/bfq-sq-mq/4.19-bfq-sq-mq-v9r1-2K181101-rc1.patch 0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/sirlucjan/kernel-patches/raw/master/4.19/bfq-sq-mq/4.19-bfq-sq-mq-v9r1-2K181206-rc2.patch # ARCH Patches '0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch' # MANJARO Patches Loading Loading @@ -79,7 +79,7 @@ sha256sums=('0c68f5655528aed4f99dae71a5b259edc93239fa899e2df79c055275c21749a1' '37c07a2dd5249ce9277a370cf60cbebb24dc1e92b845ce419de63453d5e0b685' 'a50226860ed658251eb74014daad773cb0a8700ed7c5b81548ee4f77e8d6d4de' '7f861935faf7ebd2d528052a363f0356c9b5239e32a68b4ec23dcf95ee91e708' '3ac265b7be567e628c073d64bd9a9090360c9d98e9c7b9f60ca206a86882932e' 'bd5fa32445dcd1127ddaca3d3189436557e195090998d5c9b6d1d1a801b56978' '37b86ca3de148a34258e3176dbf41488d9dbd19e93adbd22a062b3c41332ce85' '94afbc6a9cb0709f6cd71879bae66454ec26d37c83f49f58e4de28d47678e66b' '8dc7285a797c77e917aab1c05847370b71725389b9718c58b4565b40eed80d85' Loading Loading
0001-BFQ-v9-20181101.patch→0001-BFQ-v9-20181206.patch +205 −57 Original line number Diff line number Diff line Loading @@ -1615,10 +1615,10 @@ index 000000000000..fb7bb8f08b75 +} diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c new file mode 100644 index 000000000000..b904c9b0c654 index 000000000000..4c21d77c6405 --- /dev/null +++ b/block/bfq-mq-iosched.c @@ -0,0 +1,6475 @@ @@ -0,0 +1,6510 @@ +/* + * Budget Fair Queueing (BFQ) I/O scheduler. + * Loading Loading @@ -2202,7 +2202,8 @@ index 000000000000..b904c9b0c654 +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) +{ +#ifdef BFQ_GROUP_IOSCHED_ENABLED + bfq_log(bfqd, "num_active_groups %u", bfqd->num_active_groups); + bfq_log(bfqd, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); +#endif + + /* Loading @@ -2214,7 +2215,7 @@ index 000000000000..b904c9b0c654 + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef BFQ_GROUP_IOSCHED_ENABLED + ) || + (bfqd->num_active_groups > 0 + (bfqd->num_groups_with_pending_reqs > 0 +#endif + ); +} Loading Loading @@ -2411,6 +2412,7 @@ index 000000000000..b904c9b0c654 + */ + + if (sd->next_in_service || sd->in_service_entity) { + BUG_ON(!entity->in_groups_with_pending_reqs); + /* + * entity is still active, because either + * next_in_service or in_service_entity is not Loading @@ -2424,10 +2426,25 @@ index 000000000000..b904c9b0c654 + */ + break; + } + BUG_ON(!bfqd->num_active_groups); + bfqd->num_active_groups--; + bfq_log_bfqq(bfqd, bfqq, "num_active_groups %u", + bfqd->num_active_groups); + + BUG_ON(!bfqd->num_groups_with_pending_reqs && + entity->in_groups_with_pending_reqs); + /* + * The decrement of num_groups_with_pending_reqs is + * not performed immediately upon the deactivation of + * entity, but it is delayed to when it also happens + * that the first leaf descendant bfqq of entity gets + * all its pending requests completed. The following + * instructions perform this delayed decrement, if + * needed. See the comments on + * num_groups_with_pending_reqs for details. + */ + if (entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = false; + bfqd->num_groups_with_pending_reqs--; + } + bfq_log_bfqq(bfqd, bfqq, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); + } +} + Loading Loading @@ -5504,27 +5521,44 @@ index 000000000000..b904c9b0c654 + * fact, if there are active groups, then, for condition (i) + * to become false, it is enough that an active group contains + * more active processes or sub-groups than some other active + * group. We address this issue with the following bi-modal + * behavior, implemented in the function + * group. More precisely, for condition (i) to hold because of + * such a group, it is not even necessary that the group is + * (still) active: it is sufficient that, even if the group + * has become inactive, some of its descendant processes still + * have some request already dispatched but still waiting for + * completion. In fact, requests have still to be guaranteed + * their share of the throughput even after being + * dispatched. In this respect, it is easy to show that, if a + * group frequently becomes inactive while still having + * in-flight requests, and if, when this happens, the group is + * not considered in the calculation of whether the scenario + * is asymmetric, then the group may fail to be guaranteed its + * fair share of the throughput (basically because idling may + * not be performed for the descendant processes of the group, + * but it had to be). We address this issue with the + * following bi-modal behavior, implemented in the function + * bfq_symmetric_scenario(). + * + * If there are active groups, then the scenario is tagged as + * If there are groups with requests waiting for completion + * (as commented above, some of these groups may even be + * already inactive), then the scenario is tagged as + * asymmetric, conservatively, without checking any of the + * conditions (i) and (ii). So the device is idled for bfqq. + * This behavior matches also the fact that groups are created + * exactly if controlling I/O (to preserve bandwidth and + * latency guarantees) is a primary concern. + * + * On the opposite end, if there are no active groups, then + * only condition (i) is actually controlled, i.e., provided + * that condition (i) holds, idling is not performed, + * regardless of whether condition (ii) holds. In other words, + * only if condition (i) does not hold, then idling is + * allowed, and the device tends to be prevented from queueing + * many requests, possibly of several processes. Since there + * are no active groups, then, to control condition (i) it is + * enough to check whether all active queues have the same + * weight. + * exactly if controlling I/O is a primary concern (to + * preserve bandwidth and latency guarantees). + * + * On the opposite end, if there are no groups with requests + * waiting for completion, then only condition (i) is actually + * controlled, i.e., provided that condition (i) holds, idling + * is not performed, regardless of whether condition (ii) + * holds. In other words, only if condition (i) does not hold, + * then idling is allowed, and the device tends to be + * prevented from queueing many requests, possibly of several + * processes. Since there are no groups with requests waiting + * for completion, then, to control condition (i) it is enough + * to check just whether all the queues with requests waiting + * for completion also have the same weight. + * + * Not checking condition (ii) evidently exposes bfqq to the + * risk of getting less throughput than its fair share. Loading Loading @@ -5582,10 +5616,11 @@ index 000000000000..b904c9b0c654 + * bfqq is weight-raised is checked explicitly here. More + * precisely, the compound condition below takes into account + * also the fact that, even if bfqq is being weight-raised, + * the scenario is still symmetric if all active queues happen + * to be weight-raised. Actually, we should be even more + * precise here, and differentiate between interactive weight + * raising and soft real-time weight raising. + * the scenario is still symmetric if all queues with requests + * waiting for completion happen to be + * weight-raised. Actually, we should be even more precise + * here, and differentiate between interactive weight raising + * and soft real-time weight raising. + * + * As a side note, it is worth considering that the above + * device-idling countermeasures may however fail in the Loading Loading @@ -7615,7 +7650,7 @@ index 000000000000..b904c9b0c654 + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + + bfqd->queue_weights_tree = RB_ROOT; + bfqd->num_active_groups = 0; + bfqd->num_groups_with_pending_reqs = 0; + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); Loading Loading @@ -8096,10 +8131,10 @@ index 000000000000..b904c9b0c654 +MODULE_DESCRIPTION("MQ Budget Fair Queueing I/O Scheduler"); diff --git a/block/bfq-mq.h b/block/bfq-mq.h new file mode 100644 index 000000000000..511f251ac5aa index 000000000000..83ae44d9419f --- /dev/null +++ b/block/bfq-mq.h @@ -0,0 +1,1020 @@ @@ -0,0 +1,1067 @@ +/* + * BFQ v9: data structures and common functions prototypes. + * Loading Loading @@ -8296,6 +8331,9 @@ index 000000000000..511f251ac5aa + + /* flag, set to request a weight, ioprio or ioprio_class change */ + int prio_changed; + + /* flag, set if the entity is counted in groups_with_pending_reqs */ + bool in_groups_with_pending_reqs; +}; + +struct bfq_group; Loading Loading @@ -8543,10 +8581,54 @@ index 000000000000..511f251ac5aa + * bfq_weights_tree_[add|remove] for further details). + */ + struct rb_root queue_weights_tree; + + /* + * number of groups with requests still waiting for completion + */ + unsigned int num_active_groups; + * Number of groups with at least one descendant process that + * has at least one request waiting for completion. Note that + * this accounts for also requests already dispatched, but not + * yet completed. Therefore this number of groups may differ + * (be larger) than the number of active groups, as a group is + * considered active only if its corresponding entity has + * descendant queues with at least one request queued. This + * number is used to decide whether a scenario is symmetric. + * For a detailed explanation see comments on the computation + * of the variable asymmetric_scenario in the function + * bfq_better_to_idle(). + * + * However, it is hard to compute this number exactly, for + * groups with multiple descendant processes. Consider a group + * that is inactive, i.e., that has no descendant process with + * pending I/O inside BFQ queues. Then suppose that + * num_groups_with_pending_reqs is still accounting for this + * group, because the group has descendant processes with some + * I/O request still in flight. num_groups_with_pending_reqs + * should be decremented when the in-flight request of the + * last descendant process is finally completed (assuming that + * nothing else has changed for the group in the meantime, in + * terms of composition of the group and active/inactive state of child + * groups and processes). To accomplish this, an additional + * pending-request counter must be added to entities, and must + * be updated correctly. To avoid this additional field and operations, + * we resort to the following tradeoff between simplicity and + * accuracy: for an inactive group that is still counted in + * num_groups_with_pending_reqs, we decrement + * num_groups_with_pending_reqs when the first descendant + * process of the group remains with no request waiting for + * completion. + * + * Even this simpler decrement strategy requires a little + * carefulness: to avoid multiple decrements, we flag a group, + * more precisely an entity representing a group, as still + * counted in num_groups_with_pending_reqs when it becomes + * inactive. Then, when the first descendant queue of the + * entity remains with no request waiting for completion, + * num_groups_with_pending_reqs is decremented, and this flag + * is reset. After this flag is reset for the entity, + * num_groups_with_pending_reqs won't be decremented any + * longer in case a new descendant queue of the entity remains + * with no request waiting for completion. + */ + unsigned int num_groups_with_pending_reqs; + + /* + * Number of bfq_queues containing requests (including the Loading Loading @@ -9122,10 +9204,10 @@ index 000000000000..511f251ac5aa +#endif /* _BFQ_H */ diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 index 000000000000..285ae9cbc5bb index 000000000000..80aa980ee8b1 --- /dev/null +++ b/block/bfq-sched.c @@ -0,0 +1,2076 @@ @@ -0,0 +1,2078 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * Loading Loading @@ -10323,9 +10405,12 @@ index 000000000000..285ae9cbc5bb + struct bfq_data *bfqd = bfqg->bfqd; + + BUG_ON(!bfqd); + bfqd->num_active_groups++; + bfq_log_bfqg(bfqd, bfqg, "num_active_groups %u", + bfqd->num_active_groups); + if (!entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = true; + bfqd->num_groups_with_pending_reqs++; + } + bfq_log_bfqg(bfqd, bfqg, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); + } +#endif + Loading Loading @@ -10479,15 +10564,14 @@ index 000000000000..285ae9cbc5bb +} + +/** + * __bfq_deactivate_entity - deactivate an entity from its service tree. + * @entity: the entity to deactivate. + * __bfq_deactivate_entity - update sched_data and service trees for + * entity, so as to represent entity as inactive + * @entity: the entity being deactivated. + * @ins_into_idle_tree: if false, the entity will not be put into the + * idle tree. + * + * Deactivates an entity, independently of its previous state. Must + * be invoked only if entity is on a service tree. Extracts the entity + * from that tree, and if necessary and allowed, puts it into the idle + * tree. + * If necessary and allowed, puts entity into the idle tree. NOTE: + * entity may be on no tree if in service. + */ +static bool __bfq_deactivate_entity(struct bfq_entity *entity, + bool ins_into_idle_tree) Loading Loading @@ -11204,10 +11288,10 @@ index 000000000000..285ae9cbc5bb +} diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c new file mode 100644 index 000000000000..c1abe12181f9 index 000000000000..fbc0d3985394 --- /dev/null +++ b/block/bfq-sq-iosched.c @@ -0,0 +1,5889 @@ @@ -0,0 +1,5906 @@ +/* + * Budget Fair Queueing (BFQ) I/O scheduler. + * Loading Loading @@ -11731,7 +11815,8 @@ index 000000000000..c1abe12181f9 +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) +{ +#ifdef BFQ_GROUP_IOSCHED_ENABLED + bfq_log(bfqd, "num_active_groups %u", bfqd->num_active_groups); + bfq_log(bfqd, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); +#endif + + /* Loading @@ -11743,7 +11828,7 @@ index 000000000000..c1abe12181f9 + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef BFQ_GROUP_IOSCHED_ENABLED + ) || + (bfqd->num_active_groups > 0 + (bfqd->num_groups_with_pending_reqs > 0 +#endif + ); +} Loading Loading @@ -11940,6 +12025,7 @@ index 000000000000..c1abe12181f9 + */ + + if (sd->next_in_service || sd->in_service_entity) { + BUG_ON(!entity->in_groups_with_pending_reqs); + /* + * entity is still active, because either + * next_in_service or in_service_entity is not Loading @@ -11953,10 +12039,25 @@ index 000000000000..c1abe12181f9 + */ + break; + } + BUG_ON(!bfqd->num_active_groups); + bfqd->num_active_groups--; + bfq_log_bfqq(bfqd, bfqq, "num_active_groups %u", + bfqd->num_active_groups); + + BUG_ON(!bfqd->num_groups_with_pending_reqs && + entity->in_groups_with_pending_reqs); + /* + * The decrement of num_groups_with_pending_reqs is + * not performed immediately upon the deactivation of + * entity, but it is delayed to when it also happens + * that the first leaf descendant bfqq of entity gets + * all its pending requests completed. The following + * instructions perform this delayed decrement, if + * needed. See the comments on + * num_groups_with_pending_reqs for details. + */ + if (entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = false; + bfqd->num_groups_with_pending_reqs--; + } + bfq_log_bfqq(bfqd, bfqq, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); + } +} + Loading Loading @@ -16633,7 +16734,7 @@ index 000000000000..c1abe12181f9 + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + + bfqd->queue_weights_tree = RB_ROOT; + bfqd->num_active_groups = 0; + bfqd->num_groups_with_pending_reqs = 0; + + INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); + Loading Loading @@ -17099,10 +17200,10 @@ index 000000000000..c1abe12181f9 +MODULE_LICENSE("GPL"); diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 index 000000000000..623aabfe67b1 index 000000000000..6d0b1b9d9a76 --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,1017 @@ @@ -0,0 +1,1064 @@ +/* + * BFQ v9: data structures and common functions prototypes. + * Loading Loading @@ -17306,6 +17407,9 @@ index 000000000000..623aabfe67b1 + + /* flag, set to request a weight, ioprio or ioprio_class change */ + int prio_changed; + + /* flag, set if the entity is counted in groups_with_pending_reqs */ + bool in_groups_with_pending_reqs; +}; + +struct bfq_group; Loading Loading @@ -17550,10 +17654,54 @@ index 000000000000..623aabfe67b1 + * bfq_weights_tree_[add|remove] for further details). + */ + struct rb_root queue_weights_tree; + + /* + * number of groups with requests still waiting for completion + */ + unsigned int num_active_groups; + * Number of groups with at least one descendant process that + * has at least one request waiting for completion. Note that + * this accounts for also requests already dispatched, but not + * yet completed. Therefore this number of groups may differ + * (be larger) than the number of active groups, as a group is + * considered active only if its corresponding entity has + * descendant queues with at least one request queued. This + * number is used to decide whether a scenario is symmetric. + * For a detailed explanation see comments on the computation + * of the variable asymmetric_scenario in the function + * bfq_better_to_idle(). + * + * However, it is hard to compute this number exactly, for + * groups with multiple descendant processes. Consider a group + * that is inactive, i.e., that has no descendant process with + * pending I/O inside BFQ queues. Then suppose that + * num_groups_with_pending_reqs is still accounting for this + * group, because the group has descendant processes with some + * I/O request still in flight. num_groups_with_pending_reqs + * should be decremented when the in-flight request of the + * last descendant process is finally completed (assuming that + * nothing else has changed for the group in the meantime, in + * terms of composition of the group and active/inactive state of child + * groups and processes). To accomplish this, an additional + * pending-request counter must be added to entities, and must + * be updated correctly. To avoid this additional field and operations, + * we resort to the following tradeoff between simplicity and + * accuracy: for an inactive group that is still counted in + * num_groups_with_pending_reqs, we decrement + * num_groups_with_pending_reqs when the first descendant + * process of the group remains with no request waiting for + * completion. + * + * Even this simpler decrement strategy requires a little + * carefulness: to avoid multiple decrements, we flag a group, + * more precisely an entity representing a group, as still + * counted in num_groups_with_pending_reqs when it becomes + * inactive. Then, when the first descendant queue of the + * entity remains with no request waiting for completion, + * num_groups_with_pending_reqs is decremented, and this flag + * is reset. After this flag is reset for the entity, + * num_groups_with_pending_reqs won't be decremented any + * longer in case a new descendant queue of the entity remains + * with no request waiting for completion. + */ + unsigned int num_groups_with_pending_reqs; + + /* + * Number of bfq_queues containing requests (including the
PKGBUILD +4 −4 Original line number Diff line number Diff line Loading @@ -12,11 +12,11 @@ _basekernel=4.19 _basever=419 _aufs=20181119 _bfq=v9 _bfqdate=20181101 _bfqdate=20181206 _sub=8 _commit= pkgver=${_basekernel}.${_sub} pkgrel=1 pkgrel=2 arch=('i686' 'x86_64') url="http://www.kernel.org/" license=('GPL2') Loading @@ -40,7 +40,7 @@ source=("https://www.kernel.org/pub/linux/kernel/v4.x/linux-${_basekernel}.tar.x 'tmpfs-idr.patch' 'vfs-ino.patch' #"0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/Algodev-github/bfq-mq/compare/0adb328...698937e.patch" 0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/sirlucjan/kernel-patches/raw/master/4.19/bfq-sq-mq/4.19-bfq-sq-mq-v9r1-2K181101-rc1.patch 0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/sirlucjan/kernel-patches/raw/master/4.19/bfq-sq-mq/4.19-bfq-sq-mq-v9r1-2K181206-rc2.patch # ARCH Patches '0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch' # MANJARO Patches Loading Loading @@ -79,7 +79,7 @@ sha256sums=('0c68f5655528aed4f99dae71a5b259edc93239fa899e2df79c055275c21749a1' '37c07a2dd5249ce9277a370cf60cbebb24dc1e92b845ce419de63453d5e0b685' 'a50226860ed658251eb74014daad773cb0a8700ed7c5b81548ee4f77e8d6d4de' '7f861935faf7ebd2d528052a363f0356c9b5239e32a68b4ec23dcf95ee91e708' '3ac265b7be567e628c073d64bd9a9090360c9d98e9c7b9f60ca206a86882932e' 'bd5fa32445dcd1127ddaca3d3189436557e195090998d5c9b6d1d1a801b56978' '37b86ca3de148a34258e3176dbf41488d9dbd19e93adbd22a062b3c41332ce85' '94afbc6a9cb0709f6cd71879bae66454ec26d37c83f49f58e4de28d47678e66b' '8dc7285a797c77e917aab1c05847370b71725389b9718c58b4565b40eed80d85' Loading