diff --git a/0001-BFQ-v9-20181101.patch b/0001-BFQ-v9-20181206.patch similarity index 98% rename from 0001-BFQ-v9-20181101.patch rename to 0001-BFQ-v9-20181206.patch index 3d9f9cf10661e6ab0764a38ce4123331d189c525..3ad66a8723f45776be0ab237f9e2bb83b6095dd2 100644 --- a/0001-BFQ-v9-20181101.patch +++ b/0001-BFQ-v9-20181206.patch @@ -1615,10 +1615,10 @@ index 000000000000..fb7bb8f08b75 +} diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c new file mode 100644 -index 000000000000..b904c9b0c654 +index 000000000000..4c21d77c6405 --- /dev/null +++ b/block/bfq-mq-iosched.c -@@ -0,0 +1,6475 @@ +@@ -0,0 +1,6510 @@ +/* + * Budget Fair Queueing (BFQ) I/O scheduler. + * @@ -2202,7 +2202,8 @@ index 000000000000..b904c9b0c654 +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) +{ +#ifdef BFQ_GROUP_IOSCHED_ENABLED -+ bfq_log(bfqd, "num_active_groups %u", bfqd->num_active_groups); ++ bfq_log(bfqd, "num_groups_with_pending_reqs %u", ++ bfqd->num_groups_with_pending_reqs); +#endif + + /* @@ -2214,7 +2215,7 @@ index 000000000000..b904c9b0c654 + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef BFQ_GROUP_IOSCHED_ENABLED + ) || -+ (bfqd->num_active_groups > 0 ++ (bfqd->num_groups_with_pending_reqs > 0 +#endif + ); +} @@ -2411,6 +2412,7 @@ index 000000000000..b904c9b0c654 + */ + + if (sd->next_in_service || sd->in_service_entity) { ++ BUG_ON(!entity->in_groups_with_pending_reqs); + /* + * entity is still active, because either + * next_in_service or in_service_entity is not @@ -2424,10 +2426,25 @@ index 000000000000..b904c9b0c654 + */ + break; + } -+ BUG_ON(!bfqd->num_active_groups); -+ bfqd->num_active_groups--; -+ bfq_log_bfqq(bfqd, bfqq, "num_active_groups %u", -+ bfqd->num_active_groups); ++ ++ BUG_ON(!bfqd->num_groups_with_pending_reqs && ++ entity->in_groups_with_pending_reqs); ++ /* ++ * The decrement of num_groups_with_pending_reqs is ++ * not performed immediately upon the deactivation of ++ * entity, but it is delayed to when it also happens ++ * that the first leaf descendant bfqq of entity gets ++ * all its pending requests completed. The following ++ * instructions perform this delayed decrement, if ++ * needed. See the comments on ++ * num_groups_with_pending_reqs for details. ++ */ ++ if (entity->in_groups_with_pending_reqs) { ++ entity->in_groups_with_pending_reqs = false; ++ bfqd->num_groups_with_pending_reqs--; ++ } ++ bfq_log_bfqq(bfqd, bfqq, "num_groups_with_pending_reqs %u", ++ bfqd->num_groups_with_pending_reqs); + } +} + @@ -5504,27 +5521,44 @@ index 000000000000..b904c9b0c654 + * fact, if there are active groups, then, for condition (i) + * to become false, it is enough that an active group contains + * more active processes or sub-groups than some other active -+ * group. We address this issue with the following bi-modal -+ * behavior, implemented in the function ++ * group. More precisely, for condition (i) to hold because of ++ * such a group, it is not even necessary that the group is ++ * (still) active: it is sufficient that, even if the group ++ * has become inactive, some of its descendant processes still ++ * have some request already dispatched but still waiting for ++ * completion. In fact, requests have still to be guaranteed ++ * their share of the throughput even after being ++ * dispatched. In this respect, it is easy to show that, if a ++ * group frequently becomes inactive while still having ++ * in-flight requests, and if, when this happens, the group is ++ * not considered in the calculation of whether the scenario ++ * is asymmetric, then the group may fail to be guaranteed its ++ * fair share of the throughput (basically because idling may ++ * not be performed for the descendant processes of the group, ++ * but it had to be). We address this issue with the ++ * following bi-modal behavior, implemented in the function + * bfq_symmetric_scenario(). + * -+ * If there are active groups, then the scenario is tagged as ++ * If there are groups with requests waiting for completion ++ * (as commented above, some of these groups may even be ++ * already inactive), then the scenario is tagged as + * asymmetric, conservatively, without checking any of the + * conditions (i) and (ii). So the device is idled for bfqq. + * This behavior matches also the fact that groups are created -+ * exactly if controlling I/O (to preserve bandwidth and -+ * latency guarantees) is a primary concern. ++ * exactly if controlling I/O is a primary concern (to ++ * preserve bandwidth and latency guarantees). + * -+ * On the opposite end, if there are no active groups, then -+ * only condition (i) is actually controlled, i.e., provided -+ * that condition (i) holds, idling is not performed, -+ * regardless of whether condition (ii) holds. In other words, -+ * only if condition (i) does not hold, then idling is -+ * allowed, and the device tends to be prevented from queueing -+ * many requests, possibly of several processes. Since there -+ * are no active groups, then, to control condition (i) it is -+ * enough to check whether all active queues have the same -+ * weight. ++ * On the opposite end, if there are no groups with requests ++ * waiting for completion, then only condition (i) is actually ++ * controlled, i.e., provided that condition (i) holds, idling ++ * is not performed, regardless of whether condition (ii) ++ * holds. In other words, only if condition (i) does not hold, ++ * then idling is allowed, and the device tends to be ++ * prevented from queueing many requests, possibly of several ++ * processes. Since there are no groups with requests waiting ++ * for completion, then, to control condition (i) it is enough ++ * to check just whether all the queues with requests waiting ++ * for completion also have the same weight. + * + * Not checking condition (ii) evidently exposes bfqq to the + * risk of getting less throughput than its fair share. @@ -5582,10 +5616,11 @@ index 000000000000..b904c9b0c654 + * bfqq is weight-raised is checked explicitly here. More + * precisely, the compound condition below takes into account + * also the fact that, even if bfqq is being weight-raised, -+ * the scenario is still symmetric if all active queues happen -+ * to be weight-raised. Actually, we should be even more -+ * precise here, and differentiate between interactive weight -+ * raising and soft real-time weight raising. ++ * the scenario is still symmetric if all queues with requests ++ * waiting for completion happen to be ++ * weight-raised. Actually, we should be even more precise ++ * here, and differentiate between interactive weight raising ++ * and soft real-time weight raising. + * + * As a side note, it is worth considering that the above + * device-idling countermeasures may however fail in the @@ -7615,7 +7650,7 @@ index 000000000000..b904c9b0c654 + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + + bfqd->queue_weights_tree = RB_ROOT; -+ bfqd->num_active_groups = 0; ++ bfqd->num_groups_with_pending_reqs = 0; + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); @@ -8096,10 +8131,10 @@ index 000000000000..b904c9b0c654 +MODULE_DESCRIPTION("MQ Budget Fair Queueing I/O Scheduler"); diff --git a/block/bfq-mq.h b/block/bfq-mq.h new file mode 100644 -index 000000000000..511f251ac5aa +index 000000000000..83ae44d9419f --- /dev/null +++ b/block/bfq-mq.h -@@ -0,0 +1,1020 @@ +@@ -0,0 +1,1067 @@ +/* + * BFQ v9: data structures and common functions prototypes. + * @@ -8296,6 +8331,9 @@ index 000000000000..511f251ac5aa + + /* flag, set to request a weight, ioprio or ioprio_class change */ + int prio_changed; ++ ++ /* flag, set if the entity is counted in groups_with_pending_reqs */ ++ bool in_groups_with_pending_reqs; +}; + +struct bfq_group; @@ -8543,10 +8581,54 @@ index 000000000000..511f251ac5aa + * bfq_weights_tree_[add|remove] for further details). + */ + struct rb_root queue_weights_tree; ++ + /* -+ * number of groups with requests still waiting for completion ++ * Number of groups with at least one descendant process that ++ * has at least one request waiting for completion. Note that ++ * this accounts for also requests already dispatched, but not ++ * yet completed. Therefore this number of groups may differ ++ * (be larger) than the number of active groups, as a group is ++ * considered active only if its corresponding entity has ++ * descendant queues with at least one request queued. This ++ * number is used to decide whether a scenario is symmetric. ++ * For a detailed explanation see comments on the computation ++ * of the variable asymmetric_scenario in the function ++ * bfq_better_to_idle(). ++ * ++ * However, it is hard to compute this number exactly, for ++ * groups with multiple descendant processes. Consider a group ++ * that is inactive, i.e., that has no descendant process with ++ * pending I/O inside BFQ queues. Then suppose that ++ * num_groups_with_pending_reqs is still accounting for this ++ * group, because the group has descendant processes with some ++ * I/O request still in flight. num_groups_with_pending_reqs ++ * should be decremented when the in-flight request of the ++ * last descendant process is finally completed (assuming that ++ * nothing else has changed for the group in the meantime, in ++ * terms of composition of the group and active/inactive state of child ++ * groups and processes). To accomplish this, an additional ++ * pending-request counter must be added to entities, and must ++ * be updated correctly. To avoid this additional field and operations, ++ * we resort to the following tradeoff between simplicity and ++ * accuracy: for an inactive group that is still counted in ++ * num_groups_with_pending_reqs, we decrement ++ * num_groups_with_pending_reqs when the first descendant ++ * process of the group remains with no request waiting for ++ * completion. ++ * ++ * Even this simpler decrement strategy requires a little ++ * carefulness: to avoid multiple decrements, we flag a group, ++ * more precisely an entity representing a group, as still ++ * counted in num_groups_with_pending_reqs when it becomes ++ * inactive. Then, when the first descendant queue of the ++ * entity remains with no request waiting for completion, ++ * num_groups_with_pending_reqs is decremented, and this flag ++ * is reset. After this flag is reset for the entity, ++ * num_groups_with_pending_reqs won't be decremented any ++ * longer in case a new descendant queue of the entity remains ++ * with no request waiting for completion. + */ -+ unsigned int num_active_groups; ++ unsigned int num_groups_with_pending_reqs; + + /* + * Number of bfq_queues containing requests (including the @@ -9122,10 +9204,10 @@ index 000000000000..511f251ac5aa +#endif /* _BFQ_H */ diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 -index 000000000000..285ae9cbc5bb +index 000000000000..80aa980ee8b1 --- /dev/null +++ b/block/bfq-sched.c -@@ -0,0 +1,2076 @@ +@@ -0,0 +1,2078 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * @@ -10323,9 +10405,12 @@ index 000000000000..285ae9cbc5bb + struct bfq_data *bfqd = bfqg->bfqd; + + BUG_ON(!bfqd); -+ bfqd->num_active_groups++; -+ bfq_log_bfqg(bfqd, bfqg, "num_active_groups %u", -+ bfqd->num_active_groups); ++ if (!entity->in_groups_with_pending_reqs) { ++ entity->in_groups_with_pending_reqs = true; ++ bfqd->num_groups_with_pending_reqs++; ++ } ++ bfq_log_bfqg(bfqd, bfqg, "num_groups_with_pending_reqs %u", ++ bfqd->num_groups_with_pending_reqs); + } +#endif + @@ -10479,15 +10564,14 @@ index 000000000000..285ae9cbc5bb +} + +/** -+ * __bfq_deactivate_entity - deactivate an entity from its service tree. -+ * @entity: the entity to deactivate. ++ * __bfq_deactivate_entity - update sched_data and service trees for ++ * entity, so as to represent entity as inactive ++ * @entity: the entity being deactivated. + * @ins_into_idle_tree: if false, the entity will not be put into the + * idle tree. + * -+ * Deactivates an entity, independently of its previous state. Must -+ * be invoked only if entity is on a service tree. Extracts the entity -+ * from that tree, and if necessary and allowed, puts it into the idle -+ * tree. ++ * If necessary and allowed, puts entity into the idle tree. NOTE: ++ * entity may be on no tree if in service. + */ +static bool __bfq_deactivate_entity(struct bfq_entity *entity, + bool ins_into_idle_tree) @@ -11204,10 +11288,10 @@ index 000000000000..285ae9cbc5bb +} diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c new file mode 100644 -index 000000000000..c1abe12181f9 +index 000000000000..fbc0d3985394 --- /dev/null +++ b/block/bfq-sq-iosched.c -@@ -0,0 +1,5889 @@ +@@ -0,0 +1,5906 @@ +/* + * Budget Fair Queueing (BFQ) I/O scheduler. + * @@ -11731,7 +11815,8 @@ index 000000000000..c1abe12181f9 +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) +{ +#ifdef BFQ_GROUP_IOSCHED_ENABLED -+ bfq_log(bfqd, "num_active_groups %u", bfqd->num_active_groups); ++ bfq_log(bfqd, "num_groups_with_pending_reqs %u", ++ bfqd->num_groups_with_pending_reqs); +#endif + + /* @@ -11743,7 +11828,7 @@ index 000000000000..c1abe12181f9 + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef BFQ_GROUP_IOSCHED_ENABLED + ) || -+ (bfqd->num_active_groups > 0 ++ (bfqd->num_groups_with_pending_reqs > 0 +#endif + ); +} @@ -11940,6 +12025,7 @@ index 000000000000..c1abe12181f9 + */ + + if (sd->next_in_service || sd->in_service_entity) { ++ BUG_ON(!entity->in_groups_with_pending_reqs); + /* + * entity is still active, because either + * next_in_service or in_service_entity is not @@ -11953,10 +12039,25 @@ index 000000000000..c1abe12181f9 + */ + break; + } -+ BUG_ON(!bfqd->num_active_groups); -+ bfqd->num_active_groups--; -+ bfq_log_bfqq(bfqd, bfqq, "num_active_groups %u", -+ bfqd->num_active_groups); ++ ++ BUG_ON(!bfqd->num_groups_with_pending_reqs && ++ entity->in_groups_with_pending_reqs); ++ /* ++ * The decrement of num_groups_with_pending_reqs is ++ * not performed immediately upon the deactivation of ++ * entity, but it is delayed to when it also happens ++ * that the first leaf descendant bfqq of entity gets ++ * all its pending requests completed. The following ++ * instructions perform this delayed decrement, if ++ * needed. See the comments on ++ * num_groups_with_pending_reqs for details. ++ */ ++ if (entity->in_groups_with_pending_reqs) { ++ entity->in_groups_with_pending_reqs = false; ++ bfqd->num_groups_with_pending_reqs--; ++ } ++ bfq_log_bfqq(bfqd, bfqq, "num_groups_with_pending_reqs %u", ++ bfqd->num_groups_with_pending_reqs); + } +} + @@ -16633,7 +16734,7 @@ index 000000000000..c1abe12181f9 + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + + bfqd->queue_weights_tree = RB_ROOT; -+ bfqd->num_active_groups = 0; ++ bfqd->num_groups_with_pending_reqs = 0; + + INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); + @@ -17099,10 +17200,10 @@ index 000000000000..c1abe12181f9 +MODULE_LICENSE("GPL"); diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 -index 000000000000..623aabfe67b1 +index 000000000000..6d0b1b9d9a76 --- /dev/null +++ b/block/bfq.h -@@ -0,0 +1,1017 @@ +@@ -0,0 +1,1064 @@ +/* + * BFQ v9: data structures and common functions prototypes. + * @@ -17306,6 +17407,9 @@ index 000000000000..623aabfe67b1 + + /* flag, set to request a weight, ioprio or ioprio_class change */ + int prio_changed; ++ ++ /* flag, set if the entity is counted in groups_with_pending_reqs */ ++ bool in_groups_with_pending_reqs; +}; + +struct bfq_group; @@ -17550,10 +17654,54 @@ index 000000000000..623aabfe67b1 + * bfq_weights_tree_[add|remove] for further details). + */ + struct rb_root queue_weights_tree; ++ + /* -+ * number of groups with requests still waiting for completion -+ */ -+ unsigned int num_active_groups; ++ * Number of groups with at least one descendant process that ++ * has at least one request waiting for completion. Note that ++ * this accounts for also requests already dispatched, but not ++ * yet completed. Therefore this number of groups may differ ++ * (be larger) than the number of active groups, as a group is ++ * considered active only if its corresponding entity has ++ * descendant queues with at least one request queued. This ++ * number is used to decide whether a scenario is symmetric. ++ * For a detailed explanation see comments on the computation ++ * of the variable asymmetric_scenario in the function ++ * bfq_better_to_idle(). ++ * ++ * However, it is hard to compute this number exactly, for ++ * groups with multiple descendant processes. Consider a group ++ * that is inactive, i.e., that has no descendant process with ++ * pending I/O inside BFQ queues. Then suppose that ++ * num_groups_with_pending_reqs is still accounting for this ++ * group, because the group has descendant processes with some ++ * I/O request still in flight. num_groups_with_pending_reqs ++ * should be decremented when the in-flight request of the ++ * last descendant process is finally completed (assuming that ++ * nothing else has changed for the group in the meantime, in ++ * terms of composition of the group and active/inactive state of child ++ * groups and processes). To accomplish this, an additional ++ * pending-request counter must be added to entities, and must ++ * be updated correctly. To avoid this additional field and operations, ++ * we resort to the following tradeoff between simplicity and ++ * accuracy: for an inactive group that is still counted in ++ * num_groups_with_pending_reqs, we decrement ++ * num_groups_with_pending_reqs when the first descendant ++ * process of the group remains with no request waiting for ++ * completion. ++ * ++ * Even this simpler decrement strategy requires a little ++ * carefulness: to avoid multiple decrements, we flag a group, ++ * more precisely an entity representing a group, as still ++ * counted in num_groups_with_pending_reqs when it becomes ++ * inactive. Then, when the first descendant queue of the ++ * entity remains with no request waiting for completion, ++ * num_groups_with_pending_reqs is decremented, and this flag ++ * is reset. After this flag is reset for the entity, ++ * num_groups_with_pending_reqs won't be decremented any ++ * longer in case a new descendant queue of the entity remains ++ * with no request waiting for completion. ++ */ ++ unsigned int num_groups_with_pending_reqs; + + /* + * Number of bfq_queues containing requests (including the diff --git a/PKGBUILD b/PKGBUILD index b402513a5d848e1fecd4495113da9198b70e71dd..a60503ec017aa21f00c97554e0878b1b93042bb7 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -12,11 +12,11 @@ _basekernel=4.19 _basever=419 _aufs=20181119 _bfq=v9 -_bfqdate=20181101 +_bfqdate=20181206 _sub=8 _commit= pkgver=${_basekernel}.${_sub} -pkgrel=1 +pkgrel=2 arch=('i686' 'x86_64') url="http://www.kernel.org/" license=('GPL2') @@ -40,7 +40,7 @@ source=("https://www.kernel.org/pub/linux/kernel/v4.x/linux-${_basekernel}.tar.x 'tmpfs-idr.patch' 'vfs-ino.patch' #"0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/Algodev-github/bfq-mq/compare/0adb328...698937e.patch" - 0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/sirlucjan/kernel-patches/raw/master/4.19/bfq-sq-mq/4.19-bfq-sq-mq-v9r1-2K181101-rc1.patch + 0001-BFQ-${_bfq}-${_bfqdate}.patch::https://github.com/sirlucjan/kernel-patches/raw/master/4.19/bfq-sq-mq/4.19-bfq-sq-mq-v9r1-2K181206-rc2.patch # ARCH Patches '0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch' # MANJARO Patches @@ -79,7 +79,7 @@ sha256sums=('0c68f5655528aed4f99dae71a5b259edc93239fa899e2df79c055275c21749a1' '37c07a2dd5249ce9277a370cf60cbebb24dc1e92b845ce419de63453d5e0b685' 'a50226860ed658251eb74014daad773cb0a8700ed7c5b81548ee4f77e8d6d4de' '7f861935faf7ebd2d528052a363f0356c9b5239e32a68b4ec23dcf95ee91e708' - '3ac265b7be567e628c073d64bd9a9090360c9d98e9c7b9f60ca206a86882932e' + 'bd5fa32445dcd1127ddaca3d3189436557e195090998d5c9b6d1d1a801b56978' '37b86ca3de148a34258e3176dbf41488d9dbd19e93adbd22a062b3c41332ce85' '94afbc6a9cb0709f6cd71879bae66454ec26d37c83f49f58e4de28d47678e66b' '8dc7285a797c77e917aab1c05847370b71725389b9718c58b4565b40eed80d85'