diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index 3c661f4226880c2077841408f5ce9b0665fa16b6..1629b07f727b8e0451c28b16868febc9972206fa 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -50,56 +50,37 @@ nfp_abm_ctrl_stat(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
 	return 0;
 }
 
-static int
-nfp_abm_ctrl_stat_all(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
-		      unsigned int stride, unsigned int offset, bool is_u64,
-		      u64 *res)
-{
-	u64 val, sum = 0;
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		err = nfp_abm_ctrl_stat(alink, sym, stride, offset, i,
-					is_u64, &val);
-		if (err)
-			return err;
-		sum += val;
-	}
-
-	*res = sum;
-	return 0;
-}
-
-int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i, u32 val)
+int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val)
 {
-	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	struct nfp_cpp *cpp = abm->app->cpp;
 	u64 sym_offset;
 	int err;
 
-	sym_offset = (alink->queue_base + i) * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
-	err = __nfp_rtsym_writel(cpp, alink->abm->q_lvls, 4, 0,
-				 sym_offset, val);
+	__clear_bit(id, abm->threshold_undef);
+	if (abm->thresholds[id] == val)
+		return 0;
+
+	sym_offset = id * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
+	err = __nfp_rtsym_writel(cpp, abm->q_lvls, 4, 0, sym_offset, val);
 	if (err) {
-		nfp_err(cpp, "RED offload setting level failed on vNIC %d queue %d\n",
-			alink->id, i);
+		nfp_err(cpp,
+			"RED offload setting level failed on subqueue %d\n",
+			id);
 		return err;
 	}
 
+	abm->thresholds[id] = val;
 	return 0;
 }
 
-int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val)
+int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int queue,
+			   u32 val)
 {
-	int i, err;
+	unsigned int threshold;
 
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		err = nfp_abm_ctrl_set_q_lvl(alink, i, val);
-		if (err)
-			return err;
-	}
+	threshold = alink->queue_base + queue;
 
-	return 0;
+	return __nfp_abm_ctrl_set_q_lvl(alink->abm, threshold, val);
 }
 
 u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i)
@@ -153,42 +134,6 @@ int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
 				 i, true, &stats->overlimits);
 }
 
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats)
-{
-	u64 pkts = 0, bytes = 0;
-	int i, err;
-
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		pkts += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
-		bytes += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
-	}
-	stats->tx_pkts = pkts;
-	stats->tx_bytes = bytes;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
-				    false, &stats->backlog_bytes);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
-				    false, &stats->backlog_pkts);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &stats->drops);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &stats->overlimits);
-}
-
 int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 			       struct nfp_alink_xstats *xstats)
 {
@@ -205,22 +150,6 @@ int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 				 i, true, &xstats->ecn_marked);
 }
 
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats)
-{
-	int err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &xstats->pdrop);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &xstats->ecn_marked);
-}
-
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
 {
 	return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE,
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 3d15de0ae27102b2620b5d409c352fafca9c2ab9..a5732d3bd1b7613639de466131d301d1febc22c1 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -2,10 +2,12 @@
 /* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
+#include <linux/bitmap.h>
 #include <linux/etherdevice.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
 #include "../nfpcore/nfp.h"
@@ -36,6 +38,8 @@ nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 
 	switch (type) {
+	case TC_SETUP_ROOT_QDISC:
+		return nfp_abm_setup_root(netdev, repr->app_priv, type_data);
 	case TC_SETUP_QDISC_MQ:
 		return nfp_abm_setup_tc_mq(netdev, repr->app_priv, type_data);
 	case TC_SETUP_QDISC_RED:
@@ -307,31 +311,23 @@ nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 	alink->abm = abm;
 	alink->vnic = nn;
 	alink->id = id;
-	alink->parent = TC_H_ROOT;
 	alink->total_queues = alink->vnic->max_rx_rings;
-	alink->qdiscs = kvcalloc(alink->total_queues, sizeof(*alink->qdiscs),
-				 GFP_KERNEL);
-	if (!alink->qdiscs) {
-		err = -ENOMEM;
-		goto err_free_alink;
-	}
 
 	/* This is a multi-host app, make sure MAC/PHY is up, but don't
 	 * make the MAC/PHY state follow the state of any of the ports.
 	 */
 	err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
 	if (err < 0)
-		goto err_free_qdiscs;
+		goto err_free_alink;
 
 	netif_keep_dst(nn->dp.netdev);
 
 	nfp_abm_vnic_set_mac(app->pf, abm, nn, id);
 	nfp_abm_ctrl_read_params(alink);
+	INIT_RADIX_TREE(&alink->qdiscs, GFP_KERNEL);
 
 	return 0;
 
-err_free_qdiscs:
-	kvfree(alink->qdiscs);
 err_free_alink:
 	kfree(alink);
 	return err;
@@ -342,7 +338,7 @@ static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 	struct nfp_abm_link *alink = nn->app_priv;
 
 	nfp_abm_kill_reprs(alink->abm, alink);
-	kvfree(alink->qdiscs);
+	WARN(!radix_tree_empty(&alink->qdiscs), "left over qdiscs\n");
 	kfree(alink);
 }
 
@@ -398,6 +394,7 @@ static int nfp_abm_init(struct nfp_app *app)
 	struct nfp_pf *pf = app->pf;
 	struct nfp_reprs *reprs;
 	struct nfp_abm *abm;
+	unsigned int i;
 	int err;
 
 	if (!pf->eth_tbl) {
@@ -424,15 +421,28 @@ static int nfp_abm_init(struct nfp_app *app)
 	if (err)
 		goto err_free_abm;
 
+	err = -ENOMEM;
+	abm->num_thresholds = NFP_NET_MAX_RX_RINGS;
+	abm->threshold_undef = bitmap_zalloc(abm->num_thresholds, GFP_KERNEL);
+	if (!abm->threshold_undef)
+		goto err_free_abm;
+
+	abm->thresholds = kvcalloc(abm->num_thresholds,
+				   sizeof(*abm->thresholds), GFP_KERNEL);
+	if (!abm->thresholds)
+		goto err_free_thresh_umap;
+	for (i = 0; i < NFP_NET_MAX_RX_RINGS; i++)
+		__nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
+
 	/* We start in legacy mode, make sure advanced queuing is disabled */
 	err = nfp_abm_ctrl_qm_disable(abm);
 	if (err)
-		goto err_free_abm;
+		goto err_free_thresh;
 
 	err = -ENOMEM;
 	reprs = nfp_reprs_alloc(pf->max_data_vnics);
 	if (!reprs)
-		goto err_free_abm;
+		goto err_free_thresh;
 	RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PHYS_PORT], reprs);
 
 	reprs = nfp_reprs_alloc(pf->max_data_vnics);
@@ -444,6 +454,10 @@ static int nfp_abm_init(struct nfp_app *app)
 
 err_free_phys:
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+err_free_thresh:
+	kvfree(abm->thresholds);
+err_free_thresh_umap:
+	bitmap_free(abm->threshold_undef);
 err_free_abm:
 	kfree(abm);
 	app->priv = NULL;
@@ -457,6 +471,8 @@ static void nfp_abm_clean(struct nfp_app *app)
 	nfp_abm_eswitch_clean_up(abm);
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+	bitmap_free(abm->threshold_undef);
+	kvfree(abm->thresholds);
 	kfree(abm);
 	app->priv = NULL;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 3774c063e4196e31df111ebc5a22f290927154bb..240e2c8683fe81823c0f73583c7f4290c9bf7206 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -5,9 +5,15 @@
 #define __NFP_ABM_H__ 1
 
 #include <linux/bits.h>
+#include <linux/radix-tree.h>
 #include <net/devlink.h>
 #include <net/pkt_cls.h>
 
+/* Dump of 64 PRIOs and 256 REDs seems to take 850us on Xeon v4 @ 2.20GHz;
+ * 2.5ms / 400Hz seems more than sufficient for stats resolution.
+ */
+#define NFP_ABM_STATS_REFRESH_IVAL	(2500 * 1000) /* ns */
+
 #define NFP_ABM_LVL_INFINITY		S32_MAX
 
 struct nfp_app;
@@ -20,6 +26,11 @@ struct nfp_net;
  * struct nfp_abm - ABM NIC app structure
  * @app:	back pointer to nfp_app
  * @pf_id:	ID of our PF link
+ *
+ * @thresholds:		current threshold configuration
+ * @threshold_undef:	bitmap of thresholds which have not been set
+ * @num_thresholds:	number of @thresholds and bits in @threshold_undef
+ *
  * @eswitch_mode:	devlink eswitch mode, advanced functions only visible
  *			in switchdev mode
  * @q_lvls:	queue level control area
@@ -28,6 +39,11 @@ struct nfp_net;
 struct nfp_abm {
 	struct nfp_app *app;
 	unsigned int pf_id;
+
+	u32 *thresholds;
+	unsigned long *threshold_undef;
+	size_t num_thresholds;
+
 	enum devlink_eswitch_mode eswitch_mode;
 	const struct nfp_rtsym *q_lvls;
 	const struct nfp_rtsym *qm_stats;
@@ -61,16 +77,67 @@ struct nfp_alink_xstats {
 	u64 pdrop;
 };
 
+enum nfp_qdisc_type {
+	NFP_QDISC_NONE = 0,
+	NFP_QDISC_MQ,
+	NFP_QDISC_RED,
+};
+
+#define NFP_QDISC_UNTRACKED	((struct nfp_qdisc *)1UL)
+
 /**
- * struct nfp_red_qdisc - representation of single RED Qdisc
- * @handle:	handle of currently offloaded RED Qdisc
- * @stats:	statistics from last refresh
- * @xstats:	base of extended statistics
+ * struct nfp_qdisc - tracked TC Qdisc
+ * @netdev:		netdev on which Qdisc was created
+ * @type:		Qdisc type
+ * @handle:		handle of this Qdisc
+ * @parent_handle:	handle of the parent (unreliable if Qdisc was grafted)
+ * @use_cnt:		number of attachment points in the hierarchy
+ * @num_children:	current size of the @children array
+ * @children:		pointers to children
+ *
+ * @params_ok:		parameters of this Qdisc are OK for offload
+ * @offload_mark:	offload refresh state - selected for offload
+ * @offloaded:		Qdisc is currently offloaded to the HW
+ *
+ * @mq:			MQ Qdisc specific parameters and state
+ * @mq.stats:		current stats of the MQ Qdisc
+ * @mq.prev_stats:	previously reported @mq.stats
+ *
+ * @red:		RED Qdisc specific parameters and state
+ * @red.threshold:	ECN marking threshold
+ * @red.stats:		current stats of the RED Qdisc
+ * @red.prev_stats:	previously reported @red.stats
+ * @red.xstats:		extended stats for RED - current
+ * @red.prev_xstats:	extended stats for RED - previously reported
  */
-struct nfp_red_qdisc {
+struct nfp_qdisc {
+	struct net_device *netdev;
+	enum nfp_qdisc_type type;
 	u32 handle;
-	struct nfp_alink_stats stats;
-	struct nfp_alink_xstats xstats;
+	u32 parent_handle;
+	unsigned int use_cnt;
+	unsigned int num_children;
+	struct nfp_qdisc **children;
+
+	bool params_ok;
+	bool offload_mark;
+	bool offloaded;
+
+	union {
+		/* NFP_QDISC_MQ */
+		struct {
+			struct nfp_alink_stats stats;
+			struct nfp_alink_stats prev_stats;
+		} mq;
+		/* TC_SETUP_QDISC_RED */
+		struct {
+			u32 threshold;
+			struct nfp_alink_stats stats;
+			struct nfp_alink_stats prev_stats;
+			struct nfp_alink_xstats xstats;
+			struct nfp_alink_xstats prev_xstats;
+		} red;
+	};
 };
 
 /**
@@ -80,9 +147,11 @@ struct nfp_red_qdisc {
  * @id:		id of the data vNIC
  * @queue_base:	id of base to host queue within PCIe (not QC idx)
  * @total_queues:	number of PF queues
- * @parent:	handle of expected parent, i.e. handle of MQ, or TC_H_ROOT
- * @num_qdiscs:	number of currently used qdiscs
- * @qdiscs:	array of qdiscs
+ *
+ * @last_stats_update:	ktime of last stats update
+ *
+ * @root_qdisc:	pointer to the current root of the Qdisc hierarchy
+ * @qdiscs:	all qdiscs recorded by major part of the handle
  */
 struct nfp_abm_link {
 	struct nfp_abm *abm;
@@ -90,11 +159,16 @@ struct nfp_abm_link {
 	unsigned int id;
 	unsigned int queue_base;
 	unsigned int total_queues;
-	u32 parent;
-	unsigned int num_qdiscs;
-	struct nfp_red_qdisc *qdiscs;
+
+	u64 last_stats_update;
+
+	struct nfp_qdisc *root_qdisc;
+	struct radix_tree_root qdiscs;
 };
 
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink);
+int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct tc_root_qopt_offload *opt);
 int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 			 struct tc_red_qopt_offload *opt);
 int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
@@ -102,15 +176,11 @@ int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
 
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
-int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val);
-int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i,
+int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val);
+int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int queue,
 			   u32 val);
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats);
 int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
 			      struct nfp_alink_stats *stats);
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats);
 int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 			       struct nfp_alink_xstats *xstats);
 u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i);
diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
index bb05f9ee0401c94bbedf1e667ada55aa9c35cb1f..16c4afe3a37ffca4026dbff88b565f868ef16207 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
@@ -1,242 +1,548 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2018 Netronome Systems, Inc. */
 
+#include <linux/rtnetlink.h>
 #include <net/pkt_cls.h>
 #include <net/pkt_sched.h>
 #include <net/red.h>
 
 #include "../nfpcore/nfp_cpp.h"
 #include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
 #include "../nfp_port.h"
 #include "main.h"
 
-static int
-__nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		     u32 handle, unsigned int qs, u32 init_val)
+static bool nfp_abm_qdisc_is_red(struct nfp_qdisc *qdisc)
 {
-	struct nfp_port *port = nfp_port_from_netdev(netdev);
-	int ret;
+	return qdisc->type == NFP_QDISC_RED;
+}
 
-	ret = nfp_abm_ctrl_set_all_q_lvls(alink, init_val);
-	memset(alink->qdiscs, 0, sizeof(*alink->qdiscs) * alink->num_qdiscs);
+static bool nfp_abm_qdisc_child_valid(struct nfp_qdisc *qdisc, unsigned int id)
+{
+	return qdisc->children[id] &&
+	       qdisc->children[id] != NFP_QDISC_UNTRACKED;
+}
 
-	alink->parent = handle;
-	alink->num_qdiscs = qs;
-	port->tc_offload_cnt = qs;
+static void *nfp_abm_qdisc_tree_deref_slot(void __rcu **slot)
+{
+	return rtnl_dereference(*slot);
+}
 
-	return ret;
+static void
+nfp_abm_stats_propagate(struct nfp_alink_stats *parent,
+			struct nfp_alink_stats *child)
+{
+	parent->tx_pkts		+= child->tx_pkts;
+	parent->tx_bytes	+= child->tx_bytes;
+	parent->backlog_pkts	+= child->backlog_pkts;
+	parent->backlog_bytes	+= child->backlog_bytes;
+	parent->overlimits	+= child->overlimits;
+	parent->drops		+= child->drops;
 }
 
 static void
-nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		   u32 handle, unsigned int qs)
+nfp_abm_stats_update_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			 unsigned int queue)
 {
-	__nfp_abm_reset_root(netdev, alink, handle, qs, NFP_ABM_LVL_INFINITY);
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	int err;
+
+	if (!qdisc->offloaded)
+		return;
+
+	err = nfp_abm_ctrl_read_q_stats(alink, queue, &qdisc->red.stats);
+	if (err)
+		nfp_err(cpp, "RED stats (%d) read failed with error %d\n",
+			queue, err);
+
+	err = nfp_abm_ctrl_read_q_xstats(alink, queue, &qdisc->red.xstats);
+	if (err)
+		nfp_err(cpp, "RED xstats (%d) read failed with error %d\n",
+			queue, err);
 }
 
-static int
-nfp_abm_red_find(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+static void
+nfp_abm_stats_update_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
 {
-	unsigned int i = TC_H_MIN(opt->parent) - 1;
+	unsigned int i;
 
-	if (opt->parent == TC_H_ROOT)
-		i = 0;
-	else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent))
-		i = TC_H_MIN(opt->parent) - 1;
-	else
-		return -EOPNOTSUPP;
+	if (qdisc->type != NFP_QDISC_MQ)
+		return;
 
-	if (i >= alink->num_qdiscs || opt->handle != alink->qdiscs[i].handle)
-		return -EOPNOTSUPP;
+	for (i = 0; i < alink->total_queues; i++)
+		if (nfp_abm_qdisc_child_valid(qdisc, i))
+			nfp_abm_stats_update_red(alink, qdisc->children[i], i);
+}
 
-	return i;
+static void __nfp_abm_stats_update(struct nfp_abm_link *alink, u64 time_now)
+{
+	alink->last_stats_update = time_now;
+	if (alink->root_qdisc)
+		nfp_abm_stats_update_mq(alink, alink->root_qdisc);
+}
+
+static void nfp_abm_stats_update(struct nfp_abm_link *alink)
+{
+	u64 now;
+
+	/* Limit the frequency of updates - stats of non-leaf qdiscs are a sum
+	 * of all their leafs, so we would read the same stat multiple times
+	 * for every dump.
+	 */
+	now = ktime_get();
+	if (now - alink->last_stats_update < NFP_ABM_STATS_REFRESH_IVAL)
+		return;
+
+	__nfp_abm_stats_update(alink, now);
 }
 
 static void
-nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
-		    u32 handle)
+nfp_abm_qdisc_unlink_children(struct nfp_qdisc *qdisc,
+			      unsigned int start, unsigned int end)
 {
 	unsigned int i;
 
-	for (i = 0; i < alink->num_qdiscs; i++)
-		if (handle == alink->qdiscs[i].handle)
-			break;
-	if (i == alink->num_qdiscs)
+	for (i = start; i < end; i++)
+		if (nfp_abm_qdisc_child_valid(qdisc, i)) {
+			qdisc->children[i]->use_cnt--;
+			qdisc->children[i] = NULL;
+		}
+}
+
+static void
+nfp_abm_qdisc_offload_stop(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
+{
+	/* Don't complain when qdisc is getting unlinked */
+	if (qdisc->use_cnt)
+		nfp_warn(alink->abm->app->cpp, "Offload of '%08x' stopped\n",
+			 qdisc->handle);
+
+	if (!nfp_abm_qdisc_is_red(qdisc))
 		return;
 
-	if (alink->parent == TC_H_ROOT) {
-		nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
-	} else {
-		nfp_abm_ctrl_set_q_lvl(alink, i, NFP_ABM_LVL_INFINITY);
-		memset(&alink->qdiscs[i], 0, sizeof(*alink->qdiscs));
+	qdisc->red.stats.backlog_pkts = 0;
+	qdisc->red.stats.backlog_bytes = 0;
+}
+
+static int
+__nfp_abm_stats_init(struct nfp_abm_link *alink,
+		     unsigned int queue, struct nfp_alink_stats *prev_stats,
+		     struct nfp_alink_xstats *prev_xstats)
+{
+	u64 backlog_pkts, backlog_bytes;
+	int err;
+
+	/* Don't touch the backlog, backlog can only be reset after it has
+	 * been reported back to the tc qdisc stats.
+	 */
+	backlog_pkts = prev_stats->backlog_pkts;
+	backlog_bytes = prev_stats->backlog_bytes;
+
+	err = nfp_abm_ctrl_read_q_stats(alink, queue, prev_stats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED stats init (%d) failed with error %d\n",
+			queue, err);
+		return err;
 	}
+
+	err = nfp_abm_ctrl_read_q_xstats(alink, queue, prev_xstats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED xstats init (%d) failed with error %d\n",
+			queue, err);
+		return err;
+	}
+
+	prev_stats->backlog_pkts = backlog_pkts;
+	prev_stats->backlog_bytes = backlog_bytes;
+	return 0;
 }
 
-static bool
-nfp_abm_red_check_params(struct nfp_abm_link *alink,
-			 struct tc_red_qopt_offload *opt)
+static int
+nfp_abm_stats_init(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+		   unsigned int queue)
 {
-	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	return __nfp_abm_stats_init(alink, queue,
+				    &qdisc->red.prev_stats,
+				    &qdisc->red.prev_xstats);
+}
 
-	if (!opt->set.is_ecn) {
-		nfp_warn(cpp, "RED offload failed - drop is not supported (ECN option required) (p:%08x h:%08x)\n",
-			 opt->parent, opt->handle);
-		return false;
+static void
+nfp_abm_offload_compile_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			    unsigned int queue)
+{
+	qdisc->offload_mark = qdisc->type == NFP_QDISC_RED &&
+			      qdisc->params_ok &&
+			      qdisc->use_cnt == 1 &&
+			      !qdisc->children[0];
+
+	/* If we are starting offload init prev_stats */
+	if (qdisc->offload_mark && !qdisc->offloaded)
+		if (nfp_abm_stats_init(alink, qdisc, queue))
+			qdisc->offload_mark = false;
+
+	if (!qdisc->offload_mark)
+		return;
+
+	nfp_abm_ctrl_set_q_lvl(alink, queue, qdisc->red.threshold);
+}
+
+static void
+nfp_abm_offload_compile_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
+{
+	unsigned int i;
+
+	qdisc->offload_mark = qdisc->type == NFP_QDISC_MQ;
+	if (!qdisc->offload_mark)
+		return;
+
+	for (i = 0; i < alink->total_queues; i++) {
+		struct nfp_qdisc *child = qdisc->children[i];
+
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
+			continue;
+
+		nfp_abm_offload_compile_red(alink, child, i);
 	}
-	if (opt->set.is_harddrop) {
-		nfp_warn(cpp, "RED offload failed - harddrop is not supported (p:%08x h:%08x)\n",
-			 opt->parent, opt->handle);
-		return false;
+}
+
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink)
+{
+	struct nfp_abm *abm = alink->abm;
+	struct radix_tree_iter iter;
+	struct nfp_qdisc *qdisc;
+	void __rcu **slot;
+	size_t i;
+
+	/* Mark all thresholds as unconfigured */
+	__bitmap_set(abm->threshold_undef,
+		     alink->queue_base, alink->total_queues);
+
+	/* Clear offload marks */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		qdisc->offload_mark = false;
 	}
-	if (opt->set.min != opt->set.max) {
-		nfp_warn(cpp, "RED offload failed - unsupported min/max parameters (p:%08x h:%08x)\n",
-			 opt->parent, opt->handle);
-		return false;
+
+	if (alink->root_qdisc)
+		nfp_abm_offload_compile_mq(alink, alink->root_qdisc);
+
+	/* Refresh offload status */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		if (!qdisc->offload_mark && qdisc->offloaded)
+			nfp_abm_qdisc_offload_stop(alink, qdisc);
+		qdisc->offloaded = qdisc->offload_mark;
 	}
-	if (opt->set.min > NFP_ABM_LVL_INFINITY) {
-		nfp_warn(cpp, "RED offload failed - threshold too large %d > %d (p:%08x h:%08x)\n",
-			 opt->set.min, NFP_ABM_LVL_INFINITY, opt->parent,
-			 opt->handle);
-		return false;
+
+	/* Reset the unconfigured thresholds */
+	for (i = 0; i < abm->num_thresholds; i++)
+		if (test_bit(i, abm->threshold_undef))
+			__nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
+
+	__nfp_abm_stats_update(alink, ktime_get());
+}
+
+static void
+nfp_abm_qdisc_clear_mq(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct nfp_qdisc *qdisc)
+{
+	struct radix_tree_iter iter;
+	unsigned int mq_refs = 0;
+	void __rcu **slot;
+
+	if (!qdisc->use_cnt)
+		return;
+	/* MQ doesn't notify well on destruction, we need special handling of
+	 * MQ's children.
+	 */
+	if (qdisc->type == NFP_QDISC_MQ &&
+	    qdisc == alink->root_qdisc &&
+	    netdev->reg_state == NETREG_UNREGISTERING)
+		return;
+
+	/* Count refs held by MQ instances and clear pointers */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		struct nfp_qdisc *mq = nfp_abm_qdisc_tree_deref_slot(slot);
+		unsigned int i;
+
+		if (mq->type != NFP_QDISC_MQ || mq->netdev != netdev)
+			continue;
+		for (i = 0; i < mq->num_children; i++)
+			if (mq->children[i] == qdisc) {
+				mq->children[i] = NULL;
+				mq_refs++;
+			}
 	}
 
-	return true;
+	WARN(qdisc->use_cnt != mq_refs, "non-zero qdisc use count: %d (- %d)\n",
+	     qdisc->use_cnt, mq_refs);
+}
+
+static void
+nfp_abm_qdisc_free(struct net_device *netdev, struct nfp_abm_link *alink,
+		   struct nfp_qdisc *qdisc)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+
+	if (!qdisc)
+		return;
+	nfp_abm_qdisc_clear_mq(netdev, alink, qdisc);
+	WARN_ON(radix_tree_delete(&alink->qdiscs,
+				  TC_H_MAJ(qdisc->handle)) != qdisc);
+
+	kfree(qdisc->children);
+	kfree(qdisc);
+
+	port->tc_offload_cnt--;
+}
+
+static struct nfp_qdisc *
+nfp_abm_qdisc_alloc(struct net_device *netdev, struct nfp_abm_link *alink,
+		    enum nfp_qdisc_type type, u32 parent_handle, u32 handle,
+		    unsigned int children)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	struct nfp_qdisc *qdisc;
+	int err;
+
+	qdisc = kzalloc(sizeof(*qdisc), GFP_KERNEL);
+	if (!qdisc)
+		return NULL;
+
+	qdisc->children = kcalloc(children, sizeof(void *), GFP_KERNEL);
+	if (!qdisc->children)
+		goto err_free_qdisc;
+
+	qdisc->netdev = netdev;
+	qdisc->type = type;
+	qdisc->parent_handle = parent_handle;
+	qdisc->handle = handle;
+	qdisc->num_children = children;
+
+	err = radix_tree_insert(&alink->qdiscs, TC_H_MAJ(qdisc->handle), qdisc);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"Qdisc insertion into radix tree failed: %d\n", err);
+		goto err_free_child_tbl;
+	}
+
+	port->tc_offload_cnt++;
+	return qdisc;
+
+err_free_child_tbl:
+	kfree(qdisc->children);
+err_free_qdisc:
+	kfree(qdisc);
+	return NULL;
+}
+
+static struct nfp_qdisc *
+nfp_abm_qdisc_find(struct nfp_abm_link *alink, u32 handle)
+{
+	return radix_tree_lookup(&alink->qdiscs, TC_H_MAJ(handle));
 }
 
 static int
-nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
-		    struct tc_red_qopt_offload *opt)
+nfp_abm_qdisc_replace(struct net_device *netdev, struct nfp_abm_link *alink,
+		      enum nfp_qdisc_type type, u32 parent_handle, u32 handle,
+		      unsigned int children, struct nfp_qdisc **qdisc)
+{
+	*qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (*qdisc) {
+		if (WARN_ON((*qdisc)->type != type))
+			return -EINVAL;
+		return 1;
+	}
+
+	*qdisc = nfp_abm_qdisc_alloc(netdev, alink, type, parent_handle, handle,
+				     children);
+	return *qdisc ? 0 : -ENOMEM;
+}
+
+static void
+nfp_abm_qdisc_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
+		      u32 handle)
 {
-	bool existing;
-	int i, err;
+	struct nfp_qdisc *qdisc;
 
-	i = nfp_abm_red_find(alink, opt);
-	existing = i >= 0;
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return;
+
+	/* We don't get TC_SETUP_ROOT_QDISC w/ MQ when netdev is unregistered */
+	if (alink->root_qdisc == qdisc)
+		qdisc->use_cnt--;
 
-	if (!nfp_abm_red_check_params(alink, opt)) {
-		err = -EINVAL;
-		goto err_destroy;
+	nfp_abm_qdisc_unlink_children(qdisc, 0, qdisc->num_children);
+	nfp_abm_qdisc_free(netdev, alink, qdisc);
+
+	if (alink->root_qdisc == qdisc) {
+		alink->root_qdisc = NULL;
+		/* Only root change matters, other changes are acted upon on
+		 * the graft notification.
+		 */
+		nfp_abm_qdisc_offload_update(alink);
 	}
+}
 
-	if (existing) {
-		if (alink->parent == TC_H_ROOT)
-			err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min);
-		else
-			err = nfp_abm_ctrl_set_q_lvl(alink, i, opt->set.min);
-		if (err)
-			goto err_destroy;
+static int
+nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
+		    unsigned int id)
+{
+	struct nfp_qdisc *parent, *child;
+
+	parent = nfp_abm_qdisc_find(alink, handle);
+	if (!parent)
 		return 0;
-	}
 
-	if (opt->parent == TC_H_ROOT) {
-		i = 0;
-		err = __nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 1,
-					   opt->set.min);
-	} else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent)) {
-		i = TC_H_MIN(opt->parent) - 1;
-		err = nfp_abm_ctrl_set_q_lvl(alink, i, opt->set.min);
-	} else {
+	if (WARN(id >= parent->num_children,
+		 "graft child out of bound %d >= %d\n",
+		 id, parent->num_children))
 		return -EINVAL;
-	}
-	/* Set the handle to try full clean up, in case IO failed */
-	alink->qdiscs[i].handle = opt->handle;
-	if (err)
-		goto err_destroy;
 
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink, &alink->qdiscs[i].stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i,
-						&alink->qdiscs[i].stats);
-	if (err)
-		goto err_destroy;
+	nfp_abm_qdisc_unlink_children(parent, id, id + 1);
 
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink,
-					       &alink->qdiscs[i].xstats);
+	child = nfp_abm_qdisc_find(alink, child_handle);
+	if (child)
+		child->use_cnt++;
 	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i,
-						 &alink->qdiscs[i].xstats);
-	if (err)
-		goto err_destroy;
+		child = NFP_QDISC_UNTRACKED;
+	parent->children[id] = child;
 
-	alink->qdiscs[i].stats.backlog_pkts = 0;
-	alink->qdiscs[i].stats.backlog_bytes = 0;
+	nfp_abm_qdisc_offload_update(alink);
 
 	return 0;
-err_destroy:
-	/* If the qdisc keeps on living, but we can't offload undo changes */
-	if (existing) {
-		opt->set.qstats->qlen -= alink->qdiscs[i].stats.backlog_pkts;
-		opt->set.qstats->backlog -=
-			alink->qdiscs[i].stats.backlog_bytes;
-	}
-	nfp_abm_red_destroy(netdev, alink, opt->handle);
-
-	return err;
 }
 
 static void
-nfp_abm_update_stats(struct nfp_alink_stats *new, struct nfp_alink_stats *old,
-		     struct tc_qopt_offload_stats *stats)
+nfp_abm_stats_calculate(struct nfp_alink_stats *new,
+			struct nfp_alink_stats *old,
+			struct gnet_stats_basic_packed *bstats,
+			struct gnet_stats_queue *qstats)
 {
-	_bstats_update(stats->bstats, new->tx_bytes - old->tx_bytes,
+	_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
 		       new->tx_pkts - old->tx_pkts);
-	stats->qstats->qlen += new->backlog_pkts - old->backlog_pkts;
-	stats->qstats->backlog += new->backlog_bytes - old->backlog_bytes;
-	stats->qstats->overlimits += new->overlimits - old->overlimits;
-	stats->qstats->drops += new->drops - old->drops;
+	qstats->qlen += new->backlog_pkts - old->backlog_pkts;
+	qstats->backlog += new->backlog_bytes - old->backlog_bytes;
+	qstats->overlimits += new->overlimits - old->overlimits;
+	qstats->drops += new->drops - old->drops;
+}
+
+static void
+nfp_abm_stats_red_calculate(struct nfp_alink_xstats *new,
+			    struct nfp_alink_xstats *old,
+			    struct red_stats *stats)
+{
+	stats->forced_mark += new->ecn_marked - old->ecn_marked;
+	stats->pdrop += new->pdrop - old->pdrop;
 }
 
 static int
-nfp_abm_red_stats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
 {
-	struct nfp_alink_stats *prev_stats;
-	struct nfp_alink_stats stats;
-	int i, err;
+	struct nfp_qdisc *qdisc;
 
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_stats = &alink->qdiscs[i].stats;
+	nfp_abm_stats_update(alink);
 
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink, &stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-	if (err)
-		return err;
+	qdisc = nfp_abm_qdisc_find(alink, opt->handle);
+	if (!qdisc || !qdisc->offloaded)
+		return -EOPNOTSUPP;
 
-	nfp_abm_update_stats(&stats, prev_stats, &opt->stats);
+	nfp_abm_stats_red_calculate(&qdisc->red.xstats,
+				    &qdisc->red.prev_xstats,
+				    opt->xstats);
+	qdisc->red.prev_xstats = qdisc->red.xstats;
+	return 0;
+}
 
-	*prev_stats = stats;
+static int
+nfp_abm_red_stats(struct nfp_abm_link *alink, u32 handle,
+		  struct tc_qopt_offload_stats *stats)
+{
+	struct nfp_qdisc *qdisc;
 
-	return 0;
+	nfp_abm_stats_update(alink);
+
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+	/* If the qdisc offload has stopped we may need to adjust the backlog
+	 * counters back so carry on even if qdisc is not currently offloaded.
+	 */
+
+	nfp_abm_stats_calculate(&qdisc->red.stats,
+				&qdisc->red.prev_stats,
+				stats->bstats, stats->qstats);
+	qdisc->red.prev_stats = qdisc->red.stats;
+
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
+}
+
+static bool
+nfp_abm_red_check_params(struct nfp_abm_link *alink,
+			 struct tc_red_qopt_offload *opt)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+
+	if (!opt->set.is_ecn) {
+		nfp_warn(cpp, "RED offload failed - drop is not supported (ECN option required) (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.is_harddrop) {
+		nfp_warn(cpp, "RED offload failed - harddrop is not supported (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.min != opt->set.max) {
+		nfp_warn(cpp, "RED offload failed - unsupported min/max parameters (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.min > NFP_ABM_LVL_INFINITY) {
+		nfp_warn(cpp, "RED offload failed - threshold too large %d > %d (p:%08x h:%08x)\n",
+			 opt->set.min, NFP_ABM_LVL_INFINITY, opt->parent,
+			 opt->handle);
+		return false;
+	}
+
+	return true;
 }
 
 static int
-nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
+		    struct tc_red_qopt_offload *opt)
 {
-	struct nfp_alink_xstats *prev_xstats;
-	struct nfp_alink_xstats xstats;
-	int i, err;
+	struct nfp_qdisc *qdisc;
+	int ret;
 
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_xstats = &alink->qdiscs[i].xstats;
+	ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_RED, opt->parent,
+				    opt->handle, 1, &qdisc);
+	if (ret < 0)
+		return ret;
 
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink, &xstats);
-	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i, &xstats);
-	if (err)
-		return err;
+	/* If limit != 0 child gets reset */
+	if (opt->set.limit) {
+		if (nfp_abm_qdisc_child_valid(qdisc, 0))
+			qdisc->children[0]->use_cnt--;
+		qdisc->children[0] = NULL;
+	} else {
+		/* Qdisc was just allocated without a limit will use noop_qdisc,
+		 * i.e. a block hole.
+		 */
+		if (!ret)
+			qdisc->children[0] = NFP_QDISC_UNTRACKED;
+	}
 
-	opt->xstats->forced_mark += xstats.ecn_marked - prev_xstats->ecn_marked;
-	opt->xstats->pdrop += xstats.pdrop - prev_xstats->pdrop;
+	qdisc->params_ok = nfp_abm_red_check_params(alink, opt);
+	if (qdisc->params_ok)
+		qdisc->red.threshold = opt->set.min;
 
-	*prev_xstats = xstats;
+	if (qdisc->use_cnt == 1)
+		nfp_abm_qdisc_offload_update(alink);
 
 	return 0;
 }
@@ -248,37 +554,76 @@ int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 	case TC_RED_REPLACE:
 		return nfp_abm_red_replace(netdev, alink, opt);
 	case TC_RED_DESTROY:
-		nfp_abm_red_destroy(netdev, alink, opt->handle);
+		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
 		return 0;
 	case TC_RED_STATS:
-		return nfp_abm_red_stats(alink, opt);
+		return nfp_abm_red_stats(alink, opt->handle, &opt->stats);
 	case TC_RED_XSTATS:
 		return nfp_abm_red_xstats(alink, opt);
+	case TC_RED_GRAFT:
+		return nfp_abm_qdisc_graft(alink, opt->handle,
+					   opt->child_handle, 0);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
 static int
-nfp_abm_mq_stats(struct nfp_abm_link *alink, struct tc_mq_qopt_offload *opt)
+nfp_abm_mq_create(struct net_device *netdev, struct nfp_abm_link *alink,
+		  struct tc_mq_qopt_offload *opt)
 {
-	struct nfp_alink_stats stats;
+	struct nfp_qdisc *qdisc;
+	int ret;
+
+	ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_MQ,
+				    TC_H_ROOT, opt->handle, alink->total_queues,
+				    &qdisc);
+	if (ret < 0)
+		return ret;
+
+	qdisc->params_ok = true;
+	qdisc->offloaded = true;
+	nfp_abm_qdisc_offload_update(alink);
+	return 0;
+}
+
+static int
+nfp_abm_mq_stats(struct nfp_abm_link *alink, u32 handle,
+		 struct tc_qopt_offload_stats *stats)
+{
+	struct nfp_qdisc *qdisc, *red;
 	unsigned int i;
-	int err;
 
-	for (i = 0; i < alink->num_qdiscs; i++) {
-		if (alink->qdiscs[i].handle == TC_H_UNSPEC)
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+
+	nfp_abm_stats_update(alink);
+
+	/* MQ stats are summed over the children in the core, so we need
+	 * to add up the unreported child values.
+	 */
+	memset(&qdisc->mq.stats, 0, sizeof(qdisc->mq.stats));
+	memset(&qdisc->mq.prev_stats, 0, sizeof(qdisc->mq.prev_stats));
+
+	for (i = 0; i < qdisc->num_children; i++) {
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
 			continue;
 
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-		if (err)
-			return err;
+		if (!nfp_abm_qdisc_is_red(qdisc->children[i]))
+			continue;
+		red = qdisc->children[i];
 
-		nfp_abm_update_stats(&stats, &alink->qdiscs[i].stats,
-				     &opt->stats);
+		nfp_abm_stats_propagate(&qdisc->mq.stats,
+					&red->red.stats);
+		nfp_abm_stats_propagate(&qdisc->mq.prev_stats,
+					&red->red.prev_stats);
 	}
 
-	return 0;
+	nfp_abm_stats_calculate(&qdisc->mq.stats, &qdisc->mq.prev_stats,
+				stats->bstats, stats->qstats);
+
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
 }
 
 int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
@@ -286,16 +631,33 @@ int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
 {
 	switch (opt->command) {
 	case TC_MQ_CREATE:
-		nfp_abm_reset_root(netdev, alink, opt->handle,
-				   alink->total_queues);
-		return 0;
+		return nfp_abm_mq_create(netdev, alink, opt);
 	case TC_MQ_DESTROY:
-		if (opt->handle == alink->parent)
-			nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
+		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
 		return 0;
 	case TC_MQ_STATS:
-		return nfp_abm_mq_stats(alink, opt);
+		return nfp_abm_mq_stats(alink, opt->handle, &opt->stats);
+	case TC_MQ_GRAFT:
+		return nfp_abm_qdisc_graft(alink, opt->handle,
+					   opt->graft_params.child_handle,
+					   opt->graft_params.queue);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
+
+int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct tc_root_qopt_offload *opt)
+{
+	if (opt->ingress)
+		return -EOPNOTSUPP;
+	if (alink->root_qdisc)
+		alink->root_qdisc->use_cnt--;
+	alink->root_qdisc = nfp_abm_qdisc_find(alink, opt->handle);
+	if (alink->root_qdisc)
+		alink->root_qdisc->use_cnt++;
+
+	nfp_abm_qdisc_offload_update(alink);
+
+	return 0;
+}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 487fa5e0e1652da8ea91253c5c3b84ddcb0a8925..97b4233120e4b03e184f18c66b7596be8aae70a4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -845,6 +845,7 @@ enum tc_setup_type {
 	TC_SETUP_QDISC_PRIO,
 	TC_SETUP_QDISC_MQ,
 	TC_SETUP_QDISC_ETF,
+	TC_SETUP_ROOT_QDISC,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index f6c0cd29dea40f007e0e444b51594a6531d8b86e..c497ada7f591e95ad12191941268b21d33412bff 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -821,12 +821,21 @@ enum tc_mq_command {
 	TC_MQ_CREATE,
 	TC_MQ_DESTROY,
 	TC_MQ_STATS,
+	TC_MQ_GRAFT,
+};
+
+struct tc_mq_opt_offload_graft_params {
+	unsigned long queue;
+	u32 child_handle;
 };
 
 struct tc_mq_qopt_offload {
 	enum tc_mq_command command;
 	u32 handle;
-	struct tc_qopt_offload_stats stats;
+	union {
+		struct tc_qopt_offload_stats stats;
+		struct tc_mq_opt_offload_graft_params graft_params;
+	};
 };
 
 enum tc_red_command {
@@ -834,12 +843,14 @@ enum tc_red_command {
 	TC_RED_DESTROY,
 	TC_RED_STATS,
 	TC_RED_XSTATS,
+	TC_RED_GRAFT,
 };
 
 struct tc_red_qopt_offload_params {
 	u32 min;
 	u32 max;
 	u32 probability;
+	u32 limit;
 	bool is_ecn;
 	bool is_harddrop;
 	struct gnet_stats_queue *qstats;
@@ -853,6 +864,7 @@ struct tc_red_qopt_offload {
 		struct tc_red_qopt_offload_params set;
 		struct tc_qopt_offload_stats stats;
 		struct red_stats *xstats;
+		u32 child_handle;
 	};
 };
 
@@ -889,4 +901,14 @@ struct tc_prio_qopt_offload {
 	};
 };
 
+enum tc_root_command {
+	TC_ROOT_GRAFT,
+};
+
+struct tc_root_qopt_offload {
+	enum tc_root_command command;
+	u32 handle;
+	bool ingress;
+};
+
 #endif
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f55bc50cd0a938a8f5562caa50c24df02bac3080..9c88cec7e8a20f9f30a9f4113e34cf96084ee393 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -860,6 +860,21 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
 }
 EXPORT_SYMBOL(qdisc_offload_graft_helper);
 
+static void qdisc_offload_graft_root(struct net_device *dev,
+				     struct Qdisc *new, struct Qdisc *old,
+				     struct netlink_ext_ack *extack)
+{
+	struct tc_root_qopt_offload graft_offload = {
+		.command	= TC_ROOT_GRAFT,
+		.handle		= new ? new->handle : 0,
+		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
+				  (old && old->flags & TCQ_F_INGRESS),
+	};
+
+	qdisc_offload_graft_helper(dev, NULL, new, old,
+				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
+}
+
 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 			 u32 portid, u32 seq, u16 flags, int event)
 {
@@ -1026,6 +1041,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		if (dev->flags & IFF_UP)
 			dev_deactivate(dev);
 
+		qdisc_offload_graft_root(dev, new, old, extack);
+
 		if (new && new->ops->attach)
 			goto skip;
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 1db5c1bf6ddd3a8e65875c7bb4f5591edadec0a9..203659bc3906419f6a00edca96561efb503d608d 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -193,6 +193,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 		    struct Qdisc **old, struct netlink_ext_ack *extack)
 {
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+	struct tc_mq_qopt_offload graft_offload;
 	struct net_device *dev = qdisc_dev(sch);
 
 	if (dev->flags & IFF_UP)
@@ -203,6 +204,14 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
+
+	graft_offload.handle = sch->handle;
+	graft_offload.graft_params.queue = cl - 1;
+	graft_offload.graft_params.child_handle = new ? new->handle : 0;
+	graft_offload.command = TC_MQ_GRAFT;
+
+	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
+				   TC_SETUP_QDISC_MQ, &graft_offload, extack);
 	return 0;
 }
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index a1d08bdd9357f59decfaa29381f170c4351d195d..9df9942340eaaa30ed38fc3345649f287a373bee 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -166,6 +166,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
 		opt.set.probability = q->parms.max_P;
+		opt.set.limit = q->limit;
 		opt.set.is_ecn = red_use_ecn(q);
 		opt.set.is_harddrop = red_use_harddrop(q);
 		opt.set.qstats = &sch->qstats;
@@ -367,6 +368,21 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl,
 	return 0;
 }
 
+static void red_graft_offload(struct Qdisc *sch,
+			      struct Qdisc *new, struct Qdisc *old,
+			      struct netlink_ext_ack *extack)
+{
+	struct tc_red_qopt_offload graft_offload = {
+		.handle		= sch->handle,
+		.parent		= sch->parent,
+		.child_handle	= new->handle,
+		.command	= TC_RED_GRAFT,
+	};
+
+	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
+				   TC_SETUP_QDISC_RED, &graft_offload, extack);
+}
+
 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		     struct Qdisc **old, struct netlink_ext_ack *extack)
 {
@@ -376,6 +392,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		new = &noop_qdisc;
 
 	*old = qdisc_replace(sch, new, &q->qdisc);
+
+	red_graft_offload(sch, new, *old, extack);
 	return 0;
 }