diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6113f0383f4177cb6257d57e63bed..6bb2a860b15b9f08484716fd991ab8699baf25e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -22,7 +22,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o en/monitor_stats.o + en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8fa8fdd30b8509f73a27fe4d31b094dfceda5e5d..27e276c9bf84cb251ac23f3527eaae2851427696 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -388,10 +388,7 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; - struct mlx5e_txqsq_recover { - struct work_struct recover_work; - u64 last_recover; - } recover; + struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -682,6 +679,13 @@ struct mlx5e_rss_params { u8 hfunc; }; +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + int rl_update; + int rl_index; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -737,6 +741,7 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_tls *tls; #endif + struct devlink_health_reporter *tx_reporter; }; struct mlx5e_profile { @@ -866,6 +871,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p); +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_tx_disable_queue(struct netdev_queue *txq); + static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h new file mode 100644 index 0000000000000000000000000000000000000000..74083e120ab3bde334504bfd1665a87ea0b8f0e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_REPORTER_H +#define __MLX5E_EN_REPORTER_H + +#include <linux/mlx5/driver.h> +#include "en.h" + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c new file mode 100644 index 0000000000000000000000000000000000000000..9800df4909c225fabc974ba7cb188c345a5583f5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/devlink.h> +#include "reporter.h" +#include "lib/eq.h" + +#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 + +struct mlx5e_tx_err_ctx { + int (*recover)(struct mlx5e_txqsq *sq); + struct mlx5e_txqsq *sq; +}; + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return err; + } + + if (state != MLX5_RQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return -EINVAL; + } + + mlx5e_tx_disable_queue(sq->txq); + + err = mlx5e_wait_for_sq_flush(sq); + if (err) + return err; + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + + err = mlx5e_sq_to_ready(sq, state); + if (err) + return err; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + mlx5e_activate_txqsq(sq); + + return 0; +} + +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx = {0}; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); + + devlink_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +/* state lock cannot be grabbed within this function. + * It can cause a dead lock or a read-after-free. + */ +int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->sq); +} + +static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) +{ + int err; + + mutex_lock(&priv->state_lock); + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, + void *context) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_tx_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : + mlx5e_tx_reporter_recover_all(priv); +} + +static int +mlx5e_tx_reporter_build_diagnose_output(struct devlink_health_buffer *buffer, + u32 sqn, u8 state, u8 stopped) +{ + int err, i; + int nest = 0; + char name[20]; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + goto buffer_error; + nest++; + + sprintf(name, "SQ 0x%x", sqn); + err = devlink_health_buffer_put_object_name(buffer, name); + if (err) + goto buffer_error; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_object_name(buffer, "HW state"); + if (err) + goto buffer_error; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_value_u8(buffer, state); + if (err) + goto buffer_error; + + devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE */ + nest--; + + devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR */ + nest--; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_object_name(buffer, "stopped"); + if (err) + goto buffer_error; + + err = devlink_health_buffer_nest_start(buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + goto buffer_error; + nest++; + + err = devlink_health_buffer_put_value_u8(buffer, stopped); + if (err) + goto buffer_error; + + for (i = 0; i < nest; i++) + devlink_health_buffer_nest_end(buffer); + + return 0; + +buffer_error: + for (i = 0; i < nest; i++) + devlink_health_buffer_nest_cancel(buffer); + return err; +} + +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_health_buffer **buffers_array, + unsigned int buffer_size, + unsigned int num_buffers) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + unsigned int buff = 0; + int i = 0, err = 0; + + if (buffer_size < MLX5E_TX_REPORTER_PER_SQ_MAX_LEN) + return -ENOMEM; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mutex_unlock(&priv->state_lock); + return 0; + } + + while (i < priv->channels.num * priv->channels.params.num_tc) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + u8 state; + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + break; + + err = mlx5e_tx_reporter_build_diagnose_output(buffers_array[buff], + sq->sqn, state, + netif_xmit_stopped(sq->txq)); + if (err) { + if (++buff == num_buffers) + break; + } else { + i++; + } + } + + mutex_unlock(&priv->state_lock); + return err; +} + +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { + .name = "TX", + .recover = mlx5e_tx_reporter_recover, + .diagnose_size = MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC * + MLX5E_TX_REPORTER_PER_SQ_MAX_LEN, + .diagnose = mlx5e_tx_reporter_diagnose, + .dump_size = 0, + .dump = NULL, +}; + +#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct devlink *devlink = priv_to_devlink(mdev); + + priv->tx_reporter = + devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, + MLX5_REPORTER_TX_GRACEFUL_PERIOD, + true, priv); + return PTR_ERR_OR_ZERO(priv->tx_reporter); +} + +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) +{ + devlink_health_reporter_destroy(priv->tx_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8cfd2ec7c0a209afe424eca2a7be3301cf79223f..4c4507384fed024186246dec982581b58a916666 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -51,6 +51,7 @@ #include "en/xdp.h" #include "lib/eq.h" #include "en/monitor_stats.h" +#include "en/reporter.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -1160,7 +1161,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static void mlx5e_sq_recover(struct work_struct *work); +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -1182,7 +1183,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) @@ -1270,15 +1271,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, return err; } -struct mlx5e_modify_sq_param { - int curr_state; - int next_state; - bool rl_update; - int rl_index; -}; - -static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, - struct mlx5e_modify_sq_param *p) +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) { void *in; void *sqc; @@ -1376,17 +1370,7 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, return err; } -static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) -{ - WARN_ONCE(sq->cc != sq->pc, - "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", - sq->sqn, sq->cc, sq->pc); - sq->cc = 0; - sq->dma_fifo_cc = 0; - sq->pc = 0; -} - -static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); @@ -1395,7 +1379,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) netif_tx_start_queue(sq->txq); } -static inline void netif_tx_disable_queue(struct netdev_queue *txq) +void mlx5e_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); netif_tx_stop_queue(txq); @@ -1411,7 +1395,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* prevent netif_tx_wake_queue */ napi_synchronize(&c->napi); - netif_tx_disable_queue(sq->txq); + mlx5e_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { @@ -1431,6 +1415,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_rate_limit rl = {0}; cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; @@ -1440,105 +1425,15 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } -static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); - - while (time_before(jiffies, exp_time)) { - if (sq->cc == sq->pc) - return 0; - - msleep(20); - } - - netdev_err(sq->channel->netdev, - "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", - sq->sqn, sq->cc, sq->pc); - - return -ETIMEDOUT; -} - -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; - int err; - - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; + struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, + recover_work); - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} - -static void mlx5e_sq_recover(struct work_struct *work) -{ - struct mlx5e_txqsq_recover *recover = - container_of(work, struct mlx5e_txqsq_recover, - recover_work); - struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, - recover); - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; - - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); - if (err) { - netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", - sq->sqn, err); - return; - } - - if (state != MLX5_RQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return; - } - - netif_tx_disable_queue(sq->txq); - - if (mlx5e_wait_for_sq_flush(sq)) - return; - - /* If the interval between two consecutive recovers per SQ is too - * short, don't recover to avoid infinite loop of ERR_CQE -> recover. - * If we reached this state, there is probably a bug that needs to be - * fixed. let's keep the queue close and let tx timeout cleanup. - */ - if (jiffies_to_msecs(jiffies - recover->last_recover) < - MLX5E_SQ_RECOVER_MIN_INTERVAL) { - netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", - sq->sqn); - return; - } - - /* At this point, no new packets will arrive from the stack as TXQ is - * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all - * pending WQEs. SQ can safely reset the SQ. - */ - if (mlx5e_sq_to_ready(sq, state)) + if (!sq->channel->priv->tx_reporter) return; - mlx5e_reset_txqsq_cc_pc(sq); - sq->stats->recover++; - recover->last_recover = jiffies; - mlx5e_activate_txqsq(sq); + mlx5e_tx_reporter_err_cqe(sq); } static int mlx5e_open_icosq(struct mlx5e_channel *c, @@ -3207,6 +3102,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; + mlx5e_tx_reporter_destroy(priv); for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } @@ -4908,6 +4804,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif + mlx5e_tx_reporter_create(priv); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 598ad7e4d5c97872c17fe4ae8387e82a2555c96e..a8e052a5ce3662c4c83e26a578d90bc62bc40074 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -514,7 +514,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); queue_work(cq->channel->priv->wq, - &sq->recover.recover_work); + &sq->recover_work); } stats->cqe_err++; }