Patchwork [net-next,8/9] net/mlx5: Issue SW reset on FW assert

login
register
mail settings
Submitter Saeed Mahameed
Date Jan. 10, 2019, 10:29 a.m.
Message ID <20190110102906.3751-9-saeedm@mellanox.com>
Download mbox | patch
Permalink /patch/696497/
State New
Headers show

Comments

Saeed Mahameed - Jan. 10, 2019, 10:29 a.m.
From: Feras Daoud <ferasda@mellanox.com>

If a FW assert is considered fatal, indicated by a new bit in the health
buffer, reset the FW. After the reset go through the normal recovery
flow. Only one PF needs to issue the reset, so an attempt is made to
prevent the 2nd function from also issuing the reset.
It's not an error if that happens, it just slows recovery.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/health.c  | 174 +++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/lib/mlx5.h    |   1 +
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |   1 +
 include/linux/mlx5/device.h                   |  10 +-
 include/linux/mlx5/driver.h                   |   1 +
 5 files changed, 177 insertions(+), 10 deletions(-)

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index afa3fe6eef8f..74de30246eee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -40,6 +40,7 @@ 
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "lib/mlx5.h"
+#include "lib/pci_vsc.h"
 
 enum {
 	MLX5_HEALTH_POLL_INTERVAL	= 2 * HZ,
@@ -68,8 +69,10 @@  enum {
 enum  {
 	MLX5_SENSOR_NO_ERR		= 0,
 	MLX5_SENSOR_PCI_COMM_ERR	= 1,
-	MLX5_SENSOR_NIC_DISABLED	= 2,
-	MLX5_SENSOR_NIC_SW_RESET	= 3,
+	MLX5_SENSOR_PCI_ERR		= 2,
+	MLX5_SENSOR_NIC_DISABLED	= 3,
+	MLX5_SENSOR_NIC_SW_RESET	= 4,
+	MLX5_SENSOR_FW_SYND_RFR		= 5,
 };
 
 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
@@ -96,34 +99,169 @@  static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
 	return (ioread32be(&h->fw_ver) == 0xffffffff);
 }
 
+static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
+	u8 synd = ioread8(&h->synd);
+
+	if (rfr && synd)
+		mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
+	return rfr && synd;
+}
+
 static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
 {
 	if (sensor_pci_not_working(dev))
 		return MLX5_SENSOR_PCI_COMM_ERR;
+	if (pci_channel_offline(dev->pdev))
+		return MLX5_SENSOR_PCI_ERR;
 	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
 		return MLX5_SENSOR_NIC_DISABLED;
 	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
 		return MLX5_SENSOR_NIC_SW_RESET;
+	if (sensor_fw_synd_rfr(dev))
+		return MLX5_SENSOR_FW_SYND_RFR;
 
 	return MLX5_SENSOR_NO_ERR;
 }
 
+static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
+{
+	int ret;
+
+	if (!mlx5_core_is_pf(dev))
+		return -EBUSY;
+
+	/* Try to lock GW access, this stage doesn't return
+	 * EBUSY because locked GW does not mean that other PF
+	 * already started the reset.
+	 */
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret == -EBUSY)
+		return -EINVAL;
+	if (ret)
+		return ret;
+
+	/* At this stage, if the return status == EBUSY, then we know
+	 * for sure that another PF started the reset, so don't allow
+	 * another reset.
+	 */
+	ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, lock);
+	if (ret)
+		mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+
+	/* Unlock GW access */
+	mlx5_vsc_gw_unlock(dev);
+
+	return ret;
+}
+
+static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
+{
+	bool supported = (ioread32be(&dev->iseg->initializing) >>
+			  MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
+	u32 fatal_error;
+
+	if (!supported)
+		return false;
+
+	/* The reset only needs to be issued by one PF. The health buffer is
+	 * shared between all functions, and will be cleared during a reset.
+	 * Check again to avoid a redundant 2nd reset. If the fatal erros was
+	 * PCI related a reset won't help.
+	 */
+	fatal_error = check_fatal_sensors(dev);
+	if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
+	    fatal_error == MLX5_SENSOR_NIC_DISABLED ||
+	    fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
+		mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
+		return false;
+	}
+
+	mlx5_core_warn(dev, "Issuing FW Reset\n");
+	/* Write the NIC interface field to initiate the reset, the command
+	 * interface address also resides here, don't overwrite it.
+	 */
+	mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
+
+	return true;
+}
+
+#define MLX5_CRDUMP_WAIT_MS	60000
+#define MLX5_FW_RESET_WAIT_MS	1000
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 {
+	unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
+	u32 fatal_error, err;
+	int lock = -EBUSY;
+
 	mutex_lock(&dev->intf_state_mutex);
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		goto unlock;
+	if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
+		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+		goto unlock;
+	}
+
+	if (force)
+		mlx5_core_dbg(dev, "start\n");
+	else
+		mlx5_core_err(dev, "start\n");
 
-	mlx5_core_err(dev, "start\n");
-	if (pci_channel_offline(dev->pdev) ||
-	    dev->priv.health.fatal_error != MLX5_SENSOR_NO_ERR || force) {
+	fatal_error = check_fatal_sensors(dev);
+
+	if (fatal_error || force) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 		mlx5_cmd_trigger_completions(dev);
 	}
 
 	mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
-	mlx5_core_err(dev, "end\n");
 
+	if (force)
+		goto err_state_done;
+
+	if (fatal_error == MLX5_SENSOR_FW_SYND_RFR) {
+		/* Get cr-dump and reset FW semaphore */
+		lock = lock_sem_sw_reset(dev, true);
+
+		/* Execute cr-dump and SW reset */
+		if (lock == -EBUSY) {
+			delay_ms = MLX5_CRDUMP_WAIT_MS;
+			goto recover_from_sw_reset;
+		}
+		err = mlx5_crdump_collect(dev);
+		if (err)
+			mlx5_core_err(dev, "Failed to collect crdump area err %d\n",
+				      err);
+		reset_fw_if_needed(dev);
+	}
+
+recover_from_sw_reset:
+	/* Recover from SW reset */
+	end = jiffies + msecs_to_jiffies(delay_ms);
+	do {
+		if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+			break;
+
+		cond_resched();
+	} while (!time_after(jiffies, end));
+
+	if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+		dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+			mlx5_get_nic_state(dev), delay_ms);
+	}
+
+	/* Release FW semaphore if you are the lock owner */
+	if (!lock)
+		lock_sem_sw_reset(dev, false);
+
+err_state_done:
+	if (force)
+		mlx5_core_dbg(dev, "end\n");
+	else
+		mlx5_core_err(dev, "end\n");
 unlock:
 	mutex_unlock(&dev->intf_state_mutex);
 }
@@ -144,6 +282,20 @@  static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 	case MLX5_NIC_IFC_NO_DRAM_NIC:
 		mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
 		break;
+
+	case MLX5_NIC_IFC_SW_RESET:
+		/* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
+		 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
+		 *    and this is a VF), this is not recoverable by SW reset.
+		 *    Logging of this is handled elsewhere.
+		 * 2. FW reset has been issued by another function, driver can
+		 *    be reloaded to recover after the mode switches to
+		 *    MLX5_NIC_IFC_DISABLED.
+		 */
+		if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
+			mlx5_core_warn(dev, "NIC SW reset in progress\n");
+		break;
+
 	default:
 		mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
 			       nic_interface);
@@ -178,7 +330,8 @@  static void health_recover(struct work_struct *work)
 #define MLX5_RECOVERY_NO_DELAY 0
 static unsigned long get_recovery_delay(struct mlx5_core_dev *dev)
 {
-	return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_COMM_ERR ?
+	return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_ERR ||
+					       MLX5_SENSOR_PCI_COMM_ERR ?
 	       MLX5_RECOVERY_DELAY_MSECS : MLX5_RECOVERY_NO_DELAY;
 }
 
@@ -198,11 +351,14 @@  static void health_care(struct work_struct *work)
 	recover_delay = msecs_to_jiffies(get_recovery_delay(dev));
 
 	spin_lock_irqsave(&health->wq_lock, flags);
-	if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
+	if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) {
+		mlx5_core_warn(dev, "Scheduling recovery work with %lums delay\n",
+			       recover_delay);
 		schedule_delayed_work(&health->recover_work, recover_delay);
-	else
+	} else {
 		dev_err(&dev->pdev->dev,
 			"new health works are not permitted at this stage\n");
+	}
 	spin_unlock_irqrestore(&health->wq_lock, flags);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 98ef3341bdf1..a5d1c1be9c00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -45,6 +45,7 @@  int mlx5_crdump_init(struct mlx5_core_dev *dev);
 void mlx5_crdump_cleanup(struct mlx5_core_dev *dev);
 bool mlx5_crdump_is_snapshot_enabled(struct mlx5_core_dev *dev);
 int mlx5_crdump_set_snapshot_enabled(struct mlx5_core_dev *dev, bool value);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev);
 
 /* TODO move to lib/events.h */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index e5350c885ab7..bf118e85aefe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -99,6 +99,7 @@  enum {
 
 enum mlx5_semaphore_space_address {
 	MLX5_SEMAPHORE_SPACE_DOMAIN     = 0xA,
+	MLX5_SEMAPHORE_SW_RESET         = 0x20,
 };
 
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8c4a820bd4c1..cac801609d76 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -507,6 +507,10 @@  struct mlx5_cmd_layout {
 	u8		status_own;
 };
 
+enum mlx5_fatal_assert_bit_offsets {
+	MLX5_RFR_OFFSET = 31,
+};
+
 struct health_buffer {
 	__be32		assert_var[5];
 	__be32		rsvd0[3];
@@ -515,12 +519,16 @@  struct health_buffer {
 	__be32		rsvd1[2];
 	__be32		fw_ver;
 	__be32		hw_id;
-	__be32		rsvd2;
+	__be32		rfr;
 	u8		irisc_index;
 	u8		synd;
 	__be16		ext_synd;
 };
 
+enum mlx5_initializing_bit_offsets {
+	MLX5_FW_RESET_SUPPORTED_OFFSET = 30,
+};
+
 enum mlx5_cmd_addr_l_sz_offset {
 	MLX5_NIC_IFC_OFFSET = 8,
 };
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 71cb52bae1ae..2ea6732c1d4d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -575,6 +575,7 @@  struct mlx5_priv {
 };
 
 enum mlx5_device_state {
+	MLX5_DEVICE_STATE_UNINITIALIZED,
 	MLX5_DEVICE_STATE_UP,
 	MLX5_DEVICE_STATE_INTERNAL_ERROR,
 };