Patchwork vfio/pci: Restore device state on PM transition

login
register
mail settings
Submitter Alex Williamson
Date Feb. 11, 2019, 8:18 p.m.
Message ID <154991625844.2837.1741477468425673800.stgit@gimli.home>
Download mbox | patch
Permalink /patch/723301/
State New
Headers show

Comments

Alex Williamson - Feb. 11, 2019, 8:18 p.m.
PCI core handles save and restore of device state around reset, but
when using pci_set_power_state() we can unintentionally trigger a soft
reset of the device, where PCI core only restores the BAR state.  If
we're using vfio-pci's idle D3 support to try to put devices into low
power when unused, this might trigger a reset when the device is woken
for use.  Also power state management by the user, or within a guest,
can put the device into D3 power state with potentially limited
ability to restore the device if it should undergo a reset.  The PCI
spec does not define the extent of a soft reset and many devices
reporting soft reset on D3->D0 transition do not undergo a PCI config
space reset.  It's therefore assumed safe to unconditionally restore
the remainder of the state if the device indicates soft reset
support, even on a user initiated wakeup.

Implement a wrapper in vfio-pci to tag devices reporting PM reset
support, save their state on transitions into D3 and restore on
transitions back to D0.

Reported-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c         |   71 +++++++++++++++++++++++++++++++----
 drivers/vfio/pci/vfio_pci_config.c  |    2 -
 drivers/vfio/pci/vfio_pci_private.h |    6 +++
 3 files changed, 70 insertions(+), 9 deletions(-)

Patch

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index ff60bd1ea587..84b593b9fb1f 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -209,6 +209,57 @@  static bool vfio_pci_nointx(struct pci_dev *pdev)
 	return false;
 }
 
+static void vfio_pci_probe_power_state(struct vfio_pci_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	u16 pmcsr;
+
+	if (!pdev->pm_cap)
+		return;
+
+	pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);
+
+	vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
+}
+
+/*
+ * pci_set_power_state() wrapper handling devices which perform a soft reset on
+ * D3->D0 transition.  Save state prior to D0/1/2->D3, stash it on the vdev,
+ * restore when returned to D0.  Saved separately from pci_saved_state for use
+ * by PM capability emulation and separately from pci_dev internal saved state
+ * to avoid it being overwritten and consumed around other resets.
+ */
+int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	bool needs_restore = false, needs_save = false;
+	int ret;
+
+	if (vdev->needs_pm_restore) {
+		if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {
+			pci_save_state(pdev);
+			needs_save = true;
+		}
+
+		if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)
+			needs_restore = true;
+	}
+
+	ret = pci_set_power_state(pdev, state);
+
+	if (!ret) {
+		/* D3 might be unsupported via quirk, skip unless in D3 */
+		if (needs_save && pdev->current_state >= PCI_D3hot) {
+			vdev->pm_save = pci_store_saved_state(pdev);
+		} else if (needs_restore) {
+			pci_load_and_free_saved_state(pdev, &vdev->pm_save);
+			pci_restore_state(pdev);
+		}
+	}
+
+	return ret;
+}
+
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -216,7 +267,7 @@  static int vfio_pci_enable(struct vfio_pci_device *vdev)
 	u16 cmd;
 	u8 msix_pos;
 
-	pci_set_power_state(pdev, PCI_D0);
+	vfio_pci_set_power_state(vdev, PCI_D0);
 
 	/* Don't allow our initial saved state to include busmaster */
 	pci_clear_master(pdev);
@@ -407,7 +458,7 @@  static void vfio_pci_disable(struct vfio_pci_device *vdev)
 	vfio_pci_try_bus_reset(vdev);
 
 	if (!disable_idle_d3)
-		pci_set_power_state(pdev, PCI_D3hot);
+		vfio_pci_set_power_state(vdev, PCI_D3hot);
 }
 
 static void vfio_pci_release(void *device_data)
@@ -1286,6 +1337,8 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 					vfio_pci_set_vga_decode(vdev, false));
 	}
 
+	vfio_pci_probe_power_state(vdev);
+
 	if (!disable_idle_d3) {
 		/*
 		 * pci-core sets the device power state to an unknown value at
@@ -1296,8 +1349,8 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		 * be able to get to D3.  Therefore first do a D0 transition
 		 * before going to D3.
 		 */
-		pci_set_power_state(pdev, PCI_D0);
-		pci_set_power_state(pdev, PCI_D3hot);
+		vfio_pci_set_power_state(vdev, PCI_D0);
+		vfio_pci_set_power_state(vdev, PCI_D3hot);
 	}
 
 	return ret;
@@ -1316,6 +1369,11 @@  static void vfio_pci_remove(struct pci_dev *pdev)
 	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
 	kfree(vdev->region);
 	mutex_destroy(&vdev->ioeventfds_lock);
+
+	if (!disable_idle_d3)
+		vfio_pci_set_power_state(vdev, PCI_D0);
+
+	kfree(vdev->pm_save);
 	kfree(vdev);
 
 	if (vfio_pci_is_vga(pdev)) {
@@ -1324,9 +1382,6 @@  static void vfio_pci_remove(struct pci_dev *pdev)
 				VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
 				VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
 	}
-
-	if (!disable_idle_d3)
-		pci_set_power_state(pdev, PCI_D0);
 }
 
 static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -1551,7 +1606,7 @@  static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 			tmp->needs_reset = false;
 
 			if (tmp != vdev && !disable_idle_d3)
-				pci_set_power_state(tmp->pdev, PCI_D3hot);
+				vfio_pci_set_power_state(tmp, PCI_D3hot);
 		}
 
 		vfio_device_put(devs.devices[i]);
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 423ea1f98441..e82b51114687 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -691,7 +691,7 @@  static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos,
 			break;
 		}
 
-		pci_set_power_state(vdev->pdev, state);
+		vfio_pci_set_power_state(vdev, state);
 	}
 
 	return count;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 8c0009f00818..1812cf22fc4f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -114,7 +114,9 @@  struct vfio_pci_device {
 	bool			has_vga;
 	bool			needs_reset;
 	bool			nointx;
+	bool			needs_pm_restore;
 	struct pci_saved_state	*pci_saved_state;
+	struct pci_saved_state	*pm_save;
 	struct vfio_pci_reflck	*reflck;
 	int			refcnt;
 	int			ioeventfds_nr;
@@ -161,6 +163,10 @@  extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
 					unsigned int type, unsigned int subtype,
 					const struct vfio_pci_regops *ops,
 					size_t size, u32 flags, void *data);
+
+extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
+				    pci_power_t state);
+
 #ifdef CONFIG_VFIO_PCI_IGD
 extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
 #else