diff mbox series

[SRU,Bionic,1/1] vfio/pci: Hide broken INTx support from user

Message ID cf1090ddb1583af3df562fd8497610f9b0b7a7c4.1530807600.git.joseph.salisbury@canonical.com
State New
Headers show
Series vfio/pci: Hide broken INTx support from user | expand

Commit Message

Joseph Salisbury July 6, 2018, 6:41 p.m. UTC
From: Alex Williamson <alex.williamson@redhat.com>

BugLink: http://bugs.launchpad.net/bugs/1779830

INTx masking has two components, the first is that we need the ability
to prevent the device from continuing to assert INTx.  This is
provided via the DisINTx bit in the command register and is the only
thing we can really probe for when testing if INTx masking is
supported.  The second component is that the device needs to indicate
if INTx is asserted via the interrupt status bit in the device status
register.  With these two features we can generically determine if one
of the devices we own is asserting INTx, signal the user, and mask the
interrupt while the user services the device.

Generally if one or both of these components is broken we resort to
APIC level interrupt masking, which requires an exclusive interrupt
since we have no way to determine the source of the interrupt in a
shared configuration.  This often makes it difficult or impossible to
configure the system for userspace use of the device, for an interrupt
mode that the user may not need.

One possible configuration of broken INTx masking is that the DisINTx
support is fully functional, but the interrupt status bit never
signals interrupt assertion.  In this case we do have the ability to
prevent the device from asserting INTx, but lack the ability to
identify the interrupt source.  For this case we can simply pretend
that the device lacks INTx support entirely, keeping DisINTx set on
the physical device, virtualizing this bit for the user, and
virtualizing the interrupt pin register to indicate no INTx support.
We already support virtualization of the DisINTx bit and already
virtualize the interrupt pin for platforms without INTx support.  By
tying these components together, setting DisINTx on open and reset,
and identifying devices broken in this particular way, we can provide
support for them w/o the handicap of APIC level INTx masking.

Intel i40e (XL710/X710) 10/20/40GbE NICs have been identified as being
broken in this specific way.  We leave the vfio-pci.nointxmask option
as a mechanism to bypass this support, enabling INTx on the device
with all the requirements of APIC level masking.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: John Ronciak <john.ronciak@intel.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
(cherry picked from commit 450744051d201c4d72436ebf5b04b9a06ba2cf30)
Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
---
 drivers/vfio/pci/vfio_pci.c         | 55 ++++++++++++++++++++++++++++++-------
 drivers/vfio/pci/vfio_pci_config.c  |  9 +++++-
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 3 files changed, 54 insertions(+), 11 deletions(-)

Comments

Kleber Sacilotto de Souza July 27, 2018, 9:52 a.m. UTC | #1
On 07/06/18 20:41, Joseph Salisbury wrote:
> From: Alex Williamson <alex.williamson@redhat.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1779830
> 
> INTx masking has two components, the first is that we need the ability
> to prevent the device from continuing to assert INTx.  This is
> provided via the DisINTx bit in the command register and is the only
> thing we can really probe for when testing if INTx masking is
> supported.  The second component is that the device needs to indicate
> if INTx is asserted via the interrupt status bit in the device status
> register.  With these two features we can generically determine if one
> of the devices we own is asserting INTx, signal the user, and mask the
> interrupt while the user services the device.
> 
> Generally if one or both of these components is broken we resort to
> APIC level interrupt masking, which requires an exclusive interrupt
> since we have no way to determine the source of the interrupt in a
> shared configuration.  This often makes it difficult or impossible to
> configure the system for userspace use of the device, for an interrupt
> mode that the user may not need.
> 
> One possible configuration of broken INTx masking is that the DisINTx
> support is fully functional, but the interrupt status bit never
> signals interrupt assertion.  In this case we do have the ability to
> prevent the device from asserting INTx, but lack the ability to
> identify the interrupt source.  For this case we can simply pretend
> that the device lacks INTx support entirely, keeping DisINTx set on
> the physical device, virtualizing this bit for the user, and
> virtualizing the interrupt pin register to indicate no INTx support.
> We already support virtualization of the DisINTx bit and already
> virtualize the interrupt pin for platforms without INTx support.  By
> tying these components together, setting DisINTx on open and reset,
> and identifying devices broken in this particular way, we can provide
> support for them w/o the handicap of APIC level INTx masking.
> 
> Intel i40e (XL710/X710) 10/20/40GbE NICs have been identified as being
> broken in this specific way.  We leave the vfio-pci.nointxmask option
> as a mechanism to bypass this support, enabling INTx on the device
> with all the requirements of APIC level masking.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> Cc: John Ronciak <john.ronciak@intel.com>
> Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
> (cherry picked from commit 450744051d201c4d72436ebf5b04b9a06ba2cf30)
> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
This patch is targeted for Xenial and not Bionic.

Joseph, could you please add the SRU justification to the bug report as
well?

Thanks,
Kleber


Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

> ---
>  drivers/vfio/pci/vfio_pci.c         | 55 ++++++++++++++++++++++++++++++-------
>  drivers/vfio/pci/vfio_pci_config.c  |  9 +++++-
>  drivers/vfio/pci/vfio_pci_private.h |  1 +
>  3 files changed, 54 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index b31b84f..da5108a 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -112,6 +112,35 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
>  
>  static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
>  
> +/*
> + * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> + * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
> + * If a device implements the former but not the latter we would typically
> + * expect broken_intx_masking be set and require an exclusive interrupt.
> + * However since we do have control of the device's ability to assert INTx,
> + * we can instead pretend that the device does not implement INTx, virtualizing
> + * the pin register to report zero and maintaining DisINTx set on the host.
> + */
> +static bool vfio_pci_nointx(struct pci_dev *pdev)
> +{
> +	switch (pdev->vendor) {
> +	case PCI_VENDOR_ID_INTEL:
> +		switch (pdev->device) {
> +		/* All i40e (XL710/X710) 10/20/40GbE NICs */
> +		case 0x1572:
> +		case 0x1574:
> +		case 0x1580 ... 0x1581:
> +		case 0x1583 ... 0x1589:
> +		case 0x37d0 ... 0x37d2:
> +			return true;
> +		default:
> +			return false;
> +		}
> +	}
> +
> +	return false;
> +}
> +
>  static int vfio_pci_enable(struct vfio_pci_device *vdev)
>  {
>  	struct pci_dev *pdev = vdev->pdev;
> @@ -135,23 +164,29 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
>  		pr_debug("%s: Couldn't store %s saved state\n",
>  			 __func__, dev_name(&pdev->dev));
>  
> -	ret = vfio_config_init(vdev);
> -	if (ret) {
> -		kfree(vdev->pci_saved_state);
> -		vdev->pci_saved_state = NULL;
> -		pci_disable_device(pdev);
> -		return ret;
> +	if (likely(!nointxmask)) {
> +		if (vfio_pci_nointx(pdev)) {
> +			dev_info(&pdev->dev, "Masking broken INTx support\n");
> +			vdev->nointx = true;
> +			pci_intx(pdev, 0);
> +		} else
> +			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
>  	}
>  
> -	if (likely(!nointxmask))
> -		vdev->pci_2_3 = pci_intx_mask_supported(pdev);
> -
>  	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
>  	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
>  		cmd &= ~PCI_COMMAND_INTX_DISABLE;
>  		pci_write_config_word(pdev, PCI_COMMAND, cmd);
>  	}
>  
> +	ret = vfio_config_init(vdev);
> +	if (ret) {
> +		kfree(vdev->pci_saved_state);
> +		vdev->pci_saved_state = NULL;
> +		pci_disable_device(pdev);
> +		return ret;
> +	}
> +
>  	msix_pos = pdev->msix_cap;
>  	if (msix_pos) {
>  		u16 flags;
> @@ -283,7 +318,7 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
>  	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
>  		u8 pin;
>  		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
> -		if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && pin)
> +		if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin)
>  			return 1;
>  
>  	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
> diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
> index c55c632..7356440 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -387,6 +387,7 @@ static void vfio_bar_restore(struct vfio_pci_device *vdev)
>  {
>  	struct pci_dev *pdev = vdev->pdev;
>  	u32 *rbar = vdev->rbar;
> +	u16 cmd;
>  	int i;
>  
>  	if (pdev->is_virtfn)
> @@ -399,6 +400,12 @@ static void vfio_bar_restore(struct vfio_pci_device *vdev)
>  		pci_user_write_config_dword(pdev, i, *rbar);
>  
>  	pci_user_write_config_dword(pdev, PCI_ROM_ADDRESS, *rbar);
> +
> +	if (vdev->nointx) {
> +		pci_user_read_config_word(pdev, PCI_COMMAND, &cmd);
> +		cmd |= PCI_COMMAND_INTX_DISABLE;
> +		pci_user_write_config_word(pdev, PCI_COMMAND, cmd);
> +	}
>  }
>  
>  static __le32 vfio_generate_bar_flags(struct pci_dev *pdev, int bar)
> @@ -1614,7 +1621,7 @@ int vfio_config_init(struct vfio_pci_device *vdev)
>  		*(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device);
>  	}
>  
> -	if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX))
> +	if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)
>  		vconfig[PCI_INTERRUPT_PIN] = 0;
>  
>  	ret = vfio_cap_init(vdev);
> diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
> index 0e7394f..ee6a3cf 100644
> --- a/drivers/vfio/pci/vfio_pci_private.h
> +++ b/drivers/vfio/pci/vfio_pci_private.h
> @@ -57,6 +57,7 @@ struct vfio_pci_device {
>  	bool			bardirty;
>  	bool			has_vga;
>  	bool			needs_reset;
> +	bool			nointx;
>  	struct pci_saved_state	*pci_saved_state;
>  	int			refcnt;
>  	struct eventfd_ctx	*err_trigger;
>
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b31b84f..da5108a 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -112,6 +112,35 @@  static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
 
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
 
+/*
+ * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
+ * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
+ * If a device implements the former but not the latter we would typically
+ * expect broken_intx_masking be set and require an exclusive interrupt.
+ * However since we do have control of the device's ability to assert INTx,
+ * we can instead pretend that the device does not implement INTx, virtualizing
+ * the pin register to report zero and maintaining DisINTx set on the host.
+ */
+static bool vfio_pci_nointx(struct pci_dev *pdev)
+{
+	switch (pdev->vendor) {
+	case PCI_VENDOR_ID_INTEL:
+		switch (pdev->device) {
+		/* All i40e (XL710/X710) 10/20/40GbE NICs */
+		case 0x1572:
+		case 0x1574:
+		case 0x1580 ... 0x1581:
+		case 0x1583 ... 0x1589:
+		case 0x37d0 ... 0x37d2:
+			return true;
+		default:
+			return false;
+		}
+	}
+
+	return false;
+}
+
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -135,23 +164,29 @@  static int vfio_pci_enable(struct vfio_pci_device *vdev)
 		pr_debug("%s: Couldn't store %s saved state\n",
 			 __func__, dev_name(&pdev->dev));
 
-	ret = vfio_config_init(vdev);
-	if (ret) {
-		kfree(vdev->pci_saved_state);
-		vdev->pci_saved_state = NULL;
-		pci_disable_device(pdev);
-		return ret;
+	if (likely(!nointxmask)) {
+		if (vfio_pci_nointx(pdev)) {
+			dev_info(&pdev->dev, "Masking broken INTx support\n");
+			vdev->nointx = true;
+			pci_intx(pdev, 0);
+		} else
+			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
 	}
 
-	if (likely(!nointxmask))
-		vdev->pci_2_3 = pci_intx_mask_supported(pdev);
-
 	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
 	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
 		cmd &= ~PCI_COMMAND_INTX_DISABLE;
 		pci_write_config_word(pdev, PCI_COMMAND, cmd);
 	}
 
+	ret = vfio_config_init(vdev);
+	if (ret) {
+		kfree(vdev->pci_saved_state);
+		vdev->pci_saved_state = NULL;
+		pci_disable_device(pdev);
+		return ret;
+	}
+
 	msix_pos = pdev->msix_cap;
 	if (msix_pos) {
 		u16 flags;
@@ -283,7 +318,7 @@  static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
 	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
 		u8 pin;
 		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
-		if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && pin)
+		if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin)
 			return 1;
 
 	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index c55c632..7356440 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -387,6 +387,7 @@  static void vfio_bar_restore(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u32 *rbar = vdev->rbar;
+	u16 cmd;
 	int i;
 
 	if (pdev->is_virtfn)
@@ -399,6 +400,12 @@  static void vfio_bar_restore(struct vfio_pci_device *vdev)
 		pci_user_write_config_dword(pdev, i, *rbar);
 
 	pci_user_write_config_dword(pdev, PCI_ROM_ADDRESS, *rbar);
+
+	if (vdev->nointx) {
+		pci_user_read_config_word(pdev, PCI_COMMAND, &cmd);
+		cmd |= PCI_COMMAND_INTX_DISABLE;
+		pci_user_write_config_word(pdev, PCI_COMMAND, cmd);
+	}
 }
 
 static __le32 vfio_generate_bar_flags(struct pci_dev *pdev, int bar)
@@ -1614,7 +1621,7 @@  int vfio_config_init(struct vfio_pci_device *vdev)
 		*(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device);
 	}
 
-	if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX))
+	if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)
 		vconfig[PCI_INTERRUPT_PIN] = 0;
 
 	ret = vfio_cap_init(vdev);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 0e7394f..ee6a3cf 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -57,6 +57,7 @@  struct vfio_pci_device {
 	bool			bardirty;
 	bool			has_vga;
 	bool			needs_reset;
+	bool			nointx;
 	struct pci_saved_state	*pci_saved_state;
 	int			refcnt;
 	struct eventfd_ctx	*err_trigger;