diff mbox

[V10,10/12] powerpc/eeh: Support error recovery for VF PE

Message ID 1445829362-2738-11-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Wei Yang Oct. 26, 2015, 3:16 a.m. UTC
Different from PCI bus dependent PE, PE for VFs doesn't have the
primary bus, on which the PCI hotplug is implemented. The patch
supports error recovery, especially the PCI hotplug for VF's PE.
The hotplug on VF's PE is implemented based on VFs, instead of
PCI bus any more.

[gwshan: changelog and code refactoring]
Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h   |   1 +
 arch/powerpc/kernel/eeh.c        |   8 ++++
 arch/powerpc/kernel/eeh_driver.c | 100 +++++++++++++++++++++++++++++++--------
 arch/powerpc/kernel/eeh_pe.c     |   3 +-
 4 files changed, 90 insertions(+), 22 deletions(-)

Comments

Alexey Kardashevskiy Oct. 30, 2015, 5:20 a.m. UTC | #1
On 10/26/2015 02:16 PM, Wei Yang wrote:
> Different from PCI bus dependent PE, PE for VFs doesn't have the

s/Different from/Unlike/


> primary bus, on which the PCI hotplug is implemented. The patch
> supports error recovery, especially the PCI hotplug for VF's PE.

The patch adds support for error recovery of what exactly?
What is "especially" about?


> The hotplug on VF's PE is implemented based on VFs, instead of
> PCI bus any more.

Needs rephrase.

Is this patch about EEH error recovery, i.e. unplug VF, re-plug VF? Why 
does the commit log talk about PE hotplug? I thought we do VF (i.e. PCI 
device) hotplug, not PE.


>
> [gwshan: changelog and code refactoring]
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/eeh.h   |   1 +
>   arch/powerpc/kernel/eeh.c        |   8 ++++
>   arch/powerpc/kernel/eeh_driver.c | 100 +++++++++++++++++++++++++++++++--------
>   arch/powerpc/kernel/eeh_pe.c     |   3 +-
>   4 files changed, 90 insertions(+), 22 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 331c856..ea1f13c4 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -142,6 +142,7 @@ struct eeh_dev {
>   	struct pci_controller *phb;	/* Associated PHB		*/
>   	struct pci_dn *pdn;		/* Associated PCI device node	*/
>   	struct pci_dev *pdev;		/* Associated PCI device	*/
> +	int    in_error;		/* Error flag for eeh_dev	*/

Make it "bool".


>   	struct pci_dev *physfn;		/* Associated PF PORT		*/
>   	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
>   };
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index af9b597..28e4d73 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1227,6 +1227,14 @@ void eeh_remove_device(struct pci_dev *dev)
>   	 * from the parent PE during the BAR resotre.
>   	 */
>   	edev->pdev = NULL;
> +
> +	/*
> +	 * The flag "in_error" is used to trace EEH devices for VFs
> +	 * in error state or not. It's set in eeh_report_error(). If
> +	 * it's not set, eeh_report_{reset,resume}() won't be called
> +	 * for the VF EEH device.
> +	 */
> +	edev->in_error = 0;
>   	dev->dev.archdata.edev = NULL;
>   	if (!(edev->pe->state & EEH_PE_KEEP))
>   		eeh_rmv_from_parent_pe(edev);
> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
> index 89eb4bc..99868e2 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
>   	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>   	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>
> +	edev->in_error = 1;
>   	eeh_pcid_put(dev);
>   	return NULL;
>   }
> @@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
>
>   	if (!driver->err_handler ||
>   	    !driver->err_handler->slot_reset ||
> -	    (edev->mode & EEH_DEV_NO_HANDLER)) {
> +	    (edev->mode & EEH_DEV_NO_HANDLER) ||
> +	    (!edev->in_error)) {
>   		eeh_pcid_put(dev);
>   		return NULL;
>   	}
> @@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void *userdata)
>

bood was_in_error = edev->in_error;
edev->in_error = false;

then use was_in_error below and there is no need to replace return with 
goto, etc -> slightly simpler code.


>   	if (!driver->err_handler ||
>   	    !driver->err_handler->resume ||
> -	    (edev->mode & EEH_DEV_NO_HANDLER)) {
> +	    (edev->mode & EEH_DEV_NO_HANDLER) ||
> +	    (!edev->in_error)) {
>   		edev->mode &= ~EEH_DEV_NO_HANDLER;
> -		eeh_pcid_put(dev);
> -		return NULL;
> +		goto out;
>   	}
>
>   	driver->err_handler->resume(dev);
>
> +out:
> +	edev->in_error = 0;
>   	eeh_pcid_put(dev);
>   	return NULL;
>   }
> @@ -386,12 +390,38 @@ static void *eeh_report_failure(void *data, void *userdata)
>   	return NULL;
>   }
>
> +static void *eeh_add_virt_device(void *data, void *userdata)
> +{
> +	struct pci_driver *driver;
> +	struct eeh_dev *edev = (struct eeh_dev *)data;
> +	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
> +	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> +
> +	if (!(edev->physfn)) {
> +		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
> +			__func__, edev->phb->global_number, pdn->busno,
> +			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
> +		return NULL;
> +	}
> +
> +	driver = eeh_pcid_get(dev);
> +	if (driver) {
> +		eeh_pcid_put(dev);
> +		if (driver->err_handler)
> +			return NULL;
> +	}
> +
> +	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
> +	return NULL;
> +}
> +
>   static void *eeh_rmv_device(void *data, void *userdata)
>   {
>   	struct pci_driver *driver;
>   	struct eeh_dev *edev = (struct eeh_dev *)data;
>   	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>   	int *removed = (int *)userdata;
> +	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>
>   	/*
>   	 * Actually, we should remove the PCI bridges as well.
> @@ -416,7 +446,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
>   	driver = eeh_pcid_get(dev);
>   	if (driver) {
>   		eeh_pcid_put(dev);
> -		if (driver->err_handler)
> +		if (removed && driver->err_handler)
>   			return NULL;
>   	}
>
> @@ -425,11 +455,23 @@ static void *eeh_rmv_device(void *data, void *userdata)
>   		 pci_name(dev));
>   	edev->bus = dev->bus;
>   	edev->mode |= EEH_DEV_DISCONNECTED;
> -	(*removed)++;
> +	if (removed)
> +		(*removed)++;
>
> -	pci_lock_rescan_remove();
> -	pci_stop_and_remove_bus_device(dev);
> -	pci_unlock_rescan_remove();
> +	if (edev->physfn) {
> +		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
> +		edev->pdev = NULL;
> +
> +		/*
> +		 * We have to set the VF PE number to invalid one, which is
> +		 * required to plug the VF successfully.
> +		 */
> +		pdn->pe_number = IODA_INVALID_PE;
> +	} else {
> +		pci_lock_rescan_remove();
> +		pci_stop_and_remove_bus_device(dev);
> +		pci_unlock_rescan_remove();
> +	}
>
>   	return NULL;
>   }
> @@ -548,6 +590,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>   	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>   	struct timeval tstamp;
>   	int cnt, rc, removed = 0;
> +	struct eeh_dev *edev;
>
>   	/* pcibios will clear the counter; save the value */
>   	cnt = pe->freeze_count;
> @@ -561,12 +604,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>   	 */
>   	eeh_pe_state_mark(pe, EEH_PE_KEEP);
>   	if (bus) {
> -		pci_lock_rescan_remove();
> -		pcibios_remove_pci_devices(bus);
> -		pci_unlock_rescan_remove();
> -	} else if (frozen_bus) {
> +		if (pe->type & EEH_PE_VF)
> +			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);


I believe the rule is that if one branch of "if" uses curly braces, then 
the other should have them too.


> +		else {
> +			pci_lock_rescan_remove();
> +			pcibios_remove_pci_devices(bus);
> +			pci_unlock_rescan_remove();
> +		}
> +	} else if (frozen_bus)
>   		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
> -	}
>
>   	/*
>   	 * Reset the pci controller. (Asserts RST#; resets config space).
> @@ -607,14 +653,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>   		 * PE. We should disconnect it so the binding can be
>   		 * rebuilt when adding PCI devices.
>   		 */
> +		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>   		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
> -		pcibios_add_pci_devices(bus);
> +		if (pe->type & EEH_PE_VF)

Move "edev = list_first_entry(&pe->edevs, struct eeh_dev, list)" here. You 
could actually do:

eeh_add_virt_device(list_first_entry(&pe->edevs, struct eeh_dev, list), NULL);

and drop local variable @edev. Or move it to this scope. Dunno.


> +			eeh_add_virt_device(edev, NULL);
> +		else
> +			pcibios_add_pci_devices(bus);
>   	} else if (frozen_bus && removed) {
>   		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>   		ssleep(5);
>
> +		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>   		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
> -		pcibios_add_pci_devices(frozen_bus);
> +		if (pe->type & EEH_PE_VF)


The same comment as above.

> +			eeh_add_virt_device(edev, NULL);
> +		else
> +			pcibios_add_pci_devices(frozen_bus);
>   	}
>   	eeh_pe_state_clear(pe, EEH_PE_KEEP);
>
> @@ -792,11 +846,15 @@ perm_error:
>   	 * the their PCI config any more.
>   	 */
>   	if (frozen_bus) {
> -		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> -
> -		pci_lock_rescan_remove();
> -		pcibios_remove_pci_devices(frozen_bus);
> -		pci_unlock_rescan_remove();
> +		if (pe->type & EEH_PE_VF) {
> +			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
> +			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> +		} else {
> +			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> +			pci_lock_rescan_remove();
> +			pcibios_remove_pci_devices(frozen_bus);
> +			pci_unlock_rescan_remove();
> +		}
>   	}
>   }
>
> diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
> index 260a701..5cde950 100644
> --- a/arch/powerpc/kernel/eeh_pe.c
> +++ b/arch/powerpc/kernel/eeh_pe.c
> @@ -914,7 +914,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
>   	if (pe->type & EEH_PE_PHB) {
>   		bus = pe->phb->bus;
>   	} else if (pe->type & EEH_PE_BUS ||
> -		   pe->type & EEH_PE_DEVICE) {
> +		   pe->type & EEH_PE_DEVICE ||
> +		   pe->type & EEH_PE_VF) {
>   		if (pe->bus) {
>   			bus = pe->bus;
>   			goto out;
>
Wei Yang Nov. 1, 2015, 1:53 a.m. UTC | #2
On Fri, Oct 30, 2015 at 04:20:48PM +1100, Alexey Kardashevskiy wrote:
>On 10/26/2015 02:16 PM, Wei Yang wrote:
>>Different from PCI bus dependent PE, PE for VFs doesn't have the
>
>s/Different from/Unlike/
>

Will change in next version.

>
>>primary bus, on which the PCI hotplug is implemented. The patch
>>supports error recovery, especially the PCI hotplug for VF's PE.
>
>The patch adds support for error recovery of what exactly?
>What is "especially" about?
>

PFs are enumerated on PCI bus, while VFs are created by PF's driver.

In EEH recovery, it has two cases.
1. Device and driver is EEH aware, error handlers are called.
2. Device and driver is not EEH aware, un-plug the device and plug it again by
   enumerating it.

The special thing happens on the second case. For a PF, we could use the
original pci core to enumerate the bus, while for VF, we need to record the VF
which are un-plugged then plug it again.

>
>>The hotplug on VF's PE is implemented based on VFs, instead of
>>PCI bus any more.
>
>Needs rephrase.
>
>Is this patch about EEH error recovery, i.e. unplug VF, re-plug VF? Why does
>the commit log talk about PE hotplug? I thought we do VF (i.e. PCI device)
>hotplug, not PE.
>

Hmm... unlike the Bus PE for PFs, VF PE is dynamically created and released
when VFs are created and released.

>
>>
>>[gwshan: changelog and code refactoring]
>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>---
>>  arch/powerpc/include/asm/eeh.h   |   1 +
>>  arch/powerpc/kernel/eeh.c        |   8 ++++
>>  arch/powerpc/kernel/eeh_driver.c | 100 +++++++++++++++++++++++++++++++--------
>>  arch/powerpc/kernel/eeh_pe.c     |   3 +-
>>  4 files changed, 90 insertions(+), 22 deletions(-)
>>
>>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>index 331c856..ea1f13c4 100644
>>--- a/arch/powerpc/include/asm/eeh.h
>>+++ b/arch/powerpc/include/asm/eeh.h
>>@@ -142,6 +142,7 @@ struct eeh_dev {
>>  	struct pci_controller *phb;	/* Associated PHB		*/
>>  	struct pci_dn *pdn;		/* Associated PCI device node	*/
>>  	struct pci_dev *pdev;		/* Associated PCI device	*/
>>+	int    in_error;		/* Error flag for eeh_dev	*/
>
>Make it "bool".
>

Will change it in next version.

>
>>  	struct pci_dev *physfn;		/* Associated PF PORT		*/
>>  	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
>>  };
>>diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>index af9b597..28e4d73 100644
>>--- a/arch/powerpc/kernel/eeh.c
>>+++ b/arch/powerpc/kernel/eeh.c
>>@@ -1227,6 +1227,14 @@ void eeh_remove_device(struct pci_dev *dev)
>>  	 * from the parent PE during the BAR resotre.
>>  	 */
>>  	edev->pdev = NULL;
>>+
>>+	/*
>>+	 * The flag "in_error" is used to trace EEH devices for VFs
>>+	 * in error state or not. It's set in eeh_report_error(). If
>>+	 * it's not set, eeh_report_{reset,resume}() won't be called
>>+	 * for the VF EEH device.
>>+	 */
>>+	edev->in_error = 0;
>>  	dev->dev.archdata.edev = NULL;
>>  	if (!(edev->pe->state & EEH_PE_KEEP))
>>  		eeh_rmv_from_parent_pe(edev);
>>diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
>>index 89eb4bc..99868e2 100644
>>--- a/arch/powerpc/kernel/eeh_driver.c
>>+++ b/arch/powerpc/kernel/eeh_driver.c
>>@@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
>>  	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>>  	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>>
>>+	edev->in_error = 1;
>>  	eeh_pcid_put(dev);
>>  	return NULL;
>>  }
>>@@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
>>
>>  	if (!driver->err_handler ||
>>  	    !driver->err_handler->slot_reset ||
>>-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
>>+	    (!edev->in_error)) {
>>  		eeh_pcid_put(dev);
>>  		return NULL;
>>  	}
>>@@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void *userdata)
>>
>
>bood was_in_error = edev->in_error;
>edev->in_error = false;
>
>then use was_in_error below and there is no need to replace return with goto,
>etc -> slightly simpler code.
>

Will change it in next version.

>
>>  	if (!driver->err_handler ||
>>  	    !driver->err_handler->resume ||
>>-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
>>+	    (!edev->in_error)) {
>>  		edev->mode &= ~EEH_DEV_NO_HANDLER;
>>-		eeh_pcid_put(dev);
>>-		return NULL;
>>+		goto out;
>>  	}
>>
>>  	driver->err_handler->resume(dev);
>>
>>+out:
>>+	edev->in_error = 0;
>>  	eeh_pcid_put(dev);
>>  	return NULL;
>>  }
>>@@ -386,12 +390,38 @@ static void *eeh_report_failure(void *data, void *userdata)
>>  	return NULL;
>>  }
>>
>>+static void *eeh_add_virt_device(void *data, void *userdata)
>>+{
>>+	struct pci_driver *driver;
>>+	struct eeh_dev *edev = (struct eeh_dev *)data;
>>+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>+
>>+	if (!(edev->physfn)) {
>>+		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
>>+			__func__, edev->phb->global_number, pdn->busno,
>>+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>+		return NULL;
>>+	}
>>+
>>+	driver = eeh_pcid_get(dev);
>>+	if (driver) {
>>+		eeh_pcid_put(dev);
>>+		if (driver->err_handler)
>>+			return NULL;
>>+	}
>>+
>>+	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
>>+	return NULL;
>>+}
>>+
>>  static void *eeh_rmv_device(void *data, void *userdata)
>>  {
>>  	struct pci_driver *driver;
>>  	struct eeh_dev *edev = (struct eeh_dev *)data;
>>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>  	int *removed = (int *)userdata;
>>+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>
>>  	/*
>>  	 * Actually, we should remove the PCI bridges as well.
>>@@ -416,7 +446,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>  	driver = eeh_pcid_get(dev);
>>  	if (driver) {
>>  		eeh_pcid_put(dev);
>>-		if (driver->err_handler)
>>+		if (removed && driver->err_handler)
>>  			return NULL;
>>  	}
>>
>>@@ -425,11 +455,23 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>  		 pci_name(dev));
>>  	edev->bus = dev->bus;
>>  	edev->mode |= EEH_DEV_DISCONNECTED;
>>-	(*removed)++;
>>+	if (removed)
>>+		(*removed)++;
>>
>>-	pci_lock_rescan_remove();
>>-	pci_stop_and_remove_bus_device(dev);
>>-	pci_unlock_rescan_remove();
>>+	if (edev->physfn) {
>>+		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
>>+		edev->pdev = NULL;
>>+
>>+		/*
>>+		 * We have to set the VF PE number to invalid one, which is
>>+		 * required to plug the VF successfully.
>>+		 */
>>+		pdn->pe_number = IODA_INVALID_PE;
>>+	} else {
>>+		pci_lock_rescan_remove();
>>+		pci_stop_and_remove_bus_device(dev);
>>+		pci_unlock_rescan_remove();
>>+	}
>>
>>  	return NULL;
>>  }
>>@@ -548,6 +590,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>  	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>>  	struct timeval tstamp;
>>  	int cnt, rc, removed = 0;
>>+	struct eeh_dev *edev;
>>
>>  	/* pcibios will clear the counter; save the value */
>>  	cnt = pe->freeze_count;
>>@@ -561,12 +604,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>  	 */
>>  	eeh_pe_state_mark(pe, EEH_PE_KEEP);
>>  	if (bus) {
>>-		pci_lock_rescan_remove();
>>-		pcibios_remove_pci_devices(bus);
>>-		pci_unlock_rescan_remove();
>>-	} else if (frozen_bus) {
>>+		if (pe->type & EEH_PE_VF)
>>+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>
>
>I believe the rule is that if one branch of "if" uses curly braces, then the
>other should have them too.
>

Thanks for reminding, will fix it in next version.

>
>>+		else {
>>+			pci_lock_rescan_remove();
>>+			pcibios_remove_pci_devices(bus);
>>+			pci_unlock_rescan_remove();
>>+		}
>>+	} else if (frozen_bus)
>>  		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
>>-	}
>>
>>  	/*
>>  	 * Reset the pci controller. (Asserts RST#; resets config space).
>>@@ -607,14 +653,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>  		 * PE. We should disconnect it so the binding can be
>>  		 * rebuilt when adding PCI devices.
>>  		 */
>>+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>  		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>-		pcibios_add_pci_devices(bus);
>>+		if (pe->type & EEH_PE_VF)
>
>Move "edev = list_first_entry(&pe->edevs, struct eeh_dev, list)" here. You
>could actually do:
>
>eeh_add_virt_device(list_first_entry(&pe->edevs, struct eeh_dev, list), NULL);
>
>and drop local variable @edev. Or move it to this scope. Dunno.
>

Hmm... as I know, in eeh_pe_detach_dev() will remove the edev from pe's edevs
list. 

>
>>+			eeh_add_virt_device(edev, NULL);
>>+		else
>>+			pcibios_add_pci_devices(bus);
>>  	} else if (frozen_bus && removed) {
>>  		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>>  		ssleep(5);
>>
>>+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>  		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>-		pcibios_add_pci_devices(frozen_bus);
>>+		if (pe->type & EEH_PE_VF)
>
>
>The same comment as above.
>
>>+			eeh_add_virt_device(edev, NULL);
>>+		else
>>+			pcibios_add_pci_devices(frozen_bus);
>>  	}
>>  	eeh_pe_state_clear(pe, EEH_PE_KEEP);
>>
>>@@ -792,11 +846,15 @@ perm_error:
>>  	 * the their PCI config any more.
>>  	 */
>>  	if (frozen_bus) {
>>-		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>-
>>-		pci_lock_rescan_remove();
>>-		pcibios_remove_pci_devices(frozen_bus);
>>-		pci_unlock_rescan_remove();
>>+		if (pe->type & EEH_PE_VF) {
>>+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>+		} else {
>>+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>+			pci_lock_rescan_remove();
>>+			pcibios_remove_pci_devices(frozen_bus);
>>+			pci_unlock_rescan_remove();
>>+		}
>>  	}
>>  }
>>
>>diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
>>index 260a701..5cde950 100644
>>--- a/arch/powerpc/kernel/eeh_pe.c
>>+++ b/arch/powerpc/kernel/eeh_pe.c
>>@@ -914,7 +914,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
>>  	if (pe->type & EEH_PE_PHB) {
>>  		bus = pe->phb->bus;
>>  	} else if (pe->type & EEH_PE_BUS ||
>>-		   pe->type & EEH_PE_DEVICE) {
>>+		   pe->type & EEH_PE_DEVICE ||
>>+		   pe->type & EEH_PE_VF) {
>>  		if (pe->bus) {
>>  			bus = pe->bus;
>>  			goto out;
>>
>
>
>-- 
>Alexey
>--
>To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Kardashevskiy Nov. 1, 2015, 11:40 p.m. UTC | #3
On 11/01/2015 12:53 PM, Wei Yang wrote:
> On Fri, Oct 30, 2015 at 04:20:48PM +1100, Alexey Kardashevskiy wrote:
>> On 10/26/2015 02:16 PM, Wei Yang wrote:
>>> Different from PCI bus dependent PE, PE for VFs doesn't have the
>>
>> s/Different from/Unlike/
>>
>
> Will change in next version.
>
>>
>>> primary bus, on which the PCI hotplug is implemented. The patch
>>> supports error recovery, especially the PCI hotplug for VF's PE.
>>
>> The patch adds support for error recovery of what exactly?
>> What is "especially" about?
>>
>
> PFs are enumerated on PCI bus, while VFs are created by PF's driver.
>
> In EEH recovery, it has two cases.
> 1. Device and driver is EEH aware, error handlers are called.
> 2. Device and driver is not EEH aware, un-plug the device and plug it again by
>     enumerating it.
>
> The special thing happens on the second case. For a PF, we could use the
> original pci core to enumerate the bus, while for VF, we need to record the VF
> which are un-plugged then plug it again.


Right. This should have been the actual commit log.


>>
>>> The hotplug on VF's PE is implemented based on VFs, instead of
>>> PCI bus any more.
>>
>> Needs rephrase.
>>
>> Is this patch about EEH error recovery, i.e. unplug VF, re-plug VF? Why does
>> the commit log talk about PE hotplug? I thought we do VF (i.e. PCI device)
>> hotplug, not PE.
>>
>
> Hmm... unlike the Bus PE for PFs, VF PE is dynamically created and released
> when VFs are created and released.


Sure. PEs are created/released, not plugged/unplugged (VFs are), that was 
my point.


>
>>
>>>
>>> [gwshan: changelog and code refactoring]
>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>> Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>> ---
>>>   arch/powerpc/include/asm/eeh.h   |   1 +
>>>   arch/powerpc/kernel/eeh.c        |   8 ++++
>>>   arch/powerpc/kernel/eeh_driver.c | 100 +++++++++++++++++++++++++++++++--------
>>>   arch/powerpc/kernel/eeh_pe.c     |   3 +-
>>>   4 files changed, 90 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>> index 331c856..ea1f13c4 100644
>>> --- a/arch/powerpc/include/asm/eeh.h
>>> +++ b/arch/powerpc/include/asm/eeh.h
>>> @@ -142,6 +142,7 @@ struct eeh_dev {
>>>   	struct pci_controller *phb;	/* Associated PHB		*/
>>>   	struct pci_dn *pdn;		/* Associated PCI device node	*/
>>>   	struct pci_dev *pdev;		/* Associated PCI device	*/
>>> +	int    in_error;		/* Error flag for eeh_dev	*/
>>
>> Make it "bool".
>>
>
> Will change it in next version.
>
>>
>>>   	struct pci_dev *physfn;		/* Associated PF PORT		*/
>>>   	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
>>>   };
>>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>> index af9b597..28e4d73 100644
>>> --- a/arch/powerpc/kernel/eeh.c
>>> +++ b/arch/powerpc/kernel/eeh.c
>>> @@ -1227,6 +1227,14 @@ void eeh_remove_device(struct pci_dev *dev)
>>>   	 * from the parent PE during the BAR resotre.
>>>   	 */
>>>   	edev->pdev = NULL;
>>> +
>>> +	/*
>>> +	 * The flag "in_error" is used to trace EEH devices for VFs
>>> +	 * in error state or not. It's set in eeh_report_error(). If
>>> +	 * it's not set, eeh_report_{reset,resume}() won't be called
>>> +	 * for the VF EEH device.
>>> +	 */
>>> +	edev->in_error = 0;
>>>   	dev->dev.archdata.edev = NULL;
>>>   	if (!(edev->pe->state & EEH_PE_KEEP))
>>>   		eeh_rmv_from_parent_pe(edev);
>>> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
>>> index 89eb4bc..99868e2 100644
>>> --- a/arch/powerpc/kernel/eeh_driver.c
>>> +++ b/arch/powerpc/kernel/eeh_driver.c
>>> @@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
>>>   	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>>>   	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>>>
>>> +	edev->in_error = 1;
>>>   	eeh_pcid_put(dev);
>>>   	return NULL;
>>>   }
>>> @@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
>>>
>>>   	if (!driver->err_handler ||
>>>   	    !driver->err_handler->slot_reset ||
>>> -	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>> +	    (edev->mode & EEH_DEV_NO_HANDLER) ||
>>> +	    (!edev->in_error)) {
>>>   		eeh_pcid_put(dev);
>>>   		return NULL;
>>>   	}
>>> @@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void *userdata)
>>>
>>
>> bood was_in_error = edev->in_error;
>> edev->in_error = false;
>>
>> then use was_in_error below and there is no need to replace return with goto,
>> etc -> slightly simpler code.
>>
>
> Will change it in next version.
>
>>
>>>   	if (!driver->err_handler ||
>>>   	    !driver->err_handler->resume ||
>>> -	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>> +	    (edev->mode & EEH_DEV_NO_HANDLER) ||
>>> +	    (!edev->in_error)) {
>>>   		edev->mode &= ~EEH_DEV_NO_HANDLER;
>>> -		eeh_pcid_put(dev);
>>> -		return NULL;
>>> +		goto out;
>>>   	}
>>>
>>>   	driver->err_handler->resume(dev);
>>>
>>> +out:
>>> +	edev->in_error = 0;
>>>   	eeh_pcid_put(dev);
>>>   	return NULL;
>>>   }
>>> @@ -386,12 +390,38 @@ static void *eeh_report_failure(void *data, void *userdata)
>>>   	return NULL;
>>>   }
>>>
>>> +static void *eeh_add_virt_device(void *data, void *userdata)
>>> +{
>>> +	struct pci_driver *driver;
>>> +	struct eeh_dev *edev = (struct eeh_dev *)data;
>>> +	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>> +	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>> +
>>> +	if (!(edev->physfn)) {
>>> +		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
>>> +			__func__, edev->phb->global_number, pdn->busno,
>>> +			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>> +		return NULL;
>>> +	}
>>> +
>>> +	driver = eeh_pcid_get(dev);
>>> +	if (driver) {
>>> +		eeh_pcid_put(dev);
>>> +		if (driver->err_handler)
>>> +			return NULL;
>>> +	}
>>> +
>>> +	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
>>> +	return NULL;
>>> +}
>>> +
>>>   static void *eeh_rmv_device(void *data, void *userdata)
>>>   {
>>>   	struct pci_driver *driver;
>>>   	struct eeh_dev *edev = (struct eeh_dev *)data;
>>>   	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>>   	int *removed = (int *)userdata;
>>> +	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>>
>>>   	/*
>>>   	 * Actually, we should remove the PCI bridges as well.
>>> @@ -416,7 +446,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>>   	driver = eeh_pcid_get(dev);
>>>   	if (driver) {
>>>   		eeh_pcid_put(dev);
>>> -		if (driver->err_handler)
>>> +		if (removed && driver->err_handler)
>>>   			return NULL;
>>>   	}
>>>
>>> @@ -425,11 +455,23 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>>   		 pci_name(dev));
>>>   	edev->bus = dev->bus;
>>>   	edev->mode |= EEH_DEV_DISCONNECTED;
>>> -	(*removed)++;
>>> +	if (removed)
>>> +		(*removed)++;
>>>
>>> -	pci_lock_rescan_remove();
>>> -	pci_stop_and_remove_bus_device(dev);
>>> -	pci_unlock_rescan_remove();
>>> +	if (edev->physfn) {
>>> +		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
>>> +		edev->pdev = NULL;
>>> +
>>> +		/*
>>> +		 * We have to set the VF PE number to invalid one, which is
>>> +		 * required to plug the VF successfully.
>>> +		 */
>>> +		pdn->pe_number = IODA_INVALID_PE;
>>> +	} else {
>>> +		pci_lock_rescan_remove();
>>> +		pci_stop_and_remove_bus_device(dev);
>>> +		pci_unlock_rescan_remove();
>>> +	}
>>>
>>>   	return NULL;
>>>   }
>>> @@ -548,6 +590,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>>   	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>>>   	struct timeval tstamp;
>>>   	int cnt, rc, removed = 0;
>>> +	struct eeh_dev *edev;
>>>
>>>   	/* pcibios will clear the counter; save the value */
>>>   	cnt = pe->freeze_count;
>>> @@ -561,12 +604,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>>   	 */
>>>   	eeh_pe_state_mark(pe, EEH_PE_KEEP);
>>>   	if (bus) {
>>> -		pci_lock_rescan_remove();
>>> -		pcibios_remove_pci_devices(bus);
>>> -		pci_unlock_rescan_remove();
>>> -	} else if (frozen_bus) {
>>> +		if (pe->type & EEH_PE_VF)
>>> +			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>
>>
>> I believe the rule is that if one branch of "if" uses curly braces, then the
>> other should have them too.
>>
>
> Thanks for reminding, will fix it in next version.


I thought checkpatch.pl checks for it but apparently it does not.



>>
>>> +		else {
>>> +			pci_lock_rescan_remove();
>>> +			pcibios_remove_pci_devices(bus);
>>> +			pci_unlock_rescan_remove();
>>> +		}
>>> +	} else if (frozen_bus)
>>>   		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
>>> -	}
>>>
>>>   	/*
>>>   	 * Reset the pci controller. (Asserts RST#; resets config space).
>>> @@ -607,14 +653,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>>   		 * PE. We should disconnect it so the binding can be
>>>   		 * rebuilt when adding PCI devices.
>>>   		 */
>>> +		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>>   		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>> -		pcibios_add_pci_devices(bus);
>>> +		if (pe->type & EEH_PE_VF)
>>
>> Move "edev = list_first_entry(&pe->edevs, struct eeh_dev, list)" here. You
>> could actually do:
>>
>> eeh_add_virt_device(list_first_entry(&pe->edevs, struct eeh_dev, list), NULL);
>>
>> and drop local variable @edev. Or move it to this scope. Dunno.
>>
>
> Hmm... as I know, in eeh_pe_detach_dev() will remove the edev from pe's edevs
> list.
>
>>
>>> +			eeh_add_virt_device(edev, NULL);
>>> +		else
>>> +			pcibios_add_pci_devices(bus);
>>>   	} else if (frozen_bus && removed) {
>>>   		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>>>   		ssleep(5);
>>>
>>> +		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>>   		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>> -		pcibios_add_pci_devices(frozen_bus);
>>> +		if (pe->type & EEH_PE_VF)
>>
>>
>> The same comment as above.
>>
>>> +			eeh_add_virt_device(edev, NULL);
>>> +		else
>>> +			pcibios_add_pci_devices(frozen_bus);
>>>   	}
>>>   	eeh_pe_state_clear(pe, EEH_PE_KEEP);
>>>
>>> @@ -792,11 +846,15 @@ perm_error:
>>>   	 * the their PCI config any more.
>>>   	 */
>>>   	if (frozen_bus) {
>>> -		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>> -
>>> -		pci_lock_rescan_remove();
>>> -		pcibios_remove_pci_devices(frozen_bus);
>>> -		pci_unlock_rescan_remove();
>>> +		if (pe->type & EEH_PE_VF) {
>>> +			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>> +			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>> +		} else {
>>> +			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>> +			pci_lock_rescan_remove();
>>> +			pcibios_remove_pci_devices(frozen_bus);
>>> +			pci_unlock_rescan_remove();
>>> +		}
>>>   	}
>>>   }
>>>
>>> diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
>>> index 260a701..5cde950 100644
>>> --- a/arch/powerpc/kernel/eeh_pe.c
>>> +++ b/arch/powerpc/kernel/eeh_pe.c
>>> @@ -914,7 +914,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
>>>   	if (pe->type & EEH_PE_PHB) {
>>>   		bus = pe->phb->bus;
>>>   	} else if (pe->type & EEH_PE_BUS ||
>>> -		   pe->type & EEH_PE_DEVICE) {
>>> +		   pe->type & EEH_PE_DEVICE ||
>>> +		   pe->type & EEH_PE_VF) {
>>>   		if (pe->bus) {
>>>   			bus = pe->bus;
>>>   			goto out;
>>>
Wei Yang Nov. 2, 2015, 9:39 a.m. UTC | #4
On Mon, Nov 02, 2015 at 10:40:36AM +1100, Alexey Kardashevskiy wrote:
>On 11/01/2015 12:53 PM, Wei Yang wrote:
>>On Fri, Oct 30, 2015 at 04:20:48PM +1100, Alexey Kardashevskiy wrote:
>>>On 10/26/2015 02:16 PM, Wei Yang wrote:
>>>>Different from PCI bus dependent PE, PE for VFs doesn't have the
>>>
>>>s/Different from/Unlike/
>>>
>>
>>Will change in next version.
>>
>>>
>>>>primary bus, on which the PCI hotplug is implemented. The patch
>>>>supports error recovery, especially the PCI hotplug for VF's PE.
>>>
>>>The patch adds support for error recovery of what exactly?
>>>What is "especially" about?
>>>
>>
>>PFs are enumerated on PCI bus, while VFs are created by PF's driver.
>>
>>In EEH recovery, it has two cases.
>>1. Device and driver is EEH aware, error handlers are called.
>>2. Device and driver is not EEH aware, un-plug the device and plug it again by
>>    enumerating it.
>>
>>The special thing happens on the second case. For a PF, we could use the
>>original pci core to enumerate the bus, while for VF, we need to record the VF
>>which are un-plugged then plug it again.
>
>
>Right. This should have been the actual commit log.
>
>
>>>
>>>>The hotplug on VF's PE is implemented based on VFs, instead of
>>>>PCI bus any more.
>>>
>>>Needs rephrase.
>>>
>>>Is this patch about EEH error recovery, i.e. unplug VF, re-plug VF? Why does
>>>the commit log talk about PE hotplug? I thought we do VF (i.e. PCI device)
>>>hotplug, not PE.
>>>
>>
>>Hmm... unlike the Bus PE for PFs, VF PE is dynamically created and released
>>when VFs are created and released.
>
>
>Sure. PEs are created/released, not plugged/unplugged (VFs are), that was my
>point.
>

Thanks for the suggestion, will change it in next version.

>
>>
>>>
>>>>
>>>>[gwshan: changelog and code refactoring]
>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>>---
>>>>  arch/powerpc/include/asm/eeh.h   |   1 +
>>>>  arch/powerpc/kernel/eeh.c        |   8 ++++
>>>>  arch/powerpc/kernel/eeh_driver.c | 100 +++++++++++++++++++++++++++++++--------
>>>>  arch/powerpc/kernel/eeh_pe.c     |   3 +-
>>>>  4 files changed, 90 insertions(+), 22 deletions(-)
>>>>
>>>>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>>>index 331c856..ea1f13c4 100644
>>>>--- a/arch/powerpc/include/asm/eeh.h
>>>>+++ b/arch/powerpc/include/asm/eeh.h
>>>>@@ -142,6 +142,7 @@ struct eeh_dev {
>>>>  	struct pci_controller *phb;	/* Associated PHB		*/
>>>>  	struct pci_dn *pdn;		/* Associated PCI device node	*/
>>>>  	struct pci_dev *pdev;		/* Associated PCI device	*/
>>>>+	int    in_error;		/* Error flag for eeh_dev	*/
>>>
>>>Make it "bool".
>>>
>>
>>Will change it in next version.
>>
>>>
>>>>  	struct pci_dev *physfn;		/* Associated PF PORT		*/
>>>>  	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
>>>>  };
>>>>diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>>>index af9b597..28e4d73 100644
>>>>--- a/arch/powerpc/kernel/eeh.c
>>>>+++ b/arch/powerpc/kernel/eeh.c
>>>>@@ -1227,6 +1227,14 @@ void eeh_remove_device(struct pci_dev *dev)
>>>>  	 * from the parent PE during the BAR resotre.
>>>>  	 */
>>>>  	edev->pdev = NULL;
>>>>+
>>>>+	/*
>>>>+	 * The flag "in_error" is used to trace EEH devices for VFs
>>>>+	 * in error state or not. It's set in eeh_report_error(). If
>>>>+	 * it's not set, eeh_report_{reset,resume}() won't be called
>>>>+	 * for the VF EEH device.
>>>>+	 */
>>>>+	edev->in_error = 0;
>>>>  	dev->dev.archdata.edev = NULL;
>>>>  	if (!(edev->pe->state & EEH_PE_KEEP))
>>>>  		eeh_rmv_from_parent_pe(edev);
>>>>diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
>>>>index 89eb4bc..99868e2 100644
>>>>--- a/arch/powerpc/kernel/eeh_driver.c
>>>>+++ b/arch/powerpc/kernel/eeh_driver.c
>>>>@@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
>>>>  	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>>>>  	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>>>>
>>>>+	edev->in_error = 1;
>>>>  	eeh_pcid_put(dev);
>>>>  	return NULL;
>>>>  }
>>>>@@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
>>>>
>>>>  	if (!driver->err_handler ||
>>>>  	    !driver->err_handler->slot_reset ||
>>>>-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>>>+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
>>>>+	    (!edev->in_error)) {
>>>>  		eeh_pcid_put(dev);
>>>>  		return NULL;
>>>>  	}
>>>>@@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void *userdata)
>>>>
>>>
>>>bood was_in_error = edev->in_error;
>>>edev->in_error = false;
>>>
>>>then use was_in_error below and there is no need to replace return with goto,
>>>etc -> slightly simpler code.
>>>
>>
>>Will change it in next version.
>>
>>>
>>>>  	if (!driver->err_handler ||
>>>>  	    !driver->err_handler->resume ||
>>>>-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>>>+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
>>>>+	    (!edev->in_error)) {
>>>>  		edev->mode &= ~EEH_DEV_NO_HANDLER;
>>>>-		eeh_pcid_put(dev);
>>>>-		return NULL;
>>>>+		goto out;
>>>>  	}
>>>>
>>>>  	driver->err_handler->resume(dev);
>>>>
>>>>+out:
>>>>+	edev->in_error = 0;
>>>>  	eeh_pcid_put(dev);
>>>>  	return NULL;
>>>>  }
>>>>@@ -386,12 +390,38 @@ static void *eeh_report_failure(void *data, void *userdata)
>>>>  	return NULL;
>>>>  }
>>>>
>>>>+static void *eeh_add_virt_device(void *data, void *userdata)
>>>>+{
>>>>+	struct pci_driver *driver;
>>>>+	struct eeh_dev *edev = (struct eeh_dev *)data;
>>>>+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>>>+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>>>+
>>>>+	if (!(edev->physfn)) {
>>>>+		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
>>>>+			__func__, edev->phb->global_number, pdn->busno,
>>>>+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>>>+		return NULL;
>>>>+	}
>>>>+
>>>>+	driver = eeh_pcid_get(dev);
>>>>+	if (driver) {
>>>>+		eeh_pcid_put(dev);
>>>>+		if (driver->err_handler)
>>>>+			return NULL;
>>>>+	}
>>>>+
>>>>+	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
>>>>+	return NULL;
>>>>+}
>>>>+
>>>>  static void *eeh_rmv_device(void *data, void *userdata)
>>>>  {
>>>>  	struct pci_driver *driver;
>>>>  	struct eeh_dev *edev = (struct eeh_dev *)data;
>>>>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>>>  	int *removed = (int *)userdata;
>>>>+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>>>
>>>>  	/*
>>>>  	 * Actually, we should remove the PCI bridges as well.
>>>>@@ -416,7 +446,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>>>  	driver = eeh_pcid_get(dev);
>>>>  	if (driver) {
>>>>  		eeh_pcid_put(dev);
>>>>-		if (driver->err_handler)
>>>>+		if (removed && driver->err_handler)
>>>>  			return NULL;
>>>>  	}
>>>>
>>>>@@ -425,11 +455,23 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>>>  		 pci_name(dev));
>>>>  	edev->bus = dev->bus;
>>>>  	edev->mode |= EEH_DEV_DISCONNECTED;
>>>>-	(*removed)++;
>>>>+	if (removed)
>>>>+		(*removed)++;
>>>>
>>>>-	pci_lock_rescan_remove();
>>>>-	pci_stop_and_remove_bus_device(dev);
>>>>-	pci_unlock_rescan_remove();
>>>>+	if (edev->physfn) {
>>>>+		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
>>>>+		edev->pdev = NULL;
>>>>+
>>>>+		/*
>>>>+		 * We have to set the VF PE number to invalid one, which is
>>>>+		 * required to plug the VF successfully.
>>>>+		 */
>>>>+		pdn->pe_number = IODA_INVALID_PE;
>>>>+	} else {
>>>>+		pci_lock_rescan_remove();
>>>>+		pci_stop_and_remove_bus_device(dev);
>>>>+		pci_unlock_rescan_remove();
>>>>+	}
>>>>
>>>>  	return NULL;
>>>>  }
>>>>@@ -548,6 +590,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>>>  	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>>>>  	struct timeval tstamp;
>>>>  	int cnt, rc, removed = 0;
>>>>+	struct eeh_dev *edev;
>>>>
>>>>  	/* pcibios will clear the counter; save the value */
>>>>  	cnt = pe->freeze_count;
>>>>@@ -561,12 +604,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>>>  	 */
>>>>  	eeh_pe_state_mark(pe, EEH_PE_KEEP);
>>>>  	if (bus) {
>>>>-		pci_lock_rescan_remove();
>>>>-		pcibios_remove_pci_devices(bus);
>>>>-		pci_unlock_rescan_remove();
>>>>-	} else if (frozen_bus) {
>>>>+		if (pe->type & EEH_PE_VF)
>>>>+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>>
>>>
>>>I believe the rule is that if one branch of "if" uses curly braces, then the
>>>other should have them too.
>>>
>>
>>Thanks for reminding, will fix it in next version.
>
>
>I thought checkpatch.pl checks for it but apparently it does not.
>
>
>
>>>
>>>>+		else {
>>>>+			pci_lock_rescan_remove();
>>>>+			pcibios_remove_pci_devices(bus);
>>>>+			pci_unlock_rescan_remove();
>>>>+		}
>>>>+	} else if (frozen_bus)
>>>>  		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
>>>>-	}
>>>>
>>>>  	/*
>>>>  	 * Reset the pci controller. (Asserts RST#; resets config space).
>>>>@@ -607,14 +653,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>>>  		 * PE. We should disconnect it so the binding can be
>>>>  		 * rebuilt when adding PCI devices.
>>>>  		 */
>>>>+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>>>  		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>>>-		pcibios_add_pci_devices(bus);
>>>>+		if (pe->type & EEH_PE_VF)
>>>
>>>Move "edev = list_first_entry(&pe->edevs, struct eeh_dev, list)" here. You
>>>could actually do:
>>>
>>>eeh_add_virt_device(list_first_entry(&pe->edevs, struct eeh_dev, list), NULL);
>>>
>>>and drop local variable @edev. Or move it to this scope. Dunno.
>>>
>>
>>Hmm... as I know, in eeh_pe_detach_dev() will remove the edev from pe's edevs
>>list.
>>
>>>
>>>>+			eeh_add_virt_device(edev, NULL);
>>>>+		else
>>>>+			pcibios_add_pci_devices(bus);
>>>>  	} else if (frozen_bus && removed) {
>>>>  		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>>>>  		ssleep(5);
>>>>
>>>>+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>>>  		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>>>-		pcibios_add_pci_devices(frozen_bus);
>>>>+		if (pe->type & EEH_PE_VF)
>>>
>>>
>>>The same comment as above.
>>>
>>>>+			eeh_add_virt_device(edev, NULL);
>>>>+		else
>>>>+			pcibios_add_pci_devices(frozen_bus);
>>>>  	}
>>>>  	eeh_pe_state_clear(pe, EEH_PE_KEEP);
>>>>
>>>>@@ -792,11 +846,15 @@ perm_error:
>>>>  	 * the their PCI config any more.
>>>>  	 */
>>>>  	if (frozen_bus) {
>>>>-		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>>>-
>>>>-		pci_lock_rescan_remove();
>>>>-		pcibios_remove_pci_devices(frozen_bus);
>>>>-		pci_unlock_rescan_remove();
>>>>+		if (pe->type & EEH_PE_VF) {
>>>>+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>>>+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>>>+		} else {
>>>>+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>>>+			pci_lock_rescan_remove();
>>>>+			pcibios_remove_pci_devices(frozen_bus);
>>>>+			pci_unlock_rescan_remove();
>>>>+		}
>>>>  	}
>>>>  }
>>>>
>>>>diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
>>>>index 260a701..5cde950 100644
>>>>--- a/arch/powerpc/kernel/eeh_pe.c
>>>>+++ b/arch/powerpc/kernel/eeh_pe.c
>>>>@@ -914,7 +914,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
>>>>  	if (pe->type & EEH_PE_PHB) {
>>>>  		bus = pe->phb->bus;
>>>>  	} else if (pe->type & EEH_PE_BUS ||
>>>>-		   pe->type & EEH_PE_DEVICE) {
>>>>+		   pe->type & EEH_PE_DEVICE ||
>>>>+		   pe->type & EEH_PE_VF) {
>>>>  		if (pe->bus) {
>>>>  			bus = pe->bus;
>>>>  			goto out;
>>>>
>
>
>-- 
>Alexey
>--
>To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 331c856..ea1f13c4 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -142,6 +142,7 @@  struct eeh_dev {
 	struct pci_controller *phb;	/* Associated PHB		*/
 	struct pci_dn *pdn;		/* Associated PCI device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
+	int    in_error;		/* Error flag for eeh_dev	*/
 	struct pci_dev *physfn;		/* Associated PF PORT		*/
 	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
 };
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index af9b597..28e4d73 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1227,6 +1227,14 @@  void eeh_remove_device(struct pci_dev *dev)
 	 * from the parent PE during the BAR resotre.
 	 */
 	edev->pdev = NULL;
+
+	/*
+	 * The flag "in_error" is used to trace EEH devices for VFs
+	 * in error state or not. It's set in eeh_report_error(). If
+	 * it's not set, eeh_report_{reset,resume}() won't be called
+	 * for the VF EEH device.
+	 */
+	edev->in_error = 0;
 	dev->dev.archdata.edev = NULL;
 	if (!(edev->pe->state & EEH_PE_KEEP))
 		eeh_rmv_from_parent_pe(edev);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 89eb4bc..99868e2 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -211,6 +211,7 @@  static void *eeh_report_error(void *data, void *userdata)
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
+	edev->in_error = 1;
 	eeh_pcid_put(dev);
 	return NULL;
 }
@@ -282,7 +283,8 @@  static void *eeh_report_reset(void *data, void *userdata)
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->slot_reset ||
-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
+	    (!edev->in_error)) {
 		eeh_pcid_put(dev);
 		return NULL;
 	}
@@ -339,14 +341,16 @@  static void *eeh_report_resume(void *data, void *userdata)
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume ||
-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
+	    (!edev->in_error)) {
 		edev->mode &= ~EEH_DEV_NO_HANDLER;
-		eeh_pcid_put(dev);
-		return NULL;
+		goto out;
 	}
 
 	driver->err_handler->resume(dev);
 
+out:
+	edev->in_error = 0;
 	eeh_pcid_put(dev);
 	return NULL;
 }
@@ -386,12 +390,38 @@  static void *eeh_report_failure(void *data, void *userdata)
 	return NULL;
 }
 
+static void *eeh_add_virt_device(void *data, void *userdata)
+{
+	struct pci_driver *driver;
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	if (!(edev->physfn)) {
+		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
+			__func__, edev->phb->global_number, pdn->busno,
+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+		return NULL;
+	}
+
+	driver = eeh_pcid_get(dev);
+	if (driver) {
+		eeh_pcid_put(dev);
+		if (driver->err_handler)
+			return NULL;
+	}
+
+	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
+	return NULL;
+}
+
 static void *eeh_rmv_device(void *data, void *userdata)
 {
 	struct pci_driver *driver;
 	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	int *removed = (int *)userdata;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/*
 	 * Actually, we should remove the PCI bridges as well.
@@ -416,7 +446,7 @@  static void *eeh_rmv_device(void *data, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (driver) {
 		eeh_pcid_put(dev);
-		if (driver->err_handler)
+		if (removed && driver->err_handler)
 			return NULL;
 	}
 
@@ -425,11 +455,23 @@  static void *eeh_rmv_device(void *data, void *userdata)
 		 pci_name(dev));
 	edev->bus = dev->bus;
 	edev->mode |= EEH_DEV_DISCONNECTED;
-	(*removed)++;
+	if (removed)
+		(*removed)++;
 
-	pci_lock_rescan_remove();
-	pci_stop_and_remove_bus_device(dev);
-	pci_unlock_rescan_remove();
+	if (edev->physfn) {
+		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
+		edev->pdev = NULL;
+
+		/*
+		 * We have to set the VF PE number to invalid one, which is
+		 * required to plug the VF successfully.
+		 */
+		pdn->pe_number = IODA_INVALID_PE;
+	} else {
+		pci_lock_rescan_remove();
+		pci_stop_and_remove_bus_device(dev);
+		pci_unlock_rescan_remove();
+	}
 
 	return NULL;
 }
@@ -548,6 +590,7 @@  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
 	struct timeval tstamp;
 	int cnt, rc, removed = 0;
+	struct eeh_dev *edev;
 
 	/* pcibios will clear the counter; save the value */
 	cnt = pe->freeze_count;
@@ -561,12 +604,15 @@  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	 */
 	eeh_pe_state_mark(pe, EEH_PE_KEEP);
 	if (bus) {
-		pci_lock_rescan_remove();
-		pcibios_remove_pci_devices(bus);
-		pci_unlock_rescan_remove();
-	} else if (frozen_bus) {
+		if (pe->type & EEH_PE_VF)
+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+		else {
+			pci_lock_rescan_remove();
+			pcibios_remove_pci_devices(bus);
+			pci_unlock_rescan_remove();
+		}
+	} else if (frozen_bus)
 		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
-	}
 
 	/*
 	 * Reset the pci controller. (Asserts RST#; resets config space).
@@ -607,14 +653,22 @@  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 		 * PE. We should disconnect it so the binding can be
 		 * rebuilt when adding PCI devices.
 		 */
+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		pcibios_add_pci_devices(bus);
+		if (pe->type & EEH_PE_VF)
+			eeh_add_virt_device(edev, NULL);
+		else
+			pcibios_add_pci_devices(bus);
 	} else if (frozen_bus && removed) {
 		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
 		ssleep(5);
 
+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		pcibios_add_pci_devices(frozen_bus);
+		if (pe->type & EEH_PE_VF)
+			eeh_add_virt_device(edev, NULL);
+		else
+			pcibios_add_pci_devices(frozen_bus);
 	}
 	eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
@@ -792,11 +846,15 @@  perm_error:
 	 * the their PCI config any more.
 	 */
 	if (frozen_bus) {
-		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
-		pci_lock_rescan_remove();
-		pcibios_remove_pci_devices(frozen_bus);
-		pci_unlock_rescan_remove();
+		if (pe->type & EEH_PE_VF) {
+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+		} else {
+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+			pci_lock_rescan_remove();
+			pcibios_remove_pci_devices(frozen_bus);
+			pci_unlock_rescan_remove();
+		}
 	}
 }
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 260a701..5cde950 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -914,7 +914,8 @@  struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
 	if (pe->type & EEH_PE_PHB) {
 		bus = pe->phb->bus;
 	} else if (pe->type & EEH_PE_BUS ||
-		   pe->type & EEH_PE_DEVICE) {
+		   pe->type & EEH_PE_DEVICE ||
+		   pe->type & EEH_PE_VF) {
 		if (pe->bus) {
 			bus = pe->bus;
 			goto out;