diff mbox series

[v2,1/2] PCI: hv: Fix the PCI HyperV probe failure path to release resource properly

Message ID 20200501053617.24689-1-weh@microsoft.com
State New
Headers show
Series Fix PCI HyperV device error handling | expand

Commit Message

Wei Hu May 1, 2020, 5:36 a.m. UTC
Some error cases in hv_pci_probe() were not handled. Fix these error
paths to release the resourses and clean up the state properly.

Signed-off-by: Wei Hu <weh@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

Comments

Michael Kelley (LINUX) May 4, 2020, 11:42 p.m. UTC | #1
From: Wei Hu <weh@microsoft.com> Sent: Thursday, April 30, 2020 10:36 PM
> 
> Some error cases in hv_pci_probe() were not handled. Fix these error
> paths to release the resourses and clean up the state properly.
> 
> Signed-off-by: Wei Hu <weh@microsoft.com>
> ---
>  drivers/pci/controller/pci-hyperv.c | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index e15022ff63e3..e6fac0187722 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -480,6 +480,9 @@ struct hv_pcibus_device {
> 
>  	struct workqueue_struct *wq;
> 
> +	/* Highest slot of child device with resources allocated */
> +	int wslot_res_allocated;
> +
>  	/* hypercall arg, must not cross page boundary */
>  	struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
> 
> @@ -2847,7 +2850,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  	struct hv_pci_dev *hpdev;
>  	struct pci_packet *pkt;
>  	size_t size_res;
> -	u32 wslot;
> +	int wslot;
>  	int ret;
> 
>  	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
> @@ -2900,6 +2903,8 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				comp_pkt.completion_status);
>  			break;
>  		}
> +
> +		hbus->wslot_res_allocated = wslot;
>  	}
> 
>  	kfree(pkt);
> @@ -2918,10 +2923,10 @@ static int hv_send_resources_released(struct hv_device *hdev)
>  	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
>  	struct pci_child_message pkt;
>  	struct hv_pci_dev *hpdev;
> -	u32 wslot;
> +	int wslot;
>  	int ret;
> 
> -	for (wslot = 0; wslot < 256; wslot++) {
> +	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
>  		hpdev = get_pcichild_wslot(hbus, wslot);
>  		if (!hpdev)
>  			continue;
> @@ -2936,8 +2941,12 @@ static int hv_send_resources_released(struct hv_device *hdev)
>  				       VM_PKT_DATA_INBAND, 0);
>  		if (ret)
>  			return ret;
> +
> +		hbus->wslot_res_allocated = wslot - 1;
>  	}
> 
> +	hbus->wslot_res_allocated = -1;
> +
>  	return 0;
>  }
> 
> @@ -3037,6 +3046,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	if (!hbus)
>  		return -ENOMEM;
>  	hbus->state = hv_pcibus_init;
> +	hbus->wslot_res_allocated = -1;
> 
>  	/*
>  	 * The PCI bus "domain" is what is called "segment" in ACPI and other
> @@ -3136,7 +3146,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> 
>  	ret = hv_pci_allocate_bridge_windows(hbus);
>  	if (ret)
> -		goto free_irq_domain;
> +		goto exit_d0;
> 
>  	ret = hv_send_resources_allocated(hdev);
>  	if (ret)
> @@ -3154,6 +3164,8 @@ static int hv_pci_probe(struct hv_device *hdev,
> 
>  free_windows:
>  	hv_pci_free_bridge_windows(hbus);
> +exit_d0:
> +	(void) hv_pci_bus_exit(hdev, true);
>  free_irq_domain:
>  	irq_domain_remove(hbus->irq_domain);
>  free_fwnode:
> --
> 2.20.1

Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Lorenzo Pieralisi May 5, 2020, 3:03 p.m. UTC | #2
On Fri, May 01, 2020 at 01:36:17PM +0800, Wei Hu wrote:
> Some error cases in hv_pci_probe() were not handled. Fix these error
> paths to release the resourses and clean up the state properly.

This patch does more than that. It adds a variable to store the
number of slots actually allocated - I presume to free only
allocated on slots on the exit path.

Two patches required I am afraid.

> Signed-off-by: Wei Hu <weh@microsoft.com>
> ---
>  drivers/pci/controller/pci-hyperv.c | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index e15022ff63e3..e6fac0187722 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -480,6 +480,9 @@ struct hv_pcibus_device {
>  
>  	struct workqueue_struct *wq;
>  
> +	/* Highest slot of child device with resources allocated */
> +	int wslot_res_allocated;

I don't understand why you need an int rather than a u32.

Furthermore, I think a bitmap is more appropriate for what this
variable is used for.

> +
>  	/* hypercall arg, must not cross page boundary */
>  	struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
>  
> @@ -2847,7 +2850,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  	struct hv_pci_dev *hpdev;
>  	struct pci_packet *pkt;
>  	size_t size_res;
> -	u32 wslot;
> +	int wslot;
>  	int ret;
>  
>  	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
> @@ -2900,6 +2903,8 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				comp_pkt.completion_status);
>  			break;
>  		}
> +
> +		hbus->wslot_res_allocated = wslot;
>  	}
>  
>  	kfree(pkt);
> @@ -2918,10 +2923,10 @@ static int hv_send_resources_released(struct hv_device *hdev)
>  	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
>  	struct pci_child_message pkt;
>  	struct hv_pci_dev *hpdev;
> -	u32 wslot;
> +	int wslot;
>  	int ret;
>  
> -	for (wslot = 0; wslot < 256; wslot++) {
> +	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
>  		hpdev = get_pcichild_wslot(hbus, wslot);
>  		if (!hpdev)
>  			continue;
> @@ -2936,8 +2941,12 @@ static int hv_send_resources_released(struct hv_device *hdev)
>  				       VM_PKT_DATA_INBAND, 0);
>  		if (ret)
>  			return ret;
> +
> +		hbus->wslot_res_allocated = wslot - 1;

Do you really need to set it at every loop iteration ?

Moreover, I think a bitmap is better suited for what you are doing,
given that you may skip some loop indexes on !hpdev.

>  	}
>  
> +	hbus->wslot_res_allocated = -1;
> +
>  	return 0;
>  }
>  
> @@ -3037,6 +3046,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	if (!hbus)
>  		return -ENOMEM;
>  	hbus->state = hv_pcibus_init;
> +	hbus->wslot_res_allocated = -1;
>  
>  	/*
>  	 * The PCI bus "domain" is what is called "segment" in ACPI and other
> @@ -3136,7 +3146,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  
>  	ret = hv_pci_allocate_bridge_windows(hbus);
>  	if (ret)
> -		goto free_irq_domain;
> +		goto exit_d0;
>  
>  	ret = hv_send_resources_allocated(hdev);
>  	if (ret)
> @@ -3154,6 +3164,8 @@ static int hv_pci_probe(struct hv_device *hdev,
>  
>  free_windows:
>  	hv_pci_free_bridge_windows(hbus);
> +exit_d0:
> +	(void) hv_pci_bus_exit(hdev, true);

Remove the (void) cast.

Lorenzo

>  free_irq_domain:
>  	irq_domain_remove(hbus->irq_domain);
>  free_fwnode:
> -- 
> 2.20.1
>
Wei Hu May 6, 2020, 5:36 a.m. UTC | #3
Hi Lorenzo,

Thanks for your review. Please see my comments inline. 

> -----Original Message-----
> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Tuesday, May 5, 2020 11:03 PM
> To: Wei Hu <weh@microsoft.com>
> Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> wei.liu@kernel.org; robh@kernel.org; bhelgaas@google.com; linux-
> hyperv@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Dexuan Cui <decui@microsoft.com>; Michael Kelley
> <mikelley@microsoft.com>
> Subject: Re: [PATCH v2 1/2] PCI: hv: Fix the PCI HyperV probe failure path to
> release resource properly
> 
> On Fri, May 01, 2020 at 01:36:17PM +0800, Wei Hu wrote:
> > Some error cases in hv_pci_probe() were not handled. Fix these error
> > paths to release the resourses and clean up the state properly.
> 
> This patch does more than that. It adds a variable to store the number of slots
> actually allocated - I presume to free only allocated on slots on the exit path.
> 
> Two patches required I am afraid.

Well, adding this variable is needed to make the call of "(void) hv_pci_bus_exit(hdev, true)" 
at the end to work and clean up the PCI state in the failure path. Just adding this variable
would not make any changes. So I think it may be better to put them in single patch?

> 
> > Signed-off-by: Wei Hu <weh@microsoft.com>
> > ---
> >  drivers/pci/controller/pci-hyperv.c | 20 ++++++++++++++++----
> >  1 file changed, 16 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/pci/controller/pci-hyperv.c
> > b/drivers/pci/controller/pci-hyperv.c
> > index e15022ff63e3..e6fac0187722 100644
> > --- a/drivers/pci/controller/pci-hyperv.c
> > +++ b/drivers/pci/controller/pci-hyperv.c
> > @@ -480,6 +480,9 @@ struct hv_pcibus_device {
> >
> >  	struct workqueue_struct *wq;
> >
> > +	/* Highest slot of child device with resources allocated */
> > +	int wslot_res_allocated;
> 
> I don't understand why you need an int rather than a u32.
> 
> Furthermore, I think a bitmap is more appropriate for what this variable is used
> for.

So it can use a negative value (-1 in this case) to indicated none of any resources
has been allocated. Currently value between 0-255 indicating some resources 
have been allocated. Of course I can use 0xffffffff to indicate that if it were u32. But
it wouldn't make much difference, would it?

It would take 32 bytes for total 256 child slots in bitmap, while it only takes 4 bytes 
using int. It is not in critical path so scanning from the location one by one is not a big
deal.

> 
> > +
> >  	/* hypercall arg, must not cross page boundary */
> >  	struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
> >
> > @@ -2847,7 +2850,7 @@ static int hv_send_resources_allocated(struct
> hv_device *hdev)
> >  	struct hv_pci_dev *hpdev;
> >  	struct pci_packet *pkt;
> >  	size_t size_res;
> > -	u32 wslot;
> > +	int wslot;
> >  	int ret;
> >
> >  	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
> @@
> > -2900,6 +2903,8 @@ static int hv_send_resources_allocated(struct hv_device
> *hdev)
> >  				comp_pkt.completion_status);
> >  			break;
> >  		}
> > +
> > +		hbus->wslot_res_allocated = wslot;
> >  	}
> >
> >  	kfree(pkt);
> > @@ -2918,10 +2923,10 @@ static int hv_send_resources_released(struct
> hv_device *hdev)
> >  	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
> >  	struct pci_child_message pkt;
> >  	struct hv_pci_dev *hpdev;
> > -	u32 wslot;
> > +	int wslot;
> >  	int ret;
> >
> > -	for (wslot = 0; wslot < 256; wslot++) {
> > +	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
> >  		hpdev = get_pcichild_wslot(hbus, wslot);
> >  		if (!hpdev)
> >  			continue;
> > @@ -2936,8 +2941,12 @@ static int hv_send_resources_released(struct
> hv_device *hdev)
> >  				       VM_PKT_DATA_INBAND, 0);
> >  		if (ret)
> >  			return ret;
> > +
> > +		hbus->wslot_res_allocated = wslot - 1;
> 
> Do you really need to set it at every loop iteration ?
> 
> Moreover, I think a bitmap is better suited for what you are doing, given that
> you may skip some loop indexes on !hpdev.
>
The value is set in the loop whenever a resource was successfully released. It could
happen that it failed in the next iteration so the last one which had succeeded would be
recorded in this variable. It is needed  at the end of loop when this 
iteration succeeds. 

Once again since it is not in the critical path, just using an signed integer is straightforward, 
less error prone and a bit easier to maintain than bitmap in my opinion. 😊
 
> >  	}
> >
> > +	hbus->wslot_res_allocated = -1;
> > +
> >  	return 0;
> >  }
> >
> > @@ -3037,6 +3046,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> >  	if (!hbus)
> >  		return -ENOMEM;
> >  	hbus->state = hv_pcibus_init;
> > +	hbus->wslot_res_allocated = -1;
> >
> >  	/*
> >  	 * The PCI bus "domain" is what is called "segment" in ACPI and
> > other @@ -3136,7 +3146,7 @@ static int hv_pci_probe(struct hv_device
> > *hdev,
> >
> >  	ret = hv_pci_allocate_bridge_windows(hbus);
> >  	if (ret)
> > -		goto free_irq_domain;
> > +		goto exit_d0;
> >
> >  	ret = hv_send_resources_allocated(hdev);
> >  	if (ret)
> > @@ -3154,6 +3164,8 @@ static int hv_pci_probe(struct hv_device *hdev,
> >
> >  free_windows:
> >  	hv_pci_free_bridge_windows(hbus);
> > +exit_d0:
> > +	(void) hv_pci_bus_exit(hdev, true);
> 
> Remove the (void) cast.
> 

Some tools (maybe lint?) could generate error/warning messages assuming the code fails 
to check the return value without such cast. Leaving the cast in place just indicates that
the return value is deliberately ignored.  

Thanks, 
Wei
Lorenzo Pieralisi May 6, 2020, 11:09 a.m. UTC | #4
On Wed, May 06, 2020 at 05:36:46AM +0000, Wei Hu wrote:
> Hi Lorenzo,
> 
> Thanks for your review. Please see my comments inline. 
> 
> > -----Original Message-----
> > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > Sent: Tuesday, May 5, 2020 11:03 PM
> > To: Wei Hu <weh@microsoft.com>
> > Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> > <haiyangz@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> > wei.liu@kernel.org; robh@kernel.org; bhelgaas@google.com; linux-
> > hyperv@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> > kernel@vger.kernel.org; Dexuan Cui <decui@microsoft.com>; Michael Kelley
> > <mikelley@microsoft.com>
> > Subject: Re: [PATCH v2 1/2] PCI: hv: Fix the PCI HyperV probe failure path to
> > release resource properly
> > 
> > On Fri, May 01, 2020 at 01:36:17PM +0800, Wei Hu wrote:
> > > Some error cases in hv_pci_probe() were not handled. Fix these error
> > > paths to release the resourses and clean up the state properly.
> > 
> > This patch does more than that. It adds a variable to store the number of slots
> > actually allocated - I presume to free only allocated on slots on the exit path.
> > 
> > Two patches required I am afraid.
> 
> Well, adding this variable is needed to make the call of "(void) hv_pci_bus_exit(hdev, true)" 

I don't understand why - it is not clear from the commit log and the
code, please explain it since it is not obvious.

> at the end to work and clean up the PCI state in the failure path. Just adding this variable
> would not make any changes. So I think it may be better to put them in single patch?
> 
> > 
> > > Signed-off-by: Wei Hu <weh@microsoft.com>
> > > ---
> > >  drivers/pci/controller/pci-hyperv.c | 20 ++++++++++++++++----
> > >  1 file changed, 16 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/pci/controller/pci-hyperv.c
> > > b/drivers/pci/controller/pci-hyperv.c
> > > index e15022ff63e3..e6fac0187722 100644
> > > --- a/drivers/pci/controller/pci-hyperv.c
> > > +++ b/drivers/pci/controller/pci-hyperv.c
> > > @@ -480,6 +480,9 @@ struct hv_pcibus_device {
> > >
> > >  	struct workqueue_struct *wq;
> > >
> > > +	/* Highest slot of child device with resources allocated */
> > > +	int wslot_res_allocated;
> > 
> > I don't understand why you need an int rather than a u32.
> > 
> > Furthermore, I think a bitmap is more appropriate for what this variable is used
> > for.
> 
> So it can use a negative value (-1 in this case) to indicated none of any resources
> has been allocated. Currently value between 0-255 indicating some resources 
> have been allocated. Of course I can use 0xffffffff to indicate that if it were u32. But
> it wouldn't make much difference, would it?

It is fine by me - I would not have written it this way but it does
not matter.

> It would take 32 bytes for total 256 child slots in bitmap, while it only takes 4 bytes 
> using int. It is not in critical path so scanning from the location one by one is not a big
> deal.

I suggested a bitmap for correctness - a slot number may include slots
that as far as I can read the code failed get_pcichild_slot().

It is not clear what this patch is doing in this loop, that's certain.

> > 
> > > +
> > >  	/* hypercall arg, must not cross page boundary */
> > >  	struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
> > >
> > > @@ -2847,7 +2850,7 @@ static int hv_send_resources_allocated(struct
> > hv_device *hdev)
> > >  	struct hv_pci_dev *hpdev;
> > >  	struct pci_packet *pkt;
> > >  	size_t size_res;
> > > -	u32 wslot;
> > > +	int wslot;
> > >  	int ret;
> > >
> > >  	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
> > @@
> > > -2900,6 +2903,8 @@ static int hv_send_resources_allocated(struct hv_device
> > *hdev)
> > >  				comp_pkt.completion_status);
> > >  			break;
> > >  		}
> > > +
> > > +		hbus->wslot_res_allocated = wslot;
> > >  	}
> > >
> > >  	kfree(pkt);
> > > @@ -2918,10 +2923,10 @@ static int hv_send_resources_released(struct
> > hv_device *hdev)
> > >  	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
> > >  	struct pci_child_message pkt;
> > >  	struct hv_pci_dev *hpdev;
> > > -	u32 wslot;
> > > +	int wslot;
> > >  	int ret;
> > >
> > > -	for (wslot = 0; wslot < 256; wslot++) {
> > > +	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
> > >  		hpdev = get_pcichild_wslot(hbus, wslot);
> > >  		if (!hpdev)
> > >  			continue;
> > > @@ -2936,8 +2941,12 @@ static int hv_send_resources_released(struct
> > hv_device *hdev)
> > >  				       VM_PKT_DATA_INBAND, 0);
> > >  		if (ret)
> > >  			return ret;
> > > +
> > > +		hbus->wslot_res_allocated = wslot - 1;
> > 
> > Do you really need to set it at every loop iteration ?
> > 
> > Moreover, I think a bitmap is better suited for what you are doing, given that
> > you may skip some loop indexes on !hpdev.
> >
> The value is set in the loop whenever a resource was successfully released. It could
> happen that it failed in the next iteration so the last one which had succeeded would be
> recorded in this variable. It is needed  at the end of loop when this 
> iteration succeeds. 

Ok understood.

> Once again since it is not in the critical path, just using an signed integer is straightforward, 
> less error prone and a bit easier to maintain than bitmap in my opinion. 😊
>  

Less error prone, not sure, see above - it is your code so you choose
but please explain this change better, it is not obvious.

> > >  	}
> > >
> > > +	hbus->wslot_res_allocated = -1;
> > > +
> > >  	return 0;
> > >  }
> > >
> > > @@ -3037,6 +3046,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> > >  	if (!hbus)
> > >  		return -ENOMEM;
> > >  	hbus->state = hv_pcibus_init;
> > > +	hbus->wslot_res_allocated = -1;
> > >
> > >  	/*
> > >  	 * The PCI bus "domain" is what is called "segment" in ACPI and
> > > other @@ -3136,7 +3146,7 @@ static int hv_pci_probe(struct hv_device
> > > *hdev,
> > >
> > >  	ret = hv_pci_allocate_bridge_windows(hbus);
> > >  	if (ret)
> > > -		goto free_irq_domain;
> > > +		goto exit_d0;
> > >
> > >  	ret = hv_send_resources_allocated(hdev);
> > >  	if (ret)
> > > @@ -3154,6 +3164,8 @@ static int hv_pci_probe(struct hv_device *hdev,
> > >
> > >  free_windows:
> > >  	hv_pci_free_bridge_windows(hbus);
> > > +exit_d0:
> > > +	(void) hv_pci_bus_exit(hdev, true);
> > 
> > Remove the (void) cast.
> > 
> 
> Some tools (maybe lint?) could generate error/warning messages assuming the code fails 
> to check the return value without such cast. Leaving the cast in place just indicates that
> the return value is deliberately ignored.  

I understand that - the question is why it is OK to ignore it in this
specific case.

Maybe adding a wrapper around hv_pci_bus_exit() can help, I am fine with
it, just trying to help.

Lorenzo
Wei Hu May 6, 2020, 1:21 p.m. UTC | #5
Thanks for your email. See my comments inline.

> -----Original Message-----
> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Wednesday, May 6, 2020 7:10 PM
> To: Wei Hu <weh@microsoft.com>
> Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> wei.liu@kernel.org; robh@kernel.org; bhelgaas@google.com; linux-
> hyperv@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Dexuan Cui <decui@microsoft.com>; Michael Kelley
> <mikelley@microsoft.com>
> Subject: Re: [PATCH v2 1/2] PCI: hv: Fix the PCI HyperV probe failure path to
> release resource properly
> 
> On Wed, May 06, 2020 at 05:36:46AM +0000, Wei Hu wrote:
> > Hi Lorenzo,
> >
> > Thanks for your review. Please see my comments inline.
> >
> > > -----Original Message-----
> > > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > > Sent: Tuesday, May 5, 2020 11:03 PM
> > > To: Wei Hu <weh@microsoft.com>
> > > Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> > > <haiyangz@microsoft.com>; Stephen Hemminger
> > > <sthemmin@microsoft.com>; wei.liu@kernel.org; robh@kernel.org;
> > > bhelgaas@google.com; linux- hyperv@vger.kernel.org;
> > > linux-pci@vger.kernel.org; linux- kernel@vger.kernel.org; Dexuan Cui
> > > <decui@microsoft.com>; Michael Kelley <mikelley@microsoft.com>
> > > Subject: Re: [PATCH v2 1/2] PCI: hv: Fix the PCI HyperV probe
> > > failure path to release resource properly
> > >
> > > On Fri, May 01, 2020 at 01:36:17PM +0800, Wei Hu wrote:
> > > > Some error cases in hv_pci_probe() were not handled. Fix these
> > > > error paths to release the resourses and clean up the state properly.
> > >
> > > This patch does more than that. It adds a variable to store the
> > > number of slots actually allocated - I presume to free only allocated on slots
> on the exit path.
> > >
> > > Two patches required I am afraid.
> >
> > Well, adding this variable is needed to make the call of "(void)
> hv_pci_bus_exit(hdev, true)"
> 
> I don't understand why - it is not clear from the commit log and the code,
> please explain it since it is not obvious.
> 
Hv_pci_bus_exit() calls hv_send_resources_released() to release all child resources.
These child resources were allocated in hv_send_resources_allocated(). 
Hv_send_resources_allocated() could fail in the middle, leaving some child resources
allocated and rest not. Without adding this variable to record the highest slot number that
resource has been successfully allocated, calling hv_send_resources_released() could 
cause spurious resource release requests being sent to hypervisor.  

This had been fine since hv_pci_bus_exit() was never called in error path before this patch was
introduced. To add this call to clean the pci state in the error path, we need to know the starting
point in child device that resource has not been allocated. Hence this variable
is used in hv_send_resources_allocated() to record this point and in
hv_send_resource_released() to start deallocating child resources.

I can add to the commit log if you are fine with this explanation. 
 
> > at the end to work and clean up the PCI state in the failure path.
> > Just adding this variable would not make any changes. So I think it may be
> better to put them in single patch?
> >
> > >
> > > > Signed-off-by: Wei Hu <weh@microsoft.com>
> > > > ---
> > > >  drivers/pci/controller/pci-hyperv.c | 20 ++++++++++++++++----
> > > >  1 file changed, 16 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/pci/controller/pci-hyperv.c
> > > > b/drivers/pci/controller/pci-hyperv.c
> > > > index e15022ff63e3..e6fac0187722 100644
> > > > --- a/drivers/pci/controller/pci-hyperv.c
> > > > +++ b/drivers/pci/controller/pci-hyperv.c
> > > > @@ -480,6 +480,9 @@ struct hv_pcibus_device {
> > > >
> > > >  	struct workqueue_struct *wq;
> > > >
> > > > +	/* Highest slot of child device with resources allocated */
> > > > +	int wslot_res_allocated;
> > >
> > > I don't understand why you need an int rather than a u32.
> > >
> > > Furthermore, I think a bitmap is more appropriate for what this
> > > variable is used for.
> >
> > So it can use a negative value (-1 in this case) to indicated none of
> > any resources has been allocated. Currently value between 0-255
> > indicating some resources have been allocated. Of course I can use
> > 0xffffffff to indicate that if it were u32. But it wouldn't make much difference,
> would it?
> 
> It is fine by me - I would not have written it this way but it does not matter.
> 
> > It would take 32 bytes for total 256 child slots in bitmap, while it
> > only takes 4 bytes using int. It is not in critical path so scanning
> > from the location one by one is not a big deal.
> 
> I suggested a bitmap for correctness - a slot number may include slots that as
> far as I can read the code failed get_pcichild_slot().
> 
> It is not clear what this patch is doing in this loop, that's certain.
> 
Get_pcichild_wslot() just tells if a pci child device exists under this slot number. If
there is no child device, it just continue to look on the next slot number. If it does
exists, the code sends request to hypervisor for resource allocation. If it succeeds,
we update the wslot_res_allocated accordingly.

I hope this along with the explanation earlier makes it clear.

> > >
> > > > +
> > > >  	/* hypercall arg, must not cross page boundary */
> > > >  	struct hv_retarget_device_interrupt
> > > > retarget_msi_interrupt_params;
> > > >
> > > > @@ -2847,7 +2850,7 @@ static int
> > > > hv_send_resources_allocated(struct
> > > hv_device *hdev)
> > > >  	struct hv_pci_dev *hpdev;
> > > >  	struct pci_packet *pkt;
> > > >  	size_t size_res;
> > > > -	u32 wslot;
> > > > +	int wslot;
> > > >  	int ret;
> > > >
> > > >  	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
> > > @@
> > > > -2900,6 +2903,8 @@ static int hv_send_resources_allocated(struct
> > > > hv_device
> > > *hdev)
> > > >  				comp_pkt.completion_status);
> > > >  			break;
> > > >  		}
> > > > +
> > > > +		hbus->wslot_res_allocated = wslot;
> > > >  	}
> > > >
> > > >  	kfree(pkt);
> > > > @@ -2918,10 +2923,10 @@ static int
> > > > hv_send_resources_released(struct
> > > hv_device *hdev)
> > > >  	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
> > > >  	struct pci_child_message pkt;
> > > >  	struct hv_pci_dev *hpdev;
> > > > -	u32 wslot;
> > > > +	int wslot;
> > > >  	int ret;
> > > >
> > > > -	for (wslot = 0; wslot < 256; wslot++) {
> > > > +	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
> > > >  		hpdev = get_pcichild_wslot(hbus, wslot);
> > > >  		if (!hpdev)
> > > >  			continue;
> > > > @@ -2936,8 +2941,12 @@ static int
> > > > hv_send_resources_released(struct
> > > hv_device *hdev)
> > > >  				       VM_PKT_DATA_INBAND, 0);
> > > >  		if (ret)
> > > >  			return ret;
> > > > +
> > > > +		hbus->wslot_res_allocated = wslot - 1;
> > >
> > > Do you really need to set it at every loop iteration ?
> > >
> > > Moreover, I think a bitmap is better suited for what you are doing,
> > > given that you may skip some loop indexes on !hpdev.
> > >
> > The value is set in the loop whenever a resource was successfully
> > released. It could happen that it failed in the next iteration so the
> > last one which had succeeded would be recorded in this variable. It is
> > needed  at the end of loop when this iteration succeeds.
> 
> Ok understood.
> 
> > Once again since it is not in the critical path, just using an signed
> > integer is straightforward, less error prone and a bit easier to
> > maintain than bitmap in my opinion. 😊
> >
> 
> Less error prone, not sure, see above - it is your code so you choose but please
> explain this change better, it is not obvious.

I hope the explanations earlier make it a little better to understand.

> 
> > > >  	}
> > > >
> > > > +	hbus->wslot_res_allocated = -1;
> > > > +
> > > >  	return 0;
> > > >  }
> > > >
> > > > @@ -3037,6 +3046,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> > > >  	if (!hbus)
> > > >  		return -ENOMEM;
> > > >  	hbus->state = hv_pcibus_init;
> > > > +	hbus->wslot_res_allocated = -1;
> > > >
> > > >  	/*
> > > >  	 * The PCI bus "domain" is what is called "segment" in ACPI and
> > > > other @@ -3136,7 +3146,7 @@ static int hv_pci_probe(struct
> > > > hv_device *hdev,
> > > >
> > > >  	ret = hv_pci_allocate_bridge_windows(hbus);
> > > >  	if (ret)
> > > > -		goto free_irq_domain;
> > > > +		goto exit_d0;
> > > >
> > > >  	ret = hv_send_resources_allocated(hdev);
> > > >  	if (ret)
> > > > @@ -3154,6 +3164,8 @@ static int hv_pci_probe(struct hv_device
> > > > *hdev,
> > > >
> > > >  free_windows:
> > > >  	hv_pci_free_bridge_windows(hbus);
> > > > +exit_d0:
> > > > +	(void) hv_pci_bus_exit(hdev, true);
> > >
> > > Remove the (void) cast.
> > >
> >
> > Some tools (maybe lint?) could generate error/warning messages
> > assuming the code fails to check the return value without such cast.
> > Leaving the cast in place just indicates that the return value is deliberately
> ignored.
> 
> I understand that - the question is why it is OK to ignore it in this specific case.
>

Since it is already in the error path, checking the return value is not necessary.
The earlier failure point is more important to be returned to the caller. 
 
> Maybe adding a wrapper around hv_pci_bus_exit() can help, I am fine with it,
> just trying to help.

Do you mean making the (void) cast in wrapper function? Or checking the return value
in the wrapper function and ignore it?  Either way I think it might not be necessary.

Thanks,
Wei
Michael Kelley (LINUX) May 6, 2020, 2:55 p.m. UTC | #6
From: Wei Hu <weh@microsoft.com>  Sent: Wednesday, May 6, 2020 6:22 AM
> > -----Original Message-----
> > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > Sent: Wednesday, May 6, 2020 7:10 PM
> > To: Wei Hu <weh@microsoft.com>
> > Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> > <haiyangz@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> > wei.liu@kernel.org; robh@kernel.org; bhelgaas@google.com; linux-
> > hyperv@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> > kernel@vger.kernel.org; Dexuan Cui <decui@microsoft.com>; Michael Kelley
> > <mikelley@microsoft.com>
> > Subject: Re: [PATCH v2 1/2] PCI: hv: Fix the PCI HyperV probe failure path to
> > release resource properly
> >
> > On Wed, May 06, 2020 at 05:36:46AM +0000, Wei Hu wrote:
> > > Hi Lorenzo,
> > >
> > > Thanks for your review. Please see my comments inline.
> > >
> > > > -----Original Message-----
> > > > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > > > Sent: Tuesday, May 5, 2020 11:03 PM
> > > > To: Wei Hu <weh@microsoft.com>
> > > > Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> > > > <haiyangz@microsoft.com>; Stephen Hemminger
> > > > <sthemmin@microsoft.com>; wei.liu@kernel.org; robh@kernel.org;
> > > > bhelgaas@google.com; linux- hyperv@vger.kernel.org;
> > > > linux-pci@vger.kernel.org; linux- kernel@vger.kernel.org; Dexuan Cui
> > > > <decui@microsoft.com>; Michael Kelley <mikelley@microsoft.com>
> > > > Subject: Re: [PATCH v2 1/2] PCI: hv: Fix the PCI HyperV probe
> > > > failure path to release resource properly
> > > >
> > > > On Fri, May 01, 2020 at 01:36:17PM +0800, Wei Hu wrote:
> > > > > Some error cases in hv_pci_probe() were not handled. Fix these
> > > > > error paths to release the resourses and clean up the state properly.
> > > >
> > > > This patch does more than that. It adds a variable to store the
> > > > number of slots actually allocated - I presume to free only allocated on slots
> > on the exit path.
> > > >
> > > > Two patches required I am afraid.
> > >
> > > Well, adding this variable is needed to make the call of "(void)
> > hv_pci_bus_exit(hdev, true)"
> >
> > I don't understand why - it is not clear from the commit log and the code,
> > please explain it since it is not obvious.
> >
> Hv_pci_bus_exit() calls hv_send_resources_released() to release all child resources.
> These child resources were allocated in hv_send_resources_allocated().
> Hv_send_resources_allocated() could fail in the middle, leaving some child resources
> allocated and rest not. Without adding this variable to record the highest slot number that
> resource has been successfully allocated, calling hv_send_resources_released() could
> cause spurious resource release requests being sent to hypervisor.
> 
> This had been fine since hv_pci_bus_exit() was never called in error path before this patch
> was
> introduced. To add this call to clean the pci state in the error path, we need to know the
> starting
> point in child device that resource has not been allocated. Hence this variable
> is used in hv_send_resources_allocated() to record this point and in
> hv_send_resource_released() to start deallocating child resources.
> 
> I can add to the commit log if you are fine with this explanation.
> 

FWIW, I think of this patch as follows:

In some error cases in hv_pci_probe(), allocated resources are not
freed.  Fix this by adding a field to keep track of the high water mark
for slots that have resources allocated to them.  In case of an error, this
high water mark is used to know which slots have resources that
must be released.  Since slots are numbered starting with zero, a
value of -1 indicates no slots have been allocated resources.  There
may be unused slots in the range between slot 0 and the high
water mark slot, but these slots are already ignored by the existing code
in the allocate and release loops with the call to get_pcichild_wslot().

Michael
Lorenzo Pieralisi May 6, 2020, 3:21 p.m. UTC | #7
On Wed, May 06, 2020 at 02:55:17PM +0000, Michael Kelley wrote:

[...]

> > Hv_pci_bus_exit() calls hv_send_resources_released() to release all child resources.
> > These child resources were allocated in hv_send_resources_allocated().
> > Hv_send_resources_allocated() could fail in the middle, leaving some child resources
> > allocated and rest not. Without adding this variable to record the highest slot number that
> > resource has been successfully allocated, calling hv_send_resources_released() could
> > cause spurious resource release requests being sent to hypervisor.
> > 
> > This had been fine since hv_pci_bus_exit() was never called in error path before this patch
> > was
> > introduced. To add this call to clean the pci state in the error path, we need to know the
> > starting
> > point in child device that resource has not been allocated. Hence this variable
> > is used in hv_send_resources_allocated() to record this point and in
> > hv_send_resource_released() to start deallocating child resources.
> > 
> > I can add to the commit log if you are fine with this explanation.
> > 
> 
> FWIW, I think of this patch as follows:
> 
> In some error cases in hv_pci_probe(), allocated resources are not
> freed.  Fix this by adding a field to keep track of the high water mark
> for slots that have resources allocated to them.  In case of an error, this
> high water mark is used to know which slots have resources that
> must be released.  Since slots are numbered starting with zero, a
> value of -1 indicates no slots have been allocated resources.  There
> may be unused slots in the range between slot 0 and the high
> water mark slot, but these slots are already ignored by the existing code
> in the allocate and release loops with the call to get_pcichild_wslot().

That's much clearer now - please add these bits of info to the commit
log, it is essential that developers can find an explanation for a
change like this one there IMO.

Overall the code changes are fine, I am not a big fan of the (void)
cast (I don't think error codes should be ignored) but it is acceptable,
in this context.

Thank you for taking some time to review the code together.

Lorenzo
diff mbox series

Patch

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index e15022ff63e3..e6fac0187722 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -480,6 +480,9 @@  struct hv_pcibus_device {
 
 	struct workqueue_struct *wq;
 
+	/* Highest slot of child device with resources allocated */
+	int wslot_res_allocated;
+
 	/* hypercall arg, must not cross page boundary */
 	struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
 
@@ -2847,7 +2850,7 @@  static int hv_send_resources_allocated(struct hv_device *hdev)
 	struct hv_pci_dev *hpdev;
 	struct pci_packet *pkt;
 	size_t size_res;
-	u32 wslot;
+	int wslot;
 	int ret;
 
 	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
@@ -2900,6 +2903,8 @@  static int hv_send_resources_allocated(struct hv_device *hdev)
 				comp_pkt.completion_status);
 			break;
 		}
+
+		hbus->wslot_res_allocated = wslot;
 	}
 
 	kfree(pkt);
@@ -2918,10 +2923,10 @@  static int hv_send_resources_released(struct hv_device *hdev)
 	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
 	struct pci_child_message pkt;
 	struct hv_pci_dev *hpdev;
-	u32 wslot;
+	int wslot;
 	int ret;
 
-	for (wslot = 0; wslot < 256; wslot++) {
+	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
 		hpdev = get_pcichild_wslot(hbus, wslot);
 		if (!hpdev)
 			continue;
@@ -2936,8 +2941,12 @@  static int hv_send_resources_released(struct hv_device *hdev)
 				       VM_PKT_DATA_INBAND, 0);
 		if (ret)
 			return ret;
+
+		hbus->wslot_res_allocated = wslot - 1;
 	}
 
+	hbus->wslot_res_allocated = -1;
+
 	return 0;
 }
 
@@ -3037,6 +3046,7 @@  static int hv_pci_probe(struct hv_device *hdev,
 	if (!hbus)
 		return -ENOMEM;
 	hbus->state = hv_pcibus_init;
+	hbus->wslot_res_allocated = -1;
 
 	/*
 	 * The PCI bus "domain" is what is called "segment" in ACPI and other
@@ -3136,7 +3146,7 @@  static int hv_pci_probe(struct hv_device *hdev,
 
 	ret = hv_pci_allocate_bridge_windows(hbus);
 	if (ret)
-		goto free_irq_domain;
+		goto exit_d0;
 
 	ret = hv_send_resources_allocated(hdev);
 	if (ret)
@@ -3154,6 +3164,8 @@  static int hv_pci_probe(struct hv_device *hdev,
 
 free_windows:
 	hv_pci_free_bridge_windows(hbus);
+exit_d0:
+	(void) hv_pci_bus_exit(hdev, true);
 free_irq_domain:
 	irq_domain_remove(hbus->irq_domain);
 free_fwnode: