diff mbox

[3/4,RFC] fsl/msi: Add MSI bank allocation for kernel owned devices

Message ID 1425359866-31049-3-git-send-email-Bharat.Bhushan@freescale.com (mailing list archive)
State RFC
Delegated to: Scott Wood
Headers show

Commit Message

Bharat Bhushan March 3, 2015, 5:17 a.m. UTC
With this patch a "context" can allocate a MSI bank and use the
allocated MSI-bank for the devices in that "context".

kernel/host "context" is "NULL", So all devices owned by kernel
will share a MSI bank allocated with "context = NULL.

This patch is in direction to have separate MSI bank for kernel
context and userspace/VM context. We do not want two software
context (kernel and VMs) to share a MSI bank for safe/reliable
interrupts with full isolation. Follow up patch will add interface
to allocate a MSI bank for userspace/VM context.

NOTE: This RFC patch allows only one MSI bank to be allocated for
kernel context. Which seems to be sufficient to me. But if we see this
is limiting some real usecase scanerio then this limitation can be
removed

One issue which still need to addressed is when to free kernel
context allocated MSI bank? Say all MSI capable devices are assigned
to VM/userspace then there is no need to have any MSI bank reserved
for kernel context.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
---
 arch/powerpc/sysdev/fsl_msi.c | 88 ++++++++++++++++++++++++++++++++++++++-----
 arch/powerpc/sysdev/fsl_msi.h |  4 ++
 2 files changed, 83 insertions(+), 9 deletions(-)

Comments

Scott Wood March 11, 2015, 11:22 p.m. UTC | #1
On Tue, 2015-03-03 at 10:47 +0530, Bharat Bhushan wrote:
> With this patch a "context" can allocate a MSI bank and use the
> allocated MSI-bank for the devices in that "context".
> 
> kernel/host "context" is "NULL", So all devices owned by kernel
> will share a MSI bank allocated with "context = NULL.
> 
> This patch is in direction to have separate MSI bank for kernel
> context and userspace/VM context. We do not want two software
> context (kernel and VMs) to share a MSI bank for safe/reliable
> interrupts with full isolation. Follow up patch will add interface
> to allocate a MSI bank for userspace/VM context.
> 
> NOTE: This RFC patch allows only one MSI bank to be allocated for
> kernel context. Which seems to be sufficient to me. But if we see this
> is limiting some real usecase scanerio then this limitation can be
> removed
> 
> One issue which still need to addressed is when to free kernel
> context allocated MSI bank? Say all MSI capable devices are assigned
> to VM/userspace then there is no need to have any MSI bank reserved
> for kernel context.
> 
> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
> ---
>  arch/powerpc/sysdev/fsl_msi.c | 88 ++++++++++++++++++++++++++++++++++++++-----
>  arch/powerpc/sysdev/fsl_msi.h |  4 ++
>  2 files changed, 83 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
> index 32ba1e3..027aeeb 100644
> --- a/arch/powerpc/sysdev/fsl_msi.c
> +++ b/arch/powerpc/sysdev/fsl_msi.c
> @@ -142,6 +142,79 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
>  	return;
>  }
>  
> +/*
> + * Allocate a MSI Bank for the requested "context".
> + * NULL "context" means that this request is to allocate
> + * MSI bank for kernel owned devices. And currently we
> + * assume that one MSI bank is sufficient for kernel.
> + */
> +static struct fsl_msi *fsl_msi_allocate_msi_bank(void *context)
> +{
> +	struct fsl_msi *msi_data;
> +
> +	/* Kernel context (NULL) can reserve only one msi bank */
> +	if (!context) {
> +		list_for_each_entry(msi_data, &msi_head, list) {
> +			if ((msi_data->reserved == MSI_RESERVED) &&
> +			    (msi_data->context == NULL))
> +				return NULL;
> +		}

Unnecessary parentheses

Is there any reason why the kernel bank needs to participate in this
mechanism at all?  Set it aside at MSI driver init, and don't put it on
the list.  I know you've previously said that you want to support
configs where the kernel doesn't get any banks, but that can just be a
boot option that tells the MSI code to not set aside a bank for the
kernel.  It would simplify the code.

With the patchset as is, how would one indicate whether kernel devices
should get a bank?  Specifically, when the kernel does have an
MSI-capable device but we'd prefer to use legacy interrupts to keep MSIs
available to VFIO.

> +	}
> +
> +	list_for_each_entry(msi_data, &msi_head, list) {
> +		if (msi_data->reserved == MSI_FREE) {
> +			msi_data->reserved = MSI_RESERVED;
> +			msi_data->context = context;
> +			return msi_data;
> +		}
> +	}

What prevents races from parallel callers?

> +/* FIXME: Assumption that host kernel will allocate only one MSI bank */

It's not a FIXME if we think the limitation is not burdensome.

> + __attribute__ ((unused)) static int fsl_msi_free_msi_bank(void *context)

static int __maybe_unused fsl_msi_free_msi_bank(void *context)

Why are you adding this function then removing it in the next patch?

> +	/* If no MSI bank allocated for kernel owned device, allocate one */
> +	msi_data = fsl_msi_allocate_msi_bank(NULL);
> +	if (msi_data)
> +		return msi_data;
> +
> +	return NULL;

return fsl_msi_allocate_msi_bank(NULL);


> +}
> +
>  static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
>  				struct msi_msg *msg,
>  				struct fsl_msi *fsl_msi_data)
> @@ -174,7 +247,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
>  	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>  	struct device_node *np;
>  	phandle phandle = 0;
> -	int rc, hwirq = -ENOMEM;
> +	int rc = -ENODEV, hwirq = -ENOMEM;

Initialize rc when you detect the error instead of here (it'd be OK to
initialize it to zero here, to mitigate potential uninitialized value
bugs).

>  	unsigned int virq;
>  	struct msi_desc *entry;
>  	struct msi_msg msg;
> @@ -231,15 +304,12 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
>  		if (specific_msi_bank) {
>  			hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
>  		} else {
> -			/*
> -			 * Loop over all the MSI devices until we find one that has an
> -			 * available interrupt.
> -			 */
> -			list_for_each_entry(msi_data, &msi_head, list) {
> -				hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
> -				if (hwirq >= 0)
> -					break;
> +			msi_data = fsl_msi_get_reserved_msi_bank(pdev);
> +			if (!msi_data) {
> +				dev_err(&pdev->dev, "No MSI Bank allocated\n");
> +				goto out_free;

Is this really an error?  Seems like dev_info() would be more
appropriate to indicate the absence of a resource where you can fall
back to another option.

> diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
> index 9b0ab84..c69702b 100644
> --- a/arch/powerpc/sysdev/fsl_msi.h
> +++ b/arch/powerpc/sysdev/fsl_msi.h
> @@ -46,6 +46,10 @@ struct fsl_msi {
>  	struct list_head list;          /* support multiple MSI banks */
>  
>  	phandle phandle;
> +#define MSI_FREE		0
> +#define MSI_RESERVED		1
> +        int reserved;
> +        void *context;

Whitespace

How about just "bool reserved"?  Or if the kernel bank is kept off the
list, just check context for NULL.

-Scott
Bharat Bhushan March 12, 2015, 3:46 p.m. UTC | #2
> -----Original Message-----

> From: Wood Scott-B07421

> Sent: Thursday, March 12, 2015 4:53 AM

> To: Bhushan Bharat-R65777

> Cc: linuxppc-dev@ozlabs.org

> Subject: Re: [PATCH 3/4 RFC] fsl/msi: Add MSI bank allocation for kernel

> owned devices

> 

> On Tue, 2015-03-03 at 10:47 +0530, Bharat Bhushan wrote:

> > With this patch a "context" can allocate a MSI bank and use the

> > allocated MSI-bank for the devices in that "context".

> >

> > kernel/host "context" is "NULL", So all devices owned by kernel will

> > share a MSI bank allocated with "context = NULL.

> >

> > This patch is in direction to have separate MSI bank for kernel

> > context and userspace/VM context. We do not want two software context

> > (kernel and VMs) to share a MSI bank for safe/reliable interrupts with

> > full isolation. Follow up patch will add interface to allocate a MSI

> > bank for userspace/VM context.

> >

> > NOTE: This RFC patch allows only one MSI bank to be allocated for

> > kernel context. Which seems to be sufficient to me. But if we see this

> > is limiting some real usecase scanerio then this limitation can be

> > removed

> >

> > One issue which still need to addressed is when to free kernel context

> > allocated MSI bank? Say all MSI capable devices are assigned to

> > VM/userspace then there is no need to have any MSI bank reserved for

> > kernel context.

> >

> > Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>

> > ---

> >  arch/powerpc/sysdev/fsl_msi.c | 88

> > ++++++++++++++++++++++++++++++++++++++-----

> >  arch/powerpc/sysdev/fsl_msi.h |  4 ++

> >  2 files changed, 83 insertions(+), 9 deletions(-)

> >

> > diff --git a/arch/powerpc/sysdev/fsl_msi.c

> > b/arch/powerpc/sysdev/fsl_msi.c index 32ba1e3..027aeeb 100644

> > --- a/arch/powerpc/sysdev/fsl_msi.c

> > +++ b/arch/powerpc/sysdev/fsl_msi.c

> > @@ -142,6 +142,79 @@ static void fsl_teardown_msi_irqs(struct pci_dev

> *pdev)

> >  	return;

> >  }

> >

> > +/*

> > + * Allocate a MSI Bank for the requested "context".

> > + * NULL "context" means that this request is to allocate

> > + * MSI bank for kernel owned devices. And currently we

> > + * assume that one MSI bank is sufficient for kernel.

> > + */

> > +static struct fsl_msi *fsl_msi_allocate_msi_bank(void *context) {

> > +	struct fsl_msi *msi_data;

> > +

> > +	/* Kernel context (NULL) can reserve only one msi bank */

> > +	if (!context) {

> > +		list_for_each_entry(msi_data, &msi_head, list) {

> > +			if ((msi_data->reserved == MSI_RESERVED) &&

> > +			    (msi_data->context == NULL))

> > +				return NULL;

> > +		}

> 

> Unnecessary parentheses

> 

> Is there any reason why the kernel bank needs to participate in this

> mechanism at all?  Set it aside at MSI driver init, and don't put it on

> the list.  I know you've previously said that you want to support configs

> where the kernel doesn't get any banks, but that can just be a boot

> option that tells the MSI code to not set aside a bank for the kernel.

> It would simplify the code.


I think we cannot completely remove the MSI bank from kernel practically. So I can make the kernel msi bank out of list.

> 

> With the patchset as is, how would one indicate whether kernel devices

> should get a bank?


For kernel owned device, in fsl_setup_msi_irqs() we check if there is a reserved MSI bank, if not then reserve a msi bank. If reserve fails then setup_msi_irq() fails. I think there is no fallback to Legacy interrupt.

>  Specifically, when the kernel does have an MSI-

> capable device but we'd prefer to use legacy interrupts to keep MSIs

> available to VFIO.


Do we want this?

> 

> > +	}

> > +

> > +	list_for_each_entry(msi_data, &msi_head, list) {

> > +		if (msi_data->reserved == MSI_FREE) {

> > +			msi_data->reserved = MSI_RESERVED;

> > +			msi_data->context = context;

> > +			return msi_data;

> > +		}

> > +	}

> 

> What prevents races from parallel callers?


Will add a lock.

> 

> > +/* FIXME: Assumption that host kernel will allocate only one MSI bank

> > +*/

> 

> It's not a FIXME if we think the limitation is not burdensome.


Ok

> 

> > + __attribute__ ((unused)) static int fsl_msi_free_msi_bank(void

> > + *context)

> 

> static int __maybe_unused fsl_msi_free_msi_bank(void *context)

> 

> Why are you adding this function then removing it in the next patch?


Will fix this

> 

> > +	/* If no MSI bank allocated for kernel owned device, allocate one

> */

> > +	msi_data = fsl_msi_allocate_msi_bank(NULL);

> > +	if (msi_data)

> > +		return msi_data;

> > +

> > +	return NULL;

> 

> return fsl_msi_allocate_msi_bank(NULL);


Ok

> 

> 

> > +}

> > +

> >  static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,

> >  				struct msi_msg *msg,

> >  				struct fsl_msi *fsl_msi_data)

> > @@ -174,7 +247,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev,

> int nvec, int type)

> >  	struct pci_controller *hose = pci_bus_to_host(pdev->bus);

> >  	struct device_node *np;

> >  	phandle phandle = 0;

> > -	int rc, hwirq = -ENOMEM;

> > +	int rc = -ENODEV, hwirq = -ENOMEM;

> 

> Initialize rc when you detect the error instead of here (it'd be OK to

> initialize it to zero here, to mitigate potential uninitialized value

> bugs).


Ok

> 

> >  	unsigned int virq;

> >  	struct msi_desc *entry;

> >  	struct msi_msg msg;

> > @@ -231,15 +304,12 @@ static int fsl_setup_msi_irqs(struct pci_dev

> *pdev, int nvec, int type)

> >  		if (specific_msi_bank) {

> >  			hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);

> >  		} else {

> > -			/*

> > -			 * Loop over all the MSI devices until we find one that

> has an

> > -			 * available interrupt.

> > -			 */

> > -			list_for_each_entry(msi_data, &msi_head, list) {

> > -				hwirq = msi_bitmap_alloc_hwirqs(&msi_data-

> >bitmap, 1);

> > -				if (hwirq >= 0)

> > -					break;

> > +			msi_data = fsl_msi_get_reserved_msi_bank(pdev);

> > +			if (!msi_data) {

> > +				dev_err(&pdev->dev, "No MSI Bank allocated\n");

> > +				goto out_free;

> 

> Is this really an error?  Seems like dev_info() would be more appropriate

> to indicate the absence of a resource where you can fall back to another

> option.


There is no fallback in fsl_setup_msi_irqs(), We have to error out from fsl_setup_msi_irqs(), no?

> 

> > diff --git a/arch/powerpc/sysdev/fsl_msi.h

> > b/arch/powerpc/sysdev/fsl_msi.h index 9b0ab84..c69702b 100644

> > --- a/arch/powerpc/sysdev/fsl_msi.h

> > +++ b/arch/powerpc/sysdev/fsl_msi.h

> > @@ -46,6 +46,10 @@ struct fsl_msi {

> >  	struct list_head list;          /* support multiple MSI banks */

> >

> >  	phandle phandle;

> > +#define MSI_FREE		0

> > +#define MSI_RESERVED		1

> > +        int reserved;

> > +        void *context;

> 

> Whitespace

> 

> How about just "bool reserved"?  Or if the kernel bank is kept off the

> list, just check context for NULL.


I will try to make kernel bank out of list and see how much this simplifies the code.

Thanks
-Bharat

> 

> -Scott

>
Scott Wood March 13, 2015, 10:37 p.m. UTC | #3
On Thu, 2015-03-12 at 10:46 -0500, Bhushan Bharat-R65777 wrote:
> 
> 
> > -----Original Message-----
> > From: Wood Scott-B07421
> > Sent: Thursday, March 12, 2015 4:53 AM
> > To: Bhushan Bharat-R65777
> > Cc: linuxppc-dev@ozlabs.org
> > Subject: Re: [PATCH 3/4 RFC] fsl/msi: Add MSI bank allocation for kernel
> > owned devices
> > 
> > With the patchset as is, how would one indicate whether kernel devices
> > should get a bank?
> 
> For kernel owned device, in fsl_setup_msi_irqs() we check if there is
> a reserved MSI bank, if not then reserve a msi bank. If reserve fails
> then setup_msi_irq() fails. I think there is no fallback to Legacy
> interrupt.

If enabling MSI fails, it's up to the driver to fall back to legacy
interrupts (grep drivers for pci_enable_msi for examples).

> >  Specifically, when the kernel does have an MSI-
> > capable device but we'd prefer to use legacy interrupts to keep MSIs
> > available to VFIO.
> 
> Do we want this?

You'd previously raised concern about wanting to give all MSI banks to
VFIO.  The kernel might still have PCIe devices that are not
performance-critical.  That said, I'm not going to nack the patchset if
it requires the kernel to allocate a bank for itself.

> > > @@ -231,15 +304,12 @@ static int fsl_setup_msi_irqs(struct pci_dev
> > *pdev, int nvec, int type)
> > >  		if (specific_msi_bank) {
> > >  			hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
> > >  		} else {
> > > -			/*
> > > -			 * Loop over all the MSI devices until we find one that
> > has an
> > > -			 * available interrupt.
> > > -			 */
> > > -			list_for_each_entry(msi_data, &msi_head, list) {
> > > -				hwirq = msi_bitmap_alloc_hwirqs(&msi_data-
> > >bitmap, 1);
> > > -				if (hwirq >= 0)
> > > -					break;
> > > +			msi_data = fsl_msi_get_reserved_msi_bank(pdev);
> > > +			if (!msi_data) {
> > > +				dev_err(&pdev->dev, "No MSI Bank allocated\n");
> > > +				goto out_free;
> > 
> > Is this really an error?  Seems like dev_info() would be more appropriate
> > to indicate the absence of a resource where you can fall back to another
> > option.
> 
> There is no fallback in fsl_setup_msi_irqs(), We have to error out from fsl_setup_msi_irqs(), no?

I meant as far as the user is concerned, not whether you return an error from the function.

-Scott
diff mbox

Patch

diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 32ba1e3..027aeeb 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -142,6 +142,79 @@  static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
 	return;
 }
 
+/*
+ * Allocate a MSI Bank for the requested "context".
+ * NULL "context" means that this request is to allocate
+ * MSI bank for kernel owned devices. And currently we
+ * assume that one MSI bank is sufficient for kernel.
+ */
+static struct fsl_msi *fsl_msi_allocate_msi_bank(void *context)
+{
+	struct fsl_msi *msi_data;
+
+	/* Kernel context (NULL) can reserve only one msi bank */
+	if (!context) {
+		list_for_each_entry(msi_data, &msi_head, list) {
+			if ((msi_data->reserved == MSI_RESERVED) &&
+			    (msi_data->context == NULL))
+				return NULL;
+		}
+	}
+
+	list_for_each_entry(msi_data, &msi_head, list) {
+		if (msi_data->reserved == MSI_FREE) {
+			msi_data->reserved = MSI_RESERVED;
+			msi_data->context = context;
+			return msi_data;
+		}
+	}
+
+	return NULL;
+}
+
+/* FIXME: Assumption that host kernel will allocate only one MSI bank */
+ __attribute__ ((unused)) static int fsl_msi_free_msi_bank(void *context)
+{
+	struct fsl_msi *msi_data;
+
+	list_for_each_entry(msi_data, &msi_head, list) {
+		if ((msi_data->reserved == MSI_RESERVED) &&
+		     (msi_data->context == context)) {
+			msi_data->reserved = MSI_FREE;
+			msi_data->context = NULL;
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+/*  This API returns the allocated MSI bank of "context"
+ *  to which "pdev" device belongs.
+ *  All kernel owned devices have NULL context. All devices
+ *  in same "context" will share the allocated MSI bank.
+ *
+ *  Note: If no MSI bank allocated to kernel context then
+ *  we allocate a MSI bank here.
+ */
+static struct fsl_msi *fsl_msi_get_reserved_msi_bank(struct pci_dev *pdev)
+{
+	struct fsl_msi *msi_data = NULL;
+	void *context = NULL;
+
+	list_for_each_entry(msi_data, &msi_head, list) {
+		if ((msi_data->reserved == MSI_RESERVED) &&
+		    (msi_data->context == context))
+			return msi_data;
+	}
+
+	/* If no MSI bank allocated for kernel owned device, allocate one */
+	msi_data = fsl_msi_allocate_msi_bank(NULL);
+	if (msi_data)
+		return msi_data;
+
+	return NULL;
+}
+
 static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
 				struct msi_msg *msg,
 				struct fsl_msi *fsl_msi_data)
@@ -174,7 +247,7 @@  static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 	struct device_node *np;
 	phandle phandle = 0;
-	int rc, hwirq = -ENOMEM;
+	int rc = -ENODEV, hwirq = -ENOMEM;
 	unsigned int virq;
 	struct msi_desc *entry;
 	struct msi_msg msg;
@@ -231,15 +304,12 @@  static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 		if (specific_msi_bank) {
 			hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
 		} else {
-			/*
-			 * Loop over all the MSI devices until we find one that has an
-			 * available interrupt.
-			 */
-			list_for_each_entry(msi_data, &msi_head, list) {
-				hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
-				if (hwirq >= 0)
-					break;
+			msi_data = fsl_msi_get_reserved_msi_bank(pdev);
+			if (!msi_data) {
+				dev_err(&pdev->dev, "No MSI Bank allocated\n");
+				goto out_free;
 			}
+			hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
 		}
 
 		if (hwirq < 0) {
diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
index 9b0ab84..c69702b 100644
--- a/arch/powerpc/sysdev/fsl_msi.h
+++ b/arch/powerpc/sysdev/fsl_msi.h
@@ -46,6 +46,10 @@  struct fsl_msi {
 	struct list_head list;          /* support multiple MSI banks */
 
 	phandle phandle;
+#define MSI_FREE		0
+#define MSI_RESERVED		1
+        int reserved;
+        void *context;
 };
 
 #endif /* _POWERPC_SYSDEV_FSL_MSI_H */