diff mbox

pci: rewrite pci_call_probe() to use workqueue instead of work_on_cpu()

Message ID 20120823213529.GH14962@google.com
State Accepted
Headers show

Commit Message

Tejun Heo Aug. 23, 2012, 9:35 p.m. UTC
pci_call_probe() uses work_on_cpu(), which creates and tears down a
full kthread on each call, to invoke ->probe() on node local CPU for
allocation affinity.

The same goal can easily be achieved using a work item.  This patch
rewrites pci_call_probe() so that it uses a work item instead of
work_on_cpu().

Note that the function is restructured for simplicity.  This adds
get/put_online_cpus() pair for devices without node but the overhead
of doing so isn't anything material at this level.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
I tested both paths but had to simulate device on foreign node.

Thanks.

 drivers/pci/pci-driver.c |   48 +++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Bjorn Helgaas Sept. 14, 2012, 6:13 p.m. UTC | #1
On Thu, Aug 23, 2012 at 3:35 PM, Tejun Heo <tj@kernel.org> wrote:
> pci_call_probe() uses work_on_cpu(), which creates and tears down a
> full kthread on each call, to invoke ->probe() on node local CPU for
> allocation affinity.
>
> The same goal can easily be achieved using a work item.  This patch
> rewrites pci_call_probe() so that it uses a work item instead of
> work_on_cpu().
>
> Note that the function is restructured for simplicity.  This adds
> get/put_online_cpus() pair for devices without node but the overhead
> of doing so isn't anything material at this level.
>
> Signed-off-by: Tejun Heo <tj@kernel.org>

I applied this to my "next" branch and pushed it.  Thanks!

> ---
> I tested both paths but had to simulate device on foreign node.
>
> Thanks.
>
>  drivers/pci/pci-driver.c |   48 +++++++++++++++++++++++++++--------------------
>  1 file changed, 28 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 5270f1a..554c15a 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -274,11 +274,14 @@ struct drv_dev_and_id {
>         struct pci_driver *drv;
>         struct pci_dev *dev;
>         const struct pci_device_id *id;
> +       struct work_struct work;
> +       int error;
>  };
>
> -static long local_pci_probe(void *_ddi)
> +static void pci_probe_fn(struct work_struct *work)
>  {
> -       struct drv_dev_and_id *ddi = _ddi;
> +       struct drv_dev_and_id *ddi = container_of(work, struct drv_dev_and_id,
> +                                                 work);
>         struct device *dev = &ddi->dev->dev;
>         int rc;
>
> @@ -298,33 +301,38 @@ static long local_pci_probe(void *_ddi)
>                 pm_runtime_set_suspended(dev);
>                 pm_runtime_put_noidle(dev);
>         }
> -       return rc;
> +
> +       ddi->error = rc;
>  }
>
>  static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
>                           const struct pci_device_id *id)
>  {
> -       int error, node;
> +       int node;
> +       int cpu = nr_cpu_ids;
>         struct drv_dev_and_id ddi = { drv, dev, id };
>
> -       /* Execute driver initialization on node where the device's
> -          bus is attached to.  This way the driver likely allocates
> -          its local memory on the right node without any need to
> -          change it. */
> -       node = dev_to_node(&dev->dev);
> -       if (node >= 0) {
> -               int cpu;
> +       /*
> +        * Execute driver initialization on node where the device's bus is
> +        * attached to.  This way the driver likely allocates its local
> +        * memory on the right node without any need to change it.
> +        */
> +       get_online_cpus();
>
> -               get_online_cpus();
> +       node = dev_to_node(&dev->dev);
> +       if (node >= 0)
>                 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
> -               if (cpu < nr_cpu_ids)
> -                       error = work_on_cpu(cpu, local_pci_probe, &ddi);
> -               else
> -                       error = local_pci_probe(&ddi);
> -               put_online_cpus();
> -       } else
> -               error = local_pci_probe(&ddi);
> -       return error;
> +
> +       if (cpu < nr_cpu_ids) {
> +               INIT_WORK_ONSTACK(&ddi.work, pci_probe_fn);
> +               schedule_work_on(cpu, &ddi.work);
> +               flush_work(&ddi.work);
> +       } else {
> +               pci_probe_fn(&ddi.work);
> +       }
> +
> +       put_online_cpus();
> +       return ddi.error;
>  }
>
>  /**
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo Sept. 17, 2012, 9:41 p.m. UTC | #2
On Fri, Sep 14, 2012 at 12:13:21PM -0600, Bjorn Helgaas wrote:
> On Thu, Aug 23, 2012 at 3:35 PM, Tejun Heo <tj@kernel.org> wrote:
> > pci_call_probe() uses work_on_cpu(), which creates and tears down a
> > full kthread on each call, to invoke ->probe() on node local CPU for
> > allocation affinity.
> >
> > The same goal can easily be achieved using a work item.  This patch
> > rewrites pci_call_probe() so that it uses a work item instead of
> > work_on_cpu().
> >
> > Note that the function is restructured for simplicity.  This adds
> > get/put_online_cpus() pair for devices without node but the overhead
> > of doing so isn't anything material at this level.
> >
> > Signed-off-by: Tejun Heo <tj@kernel.org>
> 
> I applied this to my "next" branch and pushed it.  Thanks!

Can you please drop / revert this one?  It seems we need work_on_cpu()
after all and I'm gonna reimplement it in sane way.

Sorry about the trouble.
Bjorn Helgaas Sept. 17, 2012, 9:45 p.m. UTC | #3
On Mon, Sep 17, 2012 at 3:41 PM, Tejun Heo <tj@kernel.org> wrote:
> On Fri, Sep 14, 2012 at 12:13:21PM -0600, Bjorn Helgaas wrote:
>> On Thu, Aug 23, 2012 at 3:35 PM, Tejun Heo <tj@kernel.org> wrote:
>> > pci_call_probe() uses work_on_cpu(), which creates and tears down a
>> > full kthread on each call, to invoke ->probe() on node local CPU for
>> > allocation affinity.
>> >
>> > The same goal can easily be achieved using a work item.  This patch
>> > rewrites pci_call_probe() so that it uses a work item instead of
>> > work_on_cpu().
>> >
>> > Note that the function is restructured for simplicity.  This adds
>> > get/put_online_cpus() pair for devices without node but the overhead
>> > of doing so isn't anything material at this level.
>> >
>> > Signed-off-by: Tejun Heo <tj@kernel.org>
>>
>> I applied this to my "next" branch and pushed it.  Thanks!
>
> Can you please drop / revert this one?  It seems we need work_on_cpu()
> after all and I'm gonna reimplement it in sane way.

I dropped it.  Thanks for the heads-up.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 5270f1a..554c15a 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -274,11 +274,14 @@  struct drv_dev_and_id {
 	struct pci_driver *drv;
 	struct pci_dev *dev;
 	const struct pci_device_id *id;
+	struct work_struct work;
+	int error;
 };
 
-static long local_pci_probe(void *_ddi)
+static void pci_probe_fn(struct work_struct *work)
 {
-	struct drv_dev_and_id *ddi = _ddi;
+	struct drv_dev_and_id *ddi = container_of(work, struct drv_dev_and_id,
+						  work);
 	struct device *dev = &ddi->dev->dev;
 	int rc;
 
@@ -298,33 +301,38 @@  static long local_pci_probe(void *_ddi)
 		pm_runtime_set_suspended(dev);
 		pm_runtime_put_noidle(dev);
 	}
-	return rc;
+
+	ddi->error = rc;
 }
 
 static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
 			  const struct pci_device_id *id)
 {
-	int error, node;
+	int node;
+	int cpu = nr_cpu_ids;
 	struct drv_dev_and_id ddi = { drv, dev, id };
 
-	/* Execute driver initialization on node where the device's
-	   bus is attached to.  This way the driver likely allocates
-	   its local memory on the right node without any need to
-	   change it. */
-	node = dev_to_node(&dev->dev);
-	if (node >= 0) {
-		int cpu;
+	/*
+	 * Execute driver initialization on node where the device's bus is
+	 * attached to.  This way the driver likely allocates its local
+	 * memory on the right node without any need to change it.
+	 */
+	get_online_cpus();
 
-		get_online_cpus();
+	node = dev_to_node(&dev->dev);
+	if (node >= 0)
 		cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
-		if (cpu < nr_cpu_ids)
-			error = work_on_cpu(cpu, local_pci_probe, &ddi);
-		else
-			error = local_pci_probe(&ddi);
-		put_online_cpus();
-	} else
-		error = local_pci_probe(&ddi);
-	return error;
+
+	if (cpu < nr_cpu_ids) {
+		INIT_WORK_ONSTACK(&ddi.work, pci_probe_fn);
+		schedule_work_on(cpu, &ddi.work);
+		flush_work(&ddi.work);
+	} else {
+		pci_probe_fn(&ddi.work);
+	}
+
+	put_online_cpus();
+	return ddi.error;
 }
 
 /**