diff mbox

[-v3] PCI, pciehp: make every slot have its own workqueue to avoid deadlock

Message ID 1352711830-9772-1-git-send-email-wangyijing@huawei.com
State Superseded
Headers show

Commit Message

Yijing Wang Nov. 12, 2012, 9:17 a.m. UTC
Currently, pciehp use global pciehp_wq to handle hotplug event from hardware.
Hot remove path will be blocked if a hotplug slot connected a IO-BOX(composed of PCIe
Switch and some slots which support hotplug). The hot removed work was queued into
pciehp_wq. But in the hot-remove path, pciehp driver would flush pciehp_wq when
the pcie port(support pciehp) was removed. In this case the hot-remove path blocked.
This patch remove the global pciehp_wq and create a new workqueue for every slot to
avoid above problem.

-+-[0000:40]-+-00.0-[0000:41]--
 |           +-01.0-[0000:42]--+-00.0  Intel Corporation 82576 Gigabit Network Connection
 |           |                 \-00.1  Intel Corporation 82576 Gigabit Network Connection
 |           +-03.0-[0000:43]----00.0  LSI Logic / Symbios Logic SAS1064ET PCI-Express Fusion-MPT SAS
 |           +-04.0-[0000:44]--
 |           +-05.0-[0000:45]--
 |           +-07.0-[0000:46-4f]----00.0-[0000:47-4f]--+-04.0-[0000:48-49]----00.0-[0000:49]--
 |           |(hotplug slot)                           +-08.0-[0000:4a]--
 |           |                                         +-09.0-[0000:4b]--
 |           |                                         +-10.0-[0000:4c]--
 |           |                                         +-11.0-[0000:4d]--
 |           |                                         +-14.0-[0000:4e]--
 |           |                                         \-15.0-[0000:4f]--+-00.0  Intel Corporation 82576 Gigabit Network Connection
 |           |                                         (hotplug slot)    \-00.1  Intel Corporation 82576 Gigabit Network Connection


Signed-off-by: Yijing Wang <wangyijing@huawei.com>
---
 drivers/pci/hotplug/pciehp.h      |    2 +-
 drivers/pci/hotplug/pciehp_core.c |   11 ++---------
 drivers/pci/hotplug/pciehp_ctrl.c |    8 ++++----
 drivers/pci/hotplug/pciehp_hpc.c  |   11 ++++++++++-
 4 files changed, 17 insertions(+), 15 deletions(-)

Comments

Kenji Kaneshige Nov. 12, 2012, 10:12 a.m. UTC | #1
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>

> -----Original Message-----
> From: Yijing Wang [mailto:wangyijing@huawei.com]
> Sent: Monday, November 12, 2012 6:17 PM
> To: Bjorn Helgaas; Kaneshige, Kenji/金重 憲治; Yinghai Lu; Rafael; Rusty Russell; Mauro Carvalho Chehab; Oliver Neukum;
> jiang.liu@huawei.com
> Cc: linux-pci@vger.kernel.org; Hanjun Guo; Yijing Wang
> Subject: [PATCH -v3] PCI, pciehp: make every slot have its own workqueue to avoid deadlock
> 
> Currently, pciehp use global pciehp_wq to handle hotplug event from hardware.
> Hot remove path will be blocked if a hotplug slot connected a IO-BOX(composed of PCIe
> Switch and some slots which support hotplug). The hot removed work was queued into
> pciehp_wq. But in the hot-remove path, pciehp driver would flush pciehp_wq when
> the pcie port(support pciehp) was removed. In this case the hot-remove path blocked.
> This patch remove the global pciehp_wq and create a new workqueue for every slot to
> avoid above problem.
> 
> -+-[0000:40]-+-00.0-[0000:41]--
>  |           +-01.0-[0000:42]--+-00.0  Intel Corporation 82576 Gigabit Network Connection
>  |           |                 \-00.1  Intel Corporation 82576 Gigabit Network Connection
>  |           +-03.0-[0000:43]----00.0  LSI Logic / Symbios Logic SAS1064ET PCI-Express Fusion-MPT SAS
>  |           +-04.0-[0000:44]--
>  |           +-05.0-[0000:45]--
>  |           +-07.0-[0000:46-4f]----00.0-[0000:47-4f]--+-04.0-[0000:48-49]----00.0-[0000:49]--
>  |           |(hotplug slot)                           +-08.0-[0000:4a]--
>  |           |                                         +-09.0-[0000:4b]--
>  |           |                                         +-10.0-[0000:4c]--
>  |           |                                         +-11.0-[0000:4d]--
>  |           |                                         +-14.0-[0000:4e]--
>  |           |                                         \-15.0-[0000:4f]--+-00.0  Intel Corporation 82576 Gigabit
> Network Connection
>  |           |                                         (hotplug slot)    \-00.1  Intel Corporation 82576 Gigabit
> Network Connection
> 
> 
> Signed-off-by: Yijing Wang <wangyijing@huawei.com>
> ---
>  drivers/pci/hotplug/pciehp.h      |    2 +-
>  drivers/pci/hotplug/pciehp_core.c |   11 ++---------
>  drivers/pci/hotplug/pciehp_ctrl.c |    8 ++++----
>  drivers/pci/hotplug/pciehp_hpc.c  |   11 ++++++++++-
>  4 files changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
> index 26ffd3e..2c113de 100644
> --- a/drivers/pci/hotplug/pciehp.h
> +++ b/drivers/pci/hotplug/pciehp.h
> @@ -44,7 +44,6 @@ extern bool pciehp_poll_mode;
>  extern int pciehp_poll_time;
>  extern bool pciehp_debug;
>  extern bool pciehp_force;
> -extern struct workqueue_struct *pciehp_wq;
> 
>  #define dbg(format, arg...)						\
>  do {									\
> @@ -78,6 +77,7 @@ struct slot {
>  	struct hotplug_slot *hotplug_slot;
>  	struct delayed_work work;	/* work for button event */
>  	struct mutex lock;
> +	struct workqueue_struct *wq;
>  };
> 
>  struct event_info {
> diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
> index 916bf4f..0914211 100644
> --- a/drivers/pci/hotplug/pciehp_core.c
> +++ b/drivers/pci/hotplug/pciehp_core.c
> @@ -42,7 +42,6 @@ bool pciehp_debug;
>  bool pciehp_poll_mode;
>  int pciehp_poll_time;
>  bool pciehp_force;
> -struct workqueue_struct *pciehp_wq;
> 
>  #define DRIVER_VERSION	"0.4"
>  #define DRIVER_AUTHOR	"Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy
> <dely.l.sy@intel.com>"
> @@ -340,18 +339,13 @@ static int __init pcied_init(void)
>  {
>  	int retval = 0;
> 
> -	pciehp_wq = alloc_workqueue("pciehp", 0, 0);
> -	if (!pciehp_wq)
> -		return -ENOMEM;
> -
>  	pciehp_firmware_init();
>  	retval = pcie_port_service_register(&hpdriver_portdrv);
>   	dbg("pcie_port_service_register = %d\n", retval);
>    	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
> - 	if (retval) {
> -		destroy_workqueue(pciehp_wq);
> +	if (retval)
>  		dbg("Failure to register service\n");
> -	}
> +
>  	return retval;
>  }
> 
> @@ -359,7 +353,6 @@ static void __exit pcied_cleanup(void)
>  {
>  	dbg("unload_pciehpd()\n");
>  	pcie_port_service_unregister(&hpdriver_portdrv);
> -	destroy_workqueue(pciehp_wq);
>  	info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
>  }
> 
> diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
> index 27f4429..38f0186 100644
> --- a/drivers/pci/hotplug/pciehp_ctrl.c
> +++ b/drivers/pci/hotplug/pciehp_ctrl.c
> @@ -49,7 +49,7 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
>  	info->p_slot = p_slot;
>  	INIT_WORK(&info->work, interrupt_event_handler);
> 
> -	queue_work(pciehp_wq, &info->work);
> +	queue_work(p_slot->wq, &info->work);
> 
>  	return 0;
>  }
> @@ -344,7 +344,7 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
>  		kfree(info);
>  		goto out;
>  	}
> -	queue_work(pciehp_wq, &info->work);
> +	queue_work(p_slot->wq, &info->work);
>   out:
>  	mutex_unlock(&p_slot->lock);
>  }
> @@ -377,7 +377,7 @@ static void handle_button_press_event(struct slot *p_slot)
>  		if (ATTN_LED(ctrl))
>  			pciehp_set_attention_status(p_slot, 0);
> 
> -		queue_delayed_work(pciehp_wq, &p_slot->work, 5*HZ);
> +		queue_delayed_work(p_slot->wq, &p_slot->work, 5*HZ);
>  		break;
>  	case BLINKINGOFF_STATE:
>  	case BLINKINGON_STATE:
> @@ -439,7 +439,7 @@ static void handle_surprise_event(struct slot *p_slot)
>  	else
>  		p_slot->state = POWERON_STATE;
> 
> -	queue_work(pciehp_wq, &info->work);
> +	queue_work(p_slot->wq, &info->work);
>  }
> 
>  static void interrupt_event_handler(struct work_struct *work)
> diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
> index 13b2eaf..b34418d 100644
> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -773,23 +773,32 @@ static void pcie_shutdown_notification(struct controller *ctrl)
>  static int pcie_init_slot(struct controller *ctrl)
>  {
>  	struct slot *slot;
> +	char name[32];
> 
>  	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
>  	if (!slot)
>  		return -ENOMEM;
> +
> +	snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl));
> +	slot->wq = create_singlethread_workqueue(name);
> +	if (!slot->wq)
> +		goto abort;
> 
>  	slot->ctrl = ctrl;
>  	mutex_init(&slot->lock);
>  	INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
>  	ctrl->slot = slot;
>  	return 0;
> +abort:
> +	kfree(slot);
> +	return -ENOMEM;
>  }
> 
>  static void pcie_cleanup_slot(struct controller *ctrl)
>  {
>  	struct slot *slot = ctrl->slot;
>  	cancel_delayed_work(&slot->work);
> -	flush_workqueue(pciehp_wq);
> +	destroy_workqueue(slot->wq);
>  	kfree(slot);
>  }
> 
> --
> 1.7.1
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Nov. 13, 2012, 12:17 a.m. UTC | #2
On Mon, Nov 12, 2012 at 2:17 AM, Yijing Wang <wangyijing@huawei.com> wrote:
> Currently, pciehp use global pciehp_wq to handle hotplug event from hardware.
> Hot remove path will be blocked if a hotplug slot connected a IO-BOX(composed of PCIe
> Switch and some slots which support hotplug). The hot removed work was queued into
> pciehp_wq. But in the hot-remove path, pciehp driver would flush pciehp_wq when
> the pcie port(support pciehp) was removed. In this case the hot-remove path blocked.
> This patch remove the global pciehp_wq and create a new workqueue for every slot to
> avoid above problem.

Can you include the actual call path that leads to the deadlock?  I
assume it's related to a PCIe device removal that happens at about the
same time as the pcie_port_service_unregister(&hpdriver_portdrv)?

I just want to make sure that the new per-slot workqueue arrangement
doesn't lead to any case where the slot workqueue item depends on
something removed by the pcied_cleanup() as the module exits.

> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -773,23 +773,32 @@ static void pcie_shutdown_notification(struct controller *ctrl)
>  static int pcie_init_slot(struct controller *ctrl)
>  {
>         struct slot *slot;
> +       char name[32];
>
>         slot = kzalloc(sizeof(*slot), GFP_KERNEL);
>         if (!slot)
>                 return -ENOMEM;
> +
> +       snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl));
> +       slot->wq = create_singlethread_workqueue(name);

You're using create_singlethread_workqueue() instead of the previous
alloc_workqueue().  This is a slight change in semantics since we
previously called "alloc_workqueue(name, 0, 0)" and
create_singlethread_workqueue() calls "alloc_workqueue(name,
WQ_UNBOUND | WQ_MEM_RECLAIM, 1)."

I don't know whether this is necessary in order to have per-slot
workqueues or not.  If not, please split it into a separate patch to
make it more visible.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kenji Kaneshige Nov. 13, 2012, 2:35 a.m. UTC | #3
> -----Original Message-----
> From: Bjorn Helgaas [mailto:bhelgaas@google.com]
> Sent: Tuesday, November 13, 2012 9:18 AM
> To: Yijing Wang
> Cc: Kaneshige, Kenji/金重 憲治; Yinghai Lu; Rafael; Rusty Russell; Mauro Carvalho Chehab; Oliver Neukum;
> jiang.liu@huawei.com; linux-pci@vger.kernel.org; Hanjun Guo
> Subject: Re: [PATCH -v3] PCI, pciehp: make every slot have its own workqueue to avoid deadlock
> 
> On Mon, Nov 12, 2012 at 2:17 AM, Yijing Wang <wangyijing@huawei.com> wrote:
> > Currently, pciehp use global pciehp_wq to handle hotplug event from hardware.
> > Hot remove path will be blocked if a hotplug slot connected a IO-BOX(composed of PCIe
> > Switch and some slots which support hotplug). The hot removed work was queued into
> > pciehp_wq. But in the hot-remove path, pciehp driver would flush pciehp_wq when
> > the pcie port(support pciehp) was removed. In this case the hot-remove path blocked.
> > This patch remove the global pciehp_wq and create a new workqueue for every slot to
> > avoid above problem.
> 
> Can you include the actual call path that leads to the deadlock?  I
> assume it's related to a PCIe device removal that happens at about the
> same time as the pcie_port_service_unregister(&hpdriver_portdrv)?
> 
> I just want to make sure that the new per-slot workqueue arrangement
> doesn't lead to any case where the slot workqueue item depends on
> something removed by the pcied_cleanup() as the module exits.

Just for your information, here is my understanding of the problem.

====
- Your hardware has nested PCIe hotplug slots like below.

    --<slot A>--<Slot B>

- Hot-removal request (attention button event) comes to <slot A>
- Pciehp driver queue the hot-removal work
- This hot-removal work try to remove <slot B>
- To remove <slot B>, pciehp flush the pciehp_wq, but it never finishes
  because hot-removal work is in progress. ===> deadlock
====

> 
> > --- a/drivers/pci/hotplug/pciehp_hpc.c
> > +++ b/drivers/pci/hotplug/pciehp_hpc.c
> > @@ -773,23 +773,32 @@ static void pcie_shutdown_notification(struct controller *ctrl)
> >  static int pcie_init_slot(struct controller *ctrl)
> >  {
> >         struct slot *slot;
> > +       char name[32];
> >
> >         slot = kzalloc(sizeof(*slot), GFP_KERNEL);
> >         if (!slot)
> >                 return -ENOMEM;
> > +
> > +       snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl));
> > +       slot->wq = create_singlethread_workqueue(name);
> 
> You're using create_singlethread_workqueue() instead of the previous
> alloc_workqueue().  This is a slight change in semantics since we
> previously called "alloc_workqueue(name, 0, 0)" and
> create_singlethread_workqueue() calls "alloc_workqueue(name,
> WQ_UNBOUND | WQ_MEM_RECLAIM, 1)."
> 
> I don't know whether this is necessary in order to have per-slot
> workqueues or not.  If not, please split it into a separate patch to
> make it more visible.

Good catch.
I think we should use 0 for max_active.
Hot-add/remove operation take a long time. So if we use 1 for max_active,
we cannot handle events during hot-add/remove operation. As a result, for
example, push button event and cancel event that are issued during
hot-add/remove operation might not be handled properly, I think.

Any comments, Yijing?

Regards,
Kenji Kaneshige

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yijing Wang Nov. 13, 2012, 2:53 a.m. UTC | #4
On 2012/11/13 10:35, Kaneshige, Kenji wrote:
>> -----Original Message-----
>> From: Bjorn Helgaas [mailto:bhelgaas@google.com]
>> Sent: Tuesday, November 13, 2012 9:18 AM
>> To: Yijing Wang
>> Cc: Kaneshige, Kenji/金重 憲治; Yinghai Lu; Rafael; Rusty Russell; Mauro Carvalho Chehab; Oliver Neukum;
>> jiang.liu@huawei.com; linux-pci@vger.kernel.org; Hanjun Guo
>> Subject: Re: [PATCH -v3] PCI, pciehp: make every slot have its own workqueue to avoid deadlock
>>
>> On Mon, Nov 12, 2012 at 2:17 AM, Yijing Wang <wangyijing@huawei.com> wrote:
>>> Currently, pciehp use global pciehp_wq to handle hotplug event from hardware.
>>> Hot remove path will be blocked if a hotplug slot connected a IO-BOX(composed of PCIe
>>> Switch and some slots which support hotplug). The hot removed work was queued into
>>> pciehp_wq. But in the hot-remove path, pciehp driver would flush pciehp_wq when
>>> the pcie port(support pciehp) was removed. In this case the hot-remove path blocked.
>>> This patch remove the global pciehp_wq and create a new workqueue for every slot to
>>> avoid above problem.
>>
>> Can you include the actual call path that leads to the deadlock?  I
>> assume it's related to a PCIe device removal that happens at about the
>> same time as the pcie_port_service_unregister(&hpdriver_portdrv)?
>>
>> I just want to make sure that the new per-slot workqueue arrangement
>> doesn't lead to any case where the slot workqueue item depends on
>> something removed by the pcied_cleanup() as the module exits.
> 
> Just for your information, here is my understanding of the problem.
> 
> ====
> - Your hardware has nested PCIe hotplug slots like below.
> 
>     --<slot A>--<Slot B>
> 
> - Hot-removal request (attention button event) comes to <slot A>
> - Pciehp driver queue the hot-removal work
> - This hot-removal work try to remove <slot B>
> - To remove <slot B>, pciehp flush the pciehp_wq, but it never finishes
>   because hot-removal work is in progress. ===> deadlock
> ====
> 

Yes, the deadlock situation as Kaneshige's description above, I will add deadlock call path
description into patch.

>>
>>> --- a/drivers/pci/hotplug/pciehp_hpc.c
>>> +++ b/drivers/pci/hotplug/pciehp_hpc.c
>>> @@ -773,23 +773,32 @@ static void pcie_shutdown_notification(struct controller *ctrl)
>>>  static int pcie_init_slot(struct controller *ctrl)
>>>  {
>>>         struct slot *slot;
>>> +       char name[32];
>>>
>>>         slot = kzalloc(sizeof(*slot), GFP_KERNEL);
>>>         if (!slot)
>>>                 return -ENOMEM;
>>> +
>>> +       snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl));
>>> +       slot->wq = create_singlethread_workqueue(name);
>>
>> You're using create_singlethread_workqueue() instead of the previous
>> alloc_workqueue().  This is a slight change in semantics since we
>> previously called "alloc_workqueue(name, 0, 0)" and
>> create_singlethread_workqueue() calls "alloc_workqueue(name,
>> WQ_UNBOUND | WQ_MEM_RECLAIM, 1)."
>>
>> I don't know whether this is necessary in order to have per-slot
>> workqueues or not.  If not, please split it into a separate patch to
>> make it more visible.
> 
> Good catch.
> I think we should use 0 for max_active.
> Hot-add/remove operation take a long time. So if we use 1 for max_active,
> we cannot handle events during hot-add/remove operation. As a result, for
> example, push button event and cancel event that are issued during
> hot-add/remove operation might not be handled properly, I think.
> 
> Any comments, Yijing?

This is an issue, I will test this case in my hot-plug machine and confirm it.

Thanks!
Yijing

> 
> Regards,
> Kenji Kaneshige
> 
> 
> .
>
Yijing Wang Nov. 13, 2012, 8 a.m. UTC | #5
On 2012/11/13 10:53, Yijing Wang wrote:
> On 2012/11/13 10:35, Kaneshige, Kenji wrote:
>>> -----Original Message-----
>>> From: Bjorn Helgaas [mailto:bhelgaas@google.com]
>>> Sent: Tuesday, November 13, 2012 9:18 AM
>>> To: Yijing Wang
>>> Cc: Kaneshige, Kenji/金重 憲治; Yinghai Lu; Rafael; Rusty Russell; Mauro Carvalho Chehab; Oliver Neukum;
>>> jiang.liu@huawei.com; linux-pci@vger.kernel.org; Hanjun Guo
>>> Subject: Re: [PATCH -v3] PCI, pciehp: make every slot have its own workqueue to avoid deadlock
>>>
>>> On Mon, Nov 12, 2012 at 2:17 AM, Yijing Wang <wangyijing@huawei.com> wrote:
>>>> Currently, pciehp use global pciehp_wq to handle hotplug event from hardware.
>>>> Hot remove path will be blocked if a hotplug slot connected a IO-BOX(composed of PCIe
>>>> Switch and some slots which support hotplug). The hot removed work was queued into
>>>> pciehp_wq. But in the hot-remove path, pciehp driver would flush pciehp_wq when
>>>> the pcie port(support pciehp) was removed. In this case the hot-remove path blocked.
>>>> This patch remove the global pciehp_wq and create a new workqueue for every slot to
>>>> avoid above problem.
>>>
>>> Can you include the actual call path that leads to the deadlock?  I
>>> assume it's related to a PCIe device removal that happens at about the
>>> same time as the pcie_port_service_unregister(&hpdriver_portdrv)?
>>>
>>> I just want to make sure that the new per-slot workqueue arrangement
>>> doesn't lead to any case where the slot workqueue item depends on
>>> something removed by the pcied_cleanup() as the module exits.
>>
>> Just for your information, here is my understanding of the problem.
>>
>> ====
>> - Your hardware has nested PCIe hotplug slots like below.
>>
>>     --<slot A>--<Slot B>
>>
>> - Hot-removal request (attention button event) comes to <slot A>
>> - Pciehp driver queue the hot-removal work
>> - This hot-removal work try to remove <slot B>
>> - To remove <slot B>, pciehp flush the pciehp_wq, but it never finishes
>>   because hot-removal work is in progress. ===> deadlock
>> ====
>>
> 
> Yes, the deadlock situation as Kaneshige's description above, I will add deadlock call path
> description into patch.
> 
>>>
>>>> --- a/drivers/pci/hotplug/pciehp_hpc.c
>>>> +++ b/drivers/pci/hotplug/pciehp_hpc.c
>>>> @@ -773,23 +773,32 @@ static void pcie_shutdown_notification(struct controller *ctrl)
>>>>  static int pcie_init_slot(struct controller *ctrl)
>>>>  {
>>>>         struct slot *slot;
>>>> +       char name[32];
>>>>
>>>>         slot = kzalloc(sizeof(*slot), GFP_KERNEL);
>>>>         if (!slot)
>>>>                 return -ENOMEM;
>>>> +
>>>> +       snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl));
>>>> +       slot->wq = create_singlethread_workqueue(name);
>>>
>>> You're using create_singlethread_workqueue() instead of the previous
>>> alloc_workqueue().  This is a slight change in semantics since we
>>> previously called "alloc_workqueue(name, 0, 0)" and
>>> create_singlethread_workqueue() calls "alloc_workqueue(name,
>>> WQ_UNBOUND | WQ_MEM_RECLAIM, 1)."
>>>
>>> I don't know whether this is necessary in order to have per-slot
>>> workqueues or not.  If not, please split it into a separate patch to
>>> make it more visible.
>>
>> Good catch.
>> I think we should use 0 for max_active.
>> Hot-add/remove operation take a long time. So if we use 1 for max_active,
>> we cannot handle events during hot-add/remove operation. As a result, for
>> example, push button event and cancel event that are issued during
>> hot-add/remove operation might not be handled properly, I think.
>>
>> Any comments, Yijing?
> 
> This is an issue, I will test this case in my hot-plug machine and confirm it.

Hi Kaneshige,
   I tested this v3 patch in my hot plug machine and confirmed this problem. As you said,
there were some unexpected problem when push button event and cancel event that are issued during
hot-add/remove operation.

Before applied v3 patch the new push button event will be ignored during hot-add/remove operation

pciehp 0000:47:14.0:pcie24: Button pressed on Slot(244)
pciehp 0000:47:14.0:pcie24: PCI slot #244 - powering on due to button press.
pci 0000:4e:00.0: [1077:2432] type 00 class 0x0c0400
pci 0000:4e:00.0: reg 10: [io  0x0000-0x00ff]
pci 0000:4e:00.0: reg 14: [mem 0x00000000-0x00003fff 64bit]
..........................................................
pcieport 0000:47:14.0:   bridge window [io  0x9000-0x9fff]
pcieport 0000:47:14.0:   bridge window [mem 0x80000000-0x800fffff]
qla2xxx 0000:4e:00.0: enabling device (0100 -> 0102)
qla2xxx [0000:4e:00.0]-001d: : Found an ISP2432 irq 77 iobase 0xc000000080080000.
scsi22 : qla2xxx
pciehp 0000:47:14.0:pcie24: Button pressed on Slot(244)
pciehp 0000:47:14.0:pcie24: Button ignore on Slot(244)

After applied v3 patch the new push button event will not be ignored, and another hot-add/remove operation will start.

pciehp 0000:47:14.0:pcie24: Button pressed on Slot(244)
pciehp 0000:47:14.0:pcie24: PCI slot #244 - powering on due to button press.
pci 0000:4e:00.0: [1077:2432] type 00 class 0x0c0400
............................................................
qla2xxx [0000:4e:00.0]-001d: : Found an ISP2432 irq 77 iobase 0xc000000080080000.
scsi24 : qla2xxx
pciehp 0000:47:14.0:pcie24: Button pressed on Slot(244)
qla2xxx [0000:4e:00.0]-00fb:24: QLogic QLE2462 - PCI-Express Dual Channel 4Gb Fibre Channel HBA.
qla2xxx [0000:4e:00.0]-00fc:24: ISP2432: PCIe (2.5GT/s x4) @ 0000:4e:00.0 hdma+ host#=24 fw=5.03.02 (496).
qla2xxx 0000:4e:00.1: enabling device (0100 -> 0102)
qla2xxx [0000:4e:00.1]-001d: : Found an ISP2432 irq 82 iobase 0xc000000080084000.
scsi25 : qla2xxx
qla2xxx [0000:4e:00.1]-00fb:25: QLogic QLE2462 - PCI-Express Dual Channel 4Gb Fibre Channel HBA.
qla2xxx [0000:4e:00.1]-00fc:25: ISP2432: PCIe (2.5GT/s x4) @ 0000:4e:00.1 hdma+ host#=25 fw=5.03.02 (496).
pciehp 0000:47:14.0:pcie24: PCI slot #244 - powering off due to button press.
qla2xxx [0000:4e:00.0]-8038:24: Cable is unplugged...
qla2xxx [0000:4e:00.1]-8038:25: Cable is unplugged...

So, I will use alloc_workqueue(name, 0, 0) for every slot like the original.

> 
> Thanks!
> Yijing
> 
>>
>> Regards,
>> Kenji Kaneshige
>>
>>
>> .
>>
> 
>
diff mbox

Patch

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 26ffd3e..2c113de 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -44,7 +44,6 @@  extern bool pciehp_poll_mode;
 extern int pciehp_poll_time;
 extern bool pciehp_debug;
 extern bool pciehp_force;
-extern struct workqueue_struct *pciehp_wq;
 
 #define dbg(format, arg...)						\
 do {									\
@@ -78,6 +77,7 @@  struct slot {
 	struct hotplug_slot *hotplug_slot;
 	struct delayed_work work;	/* work for button event */
 	struct mutex lock;
+	struct workqueue_struct *wq;
 };
 
 struct event_info {
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 916bf4f..0914211 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -42,7 +42,6 @@  bool pciehp_debug;
 bool pciehp_poll_mode;
 int pciehp_poll_time;
 bool pciehp_force;
-struct workqueue_struct *pciehp_wq;
 
 #define DRIVER_VERSION	"0.4"
 #define DRIVER_AUTHOR	"Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>"
@@ -340,18 +339,13 @@  static int __init pcied_init(void)
 {
 	int retval = 0;
 
-	pciehp_wq = alloc_workqueue("pciehp", 0, 0);
-	if (!pciehp_wq)
-		return -ENOMEM;
-
 	pciehp_firmware_init();
 	retval = pcie_port_service_register(&hpdriver_portdrv);
  	dbg("pcie_port_service_register = %d\n", retval);
   	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
- 	if (retval) {
-		destroy_workqueue(pciehp_wq);
+	if (retval)
 		dbg("Failure to register service\n");
-	}
+
 	return retval;
 }
 
@@ -359,7 +353,6 @@  static void __exit pcied_cleanup(void)
 {
 	dbg("unload_pciehpd()\n");
 	pcie_port_service_unregister(&hpdriver_portdrv);
-	destroy_workqueue(pciehp_wq);
 	info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
 }
 
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 27f4429..38f0186 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -49,7 +49,7 @@  static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
 	info->p_slot = p_slot;
 	INIT_WORK(&info->work, interrupt_event_handler);
 
-	queue_work(pciehp_wq, &info->work);
+	queue_work(p_slot->wq, &info->work);
 
 	return 0;
 }
@@ -344,7 +344,7 @@  void pciehp_queue_pushbutton_work(struct work_struct *work)
 		kfree(info);
 		goto out;
 	}
-	queue_work(pciehp_wq, &info->work);
+	queue_work(p_slot->wq, &info->work);
  out:
 	mutex_unlock(&p_slot->lock);
 }
@@ -377,7 +377,7 @@  static void handle_button_press_event(struct slot *p_slot)
 		if (ATTN_LED(ctrl))
 			pciehp_set_attention_status(p_slot, 0);
 
-		queue_delayed_work(pciehp_wq, &p_slot->work, 5*HZ);
+		queue_delayed_work(p_slot->wq, &p_slot->work, 5*HZ);
 		break;
 	case BLINKINGOFF_STATE:
 	case BLINKINGON_STATE:
@@ -439,7 +439,7 @@  static void handle_surprise_event(struct slot *p_slot)
 	else
 		p_slot->state = POWERON_STATE;
 
-	queue_work(pciehp_wq, &info->work);
+	queue_work(p_slot->wq, &info->work);
 }
 
 static void interrupt_event_handler(struct work_struct *work)
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 13b2eaf..b34418d 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -773,23 +773,32 @@  static void pcie_shutdown_notification(struct controller *ctrl)
 static int pcie_init_slot(struct controller *ctrl)
 {
 	struct slot *slot;
+	char name[32];
 
 	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
 	if (!slot)
 		return -ENOMEM;
+
+	snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl));
+	slot->wq = create_singlethread_workqueue(name);
+	if (!slot->wq)
+		goto abort;
 
 	slot->ctrl = ctrl;
 	mutex_init(&slot->lock);
 	INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
 	ctrl->slot = slot;
 	return 0;
+abort:
+	kfree(slot);
+	return -ENOMEM;
 }
 
 static void pcie_cleanup_slot(struct controller *ctrl)
 {
 	struct slot *slot = ctrl->slot;
 	cancel_delayed_work(&slot->work);
-	flush_workqueue(pciehp_wq);
+	destroy_workqueue(slot->wq);
 	kfree(slot);
 }