diff mbox

[2/3] workqueue: Add an API to create a singlethread workqueue attached to the current task's cgroup

Message ID 1274227491.2370.110.camel@w-sridhar.beaverton.ibm.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Sridhar Samudrala May 19, 2010, 12:04 a.m. UTC
Add a new kernel API to create a singlethread workqueue and attach it's
task to current task's cgroup and cpumask.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>

	

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Michael S. Tsirkin May 27, 2010, 9:14 a.m. UTC | #1
On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote:
> Add a new kernel API to create a singlethread workqueue and attach it's
> task to current task's cgroup and cpumask.
> 
> Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>

Could someone familiar with workqueue code please comment on whether
this patch is suitable for 2.6.35?

It is needed to fix the case where vhost user might cause a kernel
thread to consume more CPU than allowed by the cgroup.
Should I merge it through the vhost tree?
Ack for this?

Thanks!

> diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
> index 9466e86..6d6f301 100644
> --- a/include/linux/workqueue.h
> +++ b/include/linux/workqueue.h
> @@ -211,6 +211,7 @@ __create_workqueue_key(const char *name, int singlethread,
>  #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
>  #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
>  
> +extern struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name); 
>  extern void destroy_workqueue(struct workqueue_struct *wq);
>  
>  extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 5bfb213..6ba226e 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -35,6 +35,7 @@
>  #include <linux/lockdep.h>
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/workqueue.h>
> +#include <linux/cgroup.h>
>  
>  /*
>   * The per-CPU workqueue (if single thread, we always use the first
> @@ -193,6 +194,45 @@ static const struct cpumask *cpu_singlethread_map __read_mostly;
>   */
>  static cpumask_var_t cpu_populated_map __read_mostly;
>  
> +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq)
> +{
> +	return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread;
> +}
> +
> +/* Create a singlethread workqueue and attach it's task to the current task's
> + * cgroup and set it's cpumask to the current task's cpumask.
> + */
> +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
> +{
> +	struct workqueue_struct *wq;
> +	struct task_struct *task;
> +	cpumask_var_t mask;
> +
> +	wq = create_singlethread_workqueue(name);
> +	if (!wq)
> +		goto out;
> +
> +	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> +		goto err;
> +			
> +	if (sched_getaffinity(current->pid, mask))
> +		goto err;
> +
> +	task = get_singlethread_wq_task(wq);
> +	if (sched_setaffinity(task->pid, mask))
> +		goto err;
> +
> +	if (cgroup_attach_task_current_cg(task))
> +		goto err;
> +out:	
> +	return wq;
> +err:
> +	destroy_workqueue(wq);
> +	wq = NULL;
> +	goto out;
> +}
> +EXPORT_SYMBOL_GPL(create_singlethread_workqueue_in_current_cg);
> +
>  /* If it's single threaded, it isn't in the list of workqueues. */
>  static inline int is_wq_single_threaded(struct workqueue_struct *wq)
>  {
> 	
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Oleg Nesterov May 27, 2010, 12:44 p.m. UTC | #2
On 05/27, Michael S. Tsirkin wrote:
>
> On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote:
> > Add a new kernel API to create a singlethread workqueue and attach it's
> > task to current task's cgroup and cpumask.
> >
> > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
>
> Could someone familiar with workqueue code please comment on whether
> this patch is suitable for 2.6.35?
>
> It is needed to fix the case where vhost user might cause a kernel
> thread to consume more CPU than allowed by the cgroup.
> Should I merge it through the vhost tree?
> Ack for this?

I don't understand the reasons for this patch, but this doesn't matter.

I don't really see any need to change workqueue.c,

> > +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq)
> > +{
> > +	return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread;
> > +}

(Not sure this trivial static helper with the single caller makes sense, but
 see below)

> > +/* Create a singlethread workqueue and attach it's task to the current task's
> > + * cgroup and set it's cpumask to the current task's cpumask.
> > + */
> > +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
> > +{
> > +	struct workqueue_struct *wq;
> > +	struct task_struct *task;
> > +	cpumask_var_t mask;
> > +
> > +	wq = create_singlethread_workqueue(name);
> > +	if (!wq)
> > +		goto out;
> > +
> > +	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> > +		goto err;
> > +			
> > +	if (sched_getaffinity(current->pid, mask))
> > +		goto err;
> > +
> > +	task = get_singlethread_wq_task(wq);
> > +	if (sched_setaffinity(task->pid, mask))
> > +		goto err;
> > +
> > +	if (cgroup_attach_task_current_cg(task))
> > +		goto err;
> > +out:	
> > +	return wq;
> > +err:
> > +	destroy_workqueue(wq);
> > +	wq = NULL;
> > +	goto out;
> > +}

Instead, cgroup.c (or whoever needs this) can do

	struct move_struct {
		struct work_struct work;
		int ret;
	};

	static void move_func(struct work_struct *work)
	{
		struct move_struct *move = container_of(...);

		if (cgroup_attach_task_current_cg(current))
			ret = -EANY;
	}

	static struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
	{
		struct workqueue_struct *wq;
		struct move_struct move = {
			.work = __WORK_INITIALIZER(move_func);
		};

		wq = create_singlethread_workqueue(name);
		if (!wq)
			return NULL;

		queue_work(&move.work);
		flush_work(&move.work);

		if (move.ret) {
			destroy_workqueue(wq);
			wq = NULL;
		}

		return wq;
	}

Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and
use it like the patch does.

But, imho, create_singlethread_workqueue_in_current_cg() does not belong
to  workqueue.c.

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin May 27, 2010, 1:12 p.m. UTC | #3
On Thu, May 27, 2010 at 02:44:48PM +0200, Oleg Nesterov wrote:
> On 05/27, Michael S. Tsirkin wrote:
> >
> > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote:
> > > Add a new kernel API to create a singlethread workqueue and attach it's
> > > task to current task's cgroup and cpumask.
> > >
> > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
> >
> > Could someone familiar with workqueue code please comment on whether
> > this patch is suitable for 2.6.35?
> >
> > It is needed to fix the case where vhost user might cause a kernel
> > thread to consume more CPU than allowed by the cgroup.
> > Should I merge it through the vhost tree?
> > Ack for this?
> 
> I don't understand the reasons for this patch, but this doesn't matter.

Depending on userspace application, driver can create a lot of work
for a workqueue to handle. By making the workqueue thread
belong in a cgroup, we make it possible to the CPU and other
resources thus consumed.
Oleg Nesterov May 27, 2010, 1:48 p.m. UTC | #4
On 05/27, Michael S. Tsirkin wrote:
>
> On Thu, May 27, 2010 at 02:44:48PM +0200, Oleg Nesterov wrote:
> > On 05/27, Michael S. Tsirkin wrote:
> > >
> > > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote:
> > > > Add a new kernel API to create a singlethread workqueue and attach it's
> > > > task to current task's cgroup and cpumask.
> > > >
> > > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
> > >
> > > Could someone familiar with workqueue code please comment on whether
> > > this patch is suitable for 2.6.35?
> > >
> > > It is needed to fix the case where vhost user might cause a kernel
> > > thread to consume more CPU than allowed by the cgroup.
> > > Should I merge it through the vhost tree?
> > > Ack for this?
> >
> > I don't understand the reasons for this patch, but this doesn't matter.
>
> Depending on userspace application, driver can create a lot of work
> for a workqueue to handle. By making the workqueue thread
> belong in a cgroup, we make it possible to the CPU and other
> resources thus consumed.

OK, I see, thanks for your explanation.


in case I wasn't clear... I didn't mean I dislike this idea, only the
the implementation of create_singlethread_workqueue_in_current_cg(),
it doesn't belong to workqueue.c imho.

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo May 27, 2010, 4:15 p.m. UTC | #5
Hello,

On 05/27/2010 03:12 PM, Michael S. Tsirkin wrote:
>> I don't understand the reasons for this patch, but this doesn't matter.
> 
> Depending on userspace application, driver can create a lot of work
> for a workqueue to handle. By making the workqueue thread
> belong in a cgroup, we make it possible to the CPU and other
> resources thus consumed.

Hmmm.... I don't really get it.  The unit of scheduling in workqueue
is a work.  Unless you're gonna convert every driver to use this
special kind of workqueue (and what happens when multiple tasks from
different cgroups share the driver?), I can't see how this is gonna be
useful.  If you really wanna impose cgroup control on workqueue items,
you'll have to do it per work item which might lead to the problem of
priority inversion.  Can you please describe what you're trying to do
in more detail?

Thank you.
Sridhar Samudrala May 27, 2010, 4:24 p.m. UTC | #6
On Thu, 2010-05-27 at 14:44 +0200, Oleg Nesterov wrote:
> On 05/27, Michael S. Tsirkin wrote:
> >
> > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote:
> > > Add a new kernel API to create a singlethread workqueue and attach it's
> > > task to current task's cgroup and cpumask.
> > >
> > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
> >
> > Could someone familiar with workqueue code please comment on whether
> > this patch is suitable for 2.6.35?
> >
> > It is needed to fix the case where vhost user might cause a kernel
> > thread to consume more CPU than allowed by the cgroup.
> > Should I merge it through the vhost tree?
> > Ack for this?
> 
> I don't understand the reasons for this patch, but this doesn't matter.
> 
> I don't really see any need to change workqueue.c,
> 
> > > +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq)
> > > +{
> > > +	return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread;
> > > +}
> 
> (Not sure this trivial static helper with the single caller makes sense, but
>  see below)
> 
> > > +/* Create a singlethread workqueue and attach it's task to the current task's
> > > + * cgroup and set it's cpumask to the current task's cpumask.
> > > + */
> > > +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
> > > +{
> > > +	struct workqueue_struct *wq;
> > > +	struct task_struct *task;
> > > +	cpumask_var_t mask;
> > > +
> > > +	wq = create_singlethread_workqueue(name);
> > > +	if (!wq)
> > > +		goto out;
> > > +
> > > +	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> > > +		goto err;
> > > +			
> > > +	if (sched_getaffinity(current->pid, mask))
> > > +		goto err;
> > > +
> > > +	task = get_singlethread_wq_task(wq);
> > > +	if (sched_setaffinity(task->pid, mask))
> > > +		goto err;
> > > +
> > > +	if (cgroup_attach_task_current_cg(task))
> > > +		goto err;
> > > +out:	
> > > +	return wq;
> > > +err:
> > > +	destroy_workqueue(wq);
> > > +	wq = NULL;
> > > +	goto out;
> > > +}
> 
> Instead, cgroup.c (or whoever needs this) can do
> 
> 	struct move_struct {
> 		struct work_struct work;
> 		int ret;
> 	};
> 
> 	static void move_func(struct work_struct *work)
> 	{
> 		struct move_struct *move = container_of(...);
> 
> 		if (cgroup_attach_task_current_cg(current))

We are trying to attach the task associated with workqueue to the
current task's cgroup. So what we need is 
   cgroup_attach_task_current_cg(wq->task);

However there is no interface currently that exports the task associated
with a workqueue. It is hidden in cpu_workqueue_struct and is only 
accessible within workqueue.c.


> 			ret = -EANY;
> 	}
> 
> 	static struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
> 	{
> 		struct workqueue_struct *wq;
> 		struct move_struct move = {
> 			.work = __WORK_INITIALIZER(move_func);
> 		};
> 
> 		wq = create_singlethread_workqueue(name);
> 		if (!wq)
> 			return NULL;
> 
> 		queue_work(&move.work);
> 		flush_work(&move.work);
> 
> 		if (move.ret) {
> 			destroy_workqueue(wq);
> 			wq = NULL;
> 		}
> 
> 		return wq;
> 	}
> 
> Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and
> use it like the patch does.
This requires that struct cpu_workqueue_struct and struct
workqueue_struct are made externally visible by moving them to
kernel/workqueue.h.

Instead what about adding the simple helper get_singlethread_wq_task()
in workqueue.c and exporting it.
I can add create_singlethread_workqueue_in_current_cg() to cgroup.c
using this helper routine.

Thanks
Sridhar


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin May 27, 2010, 4:39 p.m. UTC | #7
On Thu, May 27, 2010 at 06:15:54PM +0200, Tejun Heo wrote:
> Hello,
> 
> On 05/27/2010 03:12 PM, Michael S. Tsirkin wrote:
> >> I don't understand the reasons for this patch, but this doesn't matter.
> > 
> > Depending on userspace application, driver can create a lot of work
> > for a workqueue to handle. By making the workqueue thread
> > belong in a cgroup, we make it possible to the CPU and other
> > resources thus consumed.
> 
> Hmmm.... I don't really get it.  The unit of scheduling in workqueue
> is a work.

Yes. However, we use cgroups to limit when the workqueue itself is scheduled.
This affects all of work done on this workqueue, so it's a bit
of a blunt intrument. Thus we are not trying to apply this
to all drivers, we intend to start with vhost-net.

> Unless you're gonna convert every driver to use this
> special kind of workqueue (and what happens when multiple tasks from
> different cgroups share the driver?),

We'll then create a workqueue per task. Each workqueue will have the
right cgroup. But we are not trying to selve the problem for
every driver.

> I can't see how this is gonna be
> useful.  If you really wanna impose cgroup control on workqueue items,
> you'll have to do it per work item which might lead to the problem of
> priority inversion.

Exactly. cgroup is per-workqueue not per work item.
If driver wants to let administrators control priority
for different kinds of items separately, driver will have
to submit them to separate workqueues.

>  Can you please describe what you're trying to do
> in more detail?
> 
> Thank you.

vhost-net driver is under control from userspace,
it queues potentially a lot of work into the workqueue, which
might load the system beyond the cgroup limits.
And staying within cgroups limits is important for virtualization
where vhost is used.


> -- 
> tejun
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin May 27, 2010, 4:41 p.m. UTC | #8
On Thu, May 27, 2010 at 09:24:18AM -0700, Sridhar Samudrala wrote:
> On Thu, 2010-05-27 at 14:44 +0200, Oleg Nesterov wrote:
> > On 05/27, Michael S. Tsirkin wrote:
> > >
> > > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote:
> > > > Add a new kernel API to create a singlethread workqueue and attach it's
> > > > task to current task's cgroup and cpumask.
> > > >
> > > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
> > >
> > > Could someone familiar with workqueue code please comment on whether
> > > this patch is suitable for 2.6.35?
> > >
> > > It is needed to fix the case where vhost user might cause a kernel
> > > thread to consume more CPU than allowed by the cgroup.
> > > Should I merge it through the vhost tree?
> > > Ack for this?
> > 
> > I don't understand the reasons for this patch, but this doesn't matter.
> > 
> > I don't really see any need to change workqueue.c,
> > 
> > > > +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq)
> > > > +{
> > > > +	return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread;
> > > > +}
> > 
> > (Not sure this trivial static helper with the single caller makes sense, but
> >  see below)
> > 
> > > > +/* Create a singlethread workqueue and attach it's task to the current task's
> > > > + * cgroup and set it's cpumask to the current task's cpumask.
> > > > + */
> > > > +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
> > > > +{
> > > > +	struct workqueue_struct *wq;
> > > > +	struct task_struct *task;
> > > > +	cpumask_var_t mask;
> > > > +
> > > > +	wq = create_singlethread_workqueue(name);
> > > > +	if (!wq)
> > > > +		goto out;
> > > > +
> > > > +	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> > > > +		goto err;
> > > > +			
> > > > +	if (sched_getaffinity(current->pid, mask))
> > > > +		goto err;
> > > > +
> > > > +	task = get_singlethread_wq_task(wq);
> > > > +	if (sched_setaffinity(task->pid, mask))
> > > > +		goto err;
> > > > +
> > > > +	if (cgroup_attach_task_current_cg(task))
> > > > +		goto err;
> > > > +out:	
> > > > +	return wq;
> > > > +err:
> > > > +	destroy_workqueue(wq);
> > > > +	wq = NULL;
> > > > +	goto out;
> > > > +}
> > 
> > Instead, cgroup.c (or whoever needs this) can do
> > 
> > 	struct move_struct {
> > 		struct work_struct work;
> > 		int ret;
> > 	};
> > 
> > 	static void move_func(struct work_struct *work)
> > 	{
> > 		struct move_struct *move = container_of(...);
> > 
> > 		if (cgroup_attach_task_current_cg(current))
> 
> We are trying to attach the task associated with workqueue to the
> current task's cgroup. So what we need is 
>    cgroup_attach_task_current_cg(wq->task);
> 
> However there is no interface currently that exports the task associated
> with a workqueue. It is hidden in cpu_workqueue_struct and is only 
> accessible within workqueue.c.
> 
> 
> > 			ret = -EANY;
> > 	}
> > 
> > 	static struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
> > 	{
> > 		struct workqueue_struct *wq;
> > 		struct move_struct move = {
> > 			.work = __WORK_INITIALIZER(move_func);
> > 		};
> > 
> > 		wq = create_singlethread_workqueue(name);
> > 		if (!wq)
> > 			return NULL;
> > 
> > 		queue_work(&move.work);
> > 		flush_work(&move.work);
> > 
> > 		if (move.ret) {
> > 			destroy_workqueue(wq);
> > 			wq = NULL;
> > 		}
> > 
> > 		return wq;
> > 	}
> > 
> > Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and
> > use it like the patch does.
> This requires that struct cpu_workqueue_struct and struct
> workqueue_struct are made externally visible by moving them to
> kernel/workqueue.h.
> 
> Instead what about adding the simple helper get_singlethread_wq_task()
> in workqueue.c and exporting it.
> I can add create_singlethread_workqueue_in_current_cg() to cgroup.c
> using this helper routine.

Or to our driver, if that's more palatable.

> Thanks
> Sridhar
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo May 27, 2010, 4:56 p.m. UTC | #9
Hello,

On 05/27/2010 06:39 PM, Michael S. Tsirkin wrote:
>> Unless you're gonna convert every driver to use this
>> special kind of workqueue (and what happens when multiple tasks from
>> different cgroups share the driver?),
> 
> We'll then create a workqueue per task. Each workqueue will have the
> right cgroup. But we are not trying to selve the problem for
> every driver.

Ah... I see.  You're gonna use multiple workqueues.  Once concern that
I have is that this is abuse of workqueue interface to certain level
and depends on the implementation detail of workqueue rather than its
intended usage model.  stop_machine() was a similar case and in the
end it was better served by a different mechanism built on kthread
directly (cpu_stop).  Wouldn't it be cleaner to use kthread directly
for your case too?  You're basically trying to use workqueue as a
frontend to kthread, so...

Thanks.
Oleg Nesterov May 27, 2010, 5:30 p.m. UTC | #10
What I am actually worried about is Tejun's rework, I am not sure
cmwq has the "this thread services that wq" property...

On 05/27, Sridhar Samudrala wrote:
>
> On Thu, 2010-05-27 at 14:44 +0200, Oleg Nesterov wrote:
> >
> > Instead, cgroup.c (or whoever needs this) can do
> >
> > 	struct move_struct {
> > 		struct work_struct work;
> > 		int ret;
> > 	};
> >
> > 	static void move_func(struct work_struct *work)
> > 	{
> > 		struct move_struct *move = container_of(...);
> >
> > 		if (cgroup_attach_task_current_cg(current))
>
> We are trying to attach the task associated with workqueue to the
> current task's cgroup. So what we need is
>    cgroup_attach_task_current_cg(wq->task);

I do not see cgroup_attach_task_current_cg() in Linus's tree and thus I do
not now what exactly it does, and of course the code above is only template.

But I think this is easy. Just add "struct cgroup *cgrp" into move_struct
and then move_func() can do cgroup_attach_task(move->cgrp, current) ?

> > Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and
> > use it like the patch does.
> This requires that struct cpu_workqueue_struct and struct
> workqueue_struct are made externally visible by moving them to
> kernel/workqueue.h.

no, no,

> Instead what about adding the simple helper get_singlethread_wq_task()
> in workqueue.c and exporting it.

Indeed, this is what I meant.

But. I disagree with get_singlethread_wq_task(). If we add this helper,
it should work with the multi-threaded wq's too, and needs the "int cpu"
parameter, ignored when is_wq_single_threaded().

So. Either rename wq_per_cpu() and export it (once again, I do not
mean we should move the body to workqueue.h!), or create the new
helper which just calls wq_per_cpu().

> I can add create_singlethread_workqueue_in_current_cg() to cgroup.c
> using this helper routine.

Imho, this is better.

But please note that it is possible to do without any changes in
workqueue.[ch] afaics, see above.

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin May 27, 2010, 5:32 p.m. UTC | #11
On Thu, May 27, 2010 at 06:56:20PM +0200, Tejun Heo wrote:
> Hello,
> 
> On 05/27/2010 06:39 PM, Michael S. Tsirkin wrote:
> >> Unless you're gonna convert every driver to use this
> >> special kind of workqueue (and what happens when multiple tasks from
> >> different cgroups share the driver?),
> > 
> > We'll then create a workqueue per task. Each workqueue will have the
> > right cgroup. But we are not trying to selve the problem for
> > every driver.
> 
> Ah... I see.  You're gonna use multiple workqueues.  Once concern that
> I have is that this is abuse of workqueue interface to certain level
> and depends on the implementation detail of workqueue rather than its
> intended usage model.

Well, this is why I proposed adding a new API for creating
workqueue within workqueue.c, rather than exposing the task
and attaching it to cgroups in our driver: so that workqueue
maintainers can fix the implementation if it ever changes.

And after all, it's an internal API, we can always change
it later if we need.

> stop_machine() was a similar case and in the
> end it was better served by a different mechanism built on kthread
> directly (cpu_stop).  Wouldn't it be cleaner to use kthread directly
> for your case too?  You're basically trying to use workqueue as a
> frontend to kthread, so...
> 
> Thanks.

Well, yes but we are using APIs like flush_work etc. These are very
handy.  It seems much easier than rolling our own queue on top of kthread.

Makes sense?

> -- 
> tejun
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo May 27, 2010, 9:20 p.m. UTC | #12
Hello, Michael.

On 05/27/2010 07:32 PM, Michael S. Tsirkin wrote:
> Well, this is why I proposed adding a new API for creating
> workqueue within workqueue.c, rather than exposing the task
> and attaching it to cgroups in our driver: so that workqueue
> maintainers can fix the implementation if it ever changes.
> 
> And after all, it's an internal API, we can always change
> it later if we need.
...
> Well, yes but we are using APIs like flush_work etc. These are very
> handy.  It seems much easier than rolling our own queue on top of kthread.

The thing is that this kind of one-off usage becomes problemetic when
you're trying to change the implementation detail.  All current
workqueue users don't care which thread they run on and they shouldn't
as each work owns the context only for the duration the work is
executing.  If this sort of fundamental guidelines are followed, the
implementation can be improved in pretty much transparent way but when
you start depending on specific implementation details, things become
messy pretty quickly.

If this type of usage were more common, adding proper way to account
work usage according to cgroups would make sense but that's not the
case here and I removed the only exception case recently while trying
to implement cmwq and if this is added.  So, this would be the only
one which makes such extra assumptions in the whole kernel.  One way
or the other, workqueue needs to be improved and I don't really think
adding the single exception at this point is a good idea.

The thing I realized after stop_machine conversion was that there was
no reason to use workqueue there at all.  There already are more than
enough not-too-difficult synchronization constructs and if you're
using a thread for dedicated purposes, code complexity isn't that
different either way.  Plus, it would also be clearer that dedicated
threads are required there for what reason.  So, I strongly suggest
using a kthread.  If there are issues which are noticeably difficult
to solve with kthread, we can definitely talk about that and think
about things again.

Thank you.
Michael S. Tsirkin May 28, 2010, 3:08 p.m. UTC | #13
On Thu, May 27, 2010 at 11:20:22PM +0200, Tejun Heo wrote:
> Hello, Michael.
> 
> On 05/27/2010 07:32 PM, Michael S. Tsirkin wrote:
> > Well, this is why I proposed adding a new API for creating
> > workqueue within workqueue.c, rather than exposing the task
> > and attaching it to cgroups in our driver: so that workqueue
> > maintainers can fix the implementation if it ever changes.
> > 
> > And after all, it's an internal API, we can always change
> > it later if we need.
> ...
> > Well, yes but we are using APIs like flush_work etc. These are very
> > handy.  It seems much easier than rolling our own queue on top of kthread.
> 
> The thing is that this kind of one-off usage becomes problemetic when
> you're trying to change the implementation detail.  All current
> workqueue users don't care which thread they run on and they shouldn't
> as each work owns the context only for the duration the work is
> executing.  If this sort of fundamental guidelines are followed, the
> implementation can be improved in pretty much transparent way but when
> you start depending on specific implementation details, things become
> messy pretty quickly.
> 
> If this type of usage were more common, adding proper way to account
> work usage according to cgroups would make sense but that's not the
> case here and I removed the only exception case recently while trying
> to implement cmwq and if this is added.  So, this would be the only
> one which makes such extra assumptions in the whole kernel.  One way
> or the other, workqueue needs to be improved and I don't really think
> adding the single exception at this point is a good idea.
> 
> The thing I realized after stop_machine conversion was that there was
> no reason to use workqueue there at all.  There already are more than
> enough not-too-difficult synchronization constructs and if you're
> using a thread for dedicated purposes, code complexity isn't that
> different either way.  Plus, it would also be clearer that dedicated
> threads are required there for what reason.  So, I strongly suggest
> using a kthread.  If there are issues which are noticeably difficult
> to solve with kthread, we can definitely talk about that and think
> about things again.
> 
> Thank you.

Well, we have create_singlethread_workqueue, right?
This is not very different ... is it?

Just copying structures and code from workqueue.c,
adding vhost_ in front of it will definitely work:
there is nothing magic about the workqueue library.
But this just involves cut and paste which might be best avoided.
One final idea before we go the cut and paste way: how about
'create_workqueue_from_task' that would get a thread and have workqueue
run there?

> -- 
> tejun
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo May 28, 2010, 3:54 p.m. UTC | #14
Hello,

On 05/28/2010 05:08 PM, Michael S. Tsirkin wrote:
> Well, we have create_singlethread_workqueue, right?
> This is not very different ... is it?
> 
> Just copying structures and code from workqueue.c,
> adding vhost_ in front of it will definitely work:

Sure it will, but you'll probably be able to get away with much less.

> there is nothing magic about the workqueue library.
> But this just involves cut and paste which might be best avoided.

What I'm saying is that some magic needs to be added to workqueue and
if you add this single(!) exception, it will have to be backed out
pretty soon, so it would be better to do it properly now.

> One final idea before we go the cut and paste way: how about
> 'create_workqueue_from_task' that would get a thread and have workqueue
> run there?

You can currently depend on that implementation detail but it's not
the workqueue interface is meant to do.  The single threadedness is
there as execution ordering and concurrency specification and it
doesn't (or rather won't) necessarily mean that a specific single
thread is bound to certain workqueue.

Can you please direct me to have a look at the code.  I'll be happy to
do the conversion for you.

Thanks.
Michael S. Tsirkin May 30, 2010, 11:29 a.m. UTC | #15
On Fri, May 28, 2010 at 05:54:42PM +0200, Tejun Heo wrote:
> Hello,
> 
> On 05/28/2010 05:08 PM, Michael S. Tsirkin wrote:
> > Well, we have create_singlethread_workqueue, right?
> > This is not very different ... is it?
> > 
> > Just copying structures and code from workqueue.c,
> > adding vhost_ in front of it will definitely work:
> 
> Sure it will, but you'll probably be able to get away with much less.
> 
> > there is nothing magic about the workqueue library.
> > But this just involves cut and paste which might be best avoided.
> 
> What I'm saying is that some magic needs to be added to workqueue and
> if you add this single(!) exception, it will have to be backed out
> pretty soon, so it would be better to do it properly now.
> 
> > One final idea before we go the cut and paste way: how about
> > 'create_workqueue_from_task' that would get a thread and have workqueue
> > run there?
> 
> You can currently depend on that implementation detail but it's not
> the workqueue interface is meant to do.  The single threadedness is
> there as execution ordering and concurrency specification and it
> doesn't (or rather won't) necessarily mean that a specific single
> thread is bound to certain workqueue.
> 
> Can you please direct me to have a look at the code.  I'll be happy to
> do the conversion for you.

Great, thanks! The code in question is in drivers/vhost/vhost.c
It is used from drivers/vhost/net.c

On top of this, we have patchset from Sridhar Samudrala,
titled '0/3 Make vhost multi-threaded and associate each thread to its
guest's cgroup':

cgroups: Add an API to attach a task to current task's cgroup
workqueue: Add an API to create a singlethread workqueue attached to the
current task's cgroup
vhost: make it more scalable by creating a vhost thread per device

I have bounced the last three your way.


> Thanks.
> 
> -- 
> tejun
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9466e86..6d6f301 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -211,6 +211,7 @@  __create_workqueue_key(const char *name, int singlethread,
 #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
 
+extern struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name); 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5bfb213..6ba226e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -35,6 +35,7 @@ 
 #include <linux/lockdep.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
+#include <linux/cgroup.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -193,6 +194,45 @@  static const struct cpumask *cpu_singlethread_map __read_mostly;
  */
 static cpumask_var_t cpu_populated_map __read_mostly;
 
+static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq)
+{
+	return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread;
+}
+
+/* Create a singlethread workqueue and attach it's task to the current task's
+ * cgroup and set it's cpumask to the current task's cpumask.
+ */
+struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name)
+{
+	struct workqueue_struct *wq;
+	struct task_struct *task;
+	cpumask_var_t mask;
+
+	wq = create_singlethread_workqueue(name);
+	if (!wq)
+		goto out;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		goto err;
+			
+	if (sched_getaffinity(current->pid, mask))
+		goto err;
+
+	task = get_singlethread_wq_task(wq);
+	if (sched_setaffinity(task->pid, mask))
+		goto err;
+
+	if (cgroup_attach_task_current_cg(task))
+		goto err;
+out:	
+	return wq;
+err:
+	destroy_workqueue(wq);
+	wq = NULL;
+	goto out;
+}
+EXPORT_SYMBOL_GPL(create_singlethread_workqueue_in_current_cg);
+
 /* If it's single threaded, it isn't in the list of workqueues. */
 static inline int is_wq_single_threaded(struct workqueue_struct *wq)
 {