diff mbox

Do we need asynchronous pm_runtime_get()? (was: Re: bisected regression ...)

Message ID 201208051726.22729.rjw@sisk.pl
State Not Applicable
Headers show

Commit Message

Rafael J. Wysocki Aug. 5, 2012, 3:26 p.m. UTC
On Sunday, August 05, 2012, Alan Stern wrote:
> On Sat, 4 Aug 2012, Rafael J. Wysocki wrote:
> 
> > On Saturday, August 04, 2012, Alan Stern wrote:
> > > On Sat, 4 Aug 2012, Rafael J. Wysocki wrote:
> > > 
> > > > > That wasn't what he meant.  What if the code needs to run in the _same_
> > > > > context as the caller?  For example, with a spinlock held.
> > > > 
> > > > I see.  I think it wouldn't be unreasonable to require that func should take
> > > > all of the necessary locks by itself.
> > > 
> > > But then if func was called directly, because the device was at full
> > > power, it would deadlock trying to acquire a lock the caller already
> > > holds.
> > 
> > I wonder why the caller may want to take any locks beforehand?
> 
> Who knows?  :-)
> 
> The best answer may be for the caller not to hold any locks.  In the
> runtime-PM document's example driver, the lock would be dropped before
> the resume-and-call.
> 
> > > Do you have any better suggestions?
> > 
> > Hmm.  What about pm_runtime_get_and_call(dev, func_sync, func_async), where
> > func_sync() is to be called if the device is already active and func_async()
> > is to be called if it has to be resumed from the workqueue?
> 
> That's a little better but not much.  It would require storing two 
> function pointers in the dev->power structure.

I don't really think we'll need to store func_sync in dev->power.  At least
I don't see a reason to do that at the moment.

I also think that func_sync() would have to be called if runtime PM is
disabled for the given device, so that callers don't have to check that
condition themselves.

What about the appended experimental patch?

Rafael


---
 drivers/base/power/runtime.c |   82 +++++++++++++++++++++++++++++++++++++++----
 include/linux/pm.h           |    1 
 include/linux/pm_runtime.h   |   14 +++++++
 3 files changed, 90 insertions(+), 7 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Ming Lei Aug. 6, 2012, 1:30 p.m. UTC | #1
On Sun, Aug 5, 2012 at 11:26 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:

> ---
>  drivers/base/power/runtime.c |   82 +++++++++++++++++++++++++++++++++++++++----
>  include/linux/pm.h           |    1
>  include/linux/pm_runtime.h   |   14 +++++++
>  3 files changed, 90 insertions(+), 7 deletions(-)
>
> Index: linux/drivers/base/power/runtime.c
> ===================================================================
> --- linux.orig/drivers/base/power/runtime.c
> +++ linux/drivers/base/power/runtime.c
> @@ -484,6 +484,15 @@ static int rpm_suspend(struct device *de
>         goto out;
>  }
>
> +void rpm_queue_up_resume(struct device *dev)
> +{
> +       dev->power.request = RPM_REQ_RESUME;
> +       if (!dev->power.request_pending) {
> +               dev->power.request_pending = true;
> +               queue_work(pm_wq, &dev->power.work);
> +       }
> +}
> +
>  /**
>   * rpm_resume - Carry out runtime resume of given device.
>   * @dev: Device to resume.
> @@ -524,7 +533,9 @@ static int rpm_resume(struct device *dev
>          * rather than cancelling it now only to restart it again in the near
>          * future.
>          */
> -       dev->power.request = RPM_REQ_NONE;
> +       if (dev->power.request != RPM_REQ_RESUME || !dev->power.func)
> +               dev->power.request = RPM_REQ_NONE;
> +
>         if (!dev->power.timer_autosuspends)
>                 pm_runtime_deactivate_timer(dev);
>
> @@ -533,6 +544,12 @@ static int rpm_resume(struct device *dev
>                 goto out;
>         }
>
> +       if (dev->power.func && (rpmflags & RPM_ASYNC)) {
> +               rpm_queue_up_resume(dev);
> +               retval = 0;
> +               goto out;
> +       }
> +
>         if (dev->power.runtime_status == RPM_RESUMING
>             || dev->power.runtime_status == RPM_SUSPENDING) {
>                 DEFINE_WAIT(wait);
> @@ -591,11 +608,7 @@ static int rpm_resume(struct device *dev
>
>         /* Carry out an asynchronous or a synchronous resume. */
>         if (rpmflags & RPM_ASYNC) {
> -               dev->power.request = RPM_REQ_RESUME;
> -               if (!dev->power.request_pending) {
> -                       dev->power.request_pending = true;
> -                       queue_work(pm_wq, &dev->power.work);
> -               }
> +               rpm_queue_up_resume(dev);
>                 retval = 0;
>                 goto out;
>         }
> @@ -691,6 +704,7 @@ static int rpm_resume(struct device *dev
>  static void pm_runtime_work(struct work_struct *work)
>  {
>         struct device *dev = container_of(work, struct device, power.work);
> +       void (*func)(struct device *) = NULL;
>         enum rpm_request req;
>
>         spin_lock_irq(&dev->power.lock);
> @@ -715,12 +729,24 @@ static void pm_runtime_work(struct work_
>                 rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO);
>                 break;
>         case RPM_REQ_RESUME:
> -               rpm_resume(dev, RPM_NOWAIT);
> +               func = dev->power.func;
> +               if (func) {
> +                       dev->power.func = NULL;
> +                       pm_runtime_get_noresume(dev);
> +                       rpm_resume(dev, 0);
> +               } else {
> +                       rpm_resume(dev, RPM_NOWAIT);
> +               }
>                 break;
>         }
>
>   out:
>         spin_unlock_irq(&dev->power.lock);
> +
> +       if (func) {
> +               func(dev);
> +               pm_runtime_put(dev);
> +       }
>  }
>
>  /**
> @@ -877,6 +903,48 @@ int __pm_runtime_resume(struct device *d
>  }
>  EXPORT_SYMBOL_GPL(__pm_runtime_resume);
>
> +int pm_runtime_get_and_call(struct device *dev, void (*func)(struct device *),
> +                            void (*func_async)(struct device *))
> +{
> +       unsigned long flags;
> +       int ret;
> +
> +       atomic_inc(&dev->power.usage_count);
> +
> +       spin_lock_irqsave(&dev->power.lock, flags);
> +
> +       ret = dev->power.runtime_error;
> +       if (ret) {
> +               func = NULL;
> +               goto out;
> +       }
> +
> +       if (dev->power.runtime_status != RPM_ACTIVE
> +           && dev->power.disable_depth == 0)
> +               func = NULL;

Looks the above is a bit odd, and your motivation is to call 'func'
for a suspended and runtime-PM enabled device in irq context, isn't it?

> +
> +       if (!func && func_async) {
> +               if (dev->power.func) {
> +                       ret = -EAGAIN;
> +                       goto out;
> +               } else {
> +                       dev->power.func = func_async;
> +               }
> +       }
> +
> +       rpm_resume(dev, RPM_ASYNC);
> +
> + out:
> +       spin_unlock_irqrestore(&dev->power.lock, flags);
> +
> +       if (func) {
> +               func(dev);

Maybe the return value should be passed to caller. Also the race between
'func' and its .runtime_resume callback should be stated in comment.

In fact, maybe it is better to call 'func' always first, then call
' rpm_resume(dev, RPM_ASYNC);', otherwise the driver may
be confused about the order between 'func' and its .runtime_resume
callback.

> +               return 1;
> +       }
> +
> +       return ret;
> +}
> +
>  /**
>   * __pm_runtime_set_status - Set runtime PM status of a device.
>   * @dev: Device to handle.
> Index: linux/include/linux/pm.h
> ===================================================================
> --- linux.orig/include/linux/pm.h
> +++ linux/include/linux/pm.h
> @@ -547,6 +547,7 @@ struct dev_pm_info {
>         unsigned long           suspended_jiffies;
>         unsigned long           accounting_timestamp;
>         struct dev_pm_qos_request *pq_req;
> +       void                    (*func)(struct device *);

Another way is to define 'func' as 'runtime_pre_resume'
in 'struct dev_pm_ops', and there are some advantages about this way:

        - save one pointer in 'struct devices, since most of devices
don't need the 'func'
        - well documents on 'runtime_pre_resume'
        - caller of pm_runtime_get_and_call may be happier, maybe just
        pm_runtime_get or *_aync is enough.


Thanks,
Alan Stern Aug. 6, 2012, 2:47 p.m. UTC | #2
On Mon, 6 Aug 2012, Ming Lei wrote:

> Maybe the return value should be passed to caller. Also the race between
> 'func' and its .runtime_resume callback should be stated in comment.
> 
> In fact, maybe it is better to call 'func' always first, then call
> ' rpm_resume(dev, RPM_ASYNC);', otherwise the driver may
> be confused about the order between 'func' and its .runtime_resume
> callback.

> Another way is to define 'func' as 'runtime_pre_resume'
> in 'struct dev_pm_ops', and there are some advantages about this way:
> 
>         - save one pointer in 'struct devices, since most of devices
> don't need the 'func'
>         - well documents on 'runtime_pre_resume'
>         - caller of pm_runtime_get_and_call may be happier, maybe just
>         pm_runtime_get or *_aync is enough.

No, no, you have completely misunderstood the whole point of this 
change.

The idea is for "func" to be called at a time when it is known that the 
device is at full power.  That means it _has_ to be called after the 
runtime_resume callback returns.

Also, "func" should not be stored in dev_pm_ops because it won't be a 
read-only value.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alan Stern Aug. 6, 2012, 3:48 p.m. UTC | #3
On Sun, 5 Aug 2012, Rafael J. Wysocki wrote:

> I don't really think we'll need to store func_sync in dev->power.  At least
> I don't see a reason to do that at the moment.

You're right; I wasn't thinking straight.

> I also think that func_sync() would have to be called if runtime PM is
> disabled for the given device, so that callers don't have to check that
> condition themselves.

Yes.

> What about the appended experimental patch?

For now, I think it would be best to start with a single func argument.  
If it turns out that anyone really needs to have two separate
arguments, the single-argument form can be reimplemented on top of the
two-argument form easily enough.

> @@ -484,6 +484,15 @@ static int rpm_suspend(struct device *de
>  	goto out;
>  }
>  
> +void rpm_queue_up_resume(struct device *dev)
> +{
> +	dev->power.request = RPM_REQ_RESUME;
> +	if (!dev->power.request_pending) {
> +		dev->power.request_pending = true;
> +		queue_work(pm_wq, &dev->power.work);
> +	}
> +}
> +
>  /**
>   * rpm_resume - Carry out runtime resume of given device.
>   * @dev: Device to resume.
> @@ -524,7 +533,9 @@ static int rpm_resume(struct device *dev
>  	 * rather than cancelling it now only to restart it again in the near
>  	 * future.
>  	 */
> -	dev->power.request = RPM_REQ_NONE;
> +	if (dev->power.request != RPM_REQ_RESUME || !dev->power.func)
> +		dev->power.request = RPM_REQ_NONE;
> +
>  	if (!dev->power.timer_autosuspends)
>  		pm_runtime_deactivate_timer(dev);
>  
> @@ -533,6 +544,12 @@ static int rpm_resume(struct device *dev
>  		goto out;
>  	}
>  
> +	if (dev->power.func && (rpmflags & RPM_ASYNC)) {
> +		rpm_queue_up_resume(dev);
> +		retval = 0;
> +		goto out;
> +	}
> +
>  	if (dev->power.runtime_status == RPM_RESUMING
>  	    || dev->power.runtime_status == RPM_SUSPENDING) {
>  		DEFINE_WAIT(wait);

All those changes (and some of the following ones) are symptoms of a
basic mistake in this approach.  The idea of this new feature is to
call "func" as soon as we know the device is at full power, no matter
how it got there.  That means we should call it near the end of
rpm_resume() (just before the rpm_idle() call), not from within
pm_runtime_work().

Doing it this way will be more efficient and (I think) will remove
some races.

> @@ -877,6 +903,48 @@ int __pm_runtime_resume(struct device *d
>  }
>  EXPORT_SYMBOL_GPL(__pm_runtime_resume);
>  
> +int pm_runtime_get_and_call(struct device *dev, void (*func)(struct device *),
> +			     void (*func_async)(struct device *))
> +{
> +	unsigned long flags;
> +	int ret;
> +
> +	atomic_inc(&dev->power.usage_count);
> +
> +	spin_lock_irqsave(&dev->power.lock, flags);
> +
> +	ret = dev->power.runtime_error;
> +	if (ret) {
> +		func = NULL;
> +		goto out;
> +	}
> +
> +	if (dev->power.runtime_status != RPM_ACTIVE
> +	    && dev->power.disable_depth == 0)
> +		func = NULL;
> +
> +	if (!func && func_async) {
> +		if (dev->power.func) {
> +			ret = -EAGAIN;
> +			goto out;
> +		} else {
> +			dev->power.func = func_async;
> +		}
> +	}

The logic here is kind of hard to follow.  It will be simpler when
there's only one "func":

	If the status is RPM_ACTIVE or disable_depth > 0 then
	call "func" directly.

	Otherwise save "func" in dev.power and do an async resume.

Some more things:

In __pm_runtime_set_status(), if power.func is set then I think we
should call it if the new status is ACTIVE.  Likwise at the end of
rpm_suspend(), if the suspend fails.  There should be an invariant:
Whenever the status is RPM_ACTIVE, power.func must be NULL.

pm_runtime_barrier() should always clear power.func, even if the 
rpm_resume() call fails.

The documentation should explain that in some ways, "func" is like a
workqueue callback routine: Subsystems and drivers have to be careful
to make sure that it can't be invoked after the device is unbound.  A
safe way to do this is to call pm_runtime_barrier() from within the
driver's .remove() routine, after making sure that
pm_runtime_get_and_call() won't be invoked any more.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei Aug. 7, 2012, 1:35 a.m. UTC | #4
On Mon, Aug 6, 2012 at 10:47 PM, Alan Stern <stern@rowland.harvard.edu> wrote:
> No, no, you have completely misunderstood the whole point of this
> change.

Sorry, you are right. And the callback should be renamed as
'.runtime_post_resume', which should do something I/O related in
theory just after device becomes active.

>
> The idea is for "func" to be called at a time when it is known that the
> device is at full power.  That means it _has_ to be called after the
> runtime_resume callback returns.

Yes, I agree, but I don't think it may make .runtime_post_resume
not doable, do I?

>
> Also, "func" should not be stored in dev_pm_ops because it won't be a
> read-only value.

Sorry, could you explain it in detail that why the 'func'
has to switch to multiple functions dynamically? I understand
one function is enough and sometimes it can be bypassed with
one flag(such as, ignore_post_resume is introduced in dev_pm_info)
set.  Also, the driver can store the device specific states
in its own device instance to deal with different situations in the callback.

If the idea is doable, we can save one pointer in 'struct device',
since the 'func' may not be used by more than 90% devices, also
have document benefit, even may simplify implementation of the
mechanism.


Thanks,
Rafael J. Wysocki Aug. 7, 2012, 11:23 a.m. UTC | #5
On Tuesday, August 07, 2012, Ming Lei wrote:
> On Mon, Aug 6, 2012 at 10:47 PM, Alan Stern <stern@rowland.harvard.edu> wrote:
> > No, no, you have completely misunderstood the whole point of this
> > change.
> 
> Sorry, you are right. And the callback should be renamed as
> '.runtime_post_resume', which should do something I/O related in
> theory just after device becomes active.
> 
> >
> > The idea is for "func" to be called at a time when it is known that the
> > device is at full power.  That means it _has_ to be called after the
> > runtime_resume callback returns.
> 
> Yes, I agree, but I don't think it may make .runtime_post_resume
> not doable, do I?

No more device PM callbacks, please.

Besides, callbacks in struct dev_pm_ops are not only for drivers.

> > Also, "func" should not be stored in dev_pm_ops because it won't be a
> > read-only value.
> 
> Sorry, could you explain it in detail that why the 'func'
> has to switch to multiple functions dynamically? I understand
> one function is enough and sometimes it can be bypassed with
> one flag(such as, ignore_post_resume is introduced in dev_pm_info)
> set.  Also, the driver can store the device specific states
> in its own device instance to deal with different situations in the callback.
> 
> If the idea is doable, we can save one pointer in 'struct device',
> since the 'func' may not be used by more than 90% devices, also
> have document benefit, even may simplify implementation of the
> mechanism.

And how many struct device objects there are for the one extra pointer to
matter, let alone the fact that you want to replace it by one extra pointer
somewhere else?

Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei Aug. 7, 2012, 3:14 p.m. UTC | #6
On Tue, Aug 7, 2012 at 7:23 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
>> Yes, I agree, but I don't think it may make .runtime_post_resume
>> not doable, do I?
>
> No more device PM callbacks, please.

IMO, what the patch is doing is to introduce one callback which
is just called after .runtime_resume is completed, and you want
to move something out of previous .runtime_resume and do it
in another new callback to speedup resume, so it should be
reasonable to introduce the .runtime_post_resume callback in logic.

Also, the 'func' should be per driver, not per device since only one
'func' is enough for all same kind of devices driven by one same
driver.

> Besides, callbacks in struct dev_pm_ops are not only for drivers.

All the current 3 runtime callbacks are for devices. If you mean
they can be defined in bus/power_domain/device_type, .runtime_post_resume
still can be defined there too.

>
>> > Also, "func" should not be stored in dev_pm_ops because it won't be a
>> > read-only value.
>>
>> Sorry, could you explain it in detail that why the 'func'
>> has to switch to multiple functions dynamically? I understand
>> one function is enough and sometimes it can be bypassed with
>> one flag(such as, ignore_post_resume is introduced in dev_pm_info)
>> set.  Also, the driver can store the device specific states
>> in its own device instance to deal with different situations in the callback.
>>
>> If the idea is doable, we can save one pointer in 'struct device',
>> since the 'func' may not be used by more than 90% devices, also
>> have document benefit, even may simplify implementation of the
>> mechanism.
>
> And how many struct device objects there are for the one extra pointer to
> matter, let alone the fact that you want to replace it by one extra pointer
> somewhere else?

For example, in the pandaboard(one omap4 based small system), follows
the count of device instances:

         [root@root]#dmesg | grep device_add | wc -l
        471

The above is just a simple configuration(no graphics, no video/video, only
console enabled) on Pandaboard.

If the callback may be defined in dev_pm_info, not only memory can be saved,
also there are other advantages described before.


Thanks,
Alan Stern Aug. 7, 2012, 3:42 p.m. UTC | #7
On Tue, 7 Aug 2012, Ming Lei wrote:

> On Tue, Aug 7, 2012 at 7:23 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> >> Yes, I agree, but I don't think it may make .runtime_post_resume
> >> not doable, do I?
> >
> > No more device PM callbacks, please.
> 
> IMO, what the patch is doing is to introduce one callback which
> is just called after .runtime_resume is completed, and you want
> to move something out of previous .runtime_resume and do it
> in another new callback to speedup resume, so it should be
> reasonable to introduce the .runtime_post_resume callback in logic.

No, that's really not what the patch is doing.

The idea behind the new API is that "func" will be called as soon as we
know the device is at full power.  That could be after the next runtime
resume or it could be right away.  This is a one-time call; it should
not be made _every_ time the device resumes.

> Also, the 'func' should be per driver, not per device since only one
> 'func' is enough for all same kind of devices driven by one same
> driver.

But what if the subsystem defines its own dev_pm_info structure?  Then
the driver's dev_pm_info will be ignored by the runtime PM core.  All
the subsystems would have to be changed.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei Aug. 7, 2012, 4:30 p.m. UTC | #8
On Tue, Aug 7, 2012 at 11:42 PM, Alan Stern <stern@rowland.harvard.edu> wrote:
>
> No, that's really not what the patch is doing.
>
> The idea behind the new API is that "func" will be called as soon as we
> know the device is at full power.  That could be after the next runtime
> resume or it could be right away.  This is a one-time call; it should

IMO, in the both two cases, the 'func' should be very similar with
.runtime_post_resume from view of the caller because the caller
don't know what the power state of the device is now, so they may
always think the 'func' should do something similar done in
.runtime_post_resume.

Also .runtime_post_resume always knows the device's power state
is active, which is same with 'func'. In fact, it doesn't matter if the active
state is the 1st time or other times, does it?

> not be made _every_ time the device resumes.

Suppose the device is always resumed in the path(such as irq context),
the callback is still called every time.

If the .runtime_post_resume is to be a one-time call, that is easy to do it.
Also I am wondering why the callback shouldn't be called after resume
in sync context, and it may simplify implementation if the two contexts
(sync vs. async) are kept consistent.

>
>> Also, the 'func' should be per driver, not per device since only one
>> 'func' is enough for all same kind of devices driven by one same
>> driver.
>
> But what if the subsystem defines its own dev_pm_info structure?  Then
> the driver's dev_pm_info will be ignored by the runtime PM core.  All
> the subsystems would have to be changed.

Suppose .runtime_post_resume is introduced, the priority of
dev_pm_info for .runtime_post_resume callback can be changed to
adapt to the situation.


Thanks,
Rafael J. Wysocki Aug. 7, 2012, 8:45 p.m. UTC | #9
On Tuesday, August 07, 2012, Ming Lei wrote:
> On Tue, Aug 7, 2012 at 7:23 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> >> Yes, I agree, but I don't think it may make .runtime_post_resume
> >> not doable, do I?
> >
> > No more device PM callbacks, please.
> 
> IMO, what the patch is doing is to introduce one callback which
> is just called after .runtime_resume is completed,

No, it is not a callback.  It is a function to be run _once_ when the device is
known to be active.  It is not a member of a data type or anything like this.

It's kind of disappointing that you don't see a difference between that and a
callback.

> and you want to move something out of previous .runtime_resume

No, I don't.  Where did you get that idea from?

> and do it in another new callback to speedup resume,

No, not to speed up resume.  The idea is to allow drivers to run something
when the resume is complete, so that they don't have to implement a "resume
detection" logic or use .runtime_resume() to run things that don't belong
there.

> so it should be reasonable to introduce the .runtime_post_resume callback in
> logic.

No.  This doesn't have anything to do with callbacks!

If you want a new callback, you should specify what the role of this callback
is, otherwise it is not well defined.  I this case, though, what the role of
func() is depends on the caller and most likely every driver would use it
for something different.  So no, I don't see how it can be a callback.

> Also, the 'func' should be per driver, not per device since only one
> 'func' is enough for all same kind of devices driven by one same
> driver.

It isn't per device!  It is per _caller_.  The fact that the pointer is
stored _temporarily_ in struct device doesn't mean that it is per device
and that it is a callback.  From the struct device point of view it is _data_,
not a member function.

> > Besides, callbacks in struct dev_pm_ops are not only for drivers.
> 
> All the current 3 runtime callbacks are for devices. If you mean
> they can be defined in bus/power_domain/device_type, .runtime_post_resume
> still can be defined there too.

No, it wouldn't make _any_ _sense_ there, because its role there cannot be
defined in any sane way.

> >> > Also, "func" should not be stored in dev_pm_ops because it won't be a
> >> > read-only value.
> >>
> >> Sorry, could you explain it in detail that why the 'func'
> >> has to switch to multiple functions dynamically? I understand
> >> one function is enough and sometimes it can be bypassed with
> >> one flag(such as, ignore_post_resume is introduced in dev_pm_info)
> >> set.  Also, the driver can store the device specific states
> >> in its own device instance to deal with different situations in the callback.
> >>
> >> If the idea is doable, we can save one pointer in 'struct device',
> >> since the 'func' may not be used by more than 90% devices, also
> >> have document benefit, even may simplify implementation of the
> >> mechanism.
> >
> > And how many struct device objects there are for the one extra pointer to
> > matter, let alone the fact that you want to replace it by one extra pointer
> > somewhere else?
> 
> For example, in the pandaboard(one omap4 based small system), follows
> the count of device instances:
> 
>          [root@root]#dmesg | grep device_add | wc -l
>         471
> 
> The above is just a simple configuration(no graphics, no video/video, only
> console enabled) on Pandaboard.
> 
> If the callback may be defined in dev_pm_info,

I guess you mean struct dev_pm_ops, right?

> not only memory can be saved, also there are other advantages described
> before.

So now please count how many struct dev_pm_ops objects there are on that system
and compute the differece.  And please note that drivers that don't use
struct dev_pm_ops for power management will do that in the future.

Also please note that the caller of pm_runtime_get_and_call() need not be
a driver, at least in theory.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki Aug. 7, 2012, 8:57 p.m. UTC | #10
On Tuesday, August 07, 2012, Ming Lei wrote:
> On Tue, Aug 7, 2012 at 11:42 PM, Alan Stern <stern@rowland.harvard.edu> wrote:
> >
> > No, that's really not what the patch is doing.
> >
> > The idea behind the new API is that "func" will be called as soon as we
> > know the device is at full power.  That could be after the next runtime
> > resume or it could be right away.  This is a one-time call; it should
> 
> IMO, in the both two cases, the 'func' should be very similar with
> .runtime_post_resume from view of the caller because the caller
> don't know what the power state of the device is now, so they may
> always think the 'func' should do something similar done in
> .runtime_post_resume.
> 
> Also .runtime_post_resume always knows the device's power state
> is active, which is same with 'func'. In fact, it doesn't matter if the active
> state is the 1st time or other times, does it?

What Alan wanted to say, I think, was that .runtime_post_resume() would have
to be always identical, where func() need not be always the same function.
Moreover, .runtime_post_resume() would _always_ be run after a device resume,
while func() is run only _once_.

> > not be made _every_ time the device resumes.
> 
> Suppose the device is always resumed in the path(such as irq context),
> the callback is still called every time.

Yes, but what if you have _two_ code paths and you want to call different
code as func() in each of them?

> If the .runtime_post_resume is to be a one-time call, that is easy to do it.

No, it isn't.

> Also I am wondering why the callback shouldn't be called after resume
> in sync context, and it may simplify implementation if the two contexts
> (sync vs. async) are kept consistent.

I have no idea what you're talking about.

We actually have a callback that is run every time a device is resumed.
It is called .runtime_idle().  Does it help, though?  No, it doesn't.

> >> Also, the 'func' should be per driver, not per device since only one
> >> 'func' is enough for all same kind of devices driven by one same
> >> driver.
> >
> > But what if the subsystem defines its own dev_pm_info structure?  Then
> > the driver's dev_pm_info will be ignored by the runtime PM core.  All
> > the subsystems would have to be changed.
> 
> Suppose .runtime_post_resume is introduced,

It is not going to be introduced, period.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei Aug. 8, 2012, 2:02 a.m. UTC | #11
On Wed, Aug 8, 2012 at 4:45 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Tuesday, August 07, 2012, Ming Lei wrote:
>> On Tue, Aug 7, 2012 at 7:23 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
>> >> Yes, I agree, but I don't think it may make .runtime_post_resume
>> >> not doable, do I?
>> >
>> > No more device PM callbacks, please.
>>
>> IMO, what the patch is doing is to introduce one callback which
>> is just called after .runtime_resume is completed,
>
> No, it is not a callback.  It is a function to be run _once_ when the device is
> known to be active.  It is not a member of a data type or anything like this.

Looks it was said by Alan, not me, :-)

"The documentation should explain that in some ways, "func" is like a
workqueue callback routine:".

See http://marc.info/?l=linux-usb&m=134426838507799&w=2

>
> It's kind of disappointing that you don't see a difference between that and a
> callback.
>
>> and you want to move something out of previous .runtime_resume
>
> No, I don't.  Where did you get that idea from?

If so, I am wondering why the 'func' can't be called in .runtime_resume
directly, and follow the below inside caller at the same time?

             if (device is active or disabled)
                      call func(device).

>
>> and do it in another new callback to speedup resume,
>
> No, not to speed up resume.  The idea is to allow drivers to run something
> when the resume is complete, so that they don't have to implement a "resume
> detection" logic or use .runtime_resume() to run things that don't belong
> there.

Looks it was said by you, :-)

"Unless your _driver_ callback is actually executed from within a PM domain
callback, for example, and something else may be waiting for it to complete,
so your data processing is adding latencies to some other threads.  I'm not
making that up, by the way, that really can happen."

See http://marc.info/?l=linux-pm&m=134394271517527&w=2

Alan also said "Okay, those are valid reasons" for the idea. Except for
this one, I don't see other obvious advantages about the patch.

>
>> so it should be reasonable to introduce the .runtime_post_resume callback in
>> logic.
>
> No.  This doesn't have anything to do with callbacks!
>
> If you want a new callback, you should specify what the role of this callback
> is, otherwise it is not well defined.  I this case, though, what the role of
> func() is depends on the caller and most likely every driver would use it
> for something different.  So no, I don't see how it can be a callback.
>
>> Also, the 'func' should be per driver, not per device since only one
>> 'func' is enough for all same kind of devices driven by one same
>> driver.
>
> It isn't per device!  It is per _caller_.  The fact that the pointer is
> stored _temporarily_ in struct device doesn't mean that it is per device
> and that it is a callback.  From the struct device point of view it is _data_,
> not a member function.

The fact is that it will become per-device one you store it in 'struct device'.

Suppose one driver may drive 10000 same devices, the same data will be
stored inside all the 10000 device instances, it is a good way to do it?

Not mention 90% devices mayn't use the _temporarily_ data at all.

>
>> > Besides, callbacks in struct dev_pm_ops are not only for drivers.
>>
>> All the current 3 runtime callbacks are for devices. If you mean
>> they can be defined in bus/power_domain/device_type, .runtime_post_resume
>> still can be defined there too.
>
> No, it wouldn't make _any_ _sense_ there, because its role there cannot be
> defined in any sane way.
>
>> >> > Also, "func" should not be stored in dev_pm_ops because it won't be a
>> >> > read-only value.
>> >>
>> >> Sorry, could you explain it in detail that why the 'func'
>> >> has to switch to multiple functions dynamically? I understand
>> >> one function is enough and sometimes it can be bypassed with
>> >> one flag(such as, ignore_post_resume is introduced in dev_pm_info)
>> >> set.  Also, the driver can store the device specific states
>> >> in its own device instance to deal with different situations in the callback.
>> >>
>> >> If the idea is doable, we can save one pointer in 'struct device',
>> >> since the 'func' may not be used by more than 90% devices, also
>> >> have document benefit, even may simplify implementation of the
>> >> mechanism.
>> >
>> > And how many struct device objects there are for the one extra pointer to
>> > matter, let alone the fact that you want to replace it by one extra pointer
>> > somewhere else?
>>
>> For example, in the pandaboard(one omap4 based small system), follows
>> the count of device instances:
>>
>>          [root@root]#dmesg | grep device_add | wc -l
>>         471
>>
>> The above is just a simple configuration(no graphics, no video/video, only
>> console enabled) on Pandaboard.
>>
>> If the callback may be defined in dev_pm_info,
>
> I guess you mean struct dev_pm_ops, right?

Sorry, it is a typo.

>
>> not only memory can be saved, also there are other advantages described
>> before.
>
> So now please count how many struct dev_pm_ops objects there are on that system
> and compute the differece.  And please note that drivers that don't use
> struct dev_pm_ops for power management will do that in the future.

Most of dev_pm_ops stays inside module image, and not in ram.
It is a bit difficult to get the count of all dev_pm_ops objects in ram
since it is defined statically.

For example, in USB subsystem, there are only 2 dev_pm_ops
objects in RAM for a normal system, but there may have hundreds of
usb devices in the system(usb_device, usb_interface, ep_device, ...).

>
> Also please note that the caller of pm_runtime_get_and_call() need not be
> a driver, at least in theory.

I admit it in theory,  :-)


Thanks,
Alan Stern Aug. 8, 2012, 6:42 p.m. UTC | #12
On Wed, 8 Aug 2012, Ming Lei wrote:

> On Wed, Aug 8, 2012 at 4:45 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > On Tuesday, August 07, 2012, Ming Lei wrote:
> >> On Tue, Aug 7, 2012 at 7:23 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> >> >> Yes, I agree, but I don't think it may make .runtime_post_resume
> >> >> not doable, do I?
> >> >
> >> > No more device PM callbacks, please.
> >>
> >> IMO, what the patch is doing is to introduce one callback which
> >> is just called after .runtime_resume is completed,
> >
> > No, it is not a callback.  It is a function to be run _once_ when the device is
> > known to be active.  It is not a member of a data type or anything like this.
> 
> Looks it was said by Alan, not me, :-)
> 
> "The documentation should explain that in some ways, "func" is like a
> workqueue callback routine:".
> 
> See http://marc.info/?l=linux-usb&m=134426838507799&w=2

I didn't say it _was_ a callback; I said it was _like_ a callback in 
some ways.

> If so, I am wondering why the 'func' can't be called in .runtime_resume
> directly, and follow the below inside caller at the same time?
> 
>              if (device is active or disabled)
>                       call func(device).

That was my original suggestion.  Rafael pointed out that having a 
single function call to do this would make it easier for driver writers 
to get it right.

> > No, not to speed up resume.  The idea is to allow drivers to run something
> > when the resume is complete, so that they don't have to implement a "resume
> > detection" logic or use .runtime_resume() to run things that don't belong
> > there.
> 
> Looks it was said by you, :-)
> 
> "Unless your _driver_ callback is actually executed from within a PM domain
> callback, for example, and something else may be waiting for it to complete,
> so your data processing is adding latencies to some other threads.  I'm not
> making that up, by the way, that really can happen."
> 
> See http://marc.info/?l=linux-pm&m=134394271517527&w=2
> 
> Alan also said "Okay, those are valid reasons" for the idea. Except for
> this one, I don't see other obvious advantages about the patch.

Those _are_ the two advantages:

	The runtime-resume method does nothing but bring the device
	back to full power; it doesn't do any other processing;

	It's easier than calling pm_runtime_get() followed by a test
	to see whether the device is active.

> Suppose one driver may drive 10000 same devices, the same data will be
> stored inside all the 10000 device instances, it is a good way to do it?

If you've got a system with 10000 device instances, you can probably 
spare the memory needed to store these function pointers.  But you're 
right that this is a disadvantage.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki Aug. 8, 2012, 8:16 p.m. UTC | #13
On Wednesday, August 08, 2012, Alan Stern wrote:
> On Wed, 8 Aug 2012, Ming Lei wrote:
[...]
> > If so, I am wondering why the 'func' can't be called in .runtime_resume
> > directly, and follow the below inside caller at the same time?
> > 
> >              if (device is active or disabled)
> >                       call func(device).
> 
> That was my original suggestion.  Rafael pointed out that having a 
> single function call to do this would make it easier for driver writers 
> to get it right.

Not only would it be easier to get it right, in my opinion, but also in the
example above func() may be called in some places where the driver may not
want it to be called and which are very difficult to detect (like a resume
from __device_suspend() during system suspend).  Moreover, if the driver
callback is not executed directly by the PM core, but instead it is executed by
a subsystem or PM domain callback, there's no guarantee that the device _can_
be used for processing regular I/O before the driver callback returns (the
subsystem callback may still need to do something _after_ that happens).

So, this appears to be a matter of correctness too.

> > > No, not to speed up resume.  The idea is to allow drivers to run something
> > > when the resume is complete, so that they don't have to implement a "resume
> > > detection" logic or use .runtime_resume() to run things that don't belong
> > > there.
> > 
> > Looks it was said by you, :-)
> > 
> > "Unless your _driver_ callback is actually executed from within a PM domain
> > callback, for example, and something else may be waiting for it to complete,
> > so your data processing is adding latencies to some other threads.  I'm not
> > making that up, by the way, that really can happen."
> > 
> > See http://marc.info/?l=linux-pm&m=134394271517527&w=2
> > 
> > Alan also said "Okay, those are valid reasons" for the idea. Except for
> > this one, I don't see other obvious advantages about the patch.
> 
> Those _are_ the two advantages:
> 
> 	The runtime-resume method does nothing but bring the device
> 	back to full power; it doesn't do any other processing;
> 
> 	It's easier than calling pm_runtime_get() followed by a test
> 	to see whether the device is active.
> 
> > Suppose one driver may drive 10000 same devices, the same data will be
> > stored inside all the 10000 device instances, it is a good way to do it?
> 
> If you've got a system with 10000 device instances, you can probably 
> spare the memory needed to store these function pointers.  But you're 
> right that this is a disadvantage.

Yes, it is in grand general, but that also is a matter of alignment and of
the way the slab allocator works.  For example, if every struct device
object were stored at the beginning of a new memory page, then its size
wouldn't matter a lot as long as it were smaller than PAGE_SIZE.

I haven't checked the details, but I'm pretty sure that focusing on the size
alone doesn't give us the whole picture here.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki Aug. 8, 2012, 10:27 p.m. UTC | #14
On Wednesday, August 08, 2012, Ming Lei wrote:
> On Wed, Aug 8, 2012 at 4:45 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > On Tuesday, August 07, 2012, Ming Lei wrote:
> >> On Tue, Aug 7, 2012 at 7:23 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
[...]
> >
> >> and you want to move something out of previous .runtime_resume
> >
> > No, I don't.  Where did you get that idea from?
> 
> If so, I am wondering why the 'func' can't be called in .runtime_resume
> directly, and follow the below inside caller at the same time?
> 
>              if (device is active or disabled)
>                       call func(device).

This was covered in my last reply to Alan.

> >> and do it in another new callback to speedup resume,
> >
> > No, not to speed up resume.  The idea is to allow drivers to run something
> > when the resume is complete, so that they don't have to implement a "resume
> > detection" logic or use .runtime_resume() to run things that don't belong
> > there.
> 
> Looks it was said by you, :-)
> 
> "Unless your _driver_ callback is actually executed from within a PM domain
> callback, for example, and something else may be waiting for it to complete,
> so your data processing is adding latencies to some other threads.  I'm not
> making that up, by the way, that really can happen."
> 
> See http://marc.info/?l=linux-pm&m=134394271517527&w=2

We were discussing specific pseudo-code in the documentation and you
conveniently took the above out of context.  Never mind. :-)

I was trying to illustrate my point with a convincing example and I admit I
could do better.

Anyway the point was that the purpose of .runtime_resume() was not to
process random I/O.  Its purpose is to _resume_ a suspended device,
no less, no more.  Which the "so that they don't have to [...] use
.runtime_resume() to run things that don't belong there." sentence above is
about.  So I've been saying the same thing all the time and it's never been
specifically about speedup (or rather about latencies added by random I/O
processing in drivers' runtime resume callbacks).

> Alan also said "Okay, those are valid reasons" for the idea. Except for
> this one, I don't see other obvious advantages about the patch.
> 
> >
> >> so it should be reasonable to introduce the .runtime_post_resume callback in
> >> logic.
> >
> > No.  This doesn't have anything to do with callbacks!
> >
> > If you want a new callback, you should specify what the role of this callback
> > is, otherwise it is not well defined.  I this case, though, what the role of
> > func() is depends on the caller and most likely every driver would use it
> > for something different.  So no, I don't see how it can be a callback.
> >
> >> Also, the 'func' should be per driver, not per device since only one
> >> 'func' is enough for all same kind of devices driven by one same
> >> driver.
> >
> > It isn't per device!  It is per _caller_.  The fact that the pointer is
> > stored _temporarily_ in struct device doesn't mean that it is per device
> > and that it is a callback.  From the struct device point of view it is _data_,
> > not a member function.
> 
> The fact is that it will become per-device one you store it in 'struct device'.
> 
> Suppose one driver may drive 10000 same devices,

Do you have any specific example of that?  If not, then please don't make up
arguments.

> the same data will be stored inside all the 10000 device instances, it is a
> good way to do it?
> 
> Not mention 90% devices mayn't use the _temporarily_ data at all.

It may be unused just as well as an additional callback pointer in a driver
object.

[...]
> >
> > So now please count how many struct dev_pm_ops objects there are on that system
> > and compute the differece.  And please note that drivers that don't use
> > struct dev_pm_ops for power management will do that in the future.
> 
> Most of dev_pm_ops stays inside module image, and not in ram.

Care to explain?  I'm not sure I understand the above correctly.

> It is a bit difficult to get the count of all dev_pm_ops objects in ram
> since it is defined statically.

Still, they are occupying memory, aren't they?  So you really can't tell
the difference between storing pointers in device driver objects and
struct device objects.

> For example, in USB subsystem, there are only 2 dev_pm_ops
> objects in RAM for a normal system, but there may have hundreds of
> usb devices in the system(usb_device, usb_interface, ep_device, ...).

Yes, USB is kind of exceptional, but also this means that your "let's
put that pointer into struct dev_pm_ops" idea won't work for USB drivers,
precisely because they don't use struct dev_pm_ops objects.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei Aug. 9, 2012, 5:55 a.m. UTC | #15
On Thu, Aug 9, 2012 at 4:16 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Wednesday, August 08, 2012, Alan Stern wrote:

To be honest, I agree on the idea:

       The runtime-resume method does nothing but bring the device
        back to full power; it doesn't do any other processing, which
        should be done in 'func' or some kind of callback.

I just think it is not good to introduce one extra field of 'func' in
dev_pm_info which is embedded into struct device in the patch,
see the reasons in the last part of the reply.

>> That was my original suggestion.  Rafael pointed out that having a
>> single function call to do this would make it easier for driver writers
>> to get it right.
>
> Not only would it be easier to get it right, in my opinion, but also in the
> example above func() may be called in some places where the driver may not
> want it to be called and which are very difficult to detect (like a resume
> from __device_suspend() during system suspend).  Moreover, if the driver

IMO, func() does some driver specific things, which PM core shouldn't pay
special attention to in theory.

> callback is not executed directly by the PM core, but instead it is executed by
> a subsystem or PM domain callback, there's no guarantee that the device _can_
> be used for processing regular I/O before the driver callback returns (the
> subsystem callback may still need to do something _after_ that happens).

driver->runtime_resume should be allowed to do I/O things after
the device has been woken up inside driver callback, shouldn't it? If it
isn't allowed, something wrong should have be reported before.

> So, this appears to be a matter of correctness too.


>> If you've got a system with 10000 device instances, you can probably
>> spare the memory needed to store these function pointers.  But you're
>> right that this is a disadvantage.
>
> Yes, it is in grand general, but that also is a matter of alignment and of
> the way the slab allocator works.  For example, if every struct device
> object were stored at the beginning of a new memory page, then its size
> wouldn't matter a lot as long as it were smaller than PAGE_SIZE.
>
> I haven't checked the details, but I'm pretty sure that focusing on the size
> alone doesn't give us the whole picture here.

The allocation by kmalloc is not page aligned, and it is 2-power
aligned, for example 16, 32, 64,..., also it is at least hardware
L1 cache size aligned.

Firstly, introduce one extra pointer in device may increase memory
consume for device allocation, also it reflects one design drawback
in the patch, see below.

More importantly, the implementation violates some software design
principle in object oriented design. The driver core takes much
object oriented idea in its design and implementation, and introduces
device / driver / bus class. One class is an abstraction of one kind of
objects or instances with same attributes, so one class may include
many objects, for example, usb_device(class) is abstraction for all usb
devices, and there may have many many usb devices in a system, but only
one usb_device structure defined.

One specific driver class is a special class since it may only have one
driver object , which is basically read only. In OO world, it might be called
static class.

Generally, one driver object serves one specific device class, instead
of one device object. For example, usb_storage_driver is a driver object,
which serves all usb mass storage devices which all belongs to usb mass
storage class).

The 'func' to be introduced is a function pointer, which should be
driver related thing and should serve one specific device class in theory,
and it shouldn't serve only one concrete device object, so it is not good
to include it into 'struct device'.

The only function pointer in struct device:

              void    (*release)(struct device *dev)

should be removed.  All its users should convert to use device_type to
define release handler for its 'device class', instead of device object.

So suggest to improve it.


Thanks,
Rafael J. Wysocki Aug. 9, 2012, 10:46 a.m. UTC | #16
On Thursday, August 09, 2012, Ming Lei wrote:
> On Thu, Aug 9, 2012 at 4:16 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > On Wednesday, August 08, 2012, Alan Stern wrote:
> 
> To be honest, I agree on the idea:
> 
>        The runtime-resume method does nothing but bring the device
>         back to full power; it doesn't do any other processing, which
>         should be done in 'func' or some kind of callback.

Good. :-)

> I just think it is not good to introduce one extra field of 'func' in
> dev_pm_info which is embedded into struct device in the patch,
> see the reasons in the last part of the reply.
> 
> >> That was my original suggestion.  Rafael pointed out that having a
> >> single function call to do this would make it easier for driver writers
> >> to get it right.
> >
> > Not only would it be easier to get it right, in my opinion, but also in the
> > example above func() may be called in some places where the driver may not
> > want it to be called and which are very difficult to detect (like a resume
> > from __device_suspend() during system suspend).  Moreover, if the driver
> 
> IMO, func() does some driver specific things, which PM core shouldn't pay
> special attention to in theory.

But also it shouldn't execute that code, right?

> > callback is not executed directly by the PM core, but instead it is executed by
> > a subsystem or PM domain callback, there's no guarantee that the device _can_
> > be used for processing regular I/O before the driver callback returns (the
> > subsystem callback may still need to do something _after_ that happens).
> 
> driver->runtime_resume should be allowed to do I/O things after
> the device has been woken up inside driver callback, shouldn't it? If it
> isn't allowed, something wrong should have be reported before.

Well, the lack of reports doesn't mean there are no bugs. :-)

People may actually see those bugs, but they don't report them or they
report them as system suspend/resume bugs, for example.

> > So, this appears to be a matter of correctness too.
> 
> 
> >> If you've got a system with 10000 device instances, you can probably
> >> spare the memory needed to store these function pointers.  But you're
> >> right that this is a disadvantage.
> >
> > Yes, it is in grand general, but that also is a matter of alignment and of
> > the way the slab allocator works.  For example, if every struct device
> > object were stored at the beginning of a new memory page, then its size
> > wouldn't matter a lot as long as it were smaller than PAGE_SIZE.
> >
> > I haven't checked the details, but I'm pretty sure that focusing on the size
> > alone doesn't give us the whole picture here.
> 
> The allocation by kmalloc is not page aligned, and it is 2-power
> aligned, for example 16, 32, 64,..., also it is at least hardware
> L1 cache size aligned.

Sure, that's why I used the conditional above.  And it doesn't mean I didn't
have the point.

> Firstly, introduce one extra pointer in device may increase memory
> consume for device allocation,

Yes, it does, which may or may not matter depending on the actual size of
struct device and the CPU cache line size on the given machine, right?

> also it reflects one design drawback in the patch, see below.
> 
> More importantly, the implementation violates some software design
> principle in object oriented design.

It doesn't violate anything and you're just ignoring what you've been told.
That makes discussing with you rather difficult, but I'll try again
nevertheless.

If you look at the actual patch I've just posted:

https://patchwork.kernel.org/patch/1299781/

you can see that power.func is never run directly.  Moreover, the pointer it
contains is only used to run a function in pm_runtime_work() and note that
pm_runtime_work() reads that pointer _twice_, because it may be changed in the
meantime by a concurrent thread.

All of this means what I've told you already at least once: power.func is
_not_ _a_ _member_ _function_.

IOW, if it were C++, power.func would still be a function pointer, not a method,
because it is _data_, although its data type happens to be "void function
taking a struct device pointer argument".  It is a data field used to pass
information to a work function, pm_runtime_work(), from a piece of code that
schedules its execution, __pm_runtime_get_and_call().  See now?

> The driver core takes much
> object oriented idea in its design and implementation, and introduces
> device / driver / bus class. One class is an abstraction of one kind of
> objects or instances with same attributes, so one class may include
> many objects, for example, usb_device(class) is abstraction for all usb
> devices, and there may have many many usb devices in a system, but only
> one usb_device structure defined.
> 
> One specific driver class is a special class since it may only have one
> driver object , which is basically read only. In OO world, it might be called
> static class.
> 
> Generally, one driver object serves one specific device class, instead
> of one device object. For example, usb_storage_driver is a driver object,
> which serves all usb mass storage devices which all belongs to usb mass
> storage class).
> 
> The 'func' to be introduced is a function pointer, which should be
> driver related thing and should serve one specific device class in theory,
> and it shouldn't serve only one concrete device object, so it is not good
> to include it into 'struct device'.

No.  You're confusing function pointers with member functions.

Yes, we use function pointers to store the addresses of member functions,
because that's what we can do in C.  Again, in C++ member functions would be
methods, but we still might use function pointers for other purposes.

Whether or not it is "elegant" or "clean" (or whatever you call it) function
pointers for other purposes is a different matter and I think it is highly
subjective.

> The only function pointer in struct device:
> 
>               void    (*release)(struct device *dev)
> 
> should be removed.  All its users should convert to use device_type to
> define release handler for its 'device class', instead of device object.
> 
> So suggest to improve it.

What I want the callers to be able to do is "run this code right now if the
device is operational or schedule the resume of it and run this code when
it's been resumed".  So, the pointer is used to tell the workqueue code
which code to run (if any) when the device has been resumed.

I don't want a "run this code every time the device is resumed" kind of thing,
which I agree that a callback would be more suitable for.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei Aug. 9, 2012, 10:55 a.m. UTC | #17
On Thu, Aug 9, 2012 at 6:46 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Thursday, August 09, 2012, Ming Lei wrote:

>> driver->runtime_resume should be allowed to do I/O things after
>> the device has been woken up inside driver callback, shouldn't it? If it
>> isn't allowed, something wrong should have be reported before.
>
> Well, the lack of reports doesn't mean there are no bugs. :-)
>
> People may actually see those bugs, but they don't report them or they
> report them as system suspend/resume bugs, for example.

Also, I am still wondering why subsystem PM callback need to do
something after driver->runtime_resume completes, could you explain
it?

>> Firstly, introduce one extra pointer in device may increase memory
>> consume for device allocation,
>
> Yes, it does, which may or may not matter depending on the actual size of
> struct device and the CPU cache line size on the given machine, right?

It may double memory allocation size in some cases. And it is very possible
since there are so many device objects in system.

>
>> also it reflects one design drawback in the patch, see below.
>>
>> More importantly, the implementation violates some software design
>> principle in object oriented design.
>
> It doesn't violate anything and you're just ignoring what you've been told.
> That makes discussing with you rather difficult, but I'll try again
> nevertheless.
>
> If you look at the actual patch I've just posted:
>
> https://patchwork.kernel.org/patch/1299781/
>
> you can see that power.func is never run directly.  Moreover, the pointer it
> contains is only used to run a function in pm_runtime_work() and note that
> pm_runtime_work() reads that pointer _twice_, because it may be changed in the
> meantime by a concurrent thread.

I have explained it before, it is enough to keep the pointer read only
since driver can maintain its internal state in its specific device instance
(for example, usb_interface objects) and decide what to do in 'func'
for situations, right?



Thanks,
Rafael J. Wysocki Aug. 9, 2012, 7:41 p.m. UTC | #18
On Thursday, August 09, 2012, Ming Lei wrote:
> On Thu, Aug 9, 2012 at 6:46 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > On Thursday, August 09, 2012, Ming Lei wrote:
> 
> >> driver->runtime_resume should be allowed to do I/O things after
> >> the device has been woken up inside driver callback, shouldn't it? If it
> >> isn't allowed, something wrong should have be reported before.
> >
> > Well, the lack of reports doesn't mean there are no bugs. :-)
> >
> > People may actually see those bugs, but they don't report them or they
> > report them as system suspend/resume bugs, for example.
> 
> Also, I am still wondering why subsystem PM callback need to do
> something after driver->runtime_resume completes, could you explain
> it?

It just isn't guaranteed that the subsystem callback won't do anything
after driver->runtime_resume completion.  I agree that it isn't likely
to happen.

> >> Firstly, introduce one extra pointer in device may increase memory
> >> consume for device allocation,
> >
> > Yes, it does, which may or may not matter depending on the actual size of
> > struct device and the CPU cache line size on the given machine, right?
> 
> It may double memory allocation size in some cases. And it is very possible
> since there are so many device objects in system.

Numbers, please?  If you don't have them, it's just waving your hands.

> >> also it reflects one design drawback in the patch, see below.
> >>
> >> More importantly, the implementation violates some software design
> >> principle in object oriented design.
> >
> > It doesn't violate anything and you're just ignoring what you've been told.
> > That makes discussing with you rather difficult, but I'll try again
> > nevertheless.
> >
> > If you look at the actual patch I've just posted:
> >
> > https://patchwork.kernel.org/patch/1299781/
> >
> > you can see that power.func is never run directly.  Moreover, the pointer it
> > contains is only used to run a function in pm_runtime_work() and note that
> > pm_runtime_work() reads that pointer _twice_, because it may be changed in the
> > meantime by a concurrent thread.
> 
> I have explained it before, it is enough to keep the pointer read only
> since driver can maintain its internal state in its specific device instance
> (for example, usb_interface objects) and decide what to do in 'func'
> for situations, right?

Yes, it is.  I actually have a patch that does something similar (I'll post it
shortly).

Of course, it is based on the assumption that func() will always be the same
pointer for the given driver, which doesn't seem to be proven, but perhaps
it is sufficient.  At least I'm not aware of use cases where it wouldn't be.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei Aug. 10, 2012, 3:19 a.m. UTC | #19
On Fri, Aug 10, 2012 at 3:41 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
>
> It just isn't guaranteed that the subsystem callback won't do anything
> after driver->runtime_resume completion.  I agree that it isn't likely
> to happen.

In fact, the subsystem callback should make sure that don't happen, see
below comments on .runtime_resume:

 * @runtime_resume: Put the device into the fully active state in response to a
 *      wakeup event generated by hardware or at the request of software.  If
 *      necessary, put the device into the full-power state and restore its
 *      registers, so that it is fully operational.

So once driver->runtime_resume completes, the device should be fully operational
from the view of driver.

>
>> >> Firstly, introduce one extra pointer in device may increase memory
>> >> consume for device allocation,
>> >
>> > Yes, it does, which may or may not matter depending on the actual size of
>> > struct device and the CPU cache line size on the given machine, right?
>>
>> It may double memory allocation size in some cases. And it is very possible
>> since there are so many device objects in system.
>
> Numbers, please?  If you don't have them, it's just waving your hands.

It is easily observed and proved. Suppose sizeof(struct foo_dev) is 508bytes,
it will become 516bytes after your patch applies on 64bit arch, so
ksize(foo_dev_ptr)
will become 1024 and the memory consumption of the object is doubled.

>> I have explained it before, it is enough to keep the pointer read only
>> since driver can maintain its internal state in its specific device instance
>> (for example, usb_interface objects) and decide what to do in 'func'
>> for situations, right?
>
> Yes, it is.  I actually have a patch that does something similar (I'll post it
> shortly).

I have seen your patch which moves the 'func' from device into device_driver.
It is much better than before.

> Of course, it is based on the assumption that func() will always be the same
> pointer for the given driver, which doesn't seem to be proven, but perhaps
> it is sufficient.  At least I'm not aware of use cases where it wouldn't be.

Since you have moved 'func' into device_driver, and you still thought the
pointer can't be changed after it is set, so why not implement it as callback?

IMO, it is a bit weird to just store a function pointer data(not for
callback) in
driver object, but anyway, it is better than before, :-)

Thanks,
Rafael J. Wysocki Aug. 10, 2012, 8:29 p.m. UTC | #20
On Friday, August 10, 2012, Ming Lei wrote:
> On Fri, Aug 10, 2012 at 3:41 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> >
> > It just isn't guaranteed that the subsystem callback won't do anything
> > after driver->runtime_resume completion.  I agree that it isn't likely
> > to happen.
> 
> In fact, the subsystem callback should make sure that don't happen, see
> below comments on .runtime_resume:
> 
>  * @runtime_resume: Put the device into the fully active state in response to a
>  *      wakeup event generated by hardware or at the request of software.  If
>  *      necessary, put the device into the full-power state and restore its
>  *      registers, so that it is fully operational.
> 
> So once driver->runtime_resume completes, the device should be fully operational
> from the view of driver.

This comment only applies literally to drivers whose callbacks are run directly
by the PM core.  If subsystems and/or PM domains are involved, the interactions
between different layers of callbacks obviously have to be taken into account.

Please note, however, that this comment doesn't say anything about processing
I/O by the callback (hint: the callback is _not_ _supposed_ to do that).

> >> >> Firstly, introduce one extra pointer in device may increase memory
> >> >> consume for device allocation,
> >> >
> >> > Yes, it does, which may or may not matter depending on the actual size of
> >> > struct device and the CPU cache line size on the given machine, right?
> >>
> >> It may double memory allocation size in some cases. And it is very possible
> >> since there are so many device objects in system.
> >
> > Numbers, please?  If you don't have them, it's just waving your hands.
> 
> It is easily observed and proved. Suppose sizeof(struct foo_dev) is 508bytes,
> it will become 516bytes after your patch applies on 64bit arch, so
> ksize(foo_dev_ptr)
> will become 1024 and the memory consumption of the object is doubled.

I meant real numbers, not made-up ones.

> >> I have explained it before, it is enough to keep the pointer read only
> >> since driver can maintain its internal state in its specific device instance
> >> (for example, usb_interface objects) and decide what to do in 'func'
> >> for situations, right?
> >
> > Yes, it is.  I actually have a patch that does something similar (I'll post it
> > shortly).
> 
> I have seen your patch which moves the 'func' from device into device_driver.
> It is much better than before.

Oh, thanks for letting me know.

> > Of course, it is based on the assumption that func() will always be the same
> > pointer for the given driver, which doesn't seem to be proven, but perhaps
> > it is sufficient.  At least I'm not aware of use cases where it wouldn't be.
> 
> Since you have moved 'func' into device_driver, and you still thought the
> pointer can't be changed after it is set, so why not implement it as callback?

I don't understand what you mean.

It _is_ a callback now in fact.

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux/drivers/base/power/runtime.c
===================================================================
--- linux.orig/drivers/base/power/runtime.c
+++ linux/drivers/base/power/runtime.c
@@ -484,6 +484,15 @@  static int rpm_suspend(struct device *de
 	goto out;
 }
 
+void rpm_queue_up_resume(struct device *dev)
+{
+	dev->power.request = RPM_REQ_RESUME;
+	if (!dev->power.request_pending) {
+		dev->power.request_pending = true;
+		queue_work(pm_wq, &dev->power.work);
+	}
+}
+
 /**
  * rpm_resume - Carry out runtime resume of given device.
  * @dev: Device to resume.
@@ -524,7 +533,9 @@  static int rpm_resume(struct device *dev
 	 * rather than cancelling it now only to restart it again in the near
 	 * future.
 	 */
-	dev->power.request = RPM_REQ_NONE;
+	if (dev->power.request != RPM_REQ_RESUME || !dev->power.func)
+		dev->power.request = RPM_REQ_NONE;
+
 	if (!dev->power.timer_autosuspends)
 		pm_runtime_deactivate_timer(dev);
 
@@ -533,6 +544,12 @@  static int rpm_resume(struct device *dev
 		goto out;
 	}
 
+	if (dev->power.func && (rpmflags & RPM_ASYNC)) {
+		rpm_queue_up_resume(dev);
+		retval = 0;
+		goto out;
+	}
+
 	if (dev->power.runtime_status == RPM_RESUMING
 	    || dev->power.runtime_status == RPM_SUSPENDING) {
 		DEFINE_WAIT(wait);
@@ -591,11 +608,7 @@  static int rpm_resume(struct device *dev
 
 	/* Carry out an asynchronous or a synchronous resume. */
 	if (rpmflags & RPM_ASYNC) {
-		dev->power.request = RPM_REQ_RESUME;
-		if (!dev->power.request_pending) {
-			dev->power.request_pending = true;
-			queue_work(pm_wq, &dev->power.work);
-		}
+		rpm_queue_up_resume(dev);
 		retval = 0;
 		goto out;
 	}
@@ -691,6 +704,7 @@  static int rpm_resume(struct device *dev
 static void pm_runtime_work(struct work_struct *work)
 {
 	struct device *dev = container_of(work, struct device, power.work);
+	void (*func)(struct device *) = NULL;
 	enum rpm_request req;
 
 	spin_lock_irq(&dev->power.lock);
@@ -715,12 +729,24 @@  static void pm_runtime_work(struct work_
 		rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO);
 		break;
 	case RPM_REQ_RESUME:
-		rpm_resume(dev, RPM_NOWAIT);
+		func = dev->power.func;
+		if (func) {
+			dev->power.func = NULL;
+			pm_runtime_get_noresume(dev);
+			rpm_resume(dev, 0);
+		} else {
+			rpm_resume(dev, RPM_NOWAIT);
+		}
 		break;
 	}
 
  out:
 	spin_unlock_irq(&dev->power.lock);
+
+	if (func) {
+		func(dev);
+		pm_runtime_put(dev);
+	}
 }
 
 /**
@@ -877,6 +903,48 @@  int __pm_runtime_resume(struct device *d
 }
 EXPORT_SYMBOL_GPL(__pm_runtime_resume);
 
+int pm_runtime_get_and_call(struct device *dev, void (*func)(struct device *),
+			     void (*func_async)(struct device *))
+{
+	unsigned long flags;
+	int ret;
+
+	atomic_inc(&dev->power.usage_count);
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	ret = dev->power.runtime_error;
+	if (ret) {
+		func = NULL;
+		goto out;
+	}
+
+	if (dev->power.runtime_status != RPM_ACTIVE
+	    && dev->power.disable_depth == 0)
+		func = NULL;
+
+	if (!func && func_async) {
+		if (dev->power.func) {
+			ret = -EAGAIN;
+			goto out;
+		} else {
+			dev->power.func = func_async;
+		}
+	}
+
+	rpm_resume(dev, RPM_ASYNC);
+
+ out:
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	if (func) {
+		func(dev);
+		return 1;
+	}
+
+	return ret;
+}
+
 /**
  * __pm_runtime_set_status - Set runtime PM status of a device.
  * @dev: Device to handle.
Index: linux/include/linux/pm.h
===================================================================
--- linux.orig/include/linux/pm.h
+++ linux/include/linux/pm.h
@@ -547,6 +547,7 @@  struct dev_pm_info {
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
 	struct dev_pm_qos_request *pq_req;
+	void			(*func)(struct device *);
 #endif
 	struct pm_subsys_data	*subsys_data;  /* Owned by the subsystem. */
 	struct pm_qos_constraints *constraints;
Index: linux/include/linux/pm_runtime.h
===================================================================
--- linux.orig/include/linux/pm_runtime.h
+++ linux/include/linux/pm_runtime.h
@@ -47,6 +47,9 @@  extern void pm_runtime_set_autosuspend_d
 extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
 extern void pm_runtime_update_max_time_suspended(struct device *dev,
 						 s64 delta_ns);
+extern int pm_runtime_get_and_call(struct device *dev,
+				   void (*func)(struct device *),
+				   void (*func_async)(struct device *));
 
 static inline bool pm_children_suspended(struct device *dev)
 {
@@ -150,6 +153,17 @@  static inline void pm_runtime_set_autosu
 static inline unsigned long pm_runtime_autosuspend_expiration(
 				struct device *dev) { return 0; }
 
+static inline int pm_runtime_get_and_call(struct device *dev,
+					  void (*func)(struct device *),
+					  void (*func_async)(struct device *))
+{
+	if (func) {
+		func(dev);
+		return 1;
+	}
+	return 0;
+}
+
 #endif /* !CONFIG_PM_RUNTIME */
 
 static inline int pm_runtime_idle(struct device *dev)