Patchwork [v4,4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

login
register
mail settings
Submitter Dongsheng Wang
Date Sept. 24, 2013, 9:28 a.m.
Message ID <1380014925-23300-4-git-send-email-dongsheng.wang@freescale.com>
Download mbox | patch
Permalink /patch/277410/
State Superseded
Headers show

Comments

Dongsheng Wang - Sept. 24, 2013, 9:28 a.m.
From: Wang Dongsheng <dongsheng.wang@freescale.com>

Add a sys interface to enable/diable pw20 state or altivec idle, and
control the wait entry time.

Enable/Disable interface:
0, disable. 1, enable.
/sys/devices/system/cpu/cpuX/pw20_state
/sys/devices/system/cpu/cpuX/altivec_idle

Set wait time interface:(Nanosecond)
/sys/devices/system/cpu/cpuX/pw20_wait_time
/sys/devices/system/cpu/cpuX/altivec_idle_wait_time
Example: Base on TBfreq is 41MHZ.
1~47(ns): TB[63]
48~95(ns): TB[62]
96~191(ns): TB[61]
192~383(ns): TB[62]
384~767(ns): TB[60]
...

Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
---
*v4:
Move code from 85xx/common.c to kernel/sysfs.c.

Remove has_pw20_altivec_idle function.

Change wait "entry_bit" to wait time.

 arch/powerpc/kernel/sysfs.c | 291 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 291 insertions(+)
Bharat Bhushan - Sept. 25, 2013, 6:22 a.m.
> -----Original Message-----
> From: Linuxppc-dev [mailto:linuxppc-dev-
> bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On Behalf Of Dongsheng
> Wang
> Sent: Tuesday, September 24, 2013 2:59 PM
> To: Wood Scott-B07421
> Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle
> 
> From: Wang Dongsheng <dongsheng.wang@freescale.com>
> 
> Add a sys interface to enable/diable pw20 state or altivec idle, and
> control the wait entry time.
> 
> Enable/Disable interface:
> 0, disable. 1, enable.
> /sys/devices/system/cpu/cpuX/pw20_state
> /sys/devices/system/cpu/cpuX/altivec_idle
> 
> Set wait time interface:(Nanosecond)
> /sys/devices/system/cpu/cpuX/pw20_wait_time
> /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> Example: Base on TBfreq is 41MHZ.
> 1~47(ns): TB[63]
> 48~95(ns): TB[62]
> 96~191(ns): TB[61]
> 192~383(ns): TB[62]
> 384~767(ns): TB[60]
> ...
> 
> Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> ---
> *v4:
> Move code from 85xx/common.c to kernel/sysfs.c.
> 
> Remove has_pw20_altivec_idle function.
> 
> Change wait "entry_bit" to wait time.
> 
>  arch/powerpc/kernel/sysfs.c | 291 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 291 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 27a90b9..23fece6 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);
> 
>  #endif /* CONFIG_PPC64 */
> 
> +#ifdef CONFIG_FSL_SOC
> +#define MAX_BIT		63
> +
> +static u64 pw20_wt;
> +static u64 altivec_idle_wt;
> +
> +static unsigned int get_idle_ticks_bit(u64 ns)
> +{
> +	u64 cycle;
> +
> +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);

When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then this will always be ns, which is not correct, no? 

> +	if (!cycle)
> +		return 0;
> +
> +	return ilog2(cycle);
> +}
> +
> +static void do_show_pwrmgtcr0(void *val)
> +{
> +	u32 *value = val;
> +
> +	*value = mfspr(SPRN_PWRMGTCR0);
> +}
> +
> +static ssize_t show_pw20_state(struct device *dev,
> +				struct device_attribute *attr, char *buf)
> +{
> +	u32 value;
> +	unsigned int cpu = dev->id;
> +
> +	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +
> +	value &= PWRMGTCR0_PW20_WAIT;
> +
> +	return sprintf(buf, "%u\n", value ? 1 : 0);
> +}
> +
> +static void do_store_pw20_state(void *val)
> +{
> +	u32 *value = val;
> +	u32 pw20_state;
> +
> +	pw20_state = mfspr(SPRN_PWRMGTCR0);
> +
> +	if (*value)
> +		pw20_state |= PWRMGTCR0_PW20_WAIT;
> +	else
> +		pw20_state &= ~PWRMGTCR0_PW20_WAIT;
> +
> +	mtspr(SPRN_PWRMGTCR0, pw20_state);
> +}
> +
> +static ssize_t store_pw20_state(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 value;
> +	unsigned int cpu = dev->id;
> +
> +	if (kstrtou32(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (value > 1)
> +		return -EINVAL;
> +
> +	smp_call_function_single(cpu, do_store_pw20_state, &value, 1);
> +
> +	return count;
> +}
> +
> +static ssize_t show_pw20_wait_time(struct device *dev,
> +				struct device_attribute *attr, char *buf)
> +{
> +	u32 value;
> +	u64 tb_cycle;
> +	u64 time;
> +
> +	unsigned int cpu = dev->id;
> +
> +	if (!pw20_wt) {
> +		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +		value = (value & PWRMGTCR0_PW20_ENT) >>
> +					PWRMGTCR0_PW20_ENT_SHIFT;
> +
> +		tb_cycle = (1 << (MAX_BIT - value)) * 2;
> +		time = tb_cycle * (1000 / tb_ticks_per_usec) - 1;

Similar to above comment.

-Bharat

> +	} else {
> +		time = pw20_wt;
> +	}
> +
> +	return sprintf(buf, "%llu\n", time);
> +}
> +
> +static void set_pw20_wait_entry_bit(void *val)
> +{
> +	u32 *value = val;
> +	u32 pw20_idle;
> +
> +	pw20_idle = mfspr(SPRN_PWRMGTCR0);
> +
> +	/* Set Automatic PW20 Core Idle Count */
> +	/* clear count */
> +	pw20_idle &= ~PWRMGTCR0_PW20_ENT;
> +
> +	/* set count */
> +	pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT);
> +
> +	mtspr(SPRN_PWRMGTCR0, pw20_idle);
> +}
> +
> +static ssize_t store_pw20_wait_time(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 entry_bit;
> +	u64 value;
> +
> +	unsigned int cpu = dev->id;
> +
> +	if (kstrtou64(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (!value)
> +		return -EINVAL;
> +
> +	entry_bit = get_idle_ticks_bit(value);
> +	if (entry_bit > MAX_BIT)
> +		return -EINVAL;
> +
> +	pw20_wt = value;
> +	smp_call_function_single(cpu, set_pw20_wait_entry_bit,
> +				&entry_bit, 1);
> +
> +	return count;
> +}
> +
> +static ssize_t show_altivec_idle(struct device *dev,
> +				struct device_attribute *attr, char *buf)
> +{
> +	u32 value;
> +	unsigned int cpu = dev->id;
> +
> +	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +
> +	value &= PWRMGTCR0_AV_IDLE_PD_EN;
> +
> +	return sprintf(buf, "%u\n", value ? 1 : 0);
> +}
> +
> +static void do_store_altivec_idle(void *val)
> +{
> +	u32 *value = val;
> +	u32 altivec_idle;
> +
> +	altivec_idle = mfspr(SPRN_PWRMGTCR0);
> +
> +	if (*value)
> +		altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN;
> +	else
> +		altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN;
> +
> +	mtspr(SPRN_PWRMGTCR0, altivec_idle);
> +}
> +
> +static ssize_t store_altivec_idle(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 value;
> +	unsigned int cpu = dev->id;
> +
> +	if (kstrtou32(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (value > 1)
> +		return -EINVAL;
> +
> +	smp_call_function_single(cpu, do_store_altivec_idle, &value, 1);
> +
> +	return count;
> +}
> +
> +static ssize_t show_altivec_idle_wait_time(struct device *dev,
> +				struct device_attribute *attr, char *buf)
> +{
> +	u32 value;
> +	u64 tb_cycle;
> +	u64 time;
> +
> +	unsigned int cpu = dev->id;
> +
> +	if (!altivec_idle_wt) {
> +		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +		value = (value & PWRMGTCR0_AV_IDLE_CNT) >>
> +					PWRMGTCR0_AV_IDLE_CNT_SHIFT;
> +
> +		tb_cycle = (1 << (MAX_BIT - value)) * 2;
> +		time = tb_cycle * (1000 / tb_ticks_per_usec) - 1;
> +	} else {
> +		time = altivec_idle_wt;
> +	}
> +
> +	return sprintf(buf, "%llu\n", time);
> +}
> +
> +static void set_altivec_idle_wait_entry_bit(void *val)
> +{
> +	u32 *value = val;
> +	u32 altivec_idle;
> +
> +	altivec_idle = mfspr(SPRN_PWRMGTCR0);
> +
> +	/* Set Automatic AltiVec Idle Count */
> +	/* clear count */
> +	altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT;
> +
> +	/* set count */
> +	altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT);
> +
> +	mtspr(SPRN_PWRMGTCR0, altivec_idle);
> +}
> +
> +static ssize_t store_altivec_idle_wait_time(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 entry_bit;
> +	u64 value;
> +
> +	unsigned int cpu = dev->id;
> +
> +	if (kstrtou64(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (!value)
> +		return -EINVAL;
> +
> +	entry_bit = get_idle_ticks_bit(value);
> +	if (entry_bit > MAX_BIT)
> +		return -EINVAL;
> +
> +	altivec_idle_wt = value;
> +	smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit,
> +				&entry_bit, 1);
> +
> +	return count;
> +}
> +
> +/*
> + * Enable/Disable interface:
> + * 0, disable. 1, enable.
> + */
> +static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state);
> +static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle);
> +
> +/*
> + * Set wait time interface:(Nanosecond)
> + * Example: Base on TBfreq is 41MHZ.
> + * 1~47(ns): TB[63]
> + * 48~95(ns): TB[62]
> + * 96~191(ns): TB[61]
> + * 192~383(ns): TB[62]
> + * 384~767(ns): TB[60]
> + * ...
> + */
> +static DEVICE_ATTR(pw20_wait_time, 0600,
> +			show_pw20_wait_time,
> +			store_pw20_wait_time);
> +static DEVICE_ATTR(altivec_idle_wait_time, 0600,
> +			show_altivec_idle_wait_time,
> +			store_altivec_idle_wait_time);
> +#endif
> +
>  /*
>   * Enabling PMCs will slow partition context switch times so we only do
>   * it the first time we write to the PMCs.
> @@ -407,6 +680,15 @@ static void register_cpu_online(unsigned int cpu)
>  		device_create_file(s, &dev_attr_pir);
>  #endif /* CONFIG_PPC64 */
> 
> +#ifdef CONFIG_FSL_SOC
> +	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
> +		device_create_file(s, &dev_attr_pw20_state);
> +		device_create_file(s, &dev_attr_pw20_wait_time);
> +
> +		device_create_file(s, &dev_attr_altivec_idle);
> +		device_create_file(s, &dev_attr_altivec_idle_wait_time);
> +	}
> +#endif
>  	cacheinfo_cpu_online(cpu);
>  }
> 
> @@ -479,6 +761,15 @@ static void unregister_cpu_online(unsigned int cpu)
>  		device_remove_file(s, &dev_attr_pir);
>  #endif /* CONFIG_PPC64 */
> 
> +#ifdef CONFIG_FSL_SOC
> +	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
> +		device_remove_file(s, &dev_attr_pw20_state);
> +		device_remove_file(s, &dev_attr_pw20_wait_time);
> +
> +		device_remove_file(s, &dev_attr_altivec_idle);
> +		device_remove_file(s, &dev_attr_altivec_idle_wait_time);
> +	}
> +#endif
>  	cacheinfo_cpu_offline(cpu);
>  }
> 
> --
> 1.8.0
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
Wang Dongsheng-B40534 - Sept. 25, 2013, 8:10 a.m.
> -----Original Message-----
> From: Bhushan Bharat-R65777
> Sent: Wednesday, September 25, 2013 2:23 PM
> To: Wang Dongsheng-B40534; Wood Scott-B07421
> Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> altivec idle
> 
> 
> 
> > -----Original Message-----
> > From: Linuxppc-dev [mailto:linuxppc-dev-
> > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On Behalf Of
> > bounces+Dongsheng
> > Wang
> > Sent: Tuesday, September 24, 2013 2:59 PM
> > To: Wood Scott-B07421
> > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> >
> > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> >
> > Add a sys interface to enable/diable pw20 state or altivec idle, and
> > control the wait entry time.
> >
> > Enable/Disable interface:
> > 0, disable. 1, enable.
> > /sys/devices/system/cpu/cpuX/pw20_state
> > /sys/devices/system/cpu/cpuX/altivec_idle
> >
> > Set wait time interface:(Nanosecond)
> > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > Example: Base on TBfreq is 41MHZ.
> > 1~47(ns): TB[63]
> > 48~95(ns): TB[62]
> > 96~191(ns): TB[61]
> > 192~383(ns): TB[62]
> > 384~767(ns): TB[60]
> > ...
> >
> > Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> > ---
> > *v4:
> > Move code from 85xx/common.c to kernel/sysfs.c.
> >
> > Remove has_pw20_altivec_idle function.
> >
> > Change wait "entry_bit" to wait time.
> >
> >  arch/powerpc/kernel/sysfs.c | 291
> > ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 291 insertions(+)
> >
> > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > index 27a90b9..23fece6 100644
> > --- a/arch/powerpc/kernel/sysfs.c
> > +++ b/arch/powerpc/kernel/sysfs.c
> > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > setup_smt_snooze_delay);
> >
> >  #endif /* CONFIG_PPC64 */
> >
> > +#ifdef CONFIG_FSL_SOC
> > +#define MAX_BIT		63
> > +
> > +static u64 pw20_wt;
> > +static u64 altivec_idle_wt;
> > +
> > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > +	u64 cycle;
> > +
> > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> 
> When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then this will
> always be ns, which is not correct, no?
> 
"1000 / tb_ticks_per_usec" means nsec_ticks_per_tb

If timebase frequency > 1GHz, this should be "tb_ticks_per_usec / 1000" and to get tb_ticks_per_nsec.
This should be changed to "cycle = ns * tb_ticks_per_nsec;"

But at present we do not have such a platform that timebase frequency more than 1GHz. And I think it is not need to support such a situation. Because we have no environment to test it.

If later there will be more than 1GHZ platform at that time to add this support.

Thanks.

-dongsheng
Bharat Bhushan - Sept. 25, 2013, 8:33 a.m.
> -----Original Message-----
> From: Wang Dongsheng-B40534
> Sent: Wednesday, September 25, 2013 1:40 PM
> To: Bhushan Bharat-R65777; Wood Scott-B07421
> Cc: linuxppc-dev@lists.ozlabs.org
> Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and altivec
> idle
> 
> 
> 
> > -----Original Message-----
> > From: Bhushan Bharat-R65777
> > Sent: Wednesday, September 25, 2013 2:23 PM
> > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> >
> >
> >
> > > -----Original Message-----
> > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On Behalf Of
> > > bounces+Dongsheng
> > > Wang
> > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > To: Wood Scott-B07421
> > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > > altivec idle
> > >
> > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > >
> > > Add a sys interface to enable/diable pw20 state or altivec idle, and
> > > control the wait entry time.
> > >
> > > Enable/Disable interface:
> > > 0, disable. 1, enable.
> > > /sys/devices/system/cpu/cpuX/pw20_state
> > > /sys/devices/system/cpu/cpuX/altivec_idle
> > >
> > > Set wait time interface:(Nanosecond)
> > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > Example: Base on TBfreq is 41MHZ.
> > > 1~47(ns): TB[63]
> > > 48~95(ns): TB[62]
> > > 96~191(ns): TB[61]
> > > 192~383(ns): TB[62]
> > > 384~767(ns): TB[60]
> > > ...
> > >
> > > Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > ---
> > > *v4:
> > > Move code from 85xx/common.c to kernel/sysfs.c.
> > >
> > > Remove has_pw20_altivec_idle function.
> > >
> > > Change wait "entry_bit" to wait time.
> > >
> > >  arch/powerpc/kernel/sysfs.c | 291
> > > ++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 291 insertions(+)
> > >
> > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6 100644
> > > --- a/arch/powerpc/kernel/sysfs.c
> > > +++ b/arch/powerpc/kernel/sysfs.c
> > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > setup_smt_snooze_delay);
> > >
> > >  #endif /* CONFIG_PPC64 */
> > >
> > > +#ifdef CONFIG_FSL_SOC
> > > +#define MAX_BIT		63
> > > +
> > > +static u64 pw20_wt;
> > > +static u64 altivec_idle_wt;
> > > +
> > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > +	u64 cycle;
> > > +
> > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> >
> > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then this
> > will always be ns, which is not correct, no?
> >
> "1000 / tb_ticks_per_usec" means nsec_ticks_per_tb
> 
> If timebase frequency > 1GHz, this should be "tb_ticks_per_usec / 1000" and to
> get tb_ticks_per_nsec.
> This should be changed to "cycle = ns * tb_ticks_per_nsec;"

Yes, we need to change this to two line.

> 
> But at present we do not have such a platform that timebase frequency more than
> 1GHz. And I think it is not need to support such a situation. Because we have no
> environment to test it.
> 
> If later there will be more than 1GHZ platform at that time to add this support.

Would like to leave it to Scott, but personally I think that if there is something simple to fix then it must be fixed rather than waiting for some error to happen and then fixing.

-Bharat

> 
> Thanks.
> 
> -dongsheng
Scott Wood - Sept. 25, 2013, 5:56 p.m.
On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534 wrote:
> 
> > -----Original Message-----
> > From: Bhushan Bharat-R65777
> > Sent: Wednesday, September 25, 2013 2:23 PM
> > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> > 
> > 
> > 
> > > -----Original Message-----
> > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On Behalf Of
> > > bounces+Dongsheng
> > > Wang
> > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > To: Wood Scott-B07421
> > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > > altivec idle
> > >
> > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > >
> > > Add a sys interface to enable/diable pw20 state or altivec idle, and
> > > control the wait entry time.
> > >
> > > Enable/Disable interface:
> > > 0, disable. 1, enable.
> > > /sys/devices/system/cpu/cpuX/pw20_state
> > > /sys/devices/system/cpu/cpuX/altivec_idle
> > >
> > > Set wait time interface:(Nanosecond)
> > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > Example: Base on TBfreq is 41MHZ.
> > > 1~47(ns): TB[63]
> > > 48~95(ns): TB[62]
> > > 96~191(ns): TB[61]
> > > 192~383(ns): TB[62]
> > > 384~767(ns): TB[60]
> > > ...
> > >
> > > Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > ---
> > > *v4:
> > > Move code from 85xx/common.c to kernel/sysfs.c.
> > >
> > > Remove has_pw20_altivec_idle function.
> > >
> > > Change wait "entry_bit" to wait time.
> > >
> > >  arch/powerpc/kernel/sysfs.c | 291
> > > ++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 291 insertions(+)
> > >
> > > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > > index 27a90b9..23fece6 100644
> > > --- a/arch/powerpc/kernel/sysfs.c
> > > +++ b/arch/powerpc/kernel/sysfs.c
> > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > setup_smt_snooze_delay);
> > >
> > >  #endif /* CONFIG_PPC64 */
> > >
> > > +#ifdef CONFIG_FSL_SOC
> > > +#define MAX_BIT		63
> > > +
> > > +static u64 pw20_wt;
> > > +static u64 altivec_idle_wt;
> > > +
> > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > +	u64 cycle;
> > > +
> > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > 
> > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then this will
> > always be ns, which is not correct, no?

Actually it'll be a divide by zero in that case.

> "1000 / tb_ticks_per_usec" means nsec_ticks_per_tb
> 
> If timebase frequency > 1GHz, this should be "tb_ticks_per_usec / 1000" and to get tb_ticks_per_nsec.
> This should be changed to "cycle = ns * tb_ticks_per_nsec;"
> 
> But at present we do not have such a platform that timebase frequency
> more than 1GHz. And I think it is not need to support such a situation.
> Because we have no environment to test it.

You can test it by hacking a wrong timebase frequency in and seeing what
the calculation does.

Or do something like this:

	if (ns >= 10000)
		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec;
	else
		cycle = div_u64((u64)ns * tb_ticks_per_usec, 1000);

...which can be tested just by varying ns.

> If later there will be more than 1GHZ platform at that time to add this support.

There almost certainly won't be timebases that run that fast, but divide
by zero is a rather nasty way of responding if such a thing does happen.

-Scott
Wang Dongsheng-B40534 - Sept. 26, 2013, 2:32 a.m.
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Thursday, September 26, 2013 1:57 AM
> To: Wang Dongsheng-B40534
> Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> dev@lists.ozlabs.org
> Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> altivec idle
> 
> On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534 wrote:
> >
> > > -----Original Message-----
> > > From: Bhushan Bharat-R65777
> > > Sent: Wednesday, September 25, 2013 2:23 PM
> > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > and altivec idle
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On Behalf
> > > > bounces+Of Dongsheng
> > > > Wang
> > > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > > To: Wood Scott-B07421
> > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > > > altivec idle
> > > >
> > > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > >
> > > > Add a sys interface to enable/diable pw20 state or altivec idle,
> > > > and control the wait entry time.
> > > >
> > > > Enable/Disable interface:
> > > > 0, disable. 1, enable.
> > > > /sys/devices/system/cpu/cpuX/pw20_state
> > > > /sys/devices/system/cpu/cpuX/altivec_idle
> > > >
> > > > Set wait time interface:(Nanosecond)
> > > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > > Example: Base on TBfreq is 41MHZ.
> > > > 1~47(ns): TB[63]
> > > > 48~95(ns): TB[62]
> > > > 96~191(ns): TB[61]
> > > > 192~383(ns): TB[62]
> > > > 384~767(ns): TB[60]
> > > > ...
> > > >
> > > > Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > ---
> > > > *v4:
> > > > Move code from 85xx/common.c to kernel/sysfs.c.
> > > >
> > > > Remove has_pw20_altivec_idle function.
> > > >
> > > > Change wait "entry_bit" to wait time.
> > > >
> > > >  arch/powerpc/kernel/sysfs.c | 291
> > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 291 insertions(+)
> > > >
> > > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6 100644
> > > > --- a/arch/powerpc/kernel/sysfs.c
> > > > +++ b/arch/powerpc/kernel/sysfs.c
> > > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > > setup_smt_snooze_delay);
> > > >
> > > >  #endif /* CONFIG_PPC64 */
> > > >
> > > > +#ifdef CONFIG_FSL_SOC
> > > > +#define MAX_BIT		63
> > > > +
> > > > +static u64 pw20_wt;
> > > > +static u64 altivec_idle_wt;
> > > > +
> > > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > > +	u64 cycle;
> > > > +
> > > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > >
> > > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then this
> > > will always be ns, which is not correct, no?
> 
> Actually it'll be a divide by zero in that case.
> 
tb_ticks_per_usec = ppc_tb_freq / 1000000; Means TB freq should be more than 1MHZ.

if ppc_tb_freq less than 1000000, the tb_ticks_per_usec will be a divide by zero.
If this condition is established, I think kernel cannot work as a normal.

So I think we need to believe that the variable is not zero. And I think TB freq should not less than 1MHZ on PPC platform, because if TB freq less than 1MHZ, the precision time will become very poor and system response time will be slower.

> > "1000 / tb_ticks_per_usec" means nsec_ticks_per_tb
> >
> > If timebase frequency > 1GHz, this should be "tb_ticks_per_usec / 1000"
> and to get tb_ticks_per_nsec.
> > This should be changed to "cycle = ns * tb_ticks_per_nsec;"
> >
> > But at present we do not have such a platform that timebase frequency
> > more than 1GHz. And I think it is not need to support such a situation.
> > Because we have no environment to test it.
> 
> You can test it by hacking a wrong timebase frequency in and seeing what
> the calculation does.
> 
> Or do something like this:
> 
> 	if (ns >= 10000)
> 		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec;
> 	else
> 		cycle = div_u64((u64)ns * tb_ticks_per_usec, 1000);
> 
We cannot do this, because if (ns+500) < 1000, we cannot get the entry bit, it'll always zero bit.

We must to use per_nsec_tb_ticks, like my code 1000 / tb_ticks_per_usec.

> ...which can be tested just by varying ns.
> 
> > If later there will be more than 1GHZ platform at that time to add this
> support.
> 
Yes, I agree this point. :)

-dongsheng

> There almost certainly won't be timebases that run that fast, but divide
> by zero is a rather nasty way of responding if such a thing does happen.
> 
> -Scott
>
Bharat Bhushan - Sept. 26, 2013, 4:23 a.m.
> -----Original Message-----
> From: Wang Dongsheng-B40534
> Sent: Thursday, September 26, 2013 8:02 AM
> To: Wood Scott-B07421
> Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org
> Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and altivec
> idle
> 
> 
> 
> > -----Original Message-----
> > From: Wood Scott-B07421
> > Sent: Thursday, September 26, 2013 1:57 AM
> > To: Wang Dongsheng-B40534
> > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > dev@lists.ozlabs.org
> > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> >
> > On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534 wrote:
> > >
> > > > -----Original Message-----
> > > > From: Bhushan Bharat-R65777
> > > > Sent: Wednesday, September 25, 2013 2:23 PM
> > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > and altivec idle
> > > >
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On Behalf
> > > > > bounces+Of Dongsheng
> > > > > Wang
> > > > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > > > To: Wood Scott-B07421
> > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > > and altivec idle
> > > > >
> > > > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > >
> > > > > Add a sys interface to enable/diable pw20 state or altivec idle,
> > > > > and control the wait entry time.
> > > > >
> > > > > Enable/Disable interface:
> > > > > 0, disable. 1, enable.
> > > > > /sys/devices/system/cpu/cpuX/pw20_state
> > > > > /sys/devices/system/cpu/cpuX/altivec_idle
> > > > >
> > > > > Set wait time interface:(Nanosecond)
> > > > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > > > Example: Base on TBfreq is 41MHZ.
> > > > > 1~47(ns): TB[63]
> > > > > 48~95(ns): TB[62]
> > > > > 96~191(ns): TB[61]
> > > > > 192~383(ns): TB[62]
> > > > > 384~767(ns): TB[60]
> > > > > ...
> > > > >
> > > > > Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > ---
> > > > > *v4:
> > > > > Move code from 85xx/common.c to kernel/sysfs.c.
> > > > >
> > > > > Remove has_pw20_altivec_idle function.
> > > > >
> > > > > Change wait "entry_bit" to wait time.
> > > > >
> > > > >  arch/powerpc/kernel/sysfs.c | 291
> > > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > > >  1 file changed, 291 insertions(+)
> > > > >
> > > > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6 100644
> > > > > --- a/arch/powerpc/kernel/sysfs.c
> > > > > +++ b/arch/powerpc/kernel/sysfs.c
> > > > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > > > setup_smt_snooze_delay);
> > > > >
> > > > >  #endif /* CONFIG_PPC64 */
> > > > >
> > > > > +#ifdef CONFIG_FSL_SOC
> > > > > +#define MAX_BIT		63
> > > > > +
> > > > > +static u64 pw20_wt;
> > > > > +static u64 altivec_idle_wt;
> > > > > +
> > > > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > > > +	u64 cycle;
> > > > > +
> > > > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > > >
> > > > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then
> > > > this will always be ns, which is not correct, no?
> >
> > Actually it'll be a divide by zero in that case.
> >
> tb_ticks_per_usec = ppc_tb_freq / 1000000; Means TB freq should be more than
> 1MHZ.
> 
> if ppc_tb_freq less than 1000000, the tb_ticks_per_usec will be a divide by
> zero.
> If this condition is established, I think kernel cannot work as a normal.
> 
> So I think we need to believe that the variable is not zero.

We do believe it is non-zero but greater than 1000 :)

> And I think TB freq
> should not less than 1MHZ on PPC platform, because if TB freq less than 1MHZ,
> the precision time will become very poor and system response time will be
> slower.

Not sure what you are describing here related to divide by zero we are mentioning.
You are talking about if tb_ticks_per_usec is ZERO and we are talking about if (1000/tb_ticks_per_usec) will be zero.

BTW, div_u64() handle the case where divider is zero.

> 
> > > "1000 / tb_ticks_per_usec" means nsec_ticks_per_tb
> > >
> > > If timebase frequency > 1GHz, this should be "tb_ticks_per_usec / 1000"
> > and to get tb_ticks_per_nsec.
> > > This should be changed to "cycle = ns * tb_ticks_per_nsec;"
> > >
> > > But at present we do not have such a platform that timebase
> > > frequency more than 1GHz. And I think it is not need to support such a
> situation.
> > > Because we have no environment to test it.
> >
> > You can test it by hacking a wrong timebase frequency in and seeing
> > what the calculation does.
> >
> > Or do something like this:
> >
> > 	if (ns >= 10000)
	^^^

> > 		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec;
> > 	else
> > 		cycle = div_u64((u64)ns * tb_ticks_per_usec, 1000);
> >
> We cannot do this, because if (ns+500) < 1000, we cannot get the entry bit,
> it'll always zero bit.

There is a if condition of ns >= 10000, so ns+500 can not be less than 1000.

> 
> We must to use per_nsec_tb_ticks, like my code 1000 / tb_ticks_per_usec.
> 
> > ...which can be tested just by varying ns.
> >
> > > If later there will be more than 1GHZ platform at that time to add
> > > this
> > support.
> >
> Yes, I agree this point. :)

One should agree with himself :)

-Bharat

> 
> -dongsheng
> 
> > There almost certainly won't be timebases that run that fast, but
> > divide by zero is a rather nasty way of responding if such a thing does
> happen.
> >
> > -Scott
> >
Wang Dongsheng-B40534 - Sept. 26, 2013, 6:18 a.m.
> -----Original Message-----
> From: Bhushan Bharat-R65777
> Sent: Thursday, September 26, 2013 12:23 PM
> To: Wang Dongsheng-B40534; Wood Scott-B07421
> Cc: linuxppc-dev@lists.ozlabs.org
> Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> altivec idle
> 
> 
> 
> > -----Original Message-----
> > From: Wang Dongsheng-B40534
> > Sent: Thursday, September 26, 2013 8:02 AM
> > To: Wood Scott-B07421
> > Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org
> > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> >
> >
> >
> > > -----Original Message-----
> > > From: Wood Scott-B07421
> > > Sent: Thursday, September 26, 2013 1:57 AM
> > > To: Wang Dongsheng-B40534
> > > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > > dev@lists.ozlabs.org
> > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > and altivec idle
> > >
> > > On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534 wrote:
> > > >
> > > > > -----Original Message-----
> > > > > From: Bhushan Bharat-R65777
> > > > > Sent: Wednesday, September 25, 2013 2:23 PM
> > > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > state and altivec idle
> > > > >
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > > > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On
> > > > > > bounces+Behalf Of Dongsheng
> > > > > > Wang
> > > > > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > > > > To: Wood Scott-B07421
> > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > > > and altivec idle
> > > > > >
> > > > > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > >
> > > > > > Add a sys interface to enable/diable pw20 state or altivec
> > > > > > idle, and control the wait entry time.
> > > > > >
> > > > > > Enable/Disable interface:
> > > > > > 0, disable. 1, enable.
> > > > > > /sys/devices/system/cpu/cpuX/pw20_state
> > > > > > /sys/devices/system/cpu/cpuX/altivec_idle
> > > > > >
> > > > > > Set wait time interface:(Nanosecond)
> > > > > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > > > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > > > > Example: Base on TBfreq is 41MHZ.
> > > > > > 1~47(ns): TB[63]
> > > > > > 48~95(ns): TB[62]
> > > > > > 96~191(ns): TB[61]
> > > > > > 192~383(ns): TB[62]
> > > > > > 384~767(ns): TB[60]
> > > > > > ...
> > > > > >
> > > > > > Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > > ---
> > > > > > *v4:
> > > > > > Move code from 85xx/common.c to kernel/sysfs.c.
> > > > > >
> > > > > > Remove has_pw20_altivec_idle function.
> > > > > >
> > > > > > Change wait "entry_bit" to wait time.
> > > > > >
> > > > > >  arch/powerpc/kernel/sysfs.c | 291
> > > > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > > > >  1 file changed, 291 insertions(+)
> > > > > >
> > > > > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > > > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6 100644
> > > > > > --- a/arch/powerpc/kernel/sysfs.c
> > > > > > +++ b/arch/powerpc/kernel/sysfs.c
> > > > > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > > > > setup_smt_snooze_delay);
> > > > > >
> > > > > >  #endif /* CONFIG_PPC64 */
> > > > > >
> > > > > > +#ifdef CONFIG_FSL_SOC
> > > > > > +#define MAX_BIT		63
> > > > > > +
> > > > > > +static u64 pw20_wt;
> > > > > > +static u64 altivec_idle_wt;
> > > > > > +
> > > > > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > > > > +	u64 cycle;
> > > > > > +
> > > > > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > > > >
> > > > > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then
> > > > > this will always be ns, which is not correct, no?
> > >
> > > Actually it'll be a divide by zero in that case.
> > >
> > tb_ticks_per_usec = ppc_tb_freq / 1000000; Means TB freq should be
> > more than 1MHZ.
> >
> > if ppc_tb_freq less than 1000000, the tb_ticks_per_usec will be a
> > divide by zero.
> > If this condition is established, I think kernel cannot work as a
> normal.
> >
> > So I think we need to believe that the variable is not zero.
> 
> We do believe it is non-zero but greater than 1000 :)
> 
> > And I think TB freq
> > should not less than 1MHZ on PPC platform, because if TB freq less
> > than 1MHZ, the precision time will become very poor and system
> > response time will be slower.
> 
> Not sure what you are describing here related to divide by zero we are
> mentioning.
> You are talking about if tb_ticks_per_usec is ZERO and we are talking
> about if (1000/tb_ticks_per_usec) will be zero.
> 
> BTW, div_u64() handle the case where divider is zero.
> 
cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
For this, I think we were discussing the two issues:

1. Scott talking about, when the tb_ticks_per_usec is a zero.
This situation is about tb_ticks_per_usec, and possible to zero. So I answered Scott.
If I misunderstand scott, please ignore it. :)

2. You are talking about 1000/tb_ticks_per_usec is a zero.
This situation is about TB freq > 1GHZ.

I will fix this issue. Like I said before,
"If timebase frequency > 1GHz, this should be "tb_ticks_per_usec / 1000" and to get tb_ticks_per_nsec.
This should be changed to "cycle = ns * tb_ticks_per_nsec;""

#define TB_FREQ_1GHZ	1000

If (tb_ticks_per_usec > TB_FREQ_1GHZ)
	cycle = ns * (tb_ticks_per_usec / 1000);
else
	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);

how about this? :)

> >
> > > > "1000 / tb_ticks_per_usec" means nsec_ticks_per_tb
> > > >
> > > > If timebase frequency > 1GHz, this should be "tb_ticks_per_usec /
> 1000"
> > > and to get tb_ticks_per_nsec.
> > > > This should be changed to "cycle = ns * tb_ticks_per_nsec;"
> > > >
> > > > But at present we do not have such a platform that timebase
> > > > frequency more than 1GHz. And I think it is not need to support
> > > > such a
> > situation.
> > > > Because we have no environment to test it.
> > >
> > > You can test it by hacking a wrong timebase frequency in and seeing
> > > what the calculation does.
> > >
> > > Or do something like this:
> > >
> > > 	if (ns >= 10000)
> 	^^^
> 
> > > 		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec;
> > > 	else
> > > 		cycle = div_u64((u64)ns * tb_ticks_per_usec, 1000);
> > >
> > We cannot do this, because if (ns+500) < 1000, we cannot get the entry
> > bit, it'll always zero bit.
> 
> There is a if condition of ns >= 10000, so ns+500 can not be less than
> 1000.
> 
Sorry about that, I misread it. :)

> >
> > We must to use per_nsec_tb_ticks, like my code 1000 / tb_ticks_per_usec.
> >
> > > ...which can be tested just by varying ns.
> > >
> > > > If later there will be more than 1GHZ platform at that time to add
> > > > this
> > > support.
> > >
> > Yes, I agree this point. :)
> 
> One should agree with himself :)
> 
> -Bharat
> 
> >
> > -dongsheng
> >
> > > There almost certainly won't be timebases that run that fast, but
> > > divide by zero is a rather nasty way of responding if such a thing
> > > does
> > happen.
> > >
> > > -Scott
> > >
Scott Wood - Sept. 26, 2013, 9:37 p.m.
On Thu, 2013-09-26 at 01:18 -0500, Wang Dongsheng-B40534 wrote:
> 
> > -----Original Message-----
> > From: Bhushan Bharat-R65777
> > Sent: Thursday, September 26, 2013 12:23 PM
> > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > Cc: linuxppc-dev@lists.ozlabs.org
> > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> > 
> > 
> > 
> > > -----Original Message-----
> > > From: Wang Dongsheng-B40534
> > > Sent: Thursday, September 26, 2013 8:02 AM
> > > To: Wood Scott-B07421
> > > Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org
> > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > > altivec idle
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Wood Scott-B07421
> > > > Sent: Thursday, September 26, 2013 1:57 AM
> > > > To: Wang Dongsheng-B40534
> > > > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > > > dev@lists.ozlabs.org
> > > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > and altivec idle
> > > >
> > > > On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534 wrote:
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Bhushan Bharat-R65777
> > > > > > Sent: Wednesday, September 25, 2013 2:23 PM
> > > > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > state and altivec idle
> > > > > >
> > > > > >
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > > > > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On
> > > > > > > bounces+Behalf Of Dongsheng
> > > > > > > Wang
> > > > > > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > > > > > To: Wood Scott-B07421
> > > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > > > > and altivec idle
> > > > > > >
> > > > > > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > > >
> > > > > > > Add a sys interface to enable/diable pw20 state or altivec
> > > > > > > idle, and control the wait entry time.
> > > > > > >
> > > > > > > Enable/Disable interface:
> > > > > > > 0, disable. 1, enable.
> > > > > > > /sys/devices/system/cpu/cpuX/pw20_state
> > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle
> > > > > > >
> > > > > > > Set wait time interface:(Nanosecond)
> > > > > > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > > > > > Example: Base on TBfreq is 41MHZ.
> > > > > > > 1~47(ns): TB[63]
> > > > > > > 48~95(ns): TB[62]
> > > > > > > 96~191(ns): TB[61]
> > > > > > > 192~383(ns): TB[62]
> > > > > > > 384~767(ns): TB[60]
> > > > > > > ...
> > > > > > >
> > > > > > > Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > > > ---
> > > > > > > *v4:
> > > > > > > Move code from 85xx/common.c to kernel/sysfs.c.
> > > > > > >
> > > > > > > Remove has_pw20_altivec_idle function.
> > > > > > >
> > > > > > > Change wait "entry_bit" to wait time.
> > > > > > >
> > > > > > >  arch/powerpc/kernel/sysfs.c | 291
> > > > > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > >  1 file changed, 291 insertions(+)
> > > > > > >
> > > > > > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > > > > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6 100644
> > > > > > > --- a/arch/powerpc/kernel/sysfs.c
> > > > > > > +++ b/arch/powerpc/kernel/sysfs.c
> > > > > > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > > > > > setup_smt_snooze_delay);
> > > > > > >
> > > > > > >  #endif /* CONFIG_PPC64 */
> > > > > > >
> > > > > > > +#ifdef CONFIG_FSL_SOC
> > > > > > > +#define MAX_BIT		63
> > > > > > > +
> > > > > > > +static u64 pw20_wt;
> > > > > > > +static u64 altivec_idle_wt;
> > > > > > > +
> > > > > > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > > > > > +	u64 cycle;
> > > > > > > +
> > > > > > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > > > > >
> > > > > > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz) then
> > > > > > this will always be ns, which is not correct, no?
> > > >
> > > > Actually it'll be a divide by zero in that case.
> > > >
> > > tb_ticks_per_usec = ppc_tb_freq / 1000000; Means TB freq should be
> > > more than 1MHZ.
> > >
> > > if ppc_tb_freq less than 1000000, the tb_ticks_per_usec will be a
> > > divide by zero.
> > > If this condition is established, I think kernel cannot work as a
> > normal.
> > >
> > > So I think we need to believe that the variable is not zero.
> > 
> > We do believe it is non-zero but greater than 1000 :)
> > 
> > > And I think TB freq
> > > should not less than 1MHZ on PPC platform, because if TB freq less
> > > than 1MHZ, the precision time will become very poor and system
> > > response time will be slower.
> > 
> > Not sure what you are describing here related to divide by zero we are
> > mentioning.
> > You are talking about if tb_ticks_per_usec is ZERO and we are talking
> > about if (1000/tb_ticks_per_usec) will be zero.
> > 
> > BTW, div_u64() handle the case where divider is zero.

How?  When I checked yesterday it looked like div_u64() mapped to a
hardware division on 64-bit targets.  In any case it's not good to rely
on such behavior.

> cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> For this, I think we were discussing the two issues:
> 
> 1. Scott talking about, when the tb_ticks_per_usec is a zero.

No I wasn't.  I was talking about when tb_ticks_per_usec > 1000, and
thus "1000 / tb_ticks_per_usec" is zero.  You said that the result would
be ns in that case.

> 2. You are talking about 1000/tb_ticks_per_usec is a zero.
> This situation is about TB freq > 1GHZ.
> 
> I will fix this issue. Like I said before,
> "If timebase frequency > 1GHz, this should be "tb_ticks_per_usec / 1000" and to get tb_ticks_per_nsec.
> This should be changed to "cycle = ns * tb_ticks_per_nsec;""
> 
> #define TB_FREQ_1GHZ	1000
> 
> If (tb_ticks_per_usec > TB_FREQ_1GHZ)
> 	cycle = ns * (tb_ticks_per_usec / 1000);
> else
> 	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> 
> how about this? :)

I suggested an alternative to satisfy your complaint that it's hard to
test one of those if/else branches.

Plus, your version will be quite inaccurate if (e.g.) tb_ticks_per_usec
is 501, or 1999.

-Scott
Wang Dongsheng-B40534 - Sept. 27, 2013, 3:34 a.m.
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Friday, September 27, 2013 5:37 AM
> To: Wang Dongsheng-B40534
> Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> dev@lists.ozlabs.org
> Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> altivec idle
> 
> On Thu, 2013-09-26 at 01:18 -0500, Wang Dongsheng-B40534 wrote:
> >
> > > -----Original Message-----
> > > From: Bhushan Bharat-R65777
> > > Sent: Thursday, September 26, 2013 12:23 PM
> > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > Cc: linuxppc-dev@lists.ozlabs.org
> > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > and altivec idle
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Wang Dongsheng-B40534
> > > > Sent: Thursday, September 26, 2013 8:02 AM
> > > > To: Wood Scott-B07421
> > > > Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org
> > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > and altivec idle
> > > >
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Wood Scott-B07421
> > > > > Sent: Thursday, September 26, 2013 1:57 AM
> > > > > To: Wang Dongsheng-B40534
> > > > > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > > > > dev@lists.ozlabs.org
> > > > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > state and altivec idle
> > > > >
> > > > > On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534 wrote:
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Bhushan Bharat-R65777
> > > > > > > Sent: Wednesday, September 25, 2013 2:23 PM
> > > > > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > > state and altivec idle
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > > > > > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On
> > > > > > > > bounces+Behalf Of Dongsheng
> > > > > > > > Wang
> > > > > > > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > > > > > > To: Wood Scott-B07421
> > > > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > > > state and altivec idle
> > > > > > > >
> > > > > > > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > > > >
> > > > > > > > Add a sys interface to enable/diable pw20 state or altivec
> > > > > > > > idle, and control the wait entry time.
> > > > > > > >
> > > > > > > > Enable/Disable interface:
> > > > > > > > 0, disable. 1, enable.
> > > > > > > > /sys/devices/system/cpu/cpuX/pw20_state
> > > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle
> > > > > > > >
> > > > > > > > Set wait time interface:(Nanosecond)
> > > > > > > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > > > > > > Example: Base on TBfreq is 41MHZ.
> > > > > > > > 1~47(ns): TB[63]
> > > > > > > > 48~95(ns): TB[62]
> > > > > > > > 96~191(ns): TB[61]
> > > > > > > > 192~383(ns): TB[62]
> > > > > > > > 384~767(ns): TB[60]
> > > > > > > > ...
> > > > > > > >
> > > > > > > > Signed-off-by: Wang Dongsheng
> > > > > > > > <dongsheng.wang@freescale.com>
> > > > > > > > ---
> > > > > > > > *v4:
> > > > > > > > Move code from 85xx/common.c to kernel/sysfs.c.
> > > > > > > >
> > > > > > > > Remove has_pw20_altivec_idle function.
> > > > > > > >
> > > > > > > > Change wait "entry_bit" to wait time.
> > > > > > > >
> > > > > > > >  arch/powerpc/kernel/sysfs.c | 291
> > > > > > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > > >  1 file changed, 291 insertions(+)
> > > > > > > >
> > > > > > > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > > > > > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6
> > > > > > > > 100644
> > > > > > > > --- a/arch/powerpc/kernel/sysfs.c
> > > > > > > > +++ b/arch/powerpc/kernel/sysfs.c
> > > > > > > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > > > > > > setup_smt_snooze_delay);
> > > > > > > >
> > > > > > > >  #endif /* CONFIG_PPC64 */
> > > > > > > >
> > > > > > > > +#ifdef CONFIG_FSL_SOC
> > > > > > > > +#define MAX_BIT		63
> > > > > > > > +
> > > > > > > > +static u64 pw20_wt;
> > > > > > > > +static u64 altivec_idle_wt;
> > > > > > > > +
> > > > > > > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > > > > > > +	u64 cycle;
> > > > > > > > +
> > > > > > > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > > > > > >
> > > > > > > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz)
> > > > > > > then this will always be ns, which is not correct, no?
> > > > >
> > > > > Actually it'll be a divide by zero in that case.
> > > > >
> > > > tb_ticks_per_usec = ppc_tb_freq / 1000000; Means TB freq should be
> > > > more than 1MHZ.
> > > >
> > > > if ppc_tb_freq less than 1000000, the tb_ticks_per_usec will be a
> > > > divide by zero.
> > > > If this condition is established, I think kernel cannot work as a
> > > normal.
> > > >
> > > > So I think we need to believe that the variable is not zero.
> > >
> > > We do believe it is non-zero but greater than 1000 :)
> > >
> > > > And I think TB freq
> > > > should not less than 1MHZ on PPC platform, because if TB freq less
> > > > than 1MHZ, the precision time will become very poor and system
> > > > response time will be slower.
> > >
> > > Not sure what you are describing here related to divide by zero we
> > > are mentioning.
> > > You are talking about if tb_ticks_per_usec is ZERO and we are
> > > talking about if (1000/tb_ticks_per_usec) will be zero.
> > >
> > > BTW, div_u64() handle the case where divider is zero.
> 
> How?  When I checked yesterday it looked like div_u64() mapped to a
> hardware division on 64-bit targets.  In any case it's not good to rely
> on such behavior.
> 
> > cycle = div_u64(ns, 1000 / tb_ticks_per_usec); For this, I think we
> > were discussing the two issues:
> >
> > 1. Scott talking about, when the tb_ticks_per_usec is a zero.
> 
> No I wasn't.  I was talking about when tb_ticks_per_usec > 1000, and thus
> "1000 / tb_ticks_per_usec" is zero.  You said that the result would be ns
> in that case.
> 
> > 2. You are talking about 1000/tb_ticks_per_usec is a zero.
> > This situation is about TB freq > 1GHZ.
> >
> > I will fix this issue. Like I said before, "If timebase frequency >
> > 1GHz, this should be "tb_ticks_per_usec / 1000" and to get
> tb_ticks_per_nsec.
> > This should be changed to "cycle = ns * tb_ticks_per_nsec;""
> >
> > #define TB_FREQ_1GHZ	1000
> >
> > If (tb_ticks_per_usec > TB_FREQ_1GHZ)
> > 	cycle = ns * (tb_ticks_per_usec / 1000); else
> > 	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> >
> > how about this? :)
> 
> I suggested an alternative to satisfy your complaint that it's hard to
> test one of those if/else branches.
> 
> Plus, your version will be quite inaccurate if (e.g.) tb_ticks_per_usec
> is 501, or 1999.
> 
cycle = div_u64(ns * tb_ticks_per_usec, 1000); It's look good.
But why we need:
	if (ns >= 10000)
		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec;
?

I think "cycle = div_u64(ns * tb_ticks_per_usec, 1000)" is good enough. :)

-dongsheng

> -Scott
>
Scott Wood - Sept. 27, 2013, 9:33 p.m.
On Thu, 2013-09-26 at 22:34 -0500, Wang Dongsheng-B40534 wrote:
> 
> > -----Original Message-----
> > From: Wood Scott-B07421
> > Sent: Friday, September 27, 2013 5:37 AM
> > To: Wang Dongsheng-B40534
> > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > dev@lists.ozlabs.org
> > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> > 
> > On Thu, 2013-09-26 at 01:18 -0500, Wang Dongsheng-B40534 wrote:
> > >
> > > > -----Original Message-----
> > > > From: Bhushan Bharat-R65777
> > > > Sent: Thursday, September 26, 2013 12:23 PM
> > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > Cc: linuxppc-dev@lists.ozlabs.org
> > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > and altivec idle
> > > >
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Wang Dongsheng-B40534
> > > > > Sent: Thursday, September 26, 2013 8:02 AM
> > > > > To: Wood Scott-B07421
> > > > > Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org
> > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > > > and altivec idle
> > > > >
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Wood Scott-B07421
> > > > > > Sent: Thursday, September 26, 2013 1:57 AM
> > > > > > To: Wang Dongsheng-B40534
> > > > > > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > > > > > dev@lists.ozlabs.org
> > > > > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > state and altivec idle
> > > > > >
> > > > > > On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534 wrote:
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Bhushan Bharat-R65777
> > > > > > > > Sent: Wednesday, September 25, 2013 2:23 PM
> > > > > > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > > > state and altivec idle
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > > > > > > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org] On
> > > > > > > > > bounces+Behalf Of Dongsheng
> > > > > > > > > Wang
> > > > > > > > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > > > > > > > To: Wood Scott-B07421
> > > > > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > > > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > > > > state and altivec idle
> > > > > > > > >
> > > > > > > > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > > > > >
> > > > > > > > > Add a sys interface to enable/diable pw20 state or altivec
> > > > > > > > > idle, and control the wait entry time.
> > > > > > > > >
> > > > > > > > > Enable/Disable interface:
> > > > > > > > > 0, disable. 1, enable.
> > > > > > > > > /sys/devices/system/cpu/cpuX/pw20_state
> > > > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle
> > > > > > > > >
> > > > > > > > > Set wait time interface:(Nanosecond)
> > > > > > > > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > > > > > > > Example: Base on TBfreq is 41MHZ.
> > > > > > > > > 1~47(ns): TB[63]
> > > > > > > > > 48~95(ns): TB[62]
> > > > > > > > > 96~191(ns): TB[61]
> > > > > > > > > 192~383(ns): TB[62]
> > > > > > > > > 384~767(ns): TB[60]
> > > > > > > > > ...
> > > > > > > > >
> > > > > > > > > Signed-off-by: Wang Dongsheng
> > > > > > > > > <dongsheng.wang@freescale.com>
> > > > > > > > > ---
> > > > > > > > > *v4:
> > > > > > > > > Move code from 85xx/common.c to kernel/sysfs.c.
> > > > > > > > >
> > > > > > > > > Remove has_pw20_altivec_idle function.
> > > > > > > > >
> > > > > > > > > Change wait "entry_bit" to wait time.
> > > > > > > > >
> > > > > > > > >  arch/powerpc/kernel/sysfs.c | 291
> > > > > > > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > > > >  1 file changed, 291 insertions(+)
> > > > > > > > >
> > > > > > > > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > > > > > > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6
> > > > > > > > > 100644
> > > > > > > > > --- a/arch/powerpc/kernel/sysfs.c
> > > > > > > > > +++ b/arch/powerpc/kernel/sysfs.c
> > > > > > > > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > > > > > > > setup_smt_snooze_delay);
> > > > > > > > >
> > > > > > > > >  #endif /* CONFIG_PPC64 */
> > > > > > > > >
> > > > > > > > > +#ifdef CONFIG_FSL_SOC
> > > > > > > > > +#define MAX_BIT		63
> > > > > > > > > +
> > > > > > > > > +static u64 pw20_wt;
> > > > > > > > > +static u64 altivec_idle_wt;
> > > > > > > > > +
> > > > > > > > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > > > > > > > +	u64 cycle;
> > > > > > > > > +
> > > > > > > > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > > > > > > >
> > > > > > > > When tb_ticks_per_usec  > 1000 (timebase frequency > 1GHz)
> > > > > > > > then this will always be ns, which is not correct, no?
> > > > > >
> > > > > > Actually it'll be a divide by zero in that case.
> > > > > >
> > > > > tb_ticks_per_usec = ppc_tb_freq / 1000000; Means TB freq should be
> > > > > more than 1MHZ.
> > > > >
> > > > > if ppc_tb_freq less than 1000000, the tb_ticks_per_usec will be a
> > > > > divide by zero.
> > > > > If this condition is established, I think kernel cannot work as a
> > > > normal.
> > > > >
> > > > > So I think we need to believe that the variable is not zero.
> > > >
> > > > We do believe it is non-zero but greater than 1000 :)
> > > >
> > > > > And I think TB freq
> > > > > should not less than 1MHZ on PPC platform, because if TB freq less
> > > > > than 1MHZ, the precision time will become very poor and system
> > > > > response time will be slower.
> > > >
> > > > Not sure what you are describing here related to divide by zero we
> > > > are mentioning.
> > > > You are talking about if tb_ticks_per_usec is ZERO and we are
> > > > talking about if (1000/tb_ticks_per_usec) will be zero.
> > > >
> > > > BTW, div_u64() handle the case where divider is zero.
> > 
> > How?  When I checked yesterday it looked like div_u64() mapped to a
> > hardware division on 64-bit targets.  In any case it's not good to rely
> > on such behavior.
> > 
> > > cycle = div_u64(ns, 1000 / tb_ticks_per_usec); For this, I think we
> > > were discussing the two issues:
> > >
> > > 1. Scott talking about, when the tb_ticks_per_usec is a zero.
> > 
> > No I wasn't.  I was talking about when tb_ticks_per_usec > 1000, and thus
> > "1000 / tb_ticks_per_usec" is zero.  You said that the result would be ns
> > in that case.
> > 
> > > 2. You are talking about 1000/tb_ticks_per_usec is a zero.
> > > This situation is about TB freq > 1GHZ.
> > >
> > > I will fix this issue. Like I said before, "If timebase frequency >
> > > 1GHz, this should be "tb_ticks_per_usec / 1000" and to get
> > tb_ticks_per_nsec.
> > > This should be changed to "cycle = ns * tb_ticks_per_nsec;""
> > >
> > > #define TB_FREQ_1GHZ	1000
> > >
> > > If (tb_ticks_per_usec > TB_FREQ_1GHZ)
> > > 	cycle = ns * (tb_ticks_per_usec / 1000); else
> > > 	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > >
> > > how about this? :)
> > 
> > I suggested an alternative to satisfy your complaint that it's hard to
> > test one of those if/else branches.
> > 
> > Plus, your version will be quite inaccurate if (e.g.) tb_ticks_per_usec
> > is 501, or 1999.
> > 
> cycle = div_u64(ns * tb_ticks_per_usec, 1000); It's look good.
> But why we need:
> 	if (ns >= 10000)
> 		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec;
> ?
> 
> I think "cycle = div_u64(ns * tb_ticks_per_usec, 1000)" is good enough. :)

It's to deal with overflow if a very large value of ns is provided
(and/or tb_ticks_per_usec is larger than we expect).

-Scott
Wang Dongsheng-B40534 - Sept. 29, 2013, 6:57 a.m.
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Saturday, September 28, 2013 5:33 AM
> To: Wang Dongsheng-B40534
> Cc: Wood Scott-B07421; Bhushan Bharat-R65777; linuxppc-
> dev@lists.ozlabs.org
> Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> altivec idle
> 
> On Thu, 2013-09-26 at 22:34 -0500, Wang Dongsheng-B40534 wrote:
> >
> > > -----Original Message-----
> > > From: Wood Scott-B07421
> > > Sent: Friday, September 27, 2013 5:37 AM
> > > To: Wang Dongsheng-B40534
> > > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > > dev@lists.ozlabs.org
> > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > and altivec idle
> > >
> > > On Thu, 2013-09-26 at 01:18 -0500, Wang Dongsheng-B40534 wrote:
> > > >
> > > > > -----Original Message-----
> > > > > From: Bhushan Bharat-R65777
> > > > > Sent: Thursday, September 26, 2013 12:23 PM
> > > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > > Cc: linuxppc-dev@lists.ozlabs.org
> > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > state and altivec idle
> > > > >
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Wang Dongsheng-B40534
> > > > > > Sent: Thursday, September 26, 2013 8:02 AM
> > > > > > To: Wood Scott-B07421
> > > > > > Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org
> > > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > state and altivec idle
> > > > > >
> > > > > >
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Wood Scott-B07421
> > > > > > > Sent: Thursday, September 26, 2013 1:57 AM
> > > > > > > To: Wang Dongsheng-B40534
> > > > > > > Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
> > > > > > > dev@lists.ozlabs.org
> > > > > > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20
> > > > > > > state and altivec idle
> > > > > > >
> > > > > > > On Wed, 2013-09-25 at 03:10 -0500, Wang Dongsheng-B40534
> wrote:
> > > > > > > >
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Bhushan Bharat-R65777
> > > > > > > > > Sent: Wednesday, September 25, 2013 2:23 PM
> > > > > > > > > To: Wang Dongsheng-B40534; Wood Scott-B07421
> > > > > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > > > > > > > Subject: RE: [PATCH v4 4/4] powerpc/85xx: add sysfs for
> > > > > > > > > pw20 state and altivec idle
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > -----Original Message-----
> > > > > > > > > > From: Linuxppc-dev [mailto:linuxppc-dev-
> > > > > > > > > > bounces+bharat.bhushan=freescale.com@lists.ozlabs.org]
> > > > > > > > > > bounces+On Behalf Of Dongsheng
> > > > > > > > > > Wang
> > > > > > > > > > Sent: Tuesday, September 24, 2013 2:59 PM
> > > > > > > > > > To: Wood Scott-B07421
> > > > > > > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wang
> > > > > > > > > > Dongsheng-B40534
> > > > > > > > > > Subject: [PATCH v4 4/4] powerpc/85xx: add sysfs for
> > > > > > > > > > pw20 state and altivec idle
> > > > > > > > > >
> > > > > > > > > > From: Wang Dongsheng <dongsheng.wang@freescale.com>
> > > > > > > > > >
> > > > > > > > > > Add a sys interface to enable/diable pw20 state or
> > > > > > > > > > altivec idle, and control the wait entry time.
> > > > > > > > > >
> > > > > > > > > > Enable/Disable interface:
> > > > > > > > > > 0, disable. 1, enable.
> > > > > > > > > > /sys/devices/system/cpu/cpuX/pw20_state
> > > > > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle
> > > > > > > > > >
> > > > > > > > > > Set wait time interface:(Nanosecond)
> > > > > > > > > > /sys/devices/system/cpu/cpuX/pw20_wait_time
> > > > > > > > > > /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> > > > > > > > > > Example: Base on TBfreq is 41MHZ.
> > > > > > > > > > 1~47(ns): TB[63]
> > > > > > > > > > 48~95(ns): TB[62]
> > > > > > > > > > 96~191(ns): TB[61]
> > > > > > > > > > 192~383(ns): TB[62]
> > > > > > > > > > 384~767(ns): TB[60]
> > > > > > > > > > ...
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Wang Dongsheng
> > > > > > > > > > <dongsheng.wang@freescale.com>
> > > > > > > > > > ---
> > > > > > > > > > *v4:
> > > > > > > > > > Move code from 85xx/common.c to kernel/sysfs.c.
> > > > > > > > > >
> > > > > > > > > > Remove has_pw20_altivec_idle function.
> > > > > > > > > >
> > > > > > > > > > Change wait "entry_bit" to wait time.
> > > > > > > > > >
> > > > > > > > > >  arch/powerpc/kernel/sysfs.c | 291
> > > > > > > > > > ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > > > > >  1 file changed, 291 insertions(+)
> > > > > > > > > >
> > > > > > > > > > diff --git a/arch/powerpc/kernel/sysfs.c
> > > > > > > > > > b/arch/powerpc/kernel/sysfs.c index 27a90b9..23fece6
> > > > > > > > > > 100644
> > > > > > > > > > --- a/arch/powerpc/kernel/sysfs.c
> > > > > > > > > > +++ b/arch/powerpc/kernel/sysfs.c
> > > > > > > > > > @@ -85,6 +85,279 @@ __setup("smt-snooze-delay=",
> > > > > > > > > > setup_smt_snooze_delay);
> > > > > > > > > >
> > > > > > > > > >  #endif /* CONFIG_PPC64 */
> > > > > > > > > >
> > > > > > > > > > +#ifdef CONFIG_FSL_SOC
> > > > > > > > > > +#define MAX_BIT		63
> > > > > > > > > > +
> > > > > > > > > > +static u64 pw20_wt;
> > > > > > > > > > +static u64 altivec_idle_wt;
> > > > > > > > > > +
> > > > > > > > > > +static unsigned int get_idle_ticks_bit(u64 ns) {
> > > > > > > > > > +	u64 cycle;
> > > > > > > > > > +
> > > > > > > > > > +	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > > > > > > > >
> > > > > > > > > When tb_ticks_per_usec  > 1000 (timebase frequency >
> > > > > > > > > 1GHz) then this will always be ns, which is not correct,
> no?
> > > > > > >
> > > > > > > Actually it'll be a divide by zero in that case.
> > > > > > >
> > > > > > tb_ticks_per_usec = ppc_tb_freq / 1000000; Means TB freq
> > > > > > should be more than 1MHZ.
> > > > > >
> > > > > > if ppc_tb_freq less than 1000000, the tb_ticks_per_usec will
> > > > > > be a divide by zero.
> > > > > > If this condition is established, I think kernel cannot work
> > > > > > as a
> > > > > normal.
> > > > > >
> > > > > > So I think we need to believe that the variable is not zero.
> > > > >
> > > > > We do believe it is non-zero but greater than 1000 :)
> > > > >
> > > > > > And I think TB freq
> > > > > > should not less than 1MHZ on PPC platform, because if TB freq
> > > > > > less than 1MHZ, the precision time will become very poor and
> > > > > > system response time will be slower.
> > > > >
> > > > > Not sure what you are describing here related to divide by zero
> > > > > we are mentioning.
> > > > > You are talking about if tb_ticks_per_usec is ZERO and we are
> > > > > talking about if (1000/tb_ticks_per_usec) will be zero.
> > > > >
> > > > > BTW, div_u64() handle the case where divider is zero.
> > >
> > > How?  When I checked yesterday it looked like div_u64() mapped to a
> > > hardware division on 64-bit targets.  In any case it's not good to
> > > rely on such behavior.
> > >
> > > > cycle = div_u64(ns, 1000 / tb_ticks_per_usec); For this, I think
> > > > we were discussing the two issues:
> > > >
> > > > 1. Scott talking about, when the tb_ticks_per_usec is a zero.
> > >
> > > No I wasn't.  I was talking about when tb_ticks_per_usec > 1000, and
> > > thus
> > > "1000 / tb_ticks_per_usec" is zero.  You said that the result would
> > > be ns in that case.
> > >
> > > > 2. You are talking about 1000/tb_ticks_per_usec is a zero.
> > > > This situation is about TB freq > 1GHZ.
> > > >
> > > > I will fix this issue. Like I said before, "If timebase frequency
> > > > > 1GHz, this should be "tb_ticks_per_usec / 1000" and to get
> > > tb_ticks_per_nsec.
> > > > This should be changed to "cycle = ns * tb_ticks_per_nsec;""
> > > >
> > > > #define TB_FREQ_1GHZ	1000
> > > >
> > > > If (tb_ticks_per_usec > TB_FREQ_1GHZ)
> > > > 	cycle = ns * (tb_ticks_per_usec / 1000); else
> > > > 	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
> > > >
> > > > how about this? :)
> > >
> > > I suggested an alternative to satisfy your complaint that it's hard
> > > to test one of those if/else branches.
> > >
> > > Plus, your version will be quite inaccurate if (e.g.)
> > > tb_ticks_per_usec is 501, or 1999.
> > >
> > cycle = div_u64(ns * tb_ticks_per_usec, 1000); It's look good.
> > But why we need:
> > 	if (ns >= 10000)
> > 		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec; ?
> >
> > I think "cycle = div_u64(ns * tb_ticks_per_usec, 1000)" is good
> > enough. :)
> 
> It's to deal with overflow if a very large value of ns is provided
> (and/or tb_ticks_per_usec is larger than we expect).
> 
:), as I think, it's to deal with overflow. But you version cannot do deal with it.
Because ns is u64, if ns > 0xffffffff_fffffe0b, ns + 500 will overflow, And if tb_ticks_per_usec > 1000 and ns > (0xffffffff_ffffffff / 10) cycle also will overflow.

I think we need to do this:

#define U64_LOW_MASK            0xffffffffULL
#define U64_MASK                0xffffffffffffffffULL

        u32 tmp_rem;
        u64 ns_u_rem, ns_u, ns_l, ns_l_carry;
        u64 cycle;

        ns_u = ns >> 32;
        ns_l = ns & U64_LOW_MASK;

        ns_l *= tb_ticks_per_usec;
        ns_l_carry = ns_l >> 32;
        ns_u *= tb_ticks_per_usec;
        ns_u += ns_l_carry;

        ns_u = div_u64_rem(ns_u, 1000, &tmp_rem);
        ns_u_rem = tmp_rem;
        ns_l = (ns_l & U64_LOW_MASK) | ((ns_u_rem) << 32);
        ns_l = div_u64(ns_l, 1000);

        if (ns_u >> 32)
                cycle = U64_MASK;
        else
                cycle = (ns_u << 32) | (ns_l & U64_LOW_MASK);

I has already tested this code, and works good. :)

> -Scott
>
Scott Wood - Sept. 30, 2013, 11:06 p.m.
On Sun, 2013-09-29 at 01:57 -0500, Wang Dongsheng-B40534 wrote:
> 
> > -----Original Message-----
> > From: Wood Scott-B07421
> > Sent: Saturday, September 28, 2013 5:33 AM
> > To: Wang Dongsheng-B40534
> > Cc: Wood Scott-B07421; Bhushan Bharat-R65777; linuxppc-
> > dev@lists.ozlabs.org
> > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> > 
> > On Thu, 2013-09-26 at 22:34 -0500, Wang Dongsheng-B40534 wrote:
> > > cycle = div_u64(ns * tb_ticks_per_usec, 1000); It's look good.
> > > But why we need:
> > > 	if (ns >= 10000)
> > > 		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec; ?
> > >
> > > I think "cycle = div_u64(ns * tb_ticks_per_usec, 1000)" is good
> > > enough. :)
> > 
> > It's to deal with overflow if a very large value of ns is provided
> > (and/or tb_ticks_per_usec is larger than we expect).
> > 
> :), as I think, it's to deal with overflow. But you version cannot do deal with it.
> Because ns is u64, if ns > 0xffffffff_fffffe0b, ns + 500 will overflow, And if tb_ticks_per_usec > 1000 and ns > (0xffffffff_ffffffff / 10) cycle also will overflow.

Sigh... It significantly increases the value of ns at which you'll get
overflow problems. :-)  I was more concerned with
large-but-somewhat-reasonable values of ns (e.g. than with trying to
handle every possible input.  Even without that test, getting overflow
is stretching the bounds of reasonableness (e.g. a 1 GHz timebase would
require a timeout of over 7 months to overflow), but it was low-hanging
fruit.  And the worst case is we go to pw20 sooner than the user wanted,
so let's not go too overboard.

I doubt you would see > 0xffffffff_fffffe0b except if someone is trying
to disable it by setting 0xffffffff_ffffffff even though a separate API
is provided to cleanly disable it.

> I think we need to do this:
> 
> #define U64_LOW_MASK            0xffffffffULL
> #define U64_MASK                0xffffffffffffffffULL
> 
>         u32 tmp_rem;
>         u64 ns_u_rem, ns_u, ns_l, ns_l_carry;
>         u64 cycle;
> 
>         ns_u = ns >> 32;
>         ns_l = ns & U64_LOW_MASK;
> 
>         ns_l *= tb_ticks_per_usec;
>         ns_l_carry = ns_l >> 32;
>         ns_u *= tb_ticks_per_usec;
>         ns_u += ns_l_carry;
> 
>         ns_u = div_u64_rem(ns_u, 1000, &tmp_rem);
>         ns_u_rem = tmp_rem;
>         ns_l = (ns_l & U64_LOW_MASK) | ((ns_u_rem) << 32);
>         ns_l = div_u64(ns_l, 1000);
> 
>         if (ns_u >> 32)
>                 cycle = U64_MASK;
>         else
>                 cycle = (ns_u << 32) | (ns_l & U64_LOW_MASK);
> 
> I has already tested this code, and works good. :)

Ugh.  I don't think we need to get this complicated (and I'd rather not
spend the time verifying the correctness of this).

If for some reason we did need something like this in some other context
(I don't want to see it just for pw20), I'd be more inclined to see
general 128-bit mult/divide support.

-Scott
Wang Dongsheng-B40534 - Oct. 8, 2013, 3:58 a.m.
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Tuesday, October 01, 2013 7:06 AM
> To: Wang Dongsheng-B40534
> Cc: Wood Scott-B07421; Bhushan Bharat-R65777; linuxppc-
> dev@lists.ozlabs.org
> Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> altivec idle
> 
> On Sun, 2013-09-29 at 01:57 -0500, Wang Dongsheng-B40534 wrote:
> >
> > > -----Original Message-----
> > > From: Wood Scott-B07421
> > > Sent: Saturday, September 28, 2013 5:33 AM
> > > To: Wang Dongsheng-B40534
> > > Cc: Wood Scott-B07421; Bhushan Bharat-R65777; linuxppc-
> > > dev@lists.ozlabs.org
> > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> > > and altivec idle
> > >
> > > On Thu, 2013-09-26 at 22:34 -0500, Wang Dongsheng-B40534 wrote:
> > > > cycle = div_u64(ns * tb_ticks_per_usec, 1000); It's look good.
> > > > But why we need:
> > > > 	if (ns >= 10000)
> > > > 		cycle = ((ns + 500) / 1000) * tb_ticks_per_usec; ?
> > > >
> > > > I think "cycle = div_u64(ns * tb_ticks_per_usec, 1000)" is good
> > > > enough. :)
> > >
> > > It's to deal with overflow if a very large value of ns is provided
> > > (and/or tb_ticks_per_usec is larger than we expect).
> > >
> > :), as I think, it's to deal with overflow. But you version cannot do
> deal with it.
> > Because ns is u64, if ns > 0xffffffff_fffffe0b, ns + 500 will overflow,
> And if tb_ticks_per_usec > 1000 and ns > (0xffffffff_ffffffff / 10) cycle
> also will overflow.
> 
> Sigh... It significantly increases the value of ns at which you'll get
> overflow problems. :-)  I was more concerned with large-but-somewhat-
> reasonable values of ns (e.g. than with trying to handle every possible
> input.  Even without that test, getting overflow is stretching the bounds
> of reasonableness (e.g. a 1 GHz timebase would require a timeout of over
> 7 months to overflow), but it was low-hanging fruit.  And the worst case
> is we go to pw20 sooner than the user wanted, so let's not go too
> overboard.
> 
> I doubt you would see > 0xffffffff_fffffe0b except if someone is trying
> to disable it by setting 0xffffffff_ffffffff even though a separate API
> is provided to cleanly disable it.
> 
> > I think we need to do this:
> >
> > #define U64_LOW_MASK            0xffffffffULL
> > #define U64_MASK                0xffffffffffffffffULL
> >
> >         u32 tmp_rem;
> >         u64 ns_u_rem, ns_u, ns_l, ns_l_carry;
> >         u64 cycle;
> >
> >         ns_u = ns >> 32;
> >         ns_l = ns & U64_LOW_MASK;
> >
> >         ns_l *= tb_ticks_per_usec;
> >         ns_l_carry = ns_l >> 32;
> >         ns_u *= tb_ticks_per_usec;
> >         ns_u += ns_l_carry;
> >
> >         ns_u = div_u64_rem(ns_u, 1000, &tmp_rem);
> >         ns_u_rem = tmp_rem;
> >         ns_l = (ns_l & U64_LOW_MASK) | ((ns_u_rem) << 32);
> >         ns_l = div_u64(ns_l, 1000);
> >
> >         if (ns_u >> 32)
> >                 cycle = U64_MASK;
> >         else
> >                 cycle = (ns_u << 32) | (ns_l & U64_LOW_MASK);
> >
> > I has already tested this code, and works good. :)
> 
> Ugh.  I don't think we need to get this complicated (and I'd rather not
> spend the time verifying the correctness of this).
> 
> If for some reason we did need something like this in some other context
> (I don't want to see it just for pw20), I'd be more inclined to see
> general 128-bit mult/divide support.
> 
I would like to use my version,:), because it can handle any situation and we do not need to restrict users.
Here is a kind of special way to get the cycle. If this 128-bit situation is more and more, at that time we go to support it.

-dongsheng

> -Scott
>
Scott Wood - Oct. 8, 2013, 2:50 p.m.
On Mon, 2013-10-07 at 22:58 -0500, Wang Dongsheng-B40534 wrote:
> 
> > -----Original Message-----
> > From: Wood Scott-B07421
> > Sent: Tuesday, October 01, 2013 7:06 AM
> > To: Wang Dongsheng-B40534
> > Cc: Wood Scott-B07421; Bhushan Bharat-R65777; linuxppc-
> > dev@lists.ozlabs.org
> > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> > altivec idle
> > 
> > On Sun, 2013-09-29 at 01:57 -0500, Wang Dongsheng-B40534 wrote:
> > > I think we need to do this:
> > >
> > > #define U64_LOW_MASK            0xffffffffULL
> > > #define U64_MASK                0xffffffffffffffffULL
> > >
> > >         u32 tmp_rem;
> > >         u64 ns_u_rem, ns_u, ns_l, ns_l_carry;
> > >         u64 cycle;
> > >
> > >         ns_u = ns >> 32;
> > >         ns_l = ns & U64_LOW_MASK;
> > >
> > >         ns_l *= tb_ticks_per_usec;
> > >         ns_l_carry = ns_l >> 32;
> > >         ns_u *= tb_ticks_per_usec;
> > >         ns_u += ns_l_carry;
> > >
> > >         ns_u = div_u64_rem(ns_u, 1000, &tmp_rem);
> > >         ns_u_rem = tmp_rem;
> > >         ns_l = (ns_l & U64_LOW_MASK) | ((ns_u_rem) << 32);
> > >         ns_l = div_u64(ns_l, 1000);
> > >
> > >         if (ns_u >> 32)
> > >                 cycle = U64_MASK;
> > >         else
> > >                 cycle = (ns_u << 32) | (ns_l & U64_LOW_MASK);
> > >
> > > I has already tested this code, and works good. :)
> > 
> > Ugh.  I don't think we need to get this complicated (and I'd rather not
> > spend the time verifying the correctness of this).
> > 
> > If for some reason we did need something like this in some other context
> > (I don't want to see it just for pw20), I'd be more inclined to see
> > general 128-bit mult/divide support.
> > 
> I would like to use my version,:), because it can handle any situation and we do not need to restrict users.

It also would take more time to review than I have to spend on it, not
to mention the impact on anyone in the future that wants to understand
or maintain this code -- all for very unlikely situations (and the
"failure" in those very unlikely situations is just that we go into PW20
more often than intended).

-Scott
Wang Dongsheng-B40534 - Oct. 11, 2013, 7:43 a.m.
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Tuesday, October 08, 2013 10:50 PM
> To: Wang Dongsheng-B40534
> Cc: Wood Scott-B07421; Bhushan Bharat-R65777; linuxppc-
> dev@lists.ozlabs.org
> Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state and
> altivec idle
> 
> On Mon, 2013-10-07 at 22:58 -0500, Wang Dongsheng-B40534 wrote:
> >
> > > -----Original Message-----
> > > From: Wood Scott-B07421
> > > Sent: Tuesday, October 01, 2013 7:06 AM
> > > To: Wang Dongsheng-B40534
> > > Cc: Wood Scott-B07421; Bhushan Bharat-R65777; linuxppc-
> > > dev@lists.ozlabs.org
> > > Subject: Re: [PATCH v4 4/4] powerpc/85xx: add sysfs for pw20 state
> and
> > > altivec idle
> > >
> > > On Sun, 2013-09-29 at 01:57 -0500, Wang Dongsheng-B40534 wrote:
> > > > I think we need to do this:
> > > >
> > > > #define U64_LOW_MASK            0xffffffffULL
> > > > #define U64_MASK                0xffffffffffffffffULL
> > > >
> > > >         u32 tmp_rem;
> > > >         u64 ns_u_rem, ns_u, ns_l, ns_l_carry;
> > > >         u64 cycle;
> > > >
> > > >         ns_u = ns >> 32;
> > > >         ns_l = ns & U64_LOW_MASK;
> > > >
> > > >         ns_l *= tb_ticks_per_usec;
> > > >         ns_l_carry = ns_l >> 32;
> > > >         ns_u *= tb_ticks_per_usec;
> > > >         ns_u += ns_l_carry;
> > > >
> > > >         ns_u = div_u64_rem(ns_u, 1000, &tmp_rem);
> > > >         ns_u_rem = tmp_rem;
> > > >         ns_l = (ns_l & U64_LOW_MASK) | ((ns_u_rem) << 32);
> > > >         ns_l = div_u64(ns_l, 1000);
> > > >
> > > >         if (ns_u >> 32)
> > > >                 cycle = U64_MASK;
> > > >         else
> > > >                 cycle = (ns_u << 32) | (ns_l & U64_LOW_MASK);
> > > >
> > > > I has already tested this code, and works good. :)
> > >
> > > Ugh.  I don't think we need to get this complicated (and I'd rather
> not
> > > spend the time verifying the correctness of this).
> > >
> > > If for some reason we did need something like this in some other
> context
> > > (I don't want to see it just for pw20), I'd be more inclined to see
> > > general 128-bit mult/divide support.
> > >
> > I would like to use my version,:), because it can handle any situation
> and we do not need to restrict users.
> 
> It also would take more time to review than I have to spend on it, not
> to mention the impact on anyone in the future that wants to understand
> or maintain this code -- all for very unlikely situations (and the
> "failure" in those very unlikely situations is just that we go into PW20
> more often than intended).
> 
OK, I will use your verison, :)

        if (ns >= 10000)
                cycle = ((ns + 500) / 1000) * tb_ticks_per_usec;
        else
                cycle = div_u64((u64)ns * tb_ticks_per_usec, 1000);

-dongsheng

> -Scott
>

Patch

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 27a90b9..23fece6 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -85,6 +85,279 @@  __setup("smt-snooze-delay=", setup_smt_snooze_delay);
 
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_FSL_SOC
+#define MAX_BIT		63
+
+static u64 pw20_wt;
+static u64 altivec_idle_wt;
+
+static unsigned int get_idle_ticks_bit(u64 ns)
+{
+	u64 cycle;
+
+	cycle = div_u64(ns, 1000 / tb_ticks_per_usec);
+	if (!cycle)
+		return 0;
+
+	return ilog2(cycle);
+}
+
+static void do_show_pwrmgtcr0(void *val)
+{
+	u32 *value = val;
+
+	*value = mfspr(SPRN_PWRMGTCR0);
+}
+
+static ssize_t show_pw20_state(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+	value &= PWRMGTCR0_PW20_WAIT;
+
+	return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_pw20_state(void *val)
+{
+	u32 *value = val;
+	u32 pw20_state;
+
+	pw20_state = mfspr(SPRN_PWRMGTCR0);
+
+	if (*value)
+		pw20_state |= PWRMGTCR0_PW20_WAIT;
+	else
+		pw20_state &= ~PWRMGTCR0_PW20_WAIT;
+
+	mtspr(SPRN_PWRMGTCR0, pw20_state);
+}
+
+static ssize_t store_pw20_state(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	if (kstrtou32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > 1)
+		return -EINVAL;
+
+	smp_call_function_single(cpu, do_store_pw20_state, &value, 1);
+
+	return count;
+}
+
+static ssize_t show_pw20_wait_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	u64 tb_cycle;
+	u64 time;
+
+	unsigned int cpu = dev->id;
+
+	if (!pw20_wt) {
+		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+		value = (value & PWRMGTCR0_PW20_ENT) >>
+					PWRMGTCR0_PW20_ENT_SHIFT;
+
+		tb_cycle = (1 << (MAX_BIT - value)) * 2;
+		time = tb_cycle * (1000 / tb_ticks_per_usec) - 1;
+	} else {
+		time = pw20_wt;
+	}
+
+	return sprintf(buf, "%llu\n", time);
+}
+
+static void set_pw20_wait_entry_bit(void *val)
+{
+	u32 *value = val;
+	u32 pw20_idle;
+
+	pw20_idle = mfspr(SPRN_PWRMGTCR0);
+
+	/* Set Automatic PW20 Core Idle Count */
+	/* clear count */
+	pw20_idle &= ~PWRMGTCR0_PW20_ENT;
+
+	/* set count */
+	pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT);
+
+	mtspr(SPRN_PWRMGTCR0, pw20_idle);
+}
+
+static ssize_t store_pw20_wait_time(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 entry_bit;
+	u64 value;
+
+	unsigned int cpu = dev->id;
+
+	if (kstrtou64(buf, 0, &value))
+		return -EINVAL;
+
+	if (!value)
+		return -EINVAL;
+
+	entry_bit = get_idle_ticks_bit(value);
+	if (entry_bit > MAX_BIT)
+		return -EINVAL;
+
+	pw20_wt = value;
+	smp_call_function_single(cpu, set_pw20_wait_entry_bit,
+				&entry_bit, 1);
+
+	return count;
+}
+
+static ssize_t show_altivec_idle(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+	value &= PWRMGTCR0_AV_IDLE_PD_EN;
+
+	return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_altivec_idle(void *val)
+{
+	u32 *value = val;
+	u32 altivec_idle;
+
+	altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+	if (*value)
+		altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN;
+	else
+		altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN;
+
+	mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	if (kstrtou32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > 1)
+		return -EINVAL;
+
+	smp_call_function_single(cpu, do_store_altivec_idle, &value, 1);
+
+	return count;
+}
+
+static ssize_t show_altivec_idle_wait_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	u64 tb_cycle;
+	u64 time;
+
+	unsigned int cpu = dev->id;
+
+	if (!altivec_idle_wt) {
+		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+		value = (value & PWRMGTCR0_AV_IDLE_CNT) >>
+					PWRMGTCR0_AV_IDLE_CNT_SHIFT;
+
+		tb_cycle = (1 << (MAX_BIT - value)) * 2;
+		time = tb_cycle * (1000 / tb_ticks_per_usec) - 1;
+	} else {
+		time = altivec_idle_wt;
+	}
+
+	return sprintf(buf, "%llu\n", time);
+}
+
+static void set_altivec_idle_wait_entry_bit(void *val)
+{
+	u32 *value = val;
+	u32 altivec_idle;
+
+	altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+	/* Set Automatic AltiVec Idle Count */
+	/* clear count */
+	altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT;
+
+	/* set count */
+	altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT);
+
+	mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle_wait_time(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 entry_bit;
+	u64 value;
+
+	unsigned int cpu = dev->id;
+
+	if (kstrtou64(buf, 0, &value))
+		return -EINVAL;
+
+	if (!value)
+		return -EINVAL;
+
+	entry_bit = get_idle_ticks_bit(value);
+	if (entry_bit > MAX_BIT)
+		return -EINVAL;
+
+	altivec_idle_wt = value;
+	smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit,
+				&entry_bit, 1);
+
+	return count;
+}
+
+/*
+ * Enable/Disable interface:
+ * 0, disable. 1, enable.
+ */
+static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state);
+static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle);
+
+/*
+ * Set wait time interface:(Nanosecond)
+ * Example: Base on TBfreq is 41MHZ.
+ * 1~47(ns): TB[63]
+ * 48~95(ns): TB[62]
+ * 96~191(ns): TB[61]
+ * 192~383(ns): TB[62]
+ * 384~767(ns): TB[60]
+ * ...
+ */
+static DEVICE_ATTR(pw20_wait_time, 0600,
+			show_pw20_wait_time,
+			store_pw20_wait_time);
+static DEVICE_ATTR(altivec_idle_wait_time, 0600,
+			show_altivec_idle_wait_time,
+			store_altivec_idle_wait_time);
+#endif
+
 /*
  * Enabling PMCs will slow partition context switch times so we only do
  * it the first time we write to the PMCs.
@@ -407,6 +680,15 @@  static void register_cpu_online(unsigned int cpu)
 		device_create_file(s, &dev_attr_pir);
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_FSL_SOC
+	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+		device_create_file(s, &dev_attr_pw20_state);
+		device_create_file(s, &dev_attr_pw20_wait_time);
+
+		device_create_file(s, &dev_attr_altivec_idle);
+		device_create_file(s, &dev_attr_altivec_idle_wait_time);
+	}
+#endif
 	cacheinfo_cpu_online(cpu);
 }
 
@@ -479,6 +761,15 @@  static void unregister_cpu_online(unsigned int cpu)
 		device_remove_file(s, &dev_attr_pir);
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_FSL_SOC
+	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+		device_remove_file(s, &dev_attr_pw20_state);
+		device_remove_file(s, &dev_attr_pw20_wait_time);
+
+		device_remove_file(s, &dev_attr_altivec_idle);
+		device_remove_file(s, &dev_attr_altivec_idle_wait_time);
+	}
+#endif
 	cacheinfo_cpu_offline(cpu);
 }