diff mbox

[U-Boot,2/2] spi: mxc_spi: Update pre and post divider algorithm

Message ID 1368076793-24438-2-git-send-email-dirk.behme@gmail.com
State Superseded
Delegated to: Stefano Babic
Headers show

Commit Message

Dirk Behme May 9, 2013, 5:19 a.m. UTC
The spi clock divisor is of the form x * (2**y),  or  x  << y, where x is
1 to 16, and y is 0 to 15. Note the similarity with floating point numbers.
Convert the desired divisor to the smallest number which is >= desired divisor,
and can be represented in this form. The previous algorithm chose a divisor
which could be almost twice as large as needed.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Signed-off-by: Dirk Behme <dirk.behme@gmail.com>
---
 drivers/spi/mxc_spi.c |   27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

Comments

Troy Kisky May 9, 2013, 6 p.m. UTC | #1
On 5/8/2013 10:19 PM, Dirk Behme wrote:
> The spi clock divisor is of the form x * (2**y),  or  x  << y, where x is
> 1 to 16, and y is 0 to 15. Note the similarity with floating point numbers.
> Convert the desired divisor to the smallest number which is >= desired divisor,
> and can be represented in this form. The previous algorithm chose a divisor
> which could be almost twice as large as needed.
>
> Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
> Signed-off-by: Dirk Behme <dirk.behme@gmail.com>
> ---
>   drivers/spi/mxc_spi.c |   27 ++++++++++++---------------
>   1 file changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
> index 3e903b3..66c2ad8 100644
> --- a/drivers/spi/mxc_spi.c
> +++ b/drivers/spi/mxc_spi.c
> @@ -128,7 +128,7 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave *mxcs, unsigned int cs,
>   		unsigned int max_hz, unsigned int mode)
>   {
>   	u32 clk_src = mxc_get_clock(MXC_CSPI_CLK);
> -	s32 pre_div = 1, post_div = 0, i, reg_ctrl, reg_config;
> +	s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;

First, I'm totally fine with the patch as it is. I'm just going to point 
out things you may want to change, or
send a follow-up patch.

Here, no need to initialize pre_div, post_div, if you delete the  if 
(clk_src > max_hz) below which is not needed.

>   	u32 ss_pol = 0, sclkpol = 0, sclkpha = 0;
>   	struct cspi_regs *regs = (struct cspi_regs *)mxcs->base;
>   
> @@ -147,27 +147,24 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave *mxcs, unsigned int cs,
>   	reg_ctrl |=  MXC_CSPICTRL_EN;
>   	reg_write(&regs->ctrl, reg_ctrl);
>   
> -	/*
> -	 * The following computation is taken directly from Freescale's code.
> -	 */
>   	if (clk_src > max_hz) {
This "if" can be removed.

>   		pre_div = DIV_ROUND_UP(clk_src, max_hz);
If you subtract -1 here instead of when you set the divisor register, 
the logic becomes simpler


pre_div = DIV_ROUND_UP(clk_src, max_hz) - 1;

or just

pre_div = (clk_src - 1) / max_hz;

> -		if (pre_div > 16) {
> -			post_div = pre_div / 16;
> -			pre_div = 16;
> -		}
> -		if (post_div != 0) {
> -			for (i = 0; i < 16; i++) {
> -				if ((1 << i) >= post_div)
> -					break;
> -			}
> -			if (i == 16) {
> +		/* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
> +		post_div = fls(pre_div - 1);
> +		if (post_div > 4) {
> +			post_div -= 4;
> +
> +			if (post_div >= 16) {
>   				printf("Error: no divider for the freq: %d\n",
>   					max_hz);
>   				return -1;
>   			}
> -			post_div = i;
> +			pre_div = (pre_div + (1 << post_div) - 1) >> post_div;

This would become
pre_div >>= post_div;
> +
> +		} else {
> +			post_div = 0;
>   		}
> +
>   	}
>   
>   	debug("pre_div = %d, post_div=%d\n", pre_div, post_div);

And

MXC_CSPICTRL_PREDIV(pre_div - 1);

would return to

MXC_CSPICTRL_PREDIV(pre_div);


Troy
Troy Kisky May 9, 2013, 6:04 p.m. UTC | #2
On 5/9/2013 11:00 AM, Troy Kisky wrote:
> On 5/8/2013 10:19 PM, Dirk Behme wrote:
>> The spi clock divisor is of the form x * (2**y),  or  x  << y, where 
>> x is
>> 1 to 16, and y is 0 to 15. Note the similarity with floating point 
>> numbers.
>> Convert the desired divisor to the smallest number which is >= 
>> desired divisor,
>> and can be represented in this form. The previous algorithm chose a 
>> divisor
>> which could be almost twice as large as needed.
>>
>> Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
>> Signed-off-by: Dirk Behme <dirk.behme@gmail.com>
>> ---
>>   drivers/spi/mxc_spi.c |   27 ++++++++++++---------------
>>   1 file changed, 12 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
>> index 3e903b3..66c2ad8 100644
>> --- a/drivers/spi/mxc_spi.c
>> +++ b/drivers/spi/mxc_spi.c
>> @@ -128,7 +128,7 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave 
>> *mxcs, unsigned int cs,
>>           unsigned int max_hz, unsigned int mode)
>>   {
>>       u32 clk_src = mxc_get_clock(MXC_CSPI_CLK);
>> -    s32 pre_div = 1, post_div = 0, i, reg_ctrl, reg_config;
>> +    s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>
> First, I'm totally fine with the patch as it is. I'm just going to 
> point out things you may want to change, or
> send a follow-up patch.
>
> Here, no need to initialize pre_div, post_div, if you delete the if 
> (clk_src > max_hz) below which is not needed.
>
>>       u32 ss_pol = 0, sclkpol = 0, sclkpha = 0;
>>       struct cspi_regs *regs = (struct cspi_regs *)mxcs->base;
>>   @@ -147,27 +147,24 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave 
>> *mxcs, unsigned int cs,
>>       reg_ctrl |=  MXC_CSPICTRL_EN;
>>       reg_write(&regs->ctrl, reg_ctrl);
>>   -    /*
>> -     * The following computation is taken directly from Freescale's 
>> code.
>> -     */
>>       if (clk_src > max_hz) {
> This "if" can be removed.
>
>>           pre_div = DIV_ROUND_UP(clk_src, max_hz);
> If you subtract -1 here instead of when you set the divisor register, 
> the logic becomes simpler
>
>
> pre_div = DIV_ROUND_UP(clk_src, max_hz) - 1;
>
> or just
>
> pre_div = (clk_src - 1) / max_hz;
>
>> -        if (pre_div > 16) {
>> -            post_div = pre_div / 16;
>> -            pre_div = 16;
>> -        }
>> -        if (post_div != 0) {
>> -            for (i = 0; i < 16; i++) {
>> -                if ((1 << i) >= post_div)
>> -                    break;
>> -            }
>> -            if (i == 16) {
>> +        /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>> +        post_div = fls(pre_div - 1);

oops, this would also become
post_div = fls(pre_div);

>> +        if (post_div > 4) {
>> +            post_div -= 4;
>> +
>> +            if (post_div >= 16) {
>>                   printf("Error: no divider for the freq: %d\n",
>>                       max_hz);
>>                   return -1;
>>               }
>> -            post_div = i;
>> +            pre_div = (pre_div + (1 << post_div) - 1) >> post_div;
>
> This would become
> pre_div >>= post_div;
>> +
>> +        } else {
>> +            post_div = 0;
>>           }
>> +
>>       }
>>         debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
>
> And
>
> MXC_CSPICTRL_PREDIV(pre_div - 1);
>
> would return to
>
> MXC_CSPICTRL_PREDIV(pre_div);
>
>
> Troy
>
Dirk Behme May 10, 2013, 5:34 a.m. UTC | #3
Am 09.05.2013 20:00, schrieb Troy Kisky:
> On 5/8/2013 10:19 PM, Dirk Behme wrote:
>> The spi clock divisor is of the form x * (2**y),  or  x  << y, where
>> x is
>> 1 to 16, and y is 0 to 15. Note the similarity with floating point
>> numbers.
>> Convert the desired divisor to the smallest number which is >=
>> desired divisor,
>> and can be represented in this form. The previous algorithm chose a
>> divisor
>> which could be almost twice as large as needed.
>>
>> Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
>> Signed-off-by: Dirk Behme <dirk.behme@gmail.com>
>> ---
>>   drivers/spi/mxc_spi.c |   27 ++++++++++++---------------
>>   1 file changed, 12 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
>> index 3e903b3..66c2ad8 100644
>> --- a/drivers/spi/mxc_spi.c
>> +++ b/drivers/spi/mxc_spi.c
>> @@ -128,7 +128,7 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>> *mxcs, unsigned int cs,
>>           unsigned int max_hz, unsigned int mode)
>>   {
>>       u32 clk_src = mxc_get_clock(MXC_CSPI_CLK);
>> -    s32 pre_div = 1, post_div = 0, i, reg_ctrl, reg_config;
>> +    s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>
> First, I'm totally fine with the patch as it is. I'm just going to
> point out things you may want to change, or
> send a follow-up patch.
>
> Here, no need to initialize pre_div, post_div, if you delete the  if
> (clk_src > max_hz) below which is not needed.

Hmm, why is it not needed?

If you remove the if, you *always* do at least the computation

pre_div = DIV_ROUND_UP(clk_src, max_hz);
post_div = fls(pre_div - 1);
if (post_div > 4) {

I would think that doing the initialization and the if is much cheaper 
than always doing above computation, even if its not needed? I would 
keep the if.

>>       u32 ss_pol = 0, sclkpol = 0, sclkpha = 0;
>>       struct cspi_regs *regs = (struct cspi_regs *)mxcs->base;
>> @@ -147,27 +147,24 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>> *mxcs, unsigned int cs,
>>       reg_ctrl |=  MXC_CSPICTRL_EN;
>>       reg_write(&regs->ctrl, reg_ctrl);
>> -    /*
>> -     * The following computation is taken directly from Freescale's
>> code.
>> -     */
>>       if (clk_src > max_hz) {
> This "if" can be removed.
>
>>           pre_div = DIV_ROUND_UP(clk_src, max_hz);
> If you subtract -1 here instead of when you set the divisor register,
> the logic becomes simpler
>
>
> pre_div = DIV_ROUND_UP(clk_src, max_hz) - 1;
>
> or just
>
> pre_div = (clk_src - 1) / max_hz;
>
>> -        if (pre_div > 16) {
>> -            post_div = pre_div / 16;
>> -            pre_div = 16;
>> -        }
>> -        if (post_div != 0) {
>> -            for (i = 0; i < 16; i++) {
>> -                if ((1 << i) >= post_div)
>> -                    break;
>> -            }
>> -            if (i == 16) {
>> +        /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>> +        post_div = fls(pre_div - 1);
>> +        if (post_div > 4) {
>> +            post_div -= 4;
>> +
>> +            if (post_div >= 16) {
>>                   printf("Error: no divider for the freq: %d\n",
>>                       max_hz);
>>                   return -1;
>>               }
>> -            post_div = i;
>> +            pre_div = (pre_div + (1 << post_div) - 1) >> post_div;
>
> This would become
> pre_div >>= post_div;
>> +
>> +        } else {
>> +            post_div = 0;
>>           }
>> +
>>       }
>>       debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
>
> And
>
> MXC_CSPICTRL_PREDIV(pre_div - 1);
>
> would return to
>
> MXC_CSPICTRL_PREDIV(pre_div);

Well, where to do the -1 mainly depends on how we want to interpret 
the output of

debug("pre_div = %d, post_div=%d\n", pre_div, post_div);

There are two options how to understand this, at least the pre_div = 
%d part:

a) print the pre_div as the real divider used in a human readable 
format. I.e. if we divide by /15 then print 15

or

b) print the pre_div value we write to the register. I.e. if we divide 
by /15 then print 14

Up to now we are doing (a) and calculate the register value after the 
debug print. Your proposal would switch this to (b). Anyway, if it 
makes the algorithm simpler I'd agree that it's fine to switch to (b).

To summarize, this would become then:

s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;

...

if (clk_src > max_hz) {
	pre_div = (clk_src - 1) / max_hz;
	/* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
	post_div = fls(pre_div);
	if (post_div > 4) {
		post_div -= 4;
		if (post_div >= 16) {
			printf("Error: no divider for the freq: %d\n", max_hz);
			return -1;
		}
		pre_div >>= post_div;
	} else {
		post_div = 0;
	}
}

...

MXC_CSPICTRL_PREDIV(pre_div);

Is this correct?

Best regards

Dirk
Troy Kisky May 10, 2013, 6:44 p.m. UTC | #4
On 5/9/2013 10:34 PM, Dirk Behme wrote:
> Am 09.05.2013 20:00, schrieb Troy Kisky:
>> On 5/8/2013 10:19 PM, Dirk Behme wrote:
>>> The spi clock divisor is of the form x * (2**y),  or  x  << y, where
>>> x is
>>> 1 to 16, and y is 0 to 15. Note the similarity with floating point
>>> numbers.
>>> Convert the desired divisor to the smallest number which is >=
>>> desired divisor,
>>> and can be represented in this form. The previous algorithm chose a
>>> divisor
>>> which could be almost twice as large as needed.
>>>
>>> Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
>>> Signed-off-by: Dirk Behme <dirk.behme@gmail.com>
>>> ---
>>>   drivers/spi/mxc_spi.c |   27 ++++++++++++---------------
>>>   1 file changed, 12 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
>>> index 3e903b3..66c2ad8 100644
>>> --- a/drivers/spi/mxc_spi.c
>>> +++ b/drivers/spi/mxc_spi.c
>>> @@ -128,7 +128,7 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>>> *mxcs, unsigned int cs,
>>>           unsigned int max_hz, unsigned int mode)
>>>   {
>>>       u32 clk_src = mxc_get_clock(MXC_CSPI_CLK);
>>> -    s32 pre_div = 1, post_div = 0, i, reg_ctrl, reg_config;
>>> +    s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>>
>> First, I'm totally fine with the patch as it is. I'm just going to
>> point out things you may want to change, or
>> send a follow-up patch.
>>
>> Here, no need to initialize pre_div, post_div, if you delete the  if
>> (clk_src > max_hz) below which is not needed.
>
> Hmm, why is it not needed?
>
> If you remove the if, you *always* do at least the computation
>
> pre_div = DIV_ROUND_UP(clk_src, max_hz);
> post_div = fls(pre_div - 1);
> if (post_div > 4) {
>
> I would think that doing the initialization and the if is much cheaper 
> than always doing above computation, even if its not needed? I would 
> keep the if.
>
>>>       u32 ss_pol = 0, sclkpol = 0, sclkpha = 0;
>>>       struct cspi_regs *regs = (struct cspi_regs *)mxcs->base;
>>> @@ -147,27 +147,24 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>>> *mxcs, unsigned int cs,
>>>       reg_ctrl |=  MXC_CSPICTRL_EN;
>>>       reg_write(&regs->ctrl, reg_ctrl);
>>> -    /*
>>> -     * The following computation is taken directly from Freescale's
>>> code.
>>> -     */
>>>       if (clk_src > max_hz) {
>> This "if" can be removed.
>>
>>>           pre_div = DIV_ROUND_UP(clk_src, max_hz);
>> If you subtract -1 here instead of when you set the divisor register,
>> the logic becomes simpler
>>
>>
>> pre_div = DIV_ROUND_UP(clk_src, max_hz) - 1;
>>
>> or just
>>
>> pre_div = (clk_src - 1) / max_hz;
>>
>>> -        if (pre_div > 16) {
>>> -            post_div = pre_div / 16;
>>> -            pre_div = 16;
>>> -        }
>>> -        if (post_div != 0) {
>>> -            for (i = 0; i < 16; i++) {
>>> -                if ((1 << i) >= post_div)
>>> -                    break;
>>> -            }
>>> -            if (i == 16) {
>>> +        /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>>> +        post_div = fls(pre_div - 1);
>>> +        if (post_div > 4) {
>>> +            post_div -= 4;
>>> +
>>> +            if (post_div >= 16) {
>>>                   printf("Error: no divider for the freq: %d\n",
>>>                       max_hz);
>>>                   return -1;
>>>               }
>>> -            post_div = i;
>>> +            pre_div = (pre_div + (1 << post_div) - 1) >> post_div;
>>
>> This would become
>> pre_div >>= post_div;
>>> +
>>> +        } else {
>>> +            post_div = 0;
>>>           }
>>> +
>>>       }
>>>       debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
>>
>> And
>>
>> MXC_CSPICTRL_PREDIV(pre_div - 1);
>>
>> would return to
>>
>> MXC_CSPICTRL_PREDIV(pre_div);
>
> Well, where to do the -1 mainly depends on how we want to interpret 
> the output of
>
> debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
I'd change to

debug("actual div = %d, pre_div = %d, post_div=%d\n", (pre_div + 1) << 
post_div, pre_div, post_div);

to eliminate confusion.


>
> There are two options how to understand this, at least the pre_div = 
> %d part:
>
> a) print the pre_div as the real divider used in a human readable 
> format. I.e. if we divide by /15 then print 15
>
> or
>
> b) print the pre_div value we write to the register. I.e. if we divide 
> by /15 then print 14
>
> Up to now we are doing (a) and calculate the register value after the 
> debug print. Your proposal would switch this to (b). Anyway, if it 
> makes the algorithm simpler I'd agree that it's fine to switch to (b).
>
> To summarize, this would become then:
>
> s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>
> ...

If you keep the "if", pre_div = 0
Also, I'd change s32 to unsigned or u32.

But  usually the if will be true, so why keep it?
It is just code bloat and prone to errors like yours above.

The only logic difference between keeping/killing the "if" is when
clk_src == 0.  Keeping will give a divide by 1. Killing will give an error.
I'd argue that an error is more appropriate.

>
> if (clk_src > max_hz) {
>     pre_div = (clk_src - 1) / max_hz;
>     /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>     post_div = fls(pre_div);
>     if (post_div > 4) {
>         post_div -= 4;
>         if (post_div >= 16) {
>             printf("Error: no divider for the freq: %d\n", max_hz);
>             return -1;
>         }
>         pre_div >>= post_div;
>     } else {
>         post_div = 0;
>     }
> }
>
> ...
>
> MXC_CSPICTRL_PREDIV(pre_div);
>
> Is this correct?
>
> Best regards
>
> Dirk
Dirk Behme May 10, 2013, 7:08 p.m. UTC | #5
Am 10.05.2013 20:44, schrieb Troy Kisky:
> On 5/9/2013 10:34 PM, Dirk Behme wrote:
>> Am 09.05.2013 20:00, schrieb Troy Kisky:
>>> On 5/8/2013 10:19 PM, Dirk Behme wrote:
>>>> The spi clock divisor is of the form x * (2**y),  or  x  << y, where
>>>> x is
>>>> 1 to 16, and y is 0 to 15. Note the similarity with floating point
>>>> numbers.
>>>> Convert the desired divisor to the smallest number which is >=
>>>> desired divisor,
>>>> and can be represented in this form. The previous algorithm chose a
>>>> divisor
>>>> which could be almost twice as large as needed.
>>>>
>>>> Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
>>>> Signed-off-by: Dirk Behme <dirk.behme@gmail.com>
>>>> ---
>>>>   drivers/spi/mxc_spi.c |   27 ++++++++++++---------------
>>>>   1 file changed, 12 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
>>>> index 3e903b3..66c2ad8 100644
>>>> --- a/drivers/spi/mxc_spi.c
>>>> +++ b/drivers/spi/mxc_spi.c
>>>> @@ -128,7 +128,7 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>>>> *mxcs, unsigned int cs,
>>>>           unsigned int max_hz, unsigned int mode)
>>>>   {
>>>>       u32 clk_src = mxc_get_clock(MXC_CSPI_CLK);
>>>> -    s32 pre_div = 1, post_div = 0, i, reg_ctrl, reg_config;
>>>> +    s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>>>
>>> First, I'm totally fine with the patch as it is. I'm just going to
>>> point out things you may want to change, or
>>> send a follow-up patch.
>>>
>>> Here, no need to initialize pre_div, post_div, if you delete the  if
>>> (clk_src > max_hz) below which is not needed.
>>
>> Hmm, why is it not needed?
>>
>> If you remove the if, you *always* do at least the computation
>>
>> pre_div = DIV_ROUND_UP(clk_src, max_hz);
>> post_div = fls(pre_div - 1);
>> if (post_div > 4) {
>>
>> I would think that doing the initialization and the if is much
>> cheaper than always doing above computation, even if its not needed?
>> I would keep the if.
>>
>>>>       u32 ss_pol = 0, sclkpol = 0, sclkpha = 0;
>>>>       struct cspi_regs *regs = (struct cspi_regs *)mxcs->base;
>>>> @@ -147,27 +147,24 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>>>> *mxcs, unsigned int cs,
>>>>       reg_ctrl |=  MXC_CSPICTRL_EN;
>>>>       reg_write(&regs->ctrl, reg_ctrl);
>>>> -    /*
>>>> -     * The following computation is taken directly from Freescale's
>>>> code.
>>>> -     */
>>>>       if (clk_src > max_hz) {
>>> This "if" can be removed.
>>>
>>>>           pre_div = DIV_ROUND_UP(clk_src, max_hz);
>>> If you subtract -1 here instead of when you set the divisor register,
>>> the logic becomes simpler
>>>
>>>
>>> pre_div = DIV_ROUND_UP(clk_src, max_hz) - 1;
>>>
>>> or just
>>>
>>> pre_div = (clk_src - 1) / max_hz;
>>>
>>>> -        if (pre_div > 16) {
>>>> -            post_div = pre_div / 16;
>>>> -            pre_div = 16;
>>>> -        }
>>>> -        if (post_div != 0) {
>>>> -            for (i = 0; i < 16; i++) {
>>>> -                if ((1 << i) >= post_div)
>>>> -                    break;
>>>> -            }
>>>> -            if (i == 16) {
>>>> +        /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>>>> +        post_div = fls(pre_div - 1);
>>>> +        if (post_div > 4) {
>>>> +            post_div -= 4;
>>>> +
>>>> +            if (post_div >= 16) {
>>>>                   printf("Error: no divider for the freq: %d\n",
>>>>                       max_hz);
>>>>                   return -1;
>>>>               }
>>>> -            post_div = i;
>>>> +            pre_div = (pre_div + (1 << post_div) - 1) >> post_div;
>>>
>>> This would become
>>> pre_div >>= post_div;
>>>> +
>>>> +        } else {
>>>> +            post_div = 0;
>>>>           }
>>>> +
>>>>       }
>>>>       debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
>>>
>>> And
>>>
>>> MXC_CSPICTRL_PREDIV(pre_div - 1);
>>>
>>> would return to
>>>
>>> MXC_CSPICTRL_PREDIV(pre_div);
>>
>> Well, where to do the -1 mainly depends on how we want to interpret
>> the output of
>>
>> debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
> I'd change to
>
> debug("actual div = %d, pre_div = %d, post_div=%d\n", (pre_div + 1) <<
> post_div, pre_div, post_div);
>
> to eliminate confusion.
>
>
>>
>> There are two options how to understand this, at least the pre_div =
>> %d part:
>>
>> a) print the pre_div as the real divider used in a human readable
>> format. I.e. if we divide by /15 then print 15
>>
>> or
>>
>> b) print the pre_div value we write to the register. I.e. if we
>> divide by /15 then print 14
>>
>> Up to now we are doing (a) and calculate the register value after
>> the debug print. Your proposal would switch this to (b). Anyway, if
>> it makes the algorithm simpler I'd agree that it's fine to switch to
>> (b).
>>
>> To summarize, this would become then:
>>
>> s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>>
>> ...
>
> If you keep the "if", pre_div = 0

Ah, yes, thanks.

> Also, I'd change s32 to unsigned or u32.
>
> But  usually the if will be true, so why keep it?

For cases where clk_src <= max_hz. Then don't do the calculation 
because it's unnecessary.

> It is just code bloat and prone to errors like yours above.
>
> The only logic difference between keeping/killing the "if" is when
> clk_src == 0.  Keeping will give a divide by 1. Killing will give an
> error.
> I'd argue that an error is more appropriate.
>
>>
>> if (clk_src > max_hz) {
>>     pre_div = (clk_src - 1) / max_hz;
>>     /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>>     post_div = fls(pre_div);
>>     if (post_div > 4) {
>>         post_div -= 4;
>>         if (post_div >= 16) {
>>             printf("Error: no divider for the freq: %d\n", max_hz);
>>             return -1;
>>         }
>>         pre_div >>= post_div;
>>     } else {
>>         post_div = 0;
>>     }
>> }
>>
>> ...
>>
>> MXC_CSPICTRL_PREDIV(pre_div);
>>
>> Is this correct?
>>
>> Best regards
>>
>> Dirk
>
>
Troy Kisky May 10, 2013, 8:54 p.m. UTC | #6
On 5/10/2013 12:08 PM, Dirk Behme wrote:
> Am 10.05.2013 20:44, schrieb Troy Kisky:
>> On 5/9/2013 10:34 PM, Dirk Behme wrote:
>>> Am 09.05.2013 20:00, schrieb Troy Kisky:
>>>> On 5/8/2013 10:19 PM, Dirk Behme wrote:
>>>>> The spi clock divisor is of the form x * (2**y),  or  x  << y, where
>>>>> x is
>>>>> 1 to 16, and y is 0 to 15. Note the similarity with floating point
>>>>> numbers.
>>>>> Convert the desired divisor to the smallest number which is >=
>>>>> desired divisor,
>>>>> and can be represented in this form. The previous algorithm chose a
>>>>> divisor
>>>>> which could be almost twice as large as needed.
>>>>>
>>>>> Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
>>>>> Signed-off-by: Dirk Behme <dirk.behme@gmail.com>
>>>>> ---
>>>>>   drivers/spi/mxc_spi.c |   27 ++++++++++++---------------
>>>>>   1 file changed, 12 insertions(+), 15 deletions(-)
>>>>>
>>>>> diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
>>>>> index 3e903b3..66c2ad8 100644
>>>>> --- a/drivers/spi/mxc_spi.c
>>>>> +++ b/drivers/spi/mxc_spi.c
>>>>> @@ -128,7 +128,7 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>>>>> *mxcs, unsigned int cs,
>>>>>           unsigned int max_hz, unsigned int mode)
>>>>>   {
>>>>>       u32 clk_src = mxc_get_clock(MXC_CSPI_CLK);
>>>>> -    s32 pre_div = 1, post_div = 0, i, reg_ctrl, reg_config;
>>>>> +    s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>>>>
>>>> First, I'm totally fine with the patch as it is. I'm just going to
>>>> point out things you may want to change, or
>>>> send a follow-up patch.
>>>>
>>>> Here, no need to initialize pre_div, post_div, if you delete the  if
>>>> (clk_src > max_hz) below which is not needed.
>>>
>>> Hmm, why is it not needed?
>>>
>>> If you remove the if, you *always* do at least the computation
>>>
>>> pre_div = DIV_ROUND_UP(clk_src, max_hz);
>>> post_div = fls(pre_div - 1);
>>> if (post_div > 4) {
>>>
>>> I would think that doing the initialization and the if is much
>>> cheaper than always doing above computation, even if its not needed?
>>> I would keep the if.
>>>
>>>>>       u32 ss_pol = 0, sclkpol = 0, sclkpha = 0;
>>>>>       struct cspi_regs *regs = (struct cspi_regs *)mxcs->base;
>>>>> @@ -147,27 +147,24 @@ static s32 spi_cfg_mxc(struct mxc_spi_slave
>>>>> *mxcs, unsigned int cs,
>>>>>       reg_ctrl |=  MXC_CSPICTRL_EN;
>>>>>       reg_write(&regs->ctrl, reg_ctrl);
>>>>> -    /*
>>>>> -     * The following computation is taken directly from Freescale's
>>>>> code.
>>>>> -     */
>>>>>       if (clk_src > max_hz) {
>>>> This "if" can be removed.
>>>>
>>>>>           pre_div = DIV_ROUND_UP(clk_src, max_hz);
>>>> If you subtract -1 here instead of when you set the divisor register,
>>>> the logic becomes simpler
>>>>
>>>>
>>>> pre_div = DIV_ROUND_UP(clk_src, max_hz) - 1;
>>>>
>>>> or just
>>>>
>>>> pre_div = (clk_src - 1) / max_hz;
>>>>
>>>>> -        if (pre_div > 16) {
>>>>> -            post_div = pre_div / 16;
>>>>> -            pre_div = 16;
>>>>> -        }
>>>>> -        if (post_div != 0) {
>>>>> -            for (i = 0; i < 16; i++) {
>>>>> -                if ((1 << i) >= post_div)
>>>>> -                    break;
>>>>> -            }
>>>>> -            if (i == 16) {
>>>>> +        /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>>>>> +        post_div = fls(pre_div - 1);
>>>>> +        if (post_div > 4) {
>>>>> +            post_div -= 4;
>>>>> +
>>>>> +            if (post_div >= 16) {
>>>>>                   printf("Error: no divider for the freq: %d\n",
>>>>>                       max_hz);
>>>>>                   return -1;
>>>>>               }
>>>>> -            post_div = i;
>>>>> +            pre_div = (pre_div + (1 << post_div) - 1) >> post_div;
>>>>
>>>> This would become
>>>> pre_div >>= post_div;
>>>>> +
>>>>> +        } else {
>>>>> +            post_div = 0;
>>>>>           }
>>>>> +
>>>>>       }
>>>>>       debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
>>>>
>>>> And
>>>>
>>>> MXC_CSPICTRL_PREDIV(pre_div - 1);
>>>>
>>>> would return to
>>>>
>>>> MXC_CSPICTRL_PREDIV(pre_div);
>>>
>>> Well, where to do the -1 mainly depends on how we want to interpret
>>> the output of
>>>
>>> debug("pre_div = %d, post_div=%d\n", pre_div, post_div);
>> I'd change to
>>
>> debug("actual div = %d, pre_div = %d, post_div=%d\n", (pre_div + 1) <<
>> post_div, pre_div, post_div);
>>
>> to eliminate confusion.
>>
>>
>>>
>>> There are two options how to understand this, at least the pre_div =
>>> %d part:
>>>
>>> a) print the pre_div as the real divider used in a human readable
>>> format. I.e. if we divide by /15 then print 15
>>>
>>> or
>>>
>>> b) print the pre_div value we write to the register. I.e. if we
>>> divide by /15 then print 14
>>>
>>> Up to now we are doing (a) and calculate the register value after
>>> the debug print. Your proposal would switch this to (b). Anyway, if
>>> it makes the algorithm simpler I'd agree that it's fine to switch to
>>> (b).
>>>
>>> To summarize, this would become then:
>>>
>>> s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
>>>
>>> ...
>>
>> If you keep the "if", pre_div = 0
>
> Ah, yes, thanks.
>
>> Also, I'd change s32 to unsigned or u32.
>>
>> But  usually the if will be true, so why keep it?
>
> For cases where clk_src <= max_hz. Then don't do the calculation 
> because it's unnecessary.

__udivsi3 already has this optimization.
fls can be 2 instructions
         clz    r0,r0
         rsb    r0,r0,#32

so your savings are a subroutine call and a return
in the unlikely case that (clk_src <= max_hz)

But I don't have a real objection to leaving it, so feel free to decide.

>
>> It is just code bloat and prone to errors like yours above.
>>
>> The only logic difference between keeping/killing the "if" is when
>> clk_src == 0.  Keeping will give a divide by 1. Killing will give an
>> error.
>> I'd argue that an error is more appropriate.
>>
>>>
>>> if (clk_src > max_hz) {
>>>     pre_div = (clk_src - 1) / max_hz;
>>>     /* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
>>>     post_div = fls(pre_div);
>>>     if (post_div > 4) {
>>>         post_div -= 4;
>>>         if (post_div >= 16) {
>>>             printf("Error: no divider for the freq: %d\n", max_hz);
>>>             return -1;
>>>         }
>>>         pre_div >>= post_div;
>>>     } else {
>>>         post_div = 0;
>>>     }
>>> }
>>>
>>> ...
>>>
>>> MXC_CSPICTRL_PREDIV(pre_div);
>>>
>>> Is this correct?
>>>
>>> Best regards
>>>
>>> Dirk
>>
>>
>
>
diff mbox

Patch

diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
index 3e903b3..66c2ad8 100644
--- a/drivers/spi/mxc_spi.c
+++ b/drivers/spi/mxc_spi.c
@@ -128,7 +128,7 @@  static s32 spi_cfg_mxc(struct mxc_spi_slave *mxcs, unsigned int cs,
 		unsigned int max_hz, unsigned int mode)
 {
 	u32 clk_src = mxc_get_clock(MXC_CSPI_CLK);
-	s32 pre_div = 1, post_div = 0, i, reg_ctrl, reg_config;
+	s32 pre_div = 1, post_div = 0, reg_ctrl, reg_config;
 	u32 ss_pol = 0, sclkpol = 0, sclkpha = 0;
 	struct cspi_regs *regs = (struct cspi_regs *)mxcs->base;
 
@@ -147,27 +147,24 @@  static s32 spi_cfg_mxc(struct mxc_spi_slave *mxcs, unsigned int cs,
 	reg_ctrl |=  MXC_CSPICTRL_EN;
 	reg_write(&regs->ctrl, reg_ctrl);
 
-	/*
-	 * The following computation is taken directly from Freescale's code.
-	 */
 	if (clk_src > max_hz) {
 		pre_div = DIV_ROUND_UP(clk_src, max_hz);
-		if (pre_div > 16) {
-			post_div = pre_div / 16;
-			pre_div = 16;
-		}
-		if (post_div != 0) {
-			for (i = 0; i < 16; i++) {
-				if ((1 << i) >= post_div)
-					break;
-			}
-			if (i == 16) {
+		/* fls(1) = 1, fls(0x80000000) = 32, fls(16) = 5 */
+		post_div = fls(pre_div - 1);
+		if (post_div > 4) {
+			post_div -= 4;
+
+			if (post_div >= 16) {
 				printf("Error: no divider for the freq: %d\n",
 					max_hz);
 				return -1;
 			}
-			post_div = i;
+			pre_div = (pre_div + (1 << post_div) - 1) >> post_div;
+
+		} else {
+			post_div = 0;
 		}
+
 	}
 
 	debug("pre_div = %d, post_div=%d\n", pre_div, post_div);