diff mbox series

[v2,5/6] pwm: renesas-tpu: Improve maths to compute register settings

Message ID 20220420121240.67781-6-u.kleine-koenig@pengutronix.de
State Accepted
Headers show
Series pwm: renesas-tpu: Various improvements | expand

Commit Message

Uwe Kleine-König April 20, 2022, 12:12 p.m. UTC
The newly computed register values are intended to exactly match the
previously computed values. The main improvement is that the prescaler
is computed without a loop that involves two divisions in each step.
This uses the fact, that prescalers[i] = 1 << (2 * i).

Assuming a moderately smart compiler, the needed number of divisions for
the case where the requested period is too big, is reduced from 5 to 2.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 drivers/pwm/pwm-renesas-tpu.c | 50 ++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 15 deletions(-)

Comments

Geert Uytterhoeven April 20, 2022, 6:08 p.m. UTC | #1
Hi Uwe,

On Wed, Apr 20, 2022 at 2:12 PM Uwe Kleine-König
<u.kleine-koenig@pengutronix.de> wrote:
> The newly computed register values are intended to exactly match the
> previously computed values. The main improvement is that the prescaler
> is computed without a loop that involves two divisions in each step.
> This uses the fact, that prescalers[i] = 1 << (2 * i).
>
> Assuming a moderately smart compiler, the needed number of divisions for
> the case where the requested period is too big, is reduced from 5 to 2.
>
> Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>

Thanks for your patch!

> --- a/drivers/pwm/pwm-renesas-tpu.c
> +++ b/drivers/pwm/pwm-renesas-tpu.c
> @@ -244,7 +244,6 @@ static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
>  static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
>                           int duty_ns, int period_ns, bool enabled)
>  {
> -       static const unsigned int prescalers[] = { 1, 4, 16, 64 };
>         struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
>         struct tpu_device *tpu = to_tpu_device(chip);
>         unsigned int prescaler;
> @@ -254,26 +253,47 @@ static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
>         u32 duty;
>         int ret;
>
> +       clk_rate = clk_get_rate(tpu->clk);
> +
> +       period = clk_rate / (NSEC_PER_SEC / period_ns);
> +
>         /*
> -        * Pick a prescaler to avoid overflowing the counter.
> -        * TODO: Pick the highest acceptable prescaler.
> +        * Find the minimal prescaler in [0..3] such that
> +        *
> +        *      period >> (2 * prescaler) < 0x10000

scripts/checkpatch.pl:
WARNING: please, no space before tabs

> +        *
> +        * This could be calculated using something like:
> +        *
> +        *      prescaler = max(ilog2(period) / 2, 7) - 7;

WARNING: please, no space before tabs

The rest LGTM, so
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Uwe Kleine-König June 19, 2022, 8:03 p.m. UTC | #2
On Wed, Apr 20, 2022 at 08:08:34PM +0200, Geert Uytterhoeven wrote:
> Hi Uwe,
> 
> On Wed, Apr 20, 2022 at 2:12 PM Uwe Kleine-König
> <u.kleine-koenig@pengutronix.de> wrote:
> > The newly computed register values are intended to exactly match the
> > previously computed values. The main improvement is that the prescaler
> > is computed without a loop that involves two divisions in each step.
> > This uses the fact, that prescalers[i] = 1 << (2 * i).
> >
> > Assuming a moderately smart compiler, the needed number of divisions for
> > the case where the requested period is too big, is reduced from 5 to 2.
> >
> > Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> 
> Thanks for your patch!
> 
> > --- a/drivers/pwm/pwm-renesas-tpu.c
> > +++ b/drivers/pwm/pwm-renesas-tpu.c
> > @@ -244,7 +244,6 @@ static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
> >  static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
> >                           int duty_ns, int period_ns, bool enabled)
> >  {
> > -       static const unsigned int prescalers[] = { 1, 4, 16, 64 };
> >         struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
> >         struct tpu_device *tpu = to_tpu_device(chip);
> >         unsigned int prescaler;
> > @@ -254,26 +253,47 @@ static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
> >         u32 duty;
> >         int ret;
> >
> > +       clk_rate = clk_get_rate(tpu->clk);
> > +
> > +       period = clk_rate / (NSEC_PER_SEC / period_ns);
> > +
> >         /*
> > -        * Pick a prescaler to avoid overflowing the counter.
> > -        * TODO: Pick the highest acceptable prescaler.
> > +        * Find the minimal prescaler in [0..3] such that
> > +        *
> > +        *      period >> (2 * prescaler) < 0x10000
> 
> scripts/checkpatch.pl:
> WARNING: please, no space before tabs
> 
> > +        *
> > +        * This could be calculated using something like:
> > +        *
> > +        *      prescaler = max(ilog2(period) / 2, 7) - 7;
> 
> WARNING: please, no space before tabs
> 
> The rest LGTM, so
> Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>

@Thierry: Assuming you agree to this patch, too: Should I resend for the
checkpack warning (I'd s/\t/   /), or do you want to fixup at apply
time?

Best regards
Uwe
diff mbox series

Patch

diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index 5bced2050ece..60ba7cf275c7 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -244,7 +244,6 @@  static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			  int duty_ns, int period_ns, bool enabled)
 {
-	static const unsigned int prescalers[] = { 1, 4, 16, 64 };
 	struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
 	struct tpu_device *tpu = to_tpu_device(chip);
 	unsigned int prescaler;
@@ -254,26 +253,47 @@  static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	u32 duty;
 	int ret;
 
+	clk_rate = clk_get_rate(tpu->clk);
+
+	period = clk_rate / (NSEC_PER_SEC / period_ns);
+
 	/*
-	 * Pick a prescaler to avoid overflowing the counter.
-	 * TODO: Pick the highest acceptable prescaler.
+	 * Find the minimal prescaler in [0..3] such that
+	 *
+	 * 	period >> (2 * prescaler) < 0x10000
+	 *
+	 * This could be calculated using something like:
+	 *
+	 * 	prescaler = max(ilog2(period) / 2, 7) - 7;
+	 *
+	 * but given there are only four allowed results and that ilog2 isn't
+	 * cheap on all platforms using a switch statement is more effective.
 	 */
-	clk_rate = clk_get_rate(tpu->clk);
+	switch (period) {
+	case 1 ... 0xffff:
+		prescaler = 0;
+		break;
 
-	for (prescaler = 0; prescaler < ARRAY_SIZE(prescalers); ++prescaler) {
-		period = clk_rate / prescalers[prescaler]
-		       / (NSEC_PER_SEC / period_ns);
-		if (period <= 0xffff)
-			break;
-	}
+	case 0x10000 ... 0x3ffff:
+		prescaler = 1;
+		break;
 
-	if (prescaler == ARRAY_SIZE(prescalers) || period == 0) {
-		dev_err(&tpu->pdev->dev, "clock rate mismatch\n");
-		return -ENOTSUPP;
+	case 0x40000 ... 0xfffff:
+		prescaler = 2;
+		break;
+
+	case 0x100000 ... 0x3fffff:
+		prescaler = 3;
+		break;
+
+	default:
+		return -EINVAL;
 	}
 
+	period >>= 2 * prescaler;
+
 	if (duty_ns) {
-		duty = clk_rate / prescalers[prescaler]
+		duty = (clk_rate >> 2 * prescaler)
 		     / (NSEC_PER_SEC / duty_ns);
 		if (duty > period)
 			return -EINVAL;
@@ -283,7 +303,7 @@  static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	dev_dbg(&tpu->pdev->dev,
 		"rate %u, prescaler %u, period %u, duty %u\n",
-		clk_rate, prescalers[prescaler], period, duty);
+		clk_rate, 1 << (2 * prescaler), period, duty);
 
 	if (tpd->prescaler == prescaler && tpd->period == period)
 		duty_only = true;