diff mbox series

[v5,3/4] pwm: add microchip soft ip corePWM driver

Message ID 20220708143923.1129928-4-conor.dooley@microchip.com
State Changes Requested
Headers show
Series Microchip soft ip corePWM driver | expand

Commit Message

Conor Dooley July 8, 2022, 2:39 p.m. UTC
Add a driver that supports the Microchip FPGA "soft" PWM IP core.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
---
 drivers/pwm/Kconfig              |  10 +
 drivers/pwm/Makefile             |   1 +
 drivers/pwm/pwm-microchip-core.c | 355 +++++++++++++++++++++++++++++++
 3 files changed, 366 insertions(+)
 create mode 100644 drivers/pwm/pwm-microchip-core.c

Comments

Uwe Kleine-König July 9, 2022, 4:02 p.m. UTC | #1
Hello Conor,

On Fri, Jul 08, 2022 at 03:39:22PM +0100, Conor Dooley wrote:
> Add a driver that supports the Microchip FPGA "soft" PWM IP core.
> 
> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> ---
>  drivers/pwm/Kconfig              |  10 +
>  drivers/pwm/Makefile             |   1 +
>  drivers/pwm/pwm-microchip-core.c | 355 +++++++++++++++++++++++++++++++
>  3 files changed, 366 insertions(+)
>  create mode 100644 drivers/pwm/pwm-microchip-core.c
> 
> diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
> index 904de8d61828..007ea5750e73 100644
> --- a/drivers/pwm/Kconfig
> +++ b/drivers/pwm/Kconfig
> @@ -383,6 +383,16 @@ config PWM_MEDIATEK
>  	  To compile this driver as a module, choose M here: the module
>  	  will be called pwm-mediatek.
>  
> +config PWM_MICROCHIP_CORE
> +	tristate "Microchip corePWM PWM support"
> +	depends on SOC_MICROCHIP_POLARFIRE || COMPILE_TEST
> +	depends on HAS_IOMEM && OF
> +	help
> +	  PWM driver for Microchip FPGA soft IP core.
> +
> +	  To compile this driver as a module, choose M here: the module
> +	  will be called pwm-microchip-core.
> +
>  config PWM_MXS
>  	tristate "Freescale MXS PWM support"
>  	depends on ARCH_MXS || COMPILE_TEST
> diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
> index 5c08bdb817b4..43feb7cfc66a 100644
> --- a/drivers/pwm/Makefile
> +++ b/drivers/pwm/Makefile
> @@ -33,6 +33,7 @@ obj-$(CONFIG_PWM_LPSS_PCI)	+= pwm-lpss-pci.o
>  obj-$(CONFIG_PWM_LPSS_PLATFORM)	+= pwm-lpss-platform.o
>  obj-$(CONFIG_PWM_MESON)		+= pwm-meson.o
>  obj-$(CONFIG_PWM_MEDIATEK)	+= pwm-mediatek.o
> +obj-$(CONFIG_PWM_MICROCHIP_CORE)	+= pwm-microchip-core.o
>  obj-$(CONFIG_PWM_MTK_DISP)	+= pwm-mtk-disp.o
>  obj-$(CONFIG_PWM_MXS)		+= pwm-mxs.o
>  obj-$(CONFIG_PWM_NTXEC)		+= pwm-ntxec.o
> diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c
> new file mode 100644
> index 000000000000..3471eb2c8645
> --- /dev/null
> +++ b/drivers/pwm/pwm-microchip-core.c
> @@ -0,0 +1,355 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * corePWM driver for Microchip "soft" FPGA IP cores.
> + *
> + * Copyright (c) 2021-2022 Microchip Corporation. All rights reserved.
> + * Author: Conor Dooley <conor.dooley@microchip.com>
> + * Documentation:
> + * https://www.microsemi.com/document-portal/doc_download/1245275-corepwm-hb
> + *
> + * Limitations:
> + * - If the IP block is configured without "shadow registers", all register
> + *   writes will take effect immediately, causing glitches on the output.
> + *   If shadow registers *are* enabled, a write to the "SYNC_UPDATE" register
> + *   notifies the core that it needs to update the registers defining the
> + *   waveform from the contents of the "shadow registers".
> + * - The IP block has no concept of a duty cycle, only rising/falling edges of
> + *   the waveform. Unfortunately, if the rising & falling edges registers have
> + *   the same value written to them the IP block will do whichever of a rising
> + *   or a falling edge is possible. I.E. a 50% waveform at twice the requested
> + *   period. Therefore to get a 0% waveform, the output is set the max high/low
> + *   time depending on polarity.
> + * - The PWM period is set for the whole IP block not per channel. The driver
> + *   will only change the period if no other PWM output is enabled.
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/delay.h>
> +#include <linux/err.h>
> +#include <linux/io.h>
> +#include <linux/math.h>
> +#include <linux/module.h>
> +#include <linux/of_device.h>
> +#include <linux/platform_device.h>
> +#include <linux/pwm.h>
> +#include <linux/spinlock.h>
> +
> +#define PREG_TO_VAL(PREG) ((PREG) + 1)
> +
> +#define MCHPCOREPWM_PRESCALE_MAX	0x100
> +#define MCHPCOREPWM_PERIOD_STEPS_MAX	0xff
> +#define MCHPCOREPWM_PERIOD_MAX		0xff00
> +
> +#define MCHPCOREPWM_PRESCALE	0x00
> +#define MCHPCOREPWM_PERIOD	0x04
> +#define MCHPCOREPWM_EN(i)	(0x08 + 0x04 * (i)) /* 0x08, 0x0c */
> +#define MCHPCOREPWM_POSEDGE(i)	(0x10 + 0x08 * (i)) /* 0x10, 0x18, ..., 0x88 */
> +#define MCHPCOREPWM_NEGEDGE(i)	(0x14 + 0x08 * (i)) /* 0x14, 0x1c, ..., 0x8c */
> +#define MCHPCOREPWM_SYNC_UPD	0xe4
> +
> +struct mchp_core_pwm_chip {
> +	struct pwm_chip chip;
> +	struct clk *clk;
> +	spinlock_t lock; /* protect the shared period */
> +	void __iomem *base;
> +	u32 sync_update_mask;
> +};
> +
> +static inline struct mchp_core_pwm_chip *to_mchp_core_pwm(struct pwm_chip *chip)
> +{
> +	return container_of(chip, struct mchp_core_pwm_chip, chip);
> +}
> +
> +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm, bool enable)
> +{
> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> +	u8 channel_enable, reg_offset, shift;
> +
> +	/*
> +	 * There are two adjacent 8 bit control regs, the lower reg controls
> +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> +	 * and if so, offset by the bus width.
> +	 */
> +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> +	shift = pwm->hwpwm > 7 ? pwm->hwpwm - 8 : pwm->hwpwm;

I would have used

	shift = pwm->hwpwm & 7;

here. Maybe it's subjective, but for me it's a more obvious match to
pwm->hwpwm >> 3 then.

> +	spin_lock(&mchp_core_pwm->lock);
> +
> +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> +	channel_enable &= ~(1 << shift);
> +	channel_enable |= (enable << shift);
> +
> +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> +
> +	/*
> +	 * Write to the sync update registers so that channels with shadow
> +	 * registers will also get their enable update. This operation is a NOP
> +	 * for channels without shadow registers.
> +	 */
> +	writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> +
> +	spin_unlock(&mchp_core_pwm->lock);
> +}
> +
> +static void mchp_core_pwm_apply_duty(struct pwm_chip *chip, struct pwm_device *pwm,
> +				     const struct pwm_state *state, u8 prescale, u8 period_steps)
> +{
> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> +	u64 duty_steps, period, tmp;
> +	u8 posedge, negedge;
> +	u16 prescale_val = PREG_TO_VAL(prescale);
> +	u8 period_steps_val = PREG_TO_VAL(period_steps);
> +
> +	period = period_steps_val * prescale_val * NSEC_PER_SEC;
> +	period = DIV64_U64_ROUND_UP(period, clk_get_rate(mchp_core_pwm->clk));
> +
> +	/*
> +	 * Calculate the duty cycle in multiples of the prescaled period:
> +	 * duty_steps = duty_in_ns / step_in_ns
> +	 * step_in_ns = (prescale * NSEC_PER_SEC) / clk_rate
> +	 * The code below is rearranged slightly to only divide once.
> +	 *
> +	 * Because the period is per channel, it is possible that the requested
> +	 * duty cycle is longer than the period, in which case cap it to the
> +	 * period.
> +	 */
> +	if (state->duty_cycle > period) {
> +		duty_steps = period_steps_val;
> +	} else {
> +		duty_steps = state->duty_cycle * clk_get_rate(mchp_core_pwm->clk);
> +		tmp = prescale_val * NSEC_PER_SEC;
> +		duty_steps = div64_u64(duty_steps, tmp);
> +	}
> +
> +	/*
> +	 * Turn the output on unless posedge == negedge, in which case the
> +	 * duty is intended to be 0, but limitations of the IP block don't
> +	 * allow a zero length duty cycle - so just set the max high/low time
> +	 * respectively.
> +	 */
> +	if (state->polarity == PWM_POLARITY_INVERSED) {
> +		negedge = !duty_steps ? period_steps_val : 0u;
> +		posedge = duty_steps;
> +	} else {
> +		posedge = !duty_steps ? period_steps_val : 0u;
> +		negedge = duty_steps;
> +	}
> +
> +	writel_relaxed(posedge, mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
> +	writel_relaxed(negedge, mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
> +}
> +
> +static int mchp_core_pwm_apply_period(struct pwm_chip *chip, const struct pwm_state *state,
> +				      u8 *prescale, u8 *period_steps)
> +{
> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> +	u64 tmp, clk_rate;
> +	u16 prescale_val, period_steps_val;
> +
> +	/*
> +	 * Calculate the period cycles and prescale values.
> +	 * The registers are each 8 bits wide & multiplied to compute the period
> +	 * using the formula:
> +	 * (clock_period) * (prescale + 1) * (period_steps + 1)
> +	 * so the maximum period that can be generated is 0x10000 times the
> +	 * period of the input clock.
> +	 * However, due to the design of the "hardware", it is not possible to
> +	 * attain a 100% duty cycle if the full range of period_steps is used.
> +	 * Therefore period_steps is restricted to 0xFE and the maximum multiple
> +	 * of the clock period attainable is 0xFF00.
> +	 */
> +	clk_rate = clk_get_rate(mchp_core_pwm->clk);

+	/* If clk_rate is too big, the following multiplication might overflow */

> +	if (clk_rate >= NSEC_PER_SEC)
> +		return -EINVAL;
> +
> +	tmp = mul_u64_u64_div_u64(state->period, clk_rate, NSEC_PER_SEC);
> +
> +	if (tmp >= MCHPCOREPWM_PERIOD_MAX) {
> +		*prescale = MCHPCOREPWM_PRESCALE_MAX - 1;
> +		*period_steps = MCHPCOREPWM_PERIOD_STEPS_MAX - 1;
> +		goto write_registers;
> +	}
> +
> +	for (prescale_val = 1; prescale_val <= MCHPCOREPWM_PRESCALE_MAX; prescale_val++) {
> +		period_steps_val = div_u64(tmp, prescale_val);
> +		if (period_steps_val > MCHPCOREPWM_PERIOD_STEPS_MAX)
> +			continue;
> +		*period_steps = period_steps_val - 1;
> +		*prescale = prescale_val - 1;
> +		break;
> +	}

OK, so you want to find the smallest prescale_val such that

	prescale_val * MCHPCOREPWM_PERIOD_STEPS_MAX >= tmp

. You can calculate that without a loop as:

	prescale_val = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);


> +
> +write_registers:
> +	writel_relaxed(*prescale, mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
> +	writel_relaxed(*period_steps, mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
> +
> +	return 0;
> +}
> +
> +static int mchp_core_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
> +			       const struct pwm_state *state)
> +{
> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> +	struct pwm_state current_state = pwm->state;
> +	bool period_locked;
> +	u64 period;
> +	u16 channel_enabled;
> +	u8 prescale, period_steps;
> +	int ret;
> +
> +	if (!state->enabled) {
> +		mchp_core_pwm_enable(chip, pwm, false);
> +		return 0;
> +	}
> +
> +	/*
> +	 * If the only thing that has changed is the duty cycle or the polarity,
> +	 * we can shortcut the calculations and just compute/apply the new duty
> +	 * cycle pos & neg edges
> +	 * As all the channels share the same period, do not allow it to be
> +	 * changed if any other channels are enabled.
> +	 */
> +	spin_lock(&mchp_core_pwm->lock);
> +
> +	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
> +		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
> +	period_locked = channel_enabled & ~(1 << pwm->hwpwm);
> +
> +	if ((!current_state.enabled || current_state.period != state->period) && !period_locked) {
> +		ret = mchp_core_pwm_apply_period(chip, state, &prescale, &period_steps);
> +		if (ret) {
> +			spin_unlock(&mchp_core_pwm->lock);
> +			return ret;
> +		}
> +	} else {
> +		prescale = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
> +		period_steps = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
> +	}
> +
> +	/*
> +	 * If the period is locked, it may not be possible to use a period less
> +	 * than that requested.
> +	 */
> +	period = PREG_TO_VAL(period_steps) *  PREG_TO_VAL(prescale) * NSEC_PER_SEC;

s/  / /

> +	do_div(period, clk_get_rate(mchp_core_pwm->clk));
> +	if (period > state->period) {
> +		spin_unlock(&mchp_core_pwm->lock);
> +		return -EINVAL;
> +	}

I would consider it easier to do the following (in pseudo syntax):

	prescale, period_steps = calculate_hwperiod(period);

	if (period_locked):
		hwprescale = readb_relaxed(PRESCALE)
		hwperiod_steps = readb_relaxed(PERIOD)

		if period_steps * prescale < hwperiod_steps * hwprescale:
			return -EINVAL
		else
			prescale, period_steps = hwprescale,
			hwperiod_steps
	
	duty_steps = calculate_hwduty(duty, prescale)
	if (duty_steps > period_steps)
		duty_steps = period_steps


> +	mchp_core_pwm_apply_duty(chip, pwm, state, prescale, period_steps);
> +
> +	/*
> +	 * Notify the block to update the waveform from the shadow registers.
> +	 * The updated values will not appear on the bus until they have been
> +	 * applied to the waveform at the beginning of the next period. We must
> +	 * write these registers and wait for them to be applied before calling
> +	 * enable().
> +	 */
> +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> +		usleep_range(state->period, state->period * 2);
> +	}
> +
> +	spin_unlock(&mchp_core_pwm->lock);
> +
> +	mchp_core_pwm_enable(chip, pwm, true);

I already asked in the last round: Do you really need to write the
SYNC_UPD register twice? I would expect that you don't?!

Also the locking looks fishy here. It would be simpler (and maybe even
more robust, didn't think deeply about it) to assume in
mchp_core_pwm_enable() that the caller holds the lock. Then you only
grab the lock once during .apply() and nothing strange can happen in the
gap.

> +	return 0;
> +}
> +
> +static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
> +				    struct pwm_state *state)
> +{
> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> +	u8 prescale, period_steps, duty_steps;
> +	u8 posedge, negedge;
> +	u16 channel_enabled;
> +

I'd take the lock here to be sure to get a consistent return value.

> +	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
> +		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));

micro optimisation: You're reading two register values here, but only use
one. Shadowing the enabled registers in mchp_core_pwm might also be an
idea.

> +	if (channel_enabled & 1 << pwm->hwpwm)

I'm always unsure about the associativity of & and <<, so I would have
written that as

	if (channel_enabled & (1 << pwm->hwpwm))

I just tested that for the umpteens time and your code is fine, so this
is only for human readers like me.

> +		state->enabled = true;
> +	else
> +		state->enabled = false;
> +
> +	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
> +
> +	posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
> +	negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
> +
> +	duty_steps = abs((s16)posedge - (s16)negedge);

If duty_steps == 0 the returned result is wrong. I suggest to fix that,
at least mention the problem in a comment.

> +	state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;

Can this overflow?

> +	do_div(state->duty_cycle, clk_get_rate(mchp_core_pwm->clk));

What is the typical return value of clk_get_rate(mchp_core_pwm->clk)?
You need to round up here. Did you test your driver with PWM_DEBUG on?
During test please do for a few fixed periods:

	for duty_cycle in [0 .. period]:
		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})

	for duty_cycle in [period .. 0]:
		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})

and check there is no output claiming a miscalculation.

> +	state->polarity = negedge < posedge ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
> +
> +	period_steps = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD));
> +	state->period = period_steps * prescale * NSEC_PER_SEC;
> +	do_div(state->period, clk_get_rate(mchp_core_pwm->clk));

you need to round up here, too.

(The reasoning for the rounding direction is that applying the state
returned by .get_state() should not change the hw settings. If you round
down in both .apply() and .get_state() this doesn't work.)

> +}
> +
> +static const struct pwm_ops mchp_core_pwm_ops = {
> +	.apply = mchp_core_pwm_apply,
> +	.get_state = mchp_core_pwm_get_state,
> +	.owner = THIS_MODULE,
> +};
> +
> +static const struct of_device_id mchp_core_of_match[] = {
> +	{
> +		.compatible = "microchip,corepwm-rtl-v4",
> +	},
> +	{ /* sentinel */ }
> +};
> +MODULE_DEVICE_TABLE(of, mchp_core_of_match);
> +
> +static int mchp_core_pwm_probe(struct platform_device *pdev)
> +{
> +	struct mchp_core_pwm_chip *mchp_pwm;
> +	struct resource *regs;
> +	int ret;
> +
> +	mchp_pwm = devm_kzalloc(&pdev->dev, sizeof(*mchp_pwm), GFP_KERNEL);
> +	if (!mchp_pwm)
> +		return -ENOMEM;
> +
> +	mchp_pwm->base = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
> +	if (IS_ERR(mchp_pwm->base))
> +		return PTR_ERR(mchp_pwm->base);
> +
> +	mchp_pwm->clk = devm_clk_get_enabled(&pdev->dev, NULL);
> +	if (IS_ERR(mchp_pwm->clk))
> +		return PTR_ERR(mchp_pwm->clk);

		return dev_err_probe(....)

> +
> +	if (of_property_read_u32(pdev->dev.of_node, "microchip,sync-update-mask",
> +				 &mchp_pwm->sync_update_mask))
> +		mchp_pwm->sync_update_mask = 0u;
> +
> +	spin_lock_init(&mchp_pwm->lock);
> +
> +	mchp_pwm->chip.dev = &pdev->dev;
> +	mchp_pwm->chip.ops = &mchp_core_pwm_ops;
> +	mchp_pwm->chip.npwm = 16;
> +
> +	ret = devm_pwmchip_add(&pdev->dev, &mchp_pwm->chip);
> +	if (ret < 0)
> +		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
> +
> +	platform_set_drvdata(pdev, mchp_pwm);

This is not necessary any more now after .remove() is gone.

> +
> +	return 0;
> +}
> +

Best regards
Uwe
Conor Dooley July 9, 2022, 4:21 p.m. UTC | #2
On 09/07/2022 17:02, Uwe Kleine-König wrote:
> Hello Conor,
> 
> On Fri, Jul 08, 2022 at 03:39:22PM +0100, Conor Dooley wrote:
>> Add a driver that supports the Microchip FPGA "soft" PWM IP core.
>>
>> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
---8<---
>> +	mchp_core_pwm_apply_duty(chip, pwm, state, prescale, period_steps);
>> +
>> +	/*
>> +	 * Notify the block to update the waveform from the shadow registers.
>> +	 * The updated values will not appear on the bus until they have been
>> +	 * applied to the waveform at the beginning of the next period. We must
>> +	 * write these registers and wait for them to be applied before calling
>> +	 * enable().
>> +	 */
>> +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
>> +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
>> +		usleep_range(state->period, state->period * 2);
>> +	}
>> +
>> +	spin_unlock(&mchp_core_pwm->lock);
>> +
>> +	mchp_core_pwm_enable(chip, pwm, true);
> 
> I already asked in the last round: Do you really need to write the
> SYNC_UPD register twice? I would expect that you don't?!

Sorry, I thought that I had replied to this on Friday, didn't
meant to ignore you.

Yes, I do need to keep that - otherwise there are problems when
turning on the PWM channel for the first time.
Before writing to the enable registers, we need to make sure that
the values have been applied since both pos and neg edge registers
come out of reset set to 0x0.

> 
> Also the locking looks fishy here. It would be simpler (and maybe even
> more robust, didn't think deeply about it) to assume in
> mchp_core_pwm_enable() that the caller holds the lock. Then you only
> grab the lock once during .apply() and nothing strange can happen in the
> gap.
> 
>> +	return 0;
>> +}
>> +
>> +static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
>> +				    struct pwm_state *state)
>> +{
>> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
>> +	u8 prescale, period_steps, duty_steps;
>> +	u8 posedge, negedge;
>> +	u16 channel_enabled;
>> +
> 
> I'd take the lock here to be sure to get a consistent return value.
> 
>> +	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
>> +		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
> 
> micro optimisation: You're reading two register values here, but only use
> one. Shadowing the enabled registers in mchp_core_pwm might also be an
> idea.
> 
>> +	if (channel_enabled & 1 << pwm->hwpwm)
> 
> I'm always unsure about the associativity of & and <<, so I would have
> written that as
> 
> 	if (channel_enabled & (1 << pwm->hwpwm))
> 
> I just tested that for the umpteens time and your code is fine, so this
> is only for human readers like me.
> 
>> +		state->enabled = true;
>> +	else
>> +		state->enabled = false;
>> +
>> +	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
>> +
>> +	posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
>> +	negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
>> +
>> +	duty_steps = abs((s16)posedge - (s16)negedge);
> 
> If duty_steps == 0 the returned result is wrong. I suggest to fix that,
> at least mention the problem in a comment.

Ah right yeah, I didn't update this after changing the other logic. Sorry.

> 
>> +	state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;
> 
> Can this overflow?
> 
>> +	do_div(state->duty_cycle, clk_get_rate(mchp_core_pwm->clk));
> 
> What is the typical return value of clk_get_rate(mchp_core_pwm->clk)?

It's gonna be less than 600M
> You need to round up here. Did you test your driver with PWM_DEBUG on?
> During test please do for a few fixed periods:
> 
> 	for duty_cycle in [0 .. period]:
> 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
> 
> 	for duty_cycle in [period .. 0]:
> 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
> 
> and check there is no output claiming a miscalculation.

I ran the stuff you gave me last time, doing something similar w/ a
shell loop. Got no reported miscalculations.
I'll add explicit rounding though.

> 
>> +	state->polarity = negedge < posedge ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
>> +
>> +	period_steps = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD));
>> +	state->period = period_steps * prescale * NSEC_PER_SEC;
>> +	do_div(state->period, clk_get_rate(mchp_core_pwm->clk));
> 
> you need to round up here, too.
> 
> (The reasoning for the rounding direction is that applying the state
> returned by .get_state() should not change the hw settings. If you round
> down in both .apply() and .get_state() this doesn't work.)
> 
>> +}
>> +

The rest of this all seems fair to me & I'll spin up something
in the coming week.
Thanks,
Conor
Uwe Kleine-König July 9, 2022, 4:37 p.m. UTC | #3
Hello Conor,

On Sat, Jul 09, 2022 at 04:21:46PM +0000, Conor.Dooley@microchip.com wrote:
> On 09/07/2022 17:02, Uwe Kleine-König wrote:
> > On Fri, Jul 08, 2022 at 03:39:22PM +0100, Conor Dooley wrote:
> >> Add a driver that supports the Microchip FPGA "soft" PWM IP core.
> >>
> >> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> ---8<---
> >> +	mchp_core_pwm_apply_duty(chip, pwm, state, prescale, period_steps);
> >> +
> >> +	/*
> >> +	 * Notify the block to update the waveform from the shadow registers.
> >> +	 * The updated values will not appear on the bus until they have been
> >> +	 * applied to the waveform at the beginning of the next period. We must
> >> +	 * write these registers and wait for them to be applied before calling
> >> +	 * enable().
> >> +	 */
> >> +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> >> +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> >> +		usleep_range(state->period, state->period * 2);
> >> +	}
> >> +
> >> +	spin_unlock(&mchp_core_pwm->lock);
> >> +
> >> +	mchp_core_pwm_enable(chip, pwm, true);
> > 
> > I already asked in the last round: Do you really need to write the
> > SYNC_UPD register twice? I would expect that you don't?!
> 
> Sorry, I thought that I had replied to this on Friday, didn't
> meant to ignore you.
> 
> Yes, I do need to keep that - otherwise there are problems when
> turning on the PWM channel for the first time.

How unintuitive and unfortunate. I wonder if there is an upside of this
approach that I'm missing.

> Before writing to the enable registers, we need to make sure that
> the values have been applied since both pos and neg edge registers
> come out of reset set to 0x0.

I always like to understand the hardware, can you explain the problems
in more details?

> > Also the locking looks fishy here. It would be simpler (and maybe even
> > more robust, didn't think deeply about it) to assume in
> > mchp_core_pwm_enable() that the caller holds the lock. Then you only
> > grab the lock once during .apply() and nothing strange can happen in the
> > gap.
> > 
> >> +	return 0;
> >> +}
> >> +
> >> +static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
> >> +				    struct pwm_state *state)
> >> +{
> >> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> >> +	u8 prescale, period_steps, duty_steps;
> >> +	u8 posedge, negedge;
> >> +	u16 channel_enabled;
> >> +
> > 
> > I'd take the lock here to be sure to get a consistent return value.
> > 
> >> +	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
> >> +		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
> > 
> > micro optimisation: You're reading two register values here, but only use
> > one. Shadowing the enabled registers in mchp_core_pwm might also be an
> > idea.
> > 
> >> +	if (channel_enabled & 1 << pwm->hwpwm)
> > 
> > I'm always unsure about the associativity of & and <<, so I would have
> > written that as
> > 
> > 	if (channel_enabled & (1 << pwm->hwpwm))
> > 
> > I just tested that for the umpteens time and your code is fine, so this
> > is only for human readers like me.
> > 
> >> +		state->enabled = true;
> >> +	else
> >> +		state->enabled = false;
> >> +
> >> +	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
> >> +
> >> +	posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
> >> +	negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
> >> +
> >> +	duty_steps = abs((s16)posedge - (s16)negedge);
> > 
> > If duty_steps == 0 the returned result is wrong. I suggest to fix that,
> > at least mention the problem in a comment.
> 
> Ah right yeah, I didn't update this after changing the other logic. Sorry.
> 
> > 
> >> +	state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;
> > 
> > Can this overflow?
> > 
> >> +	do_div(state->duty_cycle, clk_get_rate(mchp_core_pwm->clk));
> > 
> > What is the typical return value of clk_get_rate(mchp_core_pwm->clk)?
> 
> It's gonna be less than 600M

An exact value would be interesting, then when I spot a rounding problem
I could give you a test case to double check.

> > You need to round up here. Did you test your driver with PWM_DEBUG on?
> > During test please do for a few fixed periods:
> > 
> > 	for duty_cycle in [0 .. period]:
> > 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
> > 
> > 	for duty_cycle in [period .. 0]:
> > 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
> > 
> > and check there is no output claiming a miscalculation.
> 
> I ran the stuff you gave me last time, doing something similar w/ a
> shell loop. Got no reported miscalculations.

I'm surprise, I would have expected that my test recipe would find such
an issue. Could you follow my arguing about the rounding direction?
There always the possibility that I'm wrong, too.

Best regards
Uwe
Conor Dooley July 9, 2022, 4:56 p.m. UTC | #4
On 09/07/2022 17:37, Uwe Kleine-König wrote:
> Hello Conor,
> 
> On Sat, Jul 09, 2022 at 04:21:46PM +0000, Conor.Dooley@microchip.com wrote:
>> On 09/07/2022 17:02, Uwe Kleine-König wrote:
>>> On Fri, Jul 08, 2022 at 03:39:22PM +0100, Conor Dooley wrote:
>>>> Add a driver that supports the Microchip FPGA "soft" PWM IP core.
>>>>
>>>> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
>> ---8<---
>>>> +	mchp_core_pwm_apply_duty(chip, pwm, state, prescale, period_steps);
>>>> +
>>>> +	/*
>>>> +	 * Notify the block to update the waveform from the shadow registers.
>>>> +	 * The updated values will not appear on the bus until they have been
>>>> +	 * applied to the waveform at the beginning of the next period. We must
>>>> +	 * write these registers and wait for them to be applied before calling
>>>> +	 * enable().
>>>> +	 */
>>>> +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
>>>> +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
>>>> +		usleep_range(state->period, state->period * 2);
>>>> +	}
>>>> +
>>>> +	spin_unlock(&mchp_core_pwm->lock);
>>>> +
>>>> +	mchp_core_pwm_enable(chip, pwm, true);
>>>
>>> I already asked in the last round: Do you really need to write the
>>> SYNC_UPD register twice? I would expect that you don't?!
>>
>> Sorry, I thought that I had replied to this on Friday, didn't
>> meant to ignore you.
>>
>> Yes, I do need to keep that - otherwise there are problems when
>> turning on the PWM channel for the first time.
> 
> How unintuitive and unfortunate. I wonder if there is an upside of this
> approach that I'm missing.

Simplicity of the sythesised design maybe?
Given one of the things the manual talks about is the LUT savings by
turning off shadowing it would not surprise me.

> 
>> Before writing to the enable registers, we need to make sure that
>> the values have been applied since both pos and neg edge registers
>> come out of reset set to 0x0.
> 
> I always like to understand the hardware, can you explain the problems
> in more details?

The TLDR is if I don't, the channel gets enabled w/ the 50% duty cycle
problem. From glancing at the RTL the other day, it looks like it is
down to the how the if statement that decides the output level is
ordered.

Depending on how bored I get tonight/tomorrow, I'll give you a better
answer then or during the week.

> 
>>> Also the locking looks fishy here. It would be simpler (and maybe even
>>> more robust, didn't think deeply about it) to assume in
>>> mchp_core_pwm_enable() that the caller holds the lock. Then you only
>>> grab the lock once during .apply() and nothing strange can happen in the
>>> gap.
>>>
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
>>>> +				    struct pwm_state *state)
>>>> +{
>>>> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
>>>> +	u8 prescale, period_steps, duty_steps;
>>>> +	u8 posedge, negedge;
>>>> +	u16 channel_enabled;
>>>> +
>>>
>>> I'd take the lock here to be sure to get a consistent return value.
>>>
>>>> +	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
>>>> +		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
>>>
>>> micro optimisation: You're reading two register values here, but only use
>>> one. Shadowing the enabled registers in mchp_core_pwm might also be an
>>> idea.
>>>
>>>> +	if (channel_enabled & 1 << pwm->hwpwm)
>>>
>>> I'm always unsure about the associativity of & and <<, so I would have
>>> written that as
>>>
>>> 	if (channel_enabled & (1 << pwm->hwpwm))
>>>
>>> I just tested that for the umpteens time and your code is fine, so this
>>> is only for human readers like me.
>>>
>>>> +		state->enabled = true;
>>>> +	else
>>>> +		state->enabled = false;
>>>> +
>>>> +	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
>>>> +
>>>> +	posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
>>>> +	negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
>>>> +
>>>> +	duty_steps = abs((s16)posedge - (s16)negedge);
>>>
>>> If duty_steps == 0 the returned result is wrong. I suggest to fix that,
>>> at least mention the problem in a comment.
>>
>> Ah right yeah, I didn't update this after changing the other logic. Sorry.
>>
>>>
>>>> +	state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;
>>>
>>> Can this overflow?
>>>
>>>> +	do_div(state->duty_cycle, clk_get_rate(mchp_core_pwm->clk));
>>>
>>> What is the typical return value of clk_get_rate(mchp_core_pwm->clk)?
>>
>> It's gonna be less than 600M
> 
> An exact value would be interesting, then when I spot a rounding problem
> I could give you a test case to double check.

Yeah, I am not sure what the maximum clock rates allowed in the FPGA fabric
are off the top of my head. 600 M was a sane limit b/c that's what the core
complex runs at. Said it more to say that it wouldn't be >NS_PER_SEC

>>> You need to round up here. Did you test your driver with PWM_DEBUG on?
>>> During test please do for a few fixed periods:
>>>
>>> 	for duty_cycle in [0 .. period]:
>>> 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
>>>
>>> 	for duty_cycle in [period .. 0]:
>>> 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
>>>
>>> and check there is no output claiming a miscalculation.
>>
>> I ran the stuff you gave me last time, doing something similar w/ a
>> shell loop. Got no reported miscalculations.
> 
> I'm surprise, I would have expected that my test recipe would find such
> an issue. Could you follow my arguing about the rounding direction?
> There always the possibility that I'm wrong, too.

I'll take another look & get back to you.
Thanks for the review.
Conor.
Conor Dooley July 11, 2022, 2:33 p.m. UTC | #5
Hey Uwe, took another look at this today.
I'll give you some more info on the sync_update hw when I send
the next version.

On 09/07/2022 17:02, Uwe Kleine-König wrote:
> Hello Conor,
> 
> On Fri, Jul 08, 2022 at 03:39:22PM +0100, Conor Dooley wrote:
>> Add a driver that supports the Microchip FPGA "soft" PWM IP core.
>>
>> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
>> ---

>> +static int mchp_core_pwm_apply_period(struct pwm_chip *chip, const struct pwm_state *state,
>> +				      u8 *prescale, u8 *period_steps)
>> +{
>> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
>> +	u64 tmp, clk_rate;
>> +	u16 prescale_val, period_steps_val;
>> +
>> +	/*
>> +	 * Calculate the period cycles and prescale values.
>> +	 * The registers are each 8 bits wide & multiplied to compute the period
>> +	 * using the formula:
>> +	 * (clock_period) * (prescale + 1) * (period_steps + 1)
>> +	 * so the maximum period that can be generated is 0x10000 times the
>> +	 * period of the input clock.
>> +	 * However, due to the design of the "hardware", it is not possible to
>> +	 * attain a 100% duty cycle if the full range of period_steps is used.
>> +	 * Therefore period_steps is restricted to 0xFE and the maximum multiple
>> +	 * of the clock period attainable is 0xFF00.
>> +	 */
>> +	clk_rate = clk_get_rate(mchp_core_pwm->clk);
> 
> +	/* If clk_rate is too big, the following multiplication might overflow */


I expanded this comment slightly to:
/*
* If clk_rate is too big, the following multiplication might overflow.
* However this is implausible, as the fabric of current FPGAs cannot
* provide clocks at a rate high enough.
*/

>> +	if (clk_rate >= NSEC_PER_SEC)
>> +		return -EINVAL;
>> +
>> +	tmp = mul_u64_u64_div_u64(state->period, clk_rate, NSEC_PER_SEC);
>> +
>> +	if (tmp >= MCHPCOREPWM_PERIOD_MAX) {
>> +		*prescale = MCHPCOREPWM_PRESCALE_MAX - 1;
>> +		*period_steps = MCHPCOREPWM_PERIOD_STEPS_MAX - 1;
>> +		goto write_registers;
>> +	}
>> +
>> +	for (prescale_val = 1; prescale_val <= MCHPCOREPWM_PRESCALE_MAX; prescale_val++) {
>> +		period_steps_val = div_u64(tmp, prescale_val);
>> +		if (period_steps_val > MCHPCOREPWM_PERIOD_STEPS_MAX)
>> +			continue;
>> +		*period_steps = period_steps_val - 1;
>> +		*prescale = prescale_val - 1;
>> +		break;
>> +	}
> 
> OK, so you want to find the smallest prescale_val such that
> 
> 	prescale_val * MCHPCOREPWM_PERIOD_STEPS_MAX >= tmp
> 
> . You can calculate that without a loop as:
> 
> 	prescale_val = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);

Hmm, not quite right. For tmp < MCHPCOREPWM_PERIOD_STEPS_MAX this gives
zero. It would have to be:
*prescale = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);

In which case, the loop collapses to:

*prescale = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);
/* PREG_TO_VAL() can produce a value larger than UINT8_MAX */
*period_steps = div_u64(tmp, PREG_TO_VAL((u32) *prescale)) - 1;

and the interim _val variables can just be deleted.


>> +static int mchp_core_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
>> +			       const struct pwm_state *state)
>> +{
>> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
>> +	struct pwm_state current_state = pwm->state;
>> +	bool period_locked;
>> +	u64 period;
>> +	u16 channel_enabled;
>> +	u8 prescale, period_steps;
>> +	int ret;
>> +
>> +	if (!state->enabled) {
>> +		mchp_core_pwm_enable(chip, pwm, false);
>> +		return 0;
>> +	}
>> +
>> +	/*
>> +	 * If the only thing that has changed is the duty cycle or the polarity,
>> +	 * we can shortcut the calculations and just compute/apply the new duty
>> +	 * cycle pos & neg edges
>> +	 * As all the channels share the same period, do not allow it to be
>> +	 * changed if any other channels are enabled.
>> +	 */
>> +	spin_lock(&mchp_core_pwm->lock);
>> +
>> +	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
>> +		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
>> +	period_locked = channel_enabled & ~(1 << pwm->hwpwm);
>> +
>> +	if ((!current_state.enabled || current_state.period != state->period) && !period_locked) {
>> +		ret = mchp_core_pwm_apply_period(chip, state, &prescale, &period_steps);
>> +		if (ret) {
>> +			spin_unlock(&mchp_core_pwm->lock);
>> +			return ret;
>> +		}
>> +	} else {
>> +		prescale = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
>> +		period_steps = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
>> +	}
>> +
>> +	/*
>> +	 * If the period is locked, it may not be possible to use a period less
>> +	 * than that requested.
>> +	 */
>> +	period = PREG_TO_VAL(period_steps) *  PREG_TO_VAL(prescale) * NSEC_PER_SEC;
> 
> s/  / /
> 
>> +	do_div(period, clk_get_rate(mchp_core_pwm->clk));
>> +	if (period > state->period) {
>> +		spin_unlock(&mchp_core_pwm->lock);
>> +		return -EINVAL;
>> +	}
> 
> I would consider it easier to do the following (in pseudo syntax):
> 
> 
> 	prescale, period_steps = calculate_hwperiod(period);
> 
> 	if (period_locked):
> 		hwprescale = readb_relaxed(PRESCALE)
> 		hwperiod_steps = readb_relaxed(PERIOD)
> 
> 		if period_steps * prescale < hwperiod_steps * hwprescale:
> 			return -EINVAL
> 		else
> 			prescale, period_steps = hwprescale,
> 			hwperiod_steps

I think I like this method more than messing around with the clks.
I'll change to something like this for the next version.

> 	duty_steps = calculate_hwduty(duty, prescale)
> 	if (duty_steps > period_steps)
> 		duty_steps = period_steps


> 
>> +	mchp_core_pwm_apply_duty(chip, pwm, state, prescale, period_steps);
>> +
>> +	/*
>> +	 * Notify the block to update the waveform from the shadow registers.
>> +	 * The updated values will not appear on the bus until they have been
>> +	 * applied to the waveform at the beginning of the next period. We must
>> +	 * write these registers and wait for them to be applied before calling
>> +	 * enable().
>> +	 */
>> +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
>> +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
>> +		usleep_range(state->period, state->period * 2);
>> +	}
>> +
>> +	spin_unlock(&mchp_core_pwm->lock);
>> +
>> +	mchp_core_pwm_enable(chip, pwm, true);
> 
> I already asked in the last round: Do you really need to write the
> SYNC_UPD register twice? I would expect that you don't?!
> 
> Also the locking looks fishy here. It would be simpler (and maybe even
> more robust, didn't think deeply about it) to assume in
> mchp_core_pwm_enable() that the caller holds the lock. Then you only
> grab the lock once during .apply() and nothing strange can happen in the
> gap.

I got it into my head that enable() could be called by the framework.
I'll simplify the locking here.

> I'd take the lock here to be sure to get a consistent return value.
> 
>> +	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
>> +		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
> 
> micro optimisation: You're reading two register values here, but only use
> one. Shadowing the enabled registers in mchp_core_pwm might also be an
> idea.

I'd agree, but more from the perspective of how awful I feel this code
looks.

> 
>> +	if (channel_enabled & 1 << pwm->hwpwm)
> 
> I'm always unsure about the associativity of & and <<, so I would have
> written that as
> 
> 	if (channel_enabled & (1 << pwm->hwpwm))
> 
> I just tested that for the umpteens time and your code is fine, so this
> is only for human readers like me.

I'll change it, I'll prob have forgotten the associativity by the time I
look at the driver next.

> 
>> +		state->enabled = true;
>> +	else
>> +		state->enabled = false;
>> +
>> +	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
>> +
>> +	posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
>> +	negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
>> +
>> +	duty_steps = abs((s16)posedge - (s16)negedge);
> 
> If duty_steps == 0 the returned result is wrong. I suggest to fix that,
> at least mention the problem in a comment.

I think handling it is the way to go.

> 
>> +	state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;
> 
> Can this overflow?

No, 255 * 256 * 1E9 < 2^64 but ...

>> +	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));

... can.

>>> What is the typical return value of clk_get_rate(mchp_core_pwm->clk)?
>>
>> It's gonna be less than 600M
>
> An exact value would be interesting, then when I spot a rounding problem
> I could give you a test case to double check.

The maximum depends on speed grade, but no more than 200 MHz.


>>>> You need to round up here. Did you test your driver with PWM_DEBUG on?
>>>> During test please do for a few fixed periods:
>>>>
>>>> 	for duty_cycle in [0 .. period]:
>>>> 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
>>>>
>>>> 	for duty_cycle in [period .. 0]:
>>>> 		pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
>>>>
>>>> and check there is no output claiming a miscalculation.
>>>
>>> I ran the stuff you gave me last time, doing something similar w/ a
>>> shell loop. Got no reported miscalculations.
>>
>> I'm surprise, I would have expected that my test recipe would find such
>> an issue. Could you follow my arguing about the rounding direction?
>> There always the possibility that I'm wrong, too.
>
> I'll take another look & get back to you.

I *think* I might've spelt "PWM_DEBUG" as "PWM_DEBUF"...

I'll retest!

Thanks,
Conor.
Conor Dooley July 12, 2022, 10:57 a.m. UTC | #6
On 11/07/2022 15:33, Conor Dooley wrote:
> Hey Uwe, took another look at this today.
> I'll give you some more info on the sync_update hw when I send
> the next version.

Ehh, I don't think that'll be needed - I had misconfigured my
devicetree while testing that change & that resulted in only one
of the writes to sync_update actually happening.
That just happened to be the one that didn't sleep, so the fact
that I used a spinlock for my lock didn't cause any problems.

Will switch to a mutex and remove one of the writes to the sync
update register...

/facepalm,
Conor.

> 
> On 09/07/2022 17:02, Uwe Kleine-König wrote:
>> Hello Conor,
>>
>> On Fri, Jul 08, 2022 at 03:39:22PM +0100, Conor Dooley wrote:
>>> Add a driver that supports the Microchip FPGA "soft" PWM IP core.
>>>
>>> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
>>> ---
> 
>>> +static int mchp_core_pwm_apply_period(struct pwm_chip *chip, const struct pwm_state *state,
>>> +                      u8 *prescale, u8 *period_steps)
>>> +{
>>> +    struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
>>> +    u64 tmp, clk_rate;
>>> +    u16 prescale_val, period_steps_val;
>>> +
>>> +    /*
>>> +     * Calculate the period cycles and prescale values.
>>> +     * The registers are each 8 bits wide & multiplied to compute the period
>>> +     * using the formula:
>>> +     * (clock_period) * (prescale + 1) * (period_steps + 1)
>>> +     * so the maximum period that can be generated is 0x10000 times the
>>> +     * period of the input clock.
>>> +     * However, due to the design of the "hardware", it is not possible to
>>> +     * attain a 100% duty cycle if the full range of period_steps is used.
>>> +     * Therefore period_steps is restricted to 0xFE and the maximum multiple
>>> +     * of the clock period attainable is 0xFF00.
>>> +     */
>>> +    clk_rate = clk_get_rate(mchp_core_pwm->clk);
>>
>> +    /* If clk_rate is too big, the following multiplication might overflow */
> 
> 
> I expanded this comment slightly to:
> /*
> * If clk_rate is too big, the following multiplication might overflow.
> * However this is implausible, as the fabric of current FPGAs cannot
> * provide clocks at a rate high enough.
> */
> 
>>> +    if (clk_rate >= NSEC_PER_SEC)
>>> +        return -EINVAL;
>>> +
>>> +    tmp = mul_u64_u64_div_u64(state->period, clk_rate, NSEC_PER_SEC);
>>> +
>>> +    if (tmp >= MCHPCOREPWM_PERIOD_MAX) {
>>> +        *prescale = MCHPCOREPWM_PRESCALE_MAX - 1;
>>> +        *period_steps = MCHPCOREPWM_PERIOD_STEPS_MAX - 1;
>>> +        goto write_registers;
>>> +    }
>>> +
>>> +    for (prescale_val = 1; prescale_val <= MCHPCOREPWM_PRESCALE_MAX; prescale_val++) {
>>> +        period_steps_val = div_u64(tmp, prescale_val);
>>> +        if (period_steps_val > MCHPCOREPWM_PERIOD_STEPS_MAX)
>>> +            continue;
>>> +        *period_steps = period_steps_val - 1;
>>> +        *prescale = prescale_val - 1;
>>> +        break;
>>> +    }
>>
>> OK, so you want to find the smallest prescale_val such that
>>
>>     prescale_val * MCHPCOREPWM_PERIOD_STEPS_MAX >= tmp
>>
>> . You can calculate that without a loop as:
>>
>>     prescale_val = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);
> 
> Hmm, not quite right. For tmp < MCHPCOREPWM_PERIOD_STEPS_MAX this gives
> zero. It would have to be:
> *prescale = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);
> 
> In which case, the loop collapses to:
> 
> *prescale = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);
> /* PREG_TO_VAL() can produce a value larger than UINT8_MAX */
> *period_steps = div_u64(tmp, PREG_TO_VAL((u32) *prescale)) - 1;
> 
> and the interim _val variables can just be deleted.
> 
> 
>>> +static int mchp_core_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
>>> +                   const struct pwm_state *state)
>>> +{
>>> +    struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
>>> +    struct pwm_state current_state = pwm->state;
>>> +    bool period_locked;
>>> +    u64 period;
>>> +    u16 channel_enabled;
>>> +    u8 prescale, period_steps;
>>> +    int ret;
>>> +
>>> +    if (!state->enabled) {
>>> +        mchp_core_pwm_enable(chip, pwm, false);
>>> +        return 0;
>>> +    }
>>> +
>>> +    /*
>>> +     * If the only thing that has changed is the duty cycle or the polarity,
>>> +     * we can shortcut the calculations and just compute/apply the new duty
>>> +     * cycle pos & neg edges
>>> +     * As all the channels share the same period, do not allow it to be
>>> +     * changed if any other channels are enabled.
>>> +     */
>>> +    spin_lock(&mchp_core_pwm->lock);
>>> +
>>> +    channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
>>> +        readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
>>> +    period_locked = channel_enabled & ~(1 << pwm->hwpwm);
>>> +
>>> +    if ((!current_state.enabled || current_state.period != state->period) && !period_locked) {
>>> +        ret = mchp_core_pwm_apply_period(chip, state, &prescale, &period_steps);
>>> +        if (ret) {
>>> +            spin_unlock(&mchp_core_pwm->lock);
>>> +            return ret;
>>> +        }
>>> +    } else {
>>> +        prescale = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
>>> +        period_steps = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
>>> +    }
>>> +
>>> +    /*
>>> +     * If the period is locked, it may not be possible to use a period less
>>> +     * than that requested.
>>> +     */
>>> +    period = PREG_TO_VAL(period_steps) *  PREG_TO_VAL(prescale) * NSEC_PER_SEC;
>>
>> s/  / /
>>
>>> +    do_div(period, clk_get_rate(mchp_core_pwm->clk));
>>> +    if (period > state->period) {
>>> +        spin_unlock(&mchp_core_pwm->lock);
>>> +        return -EINVAL;
>>> +    }
>>
>> I would consider it easier to do the following (in pseudo syntax):
>>
>>
>>     prescale, period_steps = calculate_hwperiod(period);
>>
>>     if (period_locked):
>>         hwprescale = readb_relaxed(PRESCALE)
>>         hwperiod_steps = readb_relaxed(PERIOD)
>>
>>         if period_steps * prescale < hwperiod_steps * hwprescale:
>>             return -EINVAL
>>         else
>>             prescale, period_steps = hwprescale,
>>             hwperiod_steps
> 
> I think I like this method more than messing around with the clks.
> I'll change to something like this for the next version.
> 
>>     duty_steps = calculate_hwduty(duty, prescale)
>>     if (duty_steps > period_steps)
>>         duty_steps = period_steps
> 
> 
>>
>>> +    mchp_core_pwm_apply_duty(chip, pwm, state, prescale, period_steps);
>>> +
>>> +    /*
>>> +     * Notify the block to update the waveform from the shadow registers.
>>> +     * The updated values will not appear on the bus until they have been
>>> +     * applied to the waveform at the beginning of the next period. We must
>>> +     * write these registers and wait for them to be applied before calling
>>> +     * enable().
>>> +     */
>>> +    if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
>>> +        writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
>>> +        usleep_range(state->period, state->period * 2);
>>> +    }
>>> +
>>> +    spin_unlock(&mchp_core_pwm->lock);
>>> +
>>> +    mchp_core_pwm_enable(chip, pwm, true);
>>
>> I already asked in the last round: Do you really need to write the
>> SYNC_UPD register twice? I would expect that you don't?!
>>
>> Also the locking looks fishy here. It would be simpler (and maybe even
>> more robust, didn't think deeply about it) to assume in
>> mchp_core_pwm_enable() that the caller holds the lock. Then you only
>> grab the lock once during .apply() and nothing strange can happen in the
>> gap.
> 
> I got it into my head that enable() could be called by the framework.
> I'll simplify the locking here.
> 
>> I'd take the lock here to be sure to get a consistent return value.
>>
>>> +    channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
>>> +        readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
>>
>> micro optimisation: You're reading two register values here, but only use
>> one. Shadowing the enabled registers in mchp_core_pwm might also be an
>> idea.
> 
> I'd agree, but more from the perspective of how awful I feel this code
> looks.
> 
>>
>>> +    if (channel_enabled & 1 << pwm->hwpwm)
>>
>> I'm always unsure about the associativity of & and <<, so I would have
>> written that as
>>
>>     if (channel_enabled & (1 << pwm->hwpwm))
>>
>> I just tested that for the umpteens time and your code is fine, so this
>> is only for human readers like me.
> 
> I'll change it, I'll prob have forgotten the associativity by the time I
> look at the driver next.
> 
>>
>>> +        state->enabled = true;
>>> +    else
>>> +        state->enabled = false;
>>> +
>>> +    prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
>>> +
>>> +    posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
>>> +    negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
>>> +
>>> +    duty_steps = abs((s16)posedge - (s16)negedge);
>>
>> If duty_steps == 0 the returned result is wrong. I suggest to fix that,
>> at least mention the problem in a comment.
> 
> I think handling it is the way to go.
> 
>>
>>> +    state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;
>>
>> Can this overflow?
> 
> No, 255 * 256 * 1E9 < 2^64 but ...
> 
>>> +    prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
> 
> ... can.
> 
>>>> What is the typical return value of clk_get_rate(mchp_core_pwm->clk)?
>>>
>>> It's gonna be less than 600M
>>
>> An exact value would be interesting, then when I spot a rounding problem
>> I could give you a test case to double check.
> 
> The maximum depends on speed grade, but no more than 200 MHz.
> 
> 
>>>>> You need to round up here. Did you test your driver with PWM_DEBUG on?
>>>>> During test please do for a few fixed periods:
>>>>>
>>>>>     for duty_cycle in [0 .. period]:
>>>>>         pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
>>>>>
>>>>>     for duty_cycle in [period .. 0]:
>>>>>         pwm_apply(mypwm, {.period = period, .duty_cycle = duty_cycle, .enabled = true, ...})
>>>>>
>>>>> and check there is no output claiming a miscalculation.
>>>>
>>>> I ran the stuff you gave me last time, doing something similar w/ a
>>>> shell loop. Got no reported miscalculations.
>>>
>>> I'm surprise, I would have expected that my test recipe would find such
>>> an issue. Could you follow my arguing about the rounding direction?
>>> There always the possibility that I'm wrong, too.
>>
>> I'll take another look & get back to you.
> 
> I *think* I might've spelt "PWM_DEBUG" as "PWM_DEBUF"...
> 
> I'll retest!
> 
> Thanks,
> Conor.
diff mbox series

Patch

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 904de8d61828..007ea5750e73 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -383,6 +383,16 @@  config PWM_MEDIATEK
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-mediatek.
 
+config PWM_MICROCHIP_CORE
+	tristate "Microchip corePWM PWM support"
+	depends on SOC_MICROCHIP_POLARFIRE || COMPILE_TEST
+	depends on HAS_IOMEM && OF
+	help
+	  PWM driver for Microchip FPGA soft IP core.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-microchip-core.
+
 config PWM_MXS
 	tristate "Freescale MXS PWM support"
 	depends on ARCH_MXS || COMPILE_TEST
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index 5c08bdb817b4..43feb7cfc66a 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -33,6 +33,7 @@  obj-$(CONFIG_PWM_LPSS_PCI)	+= pwm-lpss-pci.o
 obj-$(CONFIG_PWM_LPSS_PLATFORM)	+= pwm-lpss-platform.o
 obj-$(CONFIG_PWM_MESON)		+= pwm-meson.o
 obj-$(CONFIG_PWM_MEDIATEK)	+= pwm-mediatek.o
+obj-$(CONFIG_PWM_MICROCHIP_CORE)	+= pwm-microchip-core.o
 obj-$(CONFIG_PWM_MTK_DISP)	+= pwm-mtk-disp.o
 obj-$(CONFIG_PWM_MXS)		+= pwm-mxs.o
 obj-$(CONFIG_PWM_NTXEC)		+= pwm-ntxec.o
diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c
new file mode 100644
index 000000000000..3471eb2c8645
--- /dev/null
+++ b/drivers/pwm/pwm-microchip-core.c
@@ -0,0 +1,355 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * corePWM driver for Microchip "soft" FPGA IP cores.
+ *
+ * Copyright (c) 2021-2022 Microchip Corporation. All rights reserved.
+ * Author: Conor Dooley <conor.dooley@microchip.com>
+ * Documentation:
+ * https://www.microsemi.com/document-portal/doc_download/1245275-corepwm-hb
+ *
+ * Limitations:
+ * - If the IP block is configured without "shadow registers", all register
+ *   writes will take effect immediately, causing glitches on the output.
+ *   If shadow registers *are* enabled, a write to the "SYNC_UPDATE" register
+ *   notifies the core that it needs to update the registers defining the
+ *   waveform from the contents of the "shadow registers".
+ * - The IP block has no concept of a duty cycle, only rising/falling edges of
+ *   the waveform. Unfortunately, if the rising & falling edges registers have
+ *   the same value written to them the IP block will do whichever of a rising
+ *   or a falling edge is possible. I.E. a 50% waveform at twice the requested
+ *   period. Therefore to get a 0% waveform, the output is set the max high/low
+ *   time depending on polarity.
+ * - The PWM period is set for the whole IP block not per channel. The driver
+ *   will only change the period if no other PWM output is enabled.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/spinlock.h>
+
+#define PREG_TO_VAL(PREG) ((PREG) + 1)
+
+#define MCHPCOREPWM_PRESCALE_MAX	0x100
+#define MCHPCOREPWM_PERIOD_STEPS_MAX	0xff
+#define MCHPCOREPWM_PERIOD_MAX		0xff00
+
+#define MCHPCOREPWM_PRESCALE	0x00
+#define MCHPCOREPWM_PERIOD	0x04
+#define MCHPCOREPWM_EN(i)	(0x08 + 0x04 * (i)) /* 0x08, 0x0c */
+#define MCHPCOREPWM_POSEDGE(i)	(0x10 + 0x08 * (i)) /* 0x10, 0x18, ..., 0x88 */
+#define MCHPCOREPWM_NEGEDGE(i)	(0x14 + 0x08 * (i)) /* 0x14, 0x1c, ..., 0x8c */
+#define MCHPCOREPWM_SYNC_UPD	0xe4
+
+struct mchp_core_pwm_chip {
+	struct pwm_chip chip;
+	struct clk *clk;
+	spinlock_t lock; /* protect the shared period */
+	void __iomem *base;
+	u32 sync_update_mask;
+};
+
+static inline struct mchp_core_pwm_chip *to_mchp_core_pwm(struct pwm_chip *chip)
+{
+	return container_of(chip, struct mchp_core_pwm_chip, chip);
+}
+
+static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm, bool enable)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	u8 channel_enable, reg_offset, shift;
+
+	/*
+	 * There are two adjacent 8 bit control regs, the lower reg controls
+	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
+	 * and if so, offset by the bus width.
+	 */
+	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
+	shift = pwm->hwpwm > 7 ? pwm->hwpwm - 8 : pwm->hwpwm;
+
+	spin_lock(&mchp_core_pwm->lock);
+
+	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
+	channel_enable &= ~(1 << shift);
+	channel_enable |= (enable << shift);
+
+	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
+
+	/*
+	 * Write to the sync update registers so that channels with shadow
+	 * registers will also get their enable update. This operation is a NOP
+	 * for channels without shadow registers.
+	 */
+	writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
+
+	spin_unlock(&mchp_core_pwm->lock);
+}
+
+static void mchp_core_pwm_apply_duty(struct pwm_chip *chip, struct pwm_device *pwm,
+				     const struct pwm_state *state, u8 prescale, u8 period_steps)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	u64 duty_steps, period, tmp;
+	u8 posedge, negedge;
+	u16 prescale_val = PREG_TO_VAL(prescale);
+	u8 period_steps_val = PREG_TO_VAL(period_steps);
+
+	period = period_steps_val * prescale_val * NSEC_PER_SEC;
+	period = DIV64_U64_ROUND_UP(period, clk_get_rate(mchp_core_pwm->clk));
+
+	/*
+	 * Calculate the duty cycle in multiples of the prescaled period:
+	 * duty_steps = duty_in_ns / step_in_ns
+	 * step_in_ns = (prescale * NSEC_PER_SEC) / clk_rate
+	 * The code below is rearranged slightly to only divide once.
+	 *
+	 * Because the period is per channel, it is possible that the requested
+	 * duty cycle is longer than the period, in which case cap it to the
+	 * period.
+	 */
+	if (state->duty_cycle > period) {
+		duty_steps = period_steps_val;
+	} else {
+		duty_steps = state->duty_cycle * clk_get_rate(mchp_core_pwm->clk);
+		tmp = prescale_val * NSEC_PER_SEC;
+		duty_steps = div64_u64(duty_steps, tmp);
+	}
+
+	/*
+	 * Turn the output on unless posedge == negedge, in which case the
+	 * duty is intended to be 0, but limitations of the IP block don't
+	 * allow a zero length duty cycle - so just set the max high/low time
+	 * respectively.
+	 */
+	if (state->polarity == PWM_POLARITY_INVERSED) {
+		negedge = !duty_steps ? period_steps_val : 0u;
+		posedge = duty_steps;
+	} else {
+		posedge = !duty_steps ? period_steps_val : 0u;
+		negedge = duty_steps;
+	}
+
+	writel_relaxed(posedge, mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
+	writel_relaxed(negedge, mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
+}
+
+static int mchp_core_pwm_apply_period(struct pwm_chip *chip, const struct pwm_state *state,
+				      u8 *prescale, u8 *period_steps)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	u64 tmp, clk_rate;
+	u16 prescale_val, period_steps_val;
+
+	/*
+	 * Calculate the period cycles and prescale values.
+	 * The registers are each 8 bits wide & multiplied to compute the period
+	 * using the formula:
+	 * (clock_period) * (prescale + 1) * (period_steps + 1)
+	 * so the maximum period that can be generated is 0x10000 times the
+	 * period of the input clock.
+	 * However, due to the design of the "hardware", it is not possible to
+	 * attain a 100% duty cycle if the full range of period_steps is used.
+	 * Therefore period_steps is restricted to 0xFE and the maximum multiple
+	 * of the clock period attainable is 0xFF00.
+	 */
+	clk_rate = clk_get_rate(mchp_core_pwm->clk);
+	if (clk_rate >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	tmp = mul_u64_u64_div_u64(state->period, clk_rate, NSEC_PER_SEC);
+
+	if (tmp >= MCHPCOREPWM_PERIOD_MAX) {
+		*prescale = MCHPCOREPWM_PRESCALE_MAX - 1;
+		*period_steps = MCHPCOREPWM_PERIOD_STEPS_MAX - 1;
+		goto write_registers;
+	}
+
+	for (prescale_val = 1; prescale_val <= MCHPCOREPWM_PRESCALE_MAX; prescale_val++) {
+		period_steps_val = div_u64(tmp, prescale_val);
+		if (period_steps_val > MCHPCOREPWM_PERIOD_STEPS_MAX)
+			continue;
+		*period_steps = period_steps_val - 1;
+		*prescale = prescale_val - 1;
+		break;
+	}
+
+write_registers:
+	writel_relaxed(*prescale, mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
+	writel_relaxed(*period_steps, mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
+
+	return 0;
+}
+
+static int mchp_core_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			       const struct pwm_state *state)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	struct pwm_state current_state = pwm->state;
+	bool period_locked;
+	u64 period;
+	u16 channel_enabled;
+	u8 prescale, period_steps;
+	int ret;
+
+	if (!state->enabled) {
+		mchp_core_pwm_enable(chip, pwm, false);
+		return 0;
+	}
+
+	/*
+	 * If the only thing that has changed is the duty cycle or the polarity,
+	 * we can shortcut the calculations and just compute/apply the new duty
+	 * cycle pos & neg edges
+	 * As all the channels share the same period, do not allow it to be
+	 * changed if any other channels are enabled.
+	 */
+	spin_lock(&mchp_core_pwm->lock);
+
+	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
+		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
+	period_locked = channel_enabled & ~(1 << pwm->hwpwm);
+
+	if ((!current_state.enabled || current_state.period != state->period) && !period_locked) {
+		ret = mchp_core_pwm_apply_period(chip, state, &prescale, &period_steps);
+		if (ret) {
+			spin_unlock(&mchp_core_pwm->lock);
+			return ret;
+		}
+	} else {
+		prescale = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
+		period_steps = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
+	}
+
+	/*
+	 * If the period is locked, it may not be possible to use a period less
+	 * than that requested.
+	 */
+	period = PREG_TO_VAL(period_steps) *  PREG_TO_VAL(prescale) * NSEC_PER_SEC;
+	do_div(period, clk_get_rate(mchp_core_pwm->clk));
+	if (period > state->period) {
+		spin_unlock(&mchp_core_pwm->lock);
+		return -EINVAL;
+	}
+
+	mchp_core_pwm_apply_duty(chip, pwm, state, prescale, period_steps);
+
+	/*
+	 * Notify the block to update the waveform from the shadow registers.
+	 * The updated values will not appear on the bus until they have been
+	 * applied to the waveform at the beginning of the next period. We must
+	 * write these registers and wait for them to be applied before calling
+	 * enable().
+	 */
+	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
+		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
+		usleep_range(state->period, state->period * 2);
+	}
+
+	spin_unlock(&mchp_core_pwm->lock);
+
+	mchp_core_pwm_enable(chip, pwm, true);
+
+	return 0;
+}
+
+static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+				    struct pwm_state *state)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	u8 prescale, period_steps, duty_steps;
+	u8 posedge, negedge;
+	u16 channel_enabled;
+
+	channel_enabled = (((u16)readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(1)) << 8) |
+		readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_EN(0)));
+
+	if (channel_enabled & 1 << pwm->hwpwm)
+		state->enabled = true;
+	else
+		state->enabled = false;
+
+	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
+
+	posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
+	negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
+
+	duty_steps = abs((s16)posedge - (s16)negedge);
+	state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;
+	do_div(state->duty_cycle, clk_get_rate(mchp_core_pwm->clk));
+
+	state->polarity = negedge < posedge ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
+
+	period_steps = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD));
+	state->period = period_steps * prescale * NSEC_PER_SEC;
+	do_div(state->period, clk_get_rate(mchp_core_pwm->clk));
+}
+
+static const struct pwm_ops mchp_core_pwm_ops = {
+	.apply = mchp_core_pwm_apply,
+	.get_state = mchp_core_pwm_get_state,
+	.owner = THIS_MODULE,
+};
+
+static const struct of_device_id mchp_core_of_match[] = {
+	{
+		.compatible = "microchip,corepwm-rtl-v4",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mchp_core_of_match);
+
+static int mchp_core_pwm_probe(struct platform_device *pdev)
+{
+	struct mchp_core_pwm_chip *mchp_pwm;
+	struct resource *regs;
+	int ret;
+
+	mchp_pwm = devm_kzalloc(&pdev->dev, sizeof(*mchp_pwm), GFP_KERNEL);
+	if (!mchp_pwm)
+		return -ENOMEM;
+
+	mchp_pwm->base = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
+	if (IS_ERR(mchp_pwm->base))
+		return PTR_ERR(mchp_pwm->base);
+
+	mchp_pwm->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+	if (IS_ERR(mchp_pwm->clk))
+		return PTR_ERR(mchp_pwm->clk);
+
+	if (of_property_read_u32(pdev->dev.of_node, "microchip,sync-update-mask",
+				 &mchp_pwm->sync_update_mask))
+		mchp_pwm->sync_update_mask = 0u;
+
+	spin_lock_init(&mchp_pwm->lock);
+
+	mchp_pwm->chip.dev = &pdev->dev;
+	mchp_pwm->chip.ops = &mchp_core_pwm_ops;
+	mchp_pwm->chip.npwm = 16;
+
+	ret = devm_pwmchip_add(&pdev->dev, &mchp_pwm->chip);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
+
+	platform_set_drvdata(pdev, mchp_pwm);
+
+	return 0;
+}
+
+static struct platform_driver mchp_core_pwm_driver = {
+	.driver = {
+		.name = "mchp-core-pwm",
+		.of_match_table = mchp_core_of_match,
+	},
+	.probe = mchp_core_pwm_probe,
+};
+module_platform_driver(mchp_core_pwm_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
+MODULE_DESCRIPTION("corePWM driver for Microchip FPGAs");