diff mbox

[v2] Added perf functionality to mmdc driver

Message ID 20160815223035.5429-1-zhengyu.shen@nxp.com
State New
Headers show

Commit Message

Zhengyu Shen Aug. 15, 2016, 10:30 p.m. UTC
MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64 and
LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high performance,
and optimized. MMDC is present on i.MX6 Quad and i.MX6 QuadPlus devices, but
this driver only supports i.MX6 Quad at the moment. MMDC provides registers
for performance counters which read via this driver to help debug memory
throughput and similar issues.

$ perf stat -a -e mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/ dd if=/dev/zero of=/dev/null bs=1M count=5000
Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':

         898021787      mmdc/busy-cycles/
          14819600      mmdc/read-accesses/
            471.30 MB   mmdc/read-bytes/
        2815419216      mmdc/total-cycles/
          13367354      mmdc/write-accesses/
            427.76 MB   mmdc/write-bytes/

       5.334757334 seconds time elapsed

Signed-off-by: Zhengyu Shen <zhengyu.shen@nxp.com>
---
change from v1 to v2:
	Added cpumask and migration handling support to driver
	Validated event during event_init
	Added code to properly stop counters
	Used perf_invalid_context instead of perf_sw_context
	Added hrtimer to poll for overflow 
	Added better description
	Added support for multiple mmdcs

 arch/arm/mach-imx/mmdc.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 361 insertions(+), 1 deletion(-)

Comments

Mark Rutland Aug. 15, 2016, 4:50 p.m. UTC | #1
Hi,

On Mon, Aug 15, 2016 at 05:30:35PM -0500, Zhengyu Shen wrote:
> MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64 and
> LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high performance,
> and optimized. MMDC is present on i.MX6 Quad and i.MX6 QuadPlus devices, but
> this driver only supports i.MX6 Quad at the moment. MMDC provides registers
> for performance counters which read via this driver to help debug memory
> throughput and similar issues.
> 
> $ perf stat -a -e mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/ dd if=/dev/zero of=/dev/null bs=1M count=5000
> Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':
> 
>          898021787      mmdc/busy-cycles/
>           14819600      mmdc/read-accesses/
>             471.30 MB   mmdc/read-bytes/
>         2815419216      mmdc/total-cycles/
>           13367354      mmdc/write-accesses/
>             427.76 MB   mmdc/write-bytes/
> 
>        5.334757334 seconds time elapsed
> 
> Signed-off-by: Zhengyu Shen <zhengyu.shen@nxp.com>
> ---
> change from v1 to v2:
> 	Added cpumask and migration handling support to driver
> 	Validated event during event_init
> 	Added code to properly stop counters
> 	Used perf_invalid_context instead of perf_sw_context
> 	Added hrtimer to poll for overflow 
> 	Added better description
> 	Added support for multiple mmdcs

As I commented on v1 w.r.t. the above, I would appreciate being Cc'd on future
versions of this patch.

> +static u32 mmdc_read_counter(struct mmdc_pmu *pmu_mmdc, int cfg, u64 prev_val)
> +{
> +	u32 val;
> +	void __iomem *mmdc_base, *reg;
> +	mmdc_base = pmu_mmdc->mmdc_base;
> +
> +	switch (cfg)
> +	{
> +		case TOTAL_CYCLES:
> +			reg = mmdc_base + MMDC_MADPSR0;
> +			break;
> +		case BUSY_CYCLES:
> +			reg = mmdc_base + MMDC_MADPSR1;
> +			break;
> +		case READ_ACCESSES:
> +			reg = mmdc_base + MMDC_MADPSR2;
> +			break;
> +		case WRITE_ACCESSES:
> +			reg = mmdc_base + MMDC_MADPSR3;
> +			break;
> +		case READ_BYTES:
> +			reg = mmdc_base + MMDC_MADPSR4;
> +			break;
> +		case WRITE_BYTES:
> +			reg = mmdc_base + MMDC_MADPSR5;
> +			break;
> +		default:
> +			return -1;

This is probably worth a WARN_ONCE, given this should never happen if things
are working correctly.

> +static int mmdc_cpu_notifier(struct notifier_block *nb,
> +        unsigned long action, void *hcpu)
> +{
> +	struct mmdc_pmu *pmu_mmdc = container_of(nb, struct mmdc_pmu, cpu_nb);
> +	unsigned int cpu = (long)hcpu; /* for (long) see kernel/cpu.c */
> +	unsigned int target;
> +
> +	switch (action & ~CPU_TASKS_FROZEN) {
> +		case CPU_DOWN_PREPARE:
> +			if (!cpumask_test_and_clear_cpu(cpu, &pmu_mmdc->cpu))
> +				break;
> +			target = cpumask_any_but(cpu_online_mask, cpu);
> +			if (target >= nr_cpu_ids)
> +				break;
> +			perf_pmu_migrate_context(&pmu_mmdc->pmu, cpu, target);
> +			cpumask_set_cpu(target, &pmu_mmdc->cpu);
> +		default:
> +			break;
> +    }
> +
> +	return NOTIFY_OK;
> +}

CPU notifiers are being ripped out of the kernel with the new hotplug state
machine stuff. See [1] for examples.

> +static int mmdc_event_init(struct perf_event *event)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	if (event->attr.type != event->pmu->type)
> +		return -ENOENT;
> +
> +	if (event->cpu < 0) {
> +		dev_warn(pmu_mmdc->dev, "Can't provide per-task data!\n");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (event->attr.exclude_user   ||
> +			event->attr.exclude_kernel ||
> +			event->attr.exclude_hv     ||
> +			event->attr.exclude_idle   ||
> +			event->attr.exclude_host)
> +		return -EINVAL;

Likewise for exclude_guest here.

You also need to reject sampling events.

You also need to sanity-check grouping.

For the Nth time, I'm going to say that really, we should have the core check
this (or expose helpers to do so). It's somewhat ridiculous that evry driver
has to blacklist everything it doesn't support, rather than whitelisting the
few things that it does.

> +
> +	mmdc_enable_profiling(event);

This doesn't look right. Until we call add() we shouldn't have to poke HW.
event_init should just verify the veent and set up datastructures as required.

It would be better to count the number of active events and enable/disable the
PMU as reuired in the add() and del() callbacks.

> +	event->cpu = cpumask_first(&pmu_mmdc->cpu);
> +	local64_set(&event->count, 0);
> +
> +	return 0;
> +}
> +
> +static void mmdc_event_update(struct perf_event * event)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	u32 val;
> +	u64 prev_val;
> +	prev_val = local64_read(&event->count);
> +	val = mmdc_read_counter(pmu_mmdc, (int) event->attr.config, prev_val);
> +	local64_add(val - (u32)(prev_val&0xFFFFFFFF) , &event->count);
> +}

Nit: please add spaces eithr side of the '&'.

> +static void mmdc_event_start(struct perf_event *event, int flags)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	void __iomem *mmdc_base, *reg;
> +
> +	local64_set(&event->count, 0);
> +	mmdc_base = pmu_mmdc->mmdc_base;
> +	reg = mmdc_base + MMDC_MADPCR0;
> +	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> +			HRTIMER_MODE_REL_PINNED);

Why is a hrtimer necessary? Is this just copy-pasted from CCN, or do you have
similar HW issues?

Is there no overflow interrupt?

> +
> +	writel_relaxed(DBG_RST, reg);
> +	writel_relaxed(DBG_EN, reg);
> +}
> +
> +static int mmdc_event_add(struct perf_event *event, int flags)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	int cfg = (int)event->attr.config;
> +	if (cfg >= 1 &&	cfg <= MMDC_NUM_COUNTERS)
> +		pmu_mmdc->mmdc_events[cfg - 1] = event;

Surely this should never happen, and we don't want to start such a broken
event? SO this should be something like:

	if (WARN_ONCE(cfg < 1 || cfg > MMDC_NUM_COUNTERS))
		return;

Also, why not use zero-based indices like everyone else?

> +static void mmdc_event_stop(struct perf_event *event, int flags)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	void __iomem *mmdc_base, *reg;
> +	int cfg = (int)event->attr.config;
> +
> +	mmdc_base = pmu_mmdc->mmdc_base;
> +	reg = mmdc_base + MMDC_MADPCR0;
> +	if (cfg >= 1 &&	cfg <= MMDC_NUM_COUNTERS)

Same comment as above here.

> +	{
> +		if (hrtimer_active(&pmu_mmdc->hrtimer))
> +			hrtimer_cancel(&pmu_mmdc->hrtimer);

What if I have multiple events? As soon as I stop one the hrtimer will be
stopped for all of them. Note I fixed a similar bug in the CCN driver in [2].

> +
> +		writel_relaxed(PRF_FRZ, reg);

Why relaxed?

I see no barriers as one would expect to go with relaxed IO accessors, so I
assume this is premature optimisation, and likely introducing bugs. Please use
the non-relaxed accessors.

> +static void mmdc_overflow_handler(struct mmdc_pmu *pmu_mmdc)
> +{
> +	int i;
> +	u32 val;
> +	u64 prev_val;
> +
> +	for (i = 0; i < MMDC_NUM_COUNTERS; i++)
> +	{
> +		struct perf_event *event = pmu_mmdc->mmdc_events[i];
> +		if (event)
> +		{
> +			prev_val = local64_read(&event->count);
> +			val = mmdc_read_counter(pmu_mmdc, i + 1, prev_val);
> +			local64_add(val - (u32)(prev_val&0xFFFFFFFF) , &event->count);
> +		}
> +	}
> +}

Can't you call your update function for each event? This seems to be a copy of
that logic.

> @@ -61,7 +392,35 @@ static int imx_mmdc_probe(struct platform_device *pdev)
>  			__func__);
>  		return -EBUSY;
>  	}
> +	pmu_mmdc = kzalloc(sizeof(*pmu_mmdc), GFP_KERNEL);
>  
> +	if (!pmu_mmdc) {
> +		pr_err("failed to allocate PMU device!\n");
> +		return -ENOMEM;
> +	}
> +	mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
> +	dev_info(pmu_mmdc->dev, "No access to interrupts, using timer.\n");

You haven't even tried...

> +	hrtimer_init(&pmu_mmdc->hrtimer, CLOCK_MONOTONIC,
> +			HRTIMER_MODE_REL);
> +	pmu_mmdc->hrtimer.function = mmdc_timer_handler;
> +	register_cpu_notifier(&pmu_mmdc->cpu_nb);
> +	if (mmdc_num == 0) {
> +		name = "mmdc";
> +	} else {
> +		int len = snprintf(NULL, 0, "mmdc_%d", mmdc_num);
> +		name = devm_kzalloc(&pdev->dev, len + 1, GFP_KERNEL);
> +		snprintf(name, len + 1, "mmdc_%d", mmdc_num);
> +	}

Use devm_kasptrintf.

Thanks,
Mark.

[1] https://lkml.org/lkml/2016/7/11/312
[2] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-August/448369.html
Mark Rutland Aug. 15, 2016, 7:49 p.m. UTC | #2
On Tue, Aug 16, 2016 at 02:40:43PM +0000, Zhengyu Shen wrote:
> > > 	Added cpumask and migration handling support to driver
> > > 	Validated event during event_init
> > > 	Added code to properly stop counters
> > > 	Used perf_invalid_context instead of perf_sw_context
> > > 	Added hrtimer to poll for overflow
> > > 	Added better description
> > > 	Added support for multiple mmdcs
> > 
> > As I commented on v1 w.r.t. the above, I would appreciate being Cc'd on
> > future versions of this patch.
> 
> Sorry about that, I'll be sure to CC you in the future. 
> 
> > > +static void mmdc_event_start(struct perf_event *event, int flags) {
> > > +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> > > +	void __iomem *mmdc_base, *reg;
> > > +
> > > +	local64_set(&event->count, 0);
> > > +	mmdc_base = pmu_mmdc->mmdc_base;
> > > +	reg = mmdc_base + MMDC_MADPCR0;
> > > +	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> > > +			HRTIMER_MODE_REL_PINNED);
> > 
> > Why is a hrtimer necessary? Is this just copy-pasted from CCN, or do you
> > have similar HW issues?
> > 
> > Is there no overflow interrupt?
> 
> When overflow occurs, a register bit is set to one. There is no overflow
> interrupt which is why the timer is needed. 

I see. Please have add comment in the driver explaining this, so that this is
obvious.

Does the counter itself wrap and continue counting, or does it saturate?

How have you tuned your polling period so as to avoid missing events in the
case of an overflow?

Thanks,
Mark.
kernel test robot Aug. 16, 2016, 12:25 a.m. UTC | #3
Hi Zhengyu,

[auto build test ERROR on shawnguo/for-next]
[also build test ERROR on v4.8-rc2 next-20160815]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Zhengyu-Shen/Added-perf-functionality-to-mmdc-driver/20160816-063431
base:   https://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git for-next
config: arm-allmodconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 5.4.0-6) 5.4.0 20160609
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   arch/arm/mach-imx/mmdc.c: In function 'mmdc_pmu_init':
>> arch/arm/mach-imx/mmdc.c:354:30: error: 'CPU_PRI_PERF' undeclared (first use in this function)
     pmu_mmdc->cpu_nb.priority = CPU_PRI_PERF + 1;
                                 ^
   arch/arm/mach-imx/mmdc.c:354:30: note: each undeclared identifier is reported only once for each function it appears in

vim +/CPU_PRI_PERF +354 arch/arm/mach-imx/mmdc.c

   348	
   349		mmdc_num = ida_simple_get(&mmdc_ida, 0, 0, GFP_KERNEL);
   350	
   351		cpumask_set_cpu(smp_processor_id(), &pmu_mmdc->cpu);
   352	
   353		pmu_mmdc->cpu_nb.notifier_call = mmdc_cpu_notifier;
 > 354		pmu_mmdc->cpu_nb.priority = CPU_PRI_PERF + 1;
   355	
   356		pmu_mmdc->dev = dev;
   357		return mmdc_num;

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Peter Zijlstra Aug. 16, 2016, 12:04 p.m. UTC | #4
On Mon, Aug 15, 2016 at 05:50:50PM +0100, Mark Rutland wrote:
> For the Nth time, I'm going to say that really, we should have the core check
> this (or expose helpers to do so). It's somewhat ridiculous that evry driver
> has to blacklist everything it doesn't support, rather than whitelisting the
> few things that it does.

I'll look at patches ;-)
Zhengyu Shen Aug. 16, 2016, 2:40 p.m. UTC | #5
> > 	Added cpumask and migration handling support to driver
> > 	Validated event during event_init
> > 	Added code to properly stop counters
> > 	Used perf_invalid_context instead of perf_sw_context
> > 	Added hrtimer to poll for overflow
> > 	Added better description
> > 	Added support for multiple mmdcs
> 
> As I commented on v1 w.r.t. the above, I would appreciate being Cc'd on
> future versions of this patch.

Sorry about that, I'll be sure to CC you in the future. 

> > +static void mmdc_event_start(struct perf_event *event, int flags) {
> > +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> > +	void __iomem *mmdc_base, *reg;
> > +
> > +	local64_set(&event->count, 0);
> > +	mmdc_base = pmu_mmdc->mmdc_base;
> > +	reg = mmdc_base + MMDC_MADPCR0;
> > +	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> > +			HRTIMER_MODE_REL_PINNED);
> 
> Why is a hrtimer necessary? Is this just copy-pasted from CCN, or do you
> have similar HW issues?
> 
> Is there no overflow interrupt?

When overflow occurs, a register bit is set to one. There is no overflow
interrupt which is why the timer is needed. 

Thanks a lot for the feedback!
Zhengyu Shen Aug. 16, 2016, 8:40 p.m. UTC | #6
> > > > +	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> > > > +			HRTIMER_MODE_REL_PINNED);
> > >
> > > Why is a hrtimer necessary? Is this just copy-pasted from CCN, or do
> > > you have similar HW issues?
> > >
> > > Is there no overflow interrupt?
> >
> > When overflow occurs, a register bit is set to one. There is no
> > overflow interrupt which is why the timer is needed.
> 
> I see. Please have add comment in the driver explaining this, so that this is
> obvious.
> 
> Does the counter itself wrap and continue counting, or does it saturate?
> 
> How have you tuned your polling period so as to avoid missing events in the
> case of an overflow?
> 
> Thanks,
> Mark.
The counter wraps around once every ten seconds for total-cycles (which is the 
Fastest increasing counter). Polling is done every one second just to be safe.
Mark Rutland Aug. 17, 2016, 10:04 a.m. UTC | #7
On Tue, Aug 16, 2016 at 08:40:08PM +0000, Zhengyu Shen wrote:
> > > > > +	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> > > > > +			HRTIMER_MODE_REL_PINNED);
> > > >
> > > > Why is a hrtimer necessary? Is this just copy-pasted from CCN, or do
> > > > you have similar HW issues?
> > > >
> > > > Is there no overflow interrupt?
> > >
> > > When overflow occurs, a register bit is set to one. There is no
> > > overflow interrupt which is why the timer is needed.
> > 
> > I see. Please have add comment in the driver explaining this, so that this is
> > obvious.
> > 
> > Does the counter itself wrap and continue counting, or does it saturate?
> > 
> > How have you tuned your polling period so as to avoid missing events in the
> > case of an overflow?
> > 
> > Thanks,
> > Mark.
> The counter wraps around once every ten seconds for total-cycles (which is the 
> Fastest increasing counter). Polling is done every one second just to be safe.

Ok. It would be worth noting this with a comment next to either the
hrtimer handler or registration thereof.

I assume that on overflow the counter wraps rather than saturating?

Thanks,
Mark.
Zhengyu Shen Aug. 17, 2016, 3:31 p.m. UTC | #8
> The following series of patch resolve the kernel compilation errors with the
> proposed perf functionality integration into MMDC driver by Zhengyu Shen.
> These provide the working code which can be enhanced upon as per the
> reviews from Mark and Peter but will make sure that code compiles without
> errors and still retains working as was proposed by Zhengyu. I suggest further
> improvements from review comments could be done on top of it from
> further on with this being used as a base.
> 
> Nitin Chaudhary (2):
>   Error: Fix mmdc compilation errors due to cpu notifier
>   [i.MX6Q] Code cleanup & verification after fixing compilation error
> 
>  arch/arm/mach-imx/mmdc.c   | 95 ++++++++++++++++++++++++------------
> ----------
>  include/linux/cpuhotplug.h |  1 +
>  2 files changed, 50 insertions(+), 46 deletions(-)
> 
> --
> 2.7.4

Hi, thanks for the help. Is it impossible for two pmus to be created? I wasn't sure
about that, which is why I didn't have pmu_mmdc set to static.
Nitin Chaudhary Aug. 17, 2016, 5:36 p.m. UTC | #9
Hi Zhengyu,

There are two main reasons why I made pmu_mmdc to be static:
1. Since i.MX6Q has only a single MMDC IP, at any time there should be
only 1 instance of the same.
2. Now as the code plugs itself into the perf core, it is very
unlikely we need to allow access to this
 data structure from outside the driver code. If we need to do for
some reasons in kernel in the future then
 we can expose some wrapper APIs to achieve that in a safeway.

Also, I have a few questions regarding MMDC initialization.
1. Is there any specific hardware initialization reason that we do a
busy loop while trying to enable Automatic
    power saving mode. Is it possible to move it to deferred workqueue?
2. I am having an issue with the MMDC driver running Linux Kernel
4.8-rc2 and its unable to successfully
    probe due to timeout waiting for MMDC automatic wait getting
enabled. The below is the log message
    along with the debug messages I added to dump the read values for
MDMIC and MAPSR reg in MMDC
   connected to singel channel DDR3.
[    0.133162] imx_mmdc_probe MMDC_MDMISC: 0x40001740
[    0.133178] imx_mmdc_probe MMDC_MAPSR: 0x00001046 << Before
[    0.133871] imx_mmdc_probe: failed to enable automatic power saving
[    0.133969] imx-mmdc 21b0000.mmdc: No access to interrupts, using timer.

The code snippet where the debug logs are printed is as below:
static int imx_mmdc_probe(struct platform_device *pdev)
<snip>
        val = readl_relaxed(reg);
        ddr_type = (val & BM_MMDC_MDMISC_DDR_TYPE) >>
                 BP_MMDC_MDMISC_DDR_TYPE;

        printk(KERN_CRIT "%s MMDC_MDMISC: 0x%08x\n", __func__, val);
        reg = mmdc_base + MMDC_MAPSR;
        /* Enable automatic power saving */
        val = readl_relaxed(reg);
        printk(KERN_CRIT "%s MMDC_MAPSR: 0x%08x\n", __func__, val);
        val &= ~(1 << BP_MMDC_MAPSR_PSD);
        writel_relaxed(val, reg);

        /* Ensure it's successfully enabled */
        while (!(readl_relaxed(reg) & 1 << BP_MMDC_MAPSR_PSS) && --timeout)
                cpu_relax();
<snip>

Any help on the before mentioned issue will be welcomed.

On Wed, Aug 17, 2016 at 8:31 AM, Zhengyu Shen <zhengyu.shen@nxp.com> wrote:
>> The following series of patch resolve the kernel compilation errors with the
>> proposed perf functionality integration into MMDC driver by Zhengyu Shen.
>> These provide the working code which can be enhanced upon as per the
>> reviews from Mark and Peter but will make sure that code compiles without
>> errors and still retains working as was proposed by Zhengyu. I suggest further
>> improvements from review comments could be done on top of it from
>> further on with this being used as a base.
>>
>> Nitin Chaudhary (2):
>>   Error: Fix mmdc compilation errors due to cpu notifier
>>   [i.MX6Q] Code cleanup & verification after fixing compilation error
>>
>>  arch/arm/mach-imx/mmdc.c   | 95 ++++++++++++++++++++++++------------
>> ----------
>>  include/linux/cpuhotplug.h |  1 +
>>  2 files changed, 50 insertions(+), 46 deletions(-)
>>
>> --
>> 2.7.4
>
> Hi, thanks for the help. Is it impossible for two pmus to be created? I wasn't sure
> about that, which is why I didn't have pmu_mmdc set to static.
Nitin Chaudhary Aug. 17, 2016, 5:40 p.m. UTC | #10
Hi All,

Typo correction.

2. I am having an issue with the MMDC driver running Linux Kernel
4.8-rc2 and its unable to successfully probe due to timeout waiting for
MMDC automatic power saving mode getting enabled.

On Wed, Aug 17, 2016 at 10:36 AM, Nitin Chaudhary
<nitinchaudhary1289@gmail.com> wrote:
> Hi Zhengyu,
>
> There are two main reasons why I made pmu_mmdc to be static:
> 1. Since i.MX6Q has only a single MMDC IP, at any time there should be
> only 1 instance of the same.
> 2. Now as the code plugs itself into the perf core, it is very
> unlikely we need to allow access to this
>  data structure from outside the driver code. If we need to do for
> some reasons in kernel in the future then
>  we can expose some wrapper APIs to achieve that in a safeway.
>
> Also, I have a few questions regarding MMDC initialization.
> 1. Is there any specific hardware initialization reason that we do a
> busy loop while trying to enable Automatic
>     power saving mode. Is it possible to move it to deferred workqueue?
> 2. I am having an issue with the MMDC driver running Linux Kernel
> 4.8-rc2 and its unable to successfully
>     probe due to timeout waiting for MMDC automatic wait getting
> enabled. The below is the log message
>     along with the debug messages I added to dump the read values for
> MDMIC and MAPSR reg in MMDC
>    connected to singel channel DDR3.
> [    0.133162] imx_mmdc_probe MMDC_MDMISC: 0x40001740
> [    0.133178] imx_mmdc_probe MMDC_MAPSR: 0x00001046 << Before
> [    0.133871] imx_mmdc_probe: failed to enable automatic power saving
> [    0.133969] imx-mmdc 21b0000.mmdc: No access to interrupts, using timer.
>
> The code snippet where the debug logs are printed is as below:
> static int imx_mmdc_probe(struct platform_device *pdev)
> <snip>
>         val = readl_relaxed(reg);
>         ddr_type = (val & BM_MMDC_MDMISC_DDR_TYPE) >>
>                  BP_MMDC_MDMISC_DDR_TYPE;
>
>         printk(KERN_CRIT "%s MMDC_MDMISC: 0x%08x\n", __func__, val);
>         reg = mmdc_base + MMDC_MAPSR;
>         /* Enable automatic power saving */
>         val = readl_relaxed(reg);
>         printk(KERN_CRIT "%s MMDC_MAPSR: 0x%08x\n", __func__, val);
>         val &= ~(1 << BP_MMDC_MAPSR_PSD);
>         writel_relaxed(val, reg);
>
>         /* Ensure it's successfully enabled */
>         while (!(readl_relaxed(reg) & 1 << BP_MMDC_MAPSR_PSS) && --timeout)
>                 cpu_relax();
> <snip>
>
> Any help on the before mentioned issue will be welcomed.
>
> On Wed, Aug 17, 2016 at 8:31 AM, Zhengyu Shen <zhengyu.shen@nxp.com> wrote:
>>> The following series of patch resolve the kernel compilation errors with the
>>> proposed perf functionality integration into MMDC driver by Zhengyu Shen.
>>> These provide the working code which can be enhanced upon as per the
>>> reviews from Mark and Peter but will make sure that code compiles without
>>> errors and still retains working as was proposed by Zhengyu. I suggest further
>>> improvements from review comments could be done on top of it from
>>> further on with this being used as a base.
>>>
>>> Nitin Chaudhary (2):
>>>   Error: Fix mmdc compilation errors due to cpu notifier
>>>   [i.MX6Q] Code cleanup & verification after fixing compilation error
>>>
>>>  arch/arm/mach-imx/mmdc.c   | 95 ++++++++++++++++++++++++------------
>>> ----------
>>>  include/linux/cpuhotplug.h |  1 +
>>>  2 files changed, 50 insertions(+), 46 deletions(-)
>>>
>>> --
>>> 2.7.4
>>
>> Hi, thanks for the help. Is it impossible for two pmus to be created? I wasn't sure
>> about that, which is why I didn't have pmu_mmdc set to static.
>
>
>
> --
> Thanks & Regards
>
> Nitin Chaudhary
> Nitin Chaudhary
> MS, Electrical Engineering
> University of Southern California
> Intern, Zodiac In-flight Innovations
Nitin Chaudhary Aug. 19, 2016, 12:34 a.m. UTC | #11
Hi Zhengyu,

Thanks for your valuable inputs. I reworked on the patches and moved
the busy loop to deferred workqueue and it works for my board now. I
am publishing a new set of patches with the changes. It would be nice
if you could provide your valuable review.

On Thu, Aug 18, 2016 at 2:34 PM, Zhengyu Shen <zhengyu.shen@nxp.com> wrote:
> I believe you are correct on the bit[4]. I'm not the original creator of the driver,
> but from what I can tell, the MMDC should report that it is in power saving mode
> very quickly, if not immediately after the bit is written. I tested the value of timeout
> after a couple of a boots and it never goes under 1015 (0x3F7) for me.
>
>> -----Original Message-----
>> From: Nitin Chaudhary [mailto:nitinchaudhary1289@gmail.com]
>> Sent: Thursday, August 18, 2016 3:17 PM
>> To: Zhengyu Shen <zhengyu.shen@nxp.com>
>> Subject: Re: Re:[PATCH v2] Added perf functionality to mmdc driver
>>
>> Hi Zhengyu,
>>
>> Thanks for reply. I was wondering if the Power saving status bit
>> [MMDC0_MAPSR bit[4]] shows whether the MMDC is in power saving mode
>> or not? Also in the driver, exiting if we don't find MMDC in the power saving
>> mode immediately after enabling the automatic power saving mode within
>> the timeout period, is ok? Is it possible it will go into power saving mode after
>> enabling the automatic power saving mode once the DRAM access requests
>> calm down after the device has finished booting and went to a stable/idle
>> state?
>>
>> On Wed, Aug 17, 2016 at 11:09 AM, Zhengyu Shen
>> <zhengyu.shen@nxp.com> wrote:
>> >> Hi Zhengyu,
>> >>
>> >> There are two main reasons why I made pmu_mmdc to be static:
>> >> 1. Since i.MX6Q has only a single MMDC IP, at any time there should
>> >> be only 1 instance of the same.
>> >> 2. Now as the code plugs itself into the perf core, it is very
>> >> unlikely we need to allow access to this  data structure from outside
>> >> the driver code. If we need to do for some reasons in kernel in the
>> >> future then  we can expose some wrapper APIs to achieve that in a
>> safeway.
>> >
>> > I was told that there is a possibility of having more than one MMDC on
>> > a Board and thus pmu_mmdc must not be static. I'll have to verify this
>> > in more detail though.
>> >
>> >> Also, I have a few questions regarding MMDC initialization.
>> >> 1. Is there any specific hardware initialization reason that we do a
>> >> busy loop while trying to enable Automatic
>> >>     power saving mode. Is it possible to move it to deferred workqueue?
>> >> 2. I am having an issue with the MMDC driver running Linux Kernel
>> >> 4.8-rc2 and its unable to successfully
>> >>     probe due to timeout waiting for MMDC automatic wait getting
>> enabled.
>> >> The below is the log message
>> >>     along with the debug messages I added to dump the read values for
>> >> MDMIC and MAPSR reg in MMDC
>> >>    connected to singel channel DDR3.
>> >> [    0.133162] imx_mmdc_probe MMDC_MDMISC: 0x40001740
>> >> [    0.133178] imx_mmdc_probe MMDC_MAPSR: 0x00001046 << Before
>> >> [    0.133871] imx_mmdc_probe: failed to enable automatic power saving
>> >> [    0.133969] imx-mmdc 21b0000.mmdc: No access to interrupts, using
>> timer.
>> >>
>> >> The code snippet where the debug logs are printed is as below:
>> >> static int imx_mmdc_probe(struct platform_device *pdev) <snip>
>> >>         val = readl_relaxed(reg);
>> >>         ddr_type = (val & BM_MMDC_MDMISC_DDR_TYPE) >>
>> >>                  BP_MMDC_MDMISC_DDR_TYPE;
>> >>
>> >>         printk(KERN_CRIT "%s MMDC_MDMISC: 0x%08x\n", __func__, val);
>> >>         reg = mmdc_base + MMDC_MAPSR;
>> >>         /* Enable automatic power saving */
>> >>         val = readl_relaxed(reg);
>> >>         printk(KERN_CRIT "%s MMDC_MAPSR: 0x%08x\n", __func__, val);
>> >>         val &= ~(1 << BP_MMDC_MAPSR_PSD);
>> >>         writel_relaxed(val, reg);
>> >>
>> >>         /* Ensure it's successfully enabled */
>> >>         while (!(readl_relaxed(reg) & 1 << BP_MMDC_MAPSR_PSS) && --
>> >> timeout)
>> >>                 cpu_relax();
>> >> <snip>
>> >>
>> >> Any help on the before mentioned issue will be welcomed.
>> > I'll look into it. I can't reproduce the bug with the (slightly older) kernel
>> version I'm using.
>>
>>
>>
>> --
>> Thanks & Regards
>>
>> Nitin Chaudhary
>> MS, Electrical Engineering
>> University of Southern California
>> Intern, Zodiac In-flight Innovations
diff mbox

Patch

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index db9621c..372b59c 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011,2016 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -16,6 +16,10 @@ 
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
 
 #include "common.h"
 
@@ -27,14 +31,341 @@ 
 #define BM_MMDC_MDMISC_DDR_TYPE	0x18
 #define BP_MMDC_MDMISC_DDR_TYPE	0x3
 
+#define TOTAL_CYCLES	0x1
+#define BUSY_CYCLES		0x2
+#define READ_ACCESSES	0x3
+#define WRITE_ACCESSES	0x4
+#define READ_BYTES		0x5
+#define WRITE_BYTES		0x6
+
+/* Enables, resets, freezes, overflow profiling*/
+#define DBG_DIS			0x0
+#define DBG_EN			0x1 
+#define DBG_RST			0x2
+#define PRF_FRZ			0x4
+#define CYC_OVF 		0x8
+
+#define MMDC_MADPCR0	0x410
+#define MMDC_MADPSR0	0x418
+#define MMDC_MADPSR1	0x41C
+#define MMDC_MADPSR2	0x420
+#define MMDC_MADPSR3	0x424
+#define MMDC_MADPSR4	0x428
+#define MMDC_MADPSR5	0x42C
+
+#define MMDC_NUM_COUNTERS	6
+
+#define to_mmdc_pmu(p) (container_of(p, struct mmdc_pmu, pmu))
+
+static DEFINE_IDA(mmdc_ida);
+
 static int ddr_type;
 
+PMU_EVENT_ATTR_STRING(total-cycles, mmdc_total_cycles, "event=0x01")
+PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_busy_cycles, "event=0x02")
+PMU_EVENT_ATTR_STRING(read-accesses, mmdc_read_accesses, "event=0x03")
+PMU_EVENT_ATTR_STRING(write-accesses, mmdc_write_accesses, "config=0x04")
+PMU_EVENT_ATTR_STRING(read-bytes, mmdc_read_bytes, "event=0x05")
+PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_read_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_read_bytes_scale, "0.000001");
+PMU_EVENT_ATTR_STRING(write-bytes, mmdc_write_bytes, "event=0x06")
+PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_write_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_write_bytes_scale, "0.000001");
+
+struct mmdc_pmu
+{
+	struct pmu pmu;
+	void __iomem *mmdc_base;
+	cpumask_t cpu;
+	struct notifier_block cpu_nb;
+	struct hrtimer hrtimer;
+	unsigned int irq;
+	struct device *dev;
+	struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
+};
+
+static unsigned int mmdc_poll_period_us = 1000000;
+module_param_named(pmu_poll_period_us, mmdc_poll_period_us, uint,
+		        S_IRUGO | S_IWUSR);
+
+static ktime_t mmdc_timer_period(void)
+{
+	return ns_to_ktime((u64)mmdc_poll_period_us * 1000);
+}
+
+static ssize_t mmdc_cpumask_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmdc_pmu *pmu_mmdc = dev_get_drvdata(dev);
+	return cpumap_print_to_pagebuf(true, buf, &pmu_mmdc->cpu);
+}
+
+static struct device_attribute mmdc_cpumask_attr =
+__ATTR(cpumask, S_IRUGO, mmdc_cpumask_show, NULL);
+
+static struct attribute *mmdc_cpumask_attrs[] = {
+	&mmdc_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_cpumask_attr_group = {
+	.attrs = mmdc_cpumask_attrs,
+};
+
+static struct attribute *mmdc_events_attrs[] = {
+	&mmdc_total_cycles.attr.attr,
+	&mmdc_busy_cycles.attr.attr,
+	&mmdc_read_accesses.attr.attr,
+	&mmdc_write_accesses.attr.attr,
+	&mmdc_read_bytes.attr.attr,
+	&mmdc_read_bytes_unit.attr.attr,
+	&mmdc_read_bytes_scale.attr.attr,
+	&mmdc_write_bytes.attr.attr,
+	&mmdc_write_bytes_unit.attr.attr,
+	&mmdc_write_bytes_scale.attr.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_events_attr_group = {
+	.name = "events",
+	.attrs = mmdc_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *mmdc_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_format_attr_group = {
+	.name = "format",
+	.attrs = mmdc_format_attrs,
+};
+
+static const struct attribute_group * attr_groups[] = {
+	&mmdc_events_attr_group,
+	&mmdc_format_attr_group,
+	&mmdc_cpumask_attr_group,
+	NULL,
+};
+
+static u32 mmdc_read_counter(struct mmdc_pmu *pmu_mmdc, int cfg, u64 prev_val)
+{
+	u32 val;
+	void __iomem *mmdc_base, *reg;
+	mmdc_base = pmu_mmdc->mmdc_base;
+
+	switch (cfg)
+	{
+		case TOTAL_CYCLES:
+			reg = mmdc_base + MMDC_MADPSR0;
+			break;
+		case BUSY_CYCLES:
+			reg = mmdc_base + MMDC_MADPSR1;
+			break;
+		case READ_ACCESSES:
+			reg = mmdc_base + MMDC_MADPSR2;
+			break;
+		case WRITE_ACCESSES:
+			reg = mmdc_base + MMDC_MADPSR3;
+			break;
+		case READ_BYTES:
+			reg = mmdc_base + MMDC_MADPSR4;
+			break;
+		case WRITE_BYTES:
+			reg = mmdc_base + MMDC_MADPSR5;
+			break;
+		default:
+			return -1;
+	}
+	val = readl_relaxed(reg);
+	return val;
+}
+
+static void mmdc_enable_profiling(struct perf_event *event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+	writel_relaxed(CYC_OVF | DBG_RST, reg);
+	writel_relaxed(DBG_EN, reg);
+}
+
+static int mmdc_cpu_notifier(struct notifier_block *nb,
+        unsigned long action, void *hcpu)
+{
+	struct mmdc_pmu *pmu_mmdc = container_of(nb, struct mmdc_pmu, cpu_nb);
+	unsigned int cpu = (long)hcpu; /* for (long) see kernel/cpu.c */
+	unsigned int target;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+		case CPU_DOWN_PREPARE:
+			if (!cpumask_test_and_clear_cpu(cpu, &pmu_mmdc->cpu))
+				break;
+			target = cpumask_any_but(cpu_online_mask, cpu);
+			if (target >= nr_cpu_ids)
+				break;
+			perf_pmu_migrate_context(&pmu_mmdc->pmu, cpu, target);
+			cpumask_set_cpu(target, &pmu_mmdc->cpu);
+		default:
+			break;
+    }
+
+	return NOTIFY_OK;
+}
+
+static int mmdc_event_init(struct perf_event *event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (event->cpu < 0) {
+		dev_warn(pmu_mmdc->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->attr.exclude_user   ||
+			event->attr.exclude_kernel ||
+			event->attr.exclude_hv     ||
+			event->attr.exclude_idle   ||
+			event->attr.exclude_host)
+		return -EINVAL;
+
+	mmdc_enable_profiling(event);
+	event->cpu = cpumask_first(&pmu_mmdc->cpu);
+	local64_set(&event->count, 0);
+
+	return 0;
+}
+
+static void mmdc_event_update(struct perf_event * event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	u32 val;
+	u64 prev_val;
+	prev_val = local64_read(&event->count);
+	val = mmdc_read_counter(pmu_mmdc, (int) event->attr.config, prev_val);
+	local64_add(val - (u32)(prev_val&0xFFFFFFFF) , &event->count);
+}
+
+static void mmdc_event_start(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+
+	local64_set(&event->count, 0);
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
+			HRTIMER_MODE_REL_PINNED);
+
+	writel_relaxed(DBG_RST, reg);
+	writel_relaxed(DBG_EN, reg);
+}
+
+static int mmdc_event_add(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	int cfg = (int)event->attr.config;
+	if (cfg >= 1 &&	cfg <= MMDC_NUM_COUNTERS)
+		pmu_mmdc->mmdc_events[cfg - 1] = event;
+	if (flags & PERF_EF_START)
+		mmdc_event_start(event, flags);
+	return 0;
+}
+
+static void mmdc_event_stop(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+	int cfg = (int)event->attr.config;
+
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+	if (cfg >= 1 &&	cfg <= MMDC_NUM_COUNTERS)
+	{
+		if (hrtimer_active(&pmu_mmdc->hrtimer))
+			hrtimer_cancel(&pmu_mmdc->hrtimer);
+
+		writel_relaxed(PRF_FRZ, reg);
+		mmdc_event_update(event);
+	}
+}
+
+static void mmdc_event_del(struct perf_event *event, int flags)
+{
+	mmdc_event_stop(event, PERF_EF_UPDATE);
+}
+
+static void mmdc_overflow_handler(struct mmdc_pmu *pmu_mmdc)
+{
+	int i;
+	u32 val;
+	u64 prev_val;
+
+	for (i = 0; i < MMDC_NUM_COUNTERS; i++)
+	{
+		struct perf_event *event = pmu_mmdc->mmdc_events[i];
+		if (event)
+		{
+			prev_val = local64_read(&event->count);
+			val = mmdc_read_counter(pmu_mmdc, i + 1, prev_val);
+			local64_add(val - (u32)(prev_val&0xFFFFFFFF) , &event->count);
+		}
+	}
+}
+
+static enum hrtimer_restart mmdc_timer_handler(struct hrtimer *hrtimer)
+{
+	struct mmdc_pmu *pmu_mmdc = container_of(hrtimer, struct mmdc_pmu,
+			hrtimer);
+
+	mmdc_overflow_handler(pmu_mmdc);
+
+	hrtimer_forward_now(hrtimer, mmdc_timer_period());
+	return HRTIMER_RESTART;
+}
+
+static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc, void __iomem *mmdc_base, struct device *dev)
+{
+	int mmdc_num;
+	*pmu_mmdc = (struct mmdc_pmu) {
+		.pmu = (struct pmu) {
+			.task_ctx_nr    = perf_invalid_context,
+			.attr_groups    = attr_groups,
+			.event_init     = mmdc_event_init,
+			.add            = mmdc_event_add,
+			.del            = mmdc_event_del,
+			.start          = mmdc_event_start,
+			.stop           = mmdc_event_stop,
+			.read           = mmdc_event_update,
+		},
+		.mmdc_base = mmdc_base,
+	};
+
+	mmdc_num = ida_simple_get(&mmdc_ida, 0, 0, GFP_KERNEL);
+
+	cpumask_set_cpu(smp_processor_id(), &pmu_mmdc->cpu);
+
+	pmu_mmdc->cpu_nb.notifier_call = mmdc_cpu_notifier;
+	pmu_mmdc->cpu_nb.priority = CPU_PRI_PERF + 1;
+
+	pmu_mmdc->dev = dev;
+	return mmdc_num;
+}
+
 static int imx_mmdc_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	void __iomem *mmdc_base, *reg;
+	struct mmdc_pmu *pmu_mmdc;
+	char * name;
 	u32 val;
 	int timeout = 0x400;
+	int mmdc_num;
 
 	mmdc_base = of_iomap(np, 0);
 	WARN_ON(!mmdc_base);
@@ -61,7 +392,35 @@  static int imx_mmdc_probe(struct platform_device *pdev)
 			__func__);
 		return -EBUSY;
 	}
+	pmu_mmdc = kzalloc(sizeof(*pmu_mmdc), GFP_KERNEL);
 
+	if (!pmu_mmdc) {
+		pr_err("failed to allocate PMU device!\n");
+		return -ENOMEM;
+	}
+	mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
+	dev_info(pmu_mmdc->dev, "No access to interrupts, using timer.\n");
+	hrtimer_init(&pmu_mmdc->hrtimer, CLOCK_MONOTONIC,
+			HRTIMER_MODE_REL);
+	pmu_mmdc->hrtimer.function = mmdc_timer_handler;
+	register_cpu_notifier(&pmu_mmdc->cpu_nb);
+	if (mmdc_num == 0) {
+		name = "mmdc";
+	} else {
+		int len = snprintf(NULL, 0, "mmdc_%d", mmdc_num);
+		name = devm_kzalloc(&pdev->dev, len + 1, GFP_KERNEL);
+		snprintf(name, len + 1, "mmdc_%d", mmdc_num);
+	}
+	platform_set_drvdata(pdev, pmu_mmdc);
+	perf_pmu_register(&(pmu_mmdc->pmu), name, -1);
+	return 0;
+}
+
+static int imx_mmdc_remove(struct platform_device *pdev)
+{
+	struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev);
+	perf_pmu_unregister(&pmu_mmdc->pmu);
+	kfree(pmu_mmdc);
 	return 0;
 }
 
@@ -81,6 +440,7 @@  static struct platform_driver imx_mmdc_driver = {
 		.of_match_table = imx_mmdc_dt_ids,
 	},
 	.probe		= imx_mmdc_probe,
+	.remove		= imx_mmdc_remove,
 };
 
 static int __init imx_mmdc_init(void)