diff mbox series

[v3,7/7] perf expr: Migrate expr ids table to a hashmap

Message ID 20200515221732.44078-8-irogers@google.com
State Not Applicable
Delegated to: BPF Maintainers
Headers show
Series Copy hashmap to tools/perf/util, use in perf expr | expand

Commit Message

Ian Rogers May 15, 2020, 10:17 p.m. UTC
Use a hashmap between a char* string and a double* value. While bpf's
hashmap entries are size_t in size, we can't guarantee sizeof(size_t) >=
sizeof(double). Avoid a memory allocation when gathering ids by making 0.0
a special value encoded as NULL.

Original map suggestion by Andi Kleen:
https://lore.kernel.org/lkml/20200224210308.GQ160988@tassilo.jf.intel.com/
and seconded by Jiri Olsa:
https://lore.kernel.org/lkml/20200423112915.GH1136647@krava/

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/tests/expr.c       |  44 ++++++------
 tools/perf/tests/pmu-events.c |  25 +++----
 tools/perf/util/expr.c        | 129 +++++++++++++++++++---------------
 tools/perf/util/expr.h        |  26 +++----
 tools/perf/util/expr.y        |  22 +-----
 tools/perf/util/metricgroup.c |  92 +++++++++++-------------
 tools/perf/util/stat-shadow.c |  49 ++++++++-----
 7 files changed, 197 insertions(+), 190 deletions(-)

Comments

Arnaldo Carvalho de Melo May 18, 2020, 3:45 p.m. UTC | #1
Em Fri, May 15, 2020 at 03:17:32PM -0700, Ian Rogers escreveu:
> Use a hashmap between a char* string and a double* value. While bpf's
> hashmap entries are size_t in size, we can't guarantee sizeof(size_t) >=
> sizeof(double). Avoid a memory allocation when gathering ids by making 0.0
> a special value encoded as NULL.
> 
> Original map suggestion by Andi Kleen:
> https://lore.kernel.org/lkml/20200224210308.GQ160988@tassilo.jf.intel.com/
> and seconded by Jiri Olsa:
> https://lore.kernel.org/lkml/20200423112915.GH1136647@krava/

I'm having trouble here when building it with:

make -C tools/perf O=/tmp/build/perf

    CC       /tmp/build/perf/tests/expr.o
    INSTALL  trace_plugins
    CC       /tmp/build/perf/util/metricgroup.o
  In file included from /home/acme/git/perf/tools/lib/bpf/hashmap.h:18,
                   from /home/acme/git/perf/tools/perf/util/expr.h:6,
                   from tests/expr.c:3:
  /home/acme/git/perf/tools/lib/bpf/libbpf_internal.h:63: error: "pr_info" redefined [-Werror]
     63 | #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
        |
  In file included from tests/expr.c:2:
  /home/acme/git/perf/tools/perf/util/debug.h:24: note: this is the location of the previous definition
 
It looks like libbpf's hashmap.h is being used instead of the one in
tools/perf/util/, yeah, as intended, but then since I don't have the
fixes you added to the BPF tree, the build fails, if I instead
unconditionally use

#include "util/hashmap.h"

It works. Please ack.

I.e. with the patch below, further tests:

[acme@five perf]$ perf -vv | grep -i bpf
                   bpf: [ on  ]  # HAVE_LIBBPF_SUPPORT
[acme@five perf]$ nm ~/bin/perf | grep -i libbpf_ | wc -l
39
[acme@five perf]$ nm ~/bin/perf | grep -i hashmap_ | wc -l
17
[acme@five perf]$

Explicitely building without LIBBPF:

[acme@five perf]$ perf -vv | grep -i bpf
                   bpf: [ OFF ]  # HAVE_LIBBPF_SUPPORT
[acme@five perf]$
[acme@five perf]$ nm ~/bin/perf | grep -i libbpf_ | wc -l
0
[acme@five perf]$ nm ~/bin/perf | grep -i hashmap_ | wc -l
9
[acme@five perf]$

Works,

- Arnaldo

diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index d60a8feaf50b..8a2c1074f90f 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,11 +2,14 @@
 #ifndef PARSE_CTX_H
 #define PARSE_CTX_H 1
 
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
-#include "hashmap.h"
-#endif
+// There are fixes that need to land upstream before we can use libbpf's headers,
+// for now use our copy unconditionally, since the data structures at this point
+// are exactly the same, no problem.
+//#ifdef HAVE_LIBBPF_SUPPORT
+//#include <bpf/hashmap.h>
+//#else
+#include "util/hashmap.h"
+//#endif
 
 struct expr_parse_ctx {
 	struct hashmap ids;
Ian Rogers May 18, 2020, 4:03 p.m. UTC | #2
On Mon, May 18, 2020 at 8:45 AM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Fri, May 15, 2020 at 03:17:32PM -0700, Ian Rogers escreveu:
> > Use a hashmap between a char* string and a double* value. While bpf's
> > hashmap entries are size_t in size, we can't guarantee sizeof(size_t) >=
> > sizeof(double). Avoid a memory allocation when gathering ids by making 0.0
> > a special value encoded as NULL.
> >
> > Original map suggestion by Andi Kleen:
> > https://lore.kernel.org/lkml/20200224210308.GQ160988@tassilo.jf.intel.com/
> > and seconded by Jiri Olsa:
> > https://lore.kernel.org/lkml/20200423112915.GH1136647@krava/
>
> I'm having trouble here when building it with:
>
> make -C tools/perf O=/tmp/build/perf
>
>     CC       /tmp/build/perf/tests/expr.o
>     INSTALL  trace_plugins
>     CC       /tmp/build/perf/util/metricgroup.o
>   In file included from /home/acme/git/perf/tools/lib/bpf/hashmap.h:18,
>                    from /home/acme/git/perf/tools/perf/util/expr.h:6,
>                    from tests/expr.c:3:
>   /home/acme/git/perf/tools/lib/bpf/libbpf_internal.h:63: error: "pr_info" redefined [-Werror]
>      63 | #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
>         |
>   In file included from tests/expr.c:2:
>   /home/acme/git/perf/tools/perf/util/debug.h:24: note: this is the location of the previous definition
>
> It looks like libbpf's hashmap.h is being used instead of the one in
> tools/perf/util/, yeah, as intended, but then since I don't have the
> fixes you added to the BPF tree, the build fails, if I instead
> unconditionally use
>
> #include "util/hashmap.h"
>
> It works. Please ack.
>
> I.e. with the patch below, further tests:
>
> [acme@five perf]$ perf -vv | grep -i bpf
>                    bpf: [ on  ]  # HAVE_LIBBPF_SUPPORT
> [acme@five perf]$ nm ~/bin/perf | grep -i libbpf_ | wc -l
> 39
> [acme@five perf]$ nm ~/bin/perf | grep -i hashmap_ | wc -l
> 17
> [acme@five perf]$
>
> Explicitely building without LIBBPF:
>
> [acme@five perf]$ perf -vv | grep -i bpf
>                    bpf: [ OFF ]  # HAVE_LIBBPF_SUPPORT
> [acme@five perf]$
> [acme@five perf]$ nm ~/bin/perf | grep -i libbpf_ | wc -l
> 0
> [acme@five perf]$ nm ~/bin/perf | grep -i hashmap_ | wc -l
> 9
> [acme@five perf]$
>
> Works,
>
> - Arnaldo

Hi Arnaldo,

this build issue sounds like this patch is missing:
https://lore.kernel.org/lkml/20200515221732.44078-3-irogers@google.com/
The commit message there could have explicitly said having this
#include causes the conflicting definitions between perf's debug.h and
libbpf_internal.h's definitions of pr_info, etc.

Let me know how else to help and sorry for the confusion. Thanks,
Ian


> diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
> index d60a8feaf50b..8a2c1074f90f 100644
> --- a/tools/perf/util/expr.h
> +++ b/tools/perf/util/expr.h
> @@ -2,11 +2,14 @@
>  #ifndef PARSE_CTX_H
>  #define PARSE_CTX_H 1
>
> -#ifdef HAVE_LIBBPF_SUPPORT
> -#include <bpf/hashmap.h>
> -#else
> -#include "hashmap.h"
> -#endif
> +// There are fixes that need to land upstream before we can use libbpf's headers,
> +// for now use our copy unconditionally, since the data structures at this point
> +// are exactly the same, no problem.
> +//#ifdef HAVE_LIBBPF_SUPPORT
> +//#include <bpf/hashmap.h>
> +//#else
> +#include "util/hashmap.h"
> +//#endif
>
>  struct expr_parse_ctx {
>         struct hashmap ids;
Arnaldo Carvalho de Melo May 18, 2020, 4:06 p.m. UTC | #3
Em Mon, May 18, 2020 at 09:03:45AM -0700, Ian Rogers escreveu:
> On Mon, May 18, 2020 at 8:45 AM Arnaldo Carvalho de Melo
> <arnaldo.melo@gmail.com> wrote:
> >
> > Em Fri, May 15, 2020 at 03:17:32PM -0700, Ian Rogers escreveu:
> > > Use a hashmap between a char* string and a double* value. While bpf's
> > > hashmap entries are size_t in size, we can't guarantee sizeof(size_t) >=
> > > sizeof(double). Avoid a memory allocation when gathering ids by making 0.0
> > > a special value encoded as NULL.
> > >
> > > Original map suggestion by Andi Kleen:
> > > https://lore.kernel.org/lkml/20200224210308.GQ160988@tassilo.jf.intel.com/
> > > and seconded by Jiri Olsa:
> > > https://lore.kernel.org/lkml/20200423112915.GH1136647@krava/
> >
> > I'm having trouble here when building it with:
> >
> > make -C tools/perf O=/tmp/build/perf
> >
> >     CC       /tmp/build/perf/tests/expr.o
> >     INSTALL  trace_plugins
> >     CC       /tmp/build/perf/util/metricgroup.o
> >   In file included from /home/acme/git/perf/tools/lib/bpf/hashmap.h:18,
> >                    from /home/acme/git/perf/tools/perf/util/expr.h:6,
> >                    from tests/expr.c:3:
> >   /home/acme/git/perf/tools/lib/bpf/libbpf_internal.h:63: error: "pr_info" redefined [-Werror]
> >      63 | #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
> >         |
> >   In file included from tests/expr.c:2:
> >   /home/acme/git/perf/tools/perf/util/debug.h:24: note: this is the location of the previous definition
> >
> > It looks like libbpf's hashmap.h is being used instead of the one in
> > tools/perf/util/, yeah, as intended, but then since I don't have the
> > fixes you added to the BPF tree, the build fails, if I instead
> > unconditionally use
> >
> > #include "util/hashmap.h"
> >
> > It works. Please ack.
> >
> > I.e. with the patch below, further tests:
> >
> > [acme@five perf]$ perf -vv | grep -i bpf
> >                    bpf: [ on  ]  # HAVE_LIBBPF_SUPPORT
> > [acme@five perf]$ nm ~/bin/perf | grep -i libbpf_ | wc -l
> > 39
> > [acme@five perf]$ nm ~/bin/perf | grep -i hashmap_ | wc -l
> > 17
> > [acme@five perf]$
> >
> > Explicitely building without LIBBPF:
> >
> > [acme@five perf]$ perf -vv | grep -i bpf
> >                    bpf: [ OFF ]  # HAVE_LIBBPF_SUPPORT
> > [acme@five perf]$
> > [acme@five perf]$ nm ~/bin/perf | grep -i libbpf_ | wc -l
> > 0
> > [acme@five perf]$ nm ~/bin/perf | grep -i hashmap_ | wc -l
> > 9
> > [acme@five perf]$
> >
> > Works,
> >
> > - Arnaldo
> 
> Hi Arnaldo,
> 
> this build issue sounds like this patch is missing:
> https://lore.kernel.org/lkml/20200515221732.44078-3-irogers@google.com/
> The commit message there could have explicitly said having this
> #include causes the conflicting definitions between perf's debug.h and
> libbpf_internal.h's definitions of pr_info, etc.

yeah, understood, but I'm not processing patches for tools/lib/bpf/,
Daniel is, I'll only get that one later, then we can go back to the way
you structured it. Just an extra bit of confusion in this process ;-)
 
> Let me know how else to help and sorry for the confusion. Thanks,
> Ian
> 
> 
> > diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
> > index d60a8feaf50b..8a2c1074f90f 100644
> > --- a/tools/perf/util/expr.h
> > +++ b/tools/perf/util/expr.h
> > @@ -2,11 +2,14 @@
> >  #ifndef PARSE_CTX_H
> >  #define PARSE_CTX_H 1
> >
> > -#ifdef HAVE_LIBBPF_SUPPORT
> > -#include <bpf/hashmap.h>
> > -#else
> > -#include "hashmap.h"
> > -#endif
> > +// There are fixes that need to land upstream before we can use libbpf's headers,
> > +// for now use our copy unconditionally, since the data structures at this point
> > +// are exactly the same, no problem.
> > +//#ifdef HAVE_LIBBPF_SUPPORT
> > +//#include <bpf/hashmap.h>
> > +//#else
> > +#include "util/hashmap.h"
> > +//#endif
> >
> >  struct expr_parse_ctx {
> >         struct hashmap ids;
Arnaldo Carvalho de Melo May 18, 2020, 4:11 p.m. UTC | #4
Em Mon, May 18, 2020 at 01:06:48PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, May 18, 2020 at 09:03:45AM -0700, Ian Rogers escreveu:
> > On Mon, May 18, 2020 at 8:45 AM Arnaldo Carvalho de Melo wrote:
> > this build issue sounds like this patch is missing:
> > https://lore.kernel.org/lkml/20200515221732.44078-3-irogers@google.com/
> > The commit message there could have explicitly said having this
> > #include causes the conflicting definitions between perf's debug.h and
> > libbpf_internal.h's definitions of pr_info, etc.
> 
> yeah, understood, but I'm not processing patches for tools/lib/bpf/,
> Daniel is, I'll only get that one later, then we can go back to the way
> you structured it. Just an extra bit of confusion in this process ;-)

So, thiis is failing on all alpine Linux containers:

  CC       /tmp/build/perf/util/metricgroup.o
  CC       /tmp/build/perf/util/header.o
In file included from util/metricgroup.c:25:0:
/git/linux/tools/lib/api/fs/fs.h:16:0: error: "FS" redefined [-Werror]
 #define FS(name)    \
 ^
In file included from /git/linux/tools/perf/util/hashmap.h:16:0,
                 from util/expr.h:11,
                 from util/metricgroup.c:14:
/usr/include/bits/reg.h:28:0: note: this is the location of the previous definition
 #define FS     25
 ^
  CC       /tmp/build/perf/util/callchain.o
  CC       /tmp/build/perf/util/values.o
  CC       /tmp/build/perf/util/debug.o
  CC       /tmp/build/perf/util/fncache.o
cc1: all warnings being treated as errors
mv: can't rename '/tmp/build/perf/util/.metricgroup.o.tmp': No such file or directory
/git/linux/tools/build/Makefile.build:96: recipe for target '/tmp/build/perf/util/metricgroup.o' failed


I'll check that soon,

- Arnaldo
Ian Rogers May 18, 2020, 4:29 p.m. UTC | #5
On Mon, May 18, 2020 at 9:11 AM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Mon, May 18, 2020 at 01:06:48PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, May 18, 2020 at 09:03:45AM -0700, Ian Rogers escreveu:
> > > On Mon, May 18, 2020 at 8:45 AM Arnaldo Carvalho de Melo wrote:
> > > this build issue sounds like this patch is missing:
> > > https://lore.kernel.org/lkml/20200515221732.44078-3-irogers@google.com/
> > > The commit message there could have explicitly said having this
> > > #include causes the conflicting definitions between perf's debug.h and
> > > libbpf_internal.h's definitions of pr_info, etc.
> >
> > yeah, understood, but I'm not processing patches for tools/lib/bpf/,
> > Daniel is, I'll only get that one later, then we can go back to the way
> > you structured it. Just an extra bit of confusion in this process ;-)
>
> So, thiis is failing on all alpine Linux containers:
>
>   CC       /tmp/build/perf/util/metricgroup.o
>   CC       /tmp/build/perf/util/header.o
> In file included from util/metricgroup.c:25:0:
> /git/linux/tools/lib/api/fs/fs.h:16:0: error: "FS" redefined [-Werror]
>  #define FS(name)    \
>  ^
> In file included from /git/linux/tools/perf/util/hashmap.h:16:0,
>                  from util/expr.h:11,
>                  from util/metricgroup.c:14:
> /usr/include/bits/reg.h:28:0: note: this is the location of the previous definition
>  #define FS     25
>  ^
>   CC       /tmp/build/perf/util/callchain.o
>   CC       /tmp/build/perf/util/values.o
>   CC       /tmp/build/perf/util/debug.o
>   CC       /tmp/build/perf/util/fncache.o
> cc1: all warnings being treated as errors
> mv: can't rename '/tmp/build/perf/util/.metricgroup.o.tmp': No such file or directory
> /git/linux/tools/build/Makefile.build:96: recipe for target '/tmp/build/perf/util/metricgroup.o' failed
>
>
> I'll check that soon,
>
> - Arnaldo

I had some issues here too:
https://lore.kernel.org/lkml/CAEf4BzYxTTND7T7X0dLr2CbkEvUuKtarOeoJYYROefij+qds0w@mail.gmail.com/
The only reason for the bits/reg.h inclusion is for __WORDSIZE for the
hash_bits operation. As shown below:

#ifdef __GLIBC__
#include <bits/wordsize.h>
#else
#include <bits/reg.h>
#endif
static inline size_t hash_bits(size_t h, int bits)
{
/* shuffle bits and return requested number of upper bits */
return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
}

It'd be possible to change the definition of hash_bits and remove the
#includes by:

static inline size_t hash_bits(size_t h, int bits)
{
/* shuffle bits and return requested number of upper bits */
#ifdef __LP64__
  int shift = 64 - bits;
#else
  int shift = 32 - bits;
#endif
return (h * 11400714819323198485llu) >> shift;
}

Others may have a prefered more portable solution. A separate issue
with this same function is undefined behavior getting flagged
(unnecessarily) by sanitizers:
https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@google.com/

I was planning to come back to that once we got these changes landed.

Thanks!
Ian
Arnaldo Carvalho de Melo May 19, 2020, 7:28 p.m. UTC | #6
Em Mon, May 18, 2020 at 09:29:06AM -0700, Ian Rogers escreveu:
> I had some issues here too:
> https://lore.kernel.org/lkml/CAEf4BzYxTTND7T7X0dLr2CbkEvUuKtarOeoJYYROefij+qds0w@mail.gmail.com/
> The only reason for the bits/reg.h inclusion is for __WORDSIZE for the
> hash_bits operation. As shown below:

So, to have perf building in all the systems I have test build
containers for I have this in:

tools/include/linux/bitops.h

#include <asm/types.h>
#include <limits.h>
#ifndef __WORDSIZE
#define __WORDSIZE (__SIZEOF_LONG__ * 8)
#endif

With that it works everywhere, Android cross NDK for arm64, older
systems, cross compilers to many arches, Musl libc, uclibc, etc.

So I'm trying, just to check, that this change will make this build
everywhere:

commit ef4c968ccbd52d6a02553719ac7e97f70c65ba47
Author: Arnaldo Carvalho de Melo <acme@redhat.com>
Date:   Tue May 19 16:26:14 2020 -0300

    WIP
    
    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index e823b35e7371..df59fd4fc95b 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -10,10 +10,9 @@
 
 #include <stdbool.h>
 #include <stddef.h>
-#ifdef __GLIBC__
-#include <bits/wordsize.h>
-#else
-#include <bits/reg.h>
+#include <limits.h>
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
 #endif
 
 static inline size_t hash_bits(size_t h, int bits)


> #ifdef __GLIBC__
> #include <bits/wordsize.h>
> #else
> #include <bits/reg.h>
> #endif
> static inline size_t hash_bits(size_t h, int bits)
> {
> /* shuffle bits and return requested number of upper bits */
> return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
> }
> 
> It'd be possible to change the definition of hash_bits and remove the
> #includes by:
> 
> static inline size_t hash_bits(size_t h, int bits)
> {
> /* shuffle bits and return requested number of upper bits */
> #ifdef __LP64__
>   int shift = 64 - bits;
> #else
>   int shift = 32 - bits;
> #endif
> return (h * 11400714819323198485llu) >> shift;
> }
> 
> Others may have a prefered more portable solution. A separate issue
> with this same function is undefined behavior getting flagged
> (unnecessarily) by sanitizers:
> https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@google.com/
> 
> I was planning to come back to that once we got these changes landed.
> 
> Thanks!
> Ian
diff mbox series

Patch

diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 3f742612776a..13350c500e34 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -19,15 +19,13 @@  static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
 int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 {
 	const char *p;
-	const char **other;
-	double val;
-	int i, ret;
+	double val, *val_ptr;
+	int ret;
 	struct expr_parse_ctx ctx;
-	int num_other;
 
 	expr__ctx_init(&ctx);
-	expr__add_id(&ctx, "FOO", 1);
-	expr__add_id(&ctx, "BAR", 2);
+	expr__add_id(&ctx, strdup("FOO"), 1);
+	expr__add_id(&ctx, strdup("BAR"), 2);
 
 	ret = test(&ctx, "1+1", 2);
 	ret |= test(&ctx, "FOO+BAR", 3);
@@ -52,25 +50,29 @@  int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 	ret = expr__parse(&val, &ctx, p, 1);
 	TEST_ASSERT_VAL("missing operand", ret == -1);
 
+	expr__ctx_clear(&ctx);
 	TEST_ASSERT_VAL("find other",
-			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other, 1) == 0);
-	TEST_ASSERT_VAL("find other", num_other == 3);
-	TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
-	TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
-	TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
-	TEST_ASSERT_VAL("find other", other[3] == NULL);
+			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO",
+					 &ctx, 1) == 0);
+	TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3);
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR",
+						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ",
+						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO",
+						    (void **)&val_ptr));
 
+	expr__ctx_clear(&ctx);
 	TEST_ASSERT_VAL("find other",
-			expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL,
-				   &other, &num_other, 3) == 0);
-	TEST_ASSERT_VAL("find other", num_other == 2);
-	TEST_ASSERT_VAL("find other", !strcmp(other[0], "EVENT1,param=3/"));
-	TEST_ASSERT_VAL("find other", !strcmp(other[1], "EVENT2,param=3/"));
-	TEST_ASSERT_VAL("find other", other[2] == NULL);
+			expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@",
+					 NULL, &ctx, 3) == 0);
+	TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2);
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/",
+						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/",
+						    (void **)&val_ptr));
 
-	for (i = 0; i < num_other; i++)
-		zfree(&other[i]);
-	free((void *)other);
+	expr__ctx_clear(&ctx);
 
 	return 0;
 }
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index e21f0addcfbb..3de59564deb0 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -433,8 +433,6 @@  static int test_parsing(void)
 	struct pmu_events_map *map;
 	struct pmu_event *pe;
 	int i, j, k;
-	const char **ids;
-	int idnum;
 	int ret = 0;
 	struct expr_parse_ctx ctx;
 	double result;
@@ -446,29 +444,34 @@  static int test_parsing(void)
 			break;
 		j = 0;
 		for (;;) {
+			struct hashmap_entry *cur;
+			size_t bkt;
+
 			pe = &map->table[j++];
 			if (!pe->name && !pe->metric_group && !pe->metric_name)
 				break;
 			if (!pe->metric_expr)
 				continue;
-			if (expr__find_other(pe->metric_expr, NULL,
-						&ids, &idnum, 0) < 0) {
+			expr__ctx_init(&ctx);
+			if (expr__find_other(pe->metric_expr, NULL, &ctx, 0)
+				  < 0) {
 				expr_failure("Parse other failed", map, pe);
 				ret++;
 				continue;
 			}
-			expr__ctx_init(&ctx);
 
 			/*
 			 * Add all ids with a made up value. The value may
 			 * trigger divide by zero when subtracted and so try to
 			 * make them unique.
 			 */
-			for (k = 0; k < idnum; k++)
-				expr__add_id(&ctx, ids[k], k + 1);
+			k = 1;
+			hashmap__for_each_entry((&ctx.ids), cur, bkt)
+				expr__add_id(&ctx, strdup(cur->key), k++);
 
-			for (k = 0; k < idnum; k++) {
-				if (check_parse_id(ids[k], map == cpus_map, pe))
+			hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+				if (check_parse_id(cur->key, map == cpus_map,
+						   pe))
 					ret++;
 			}
 
@@ -476,9 +479,7 @@  static int test_parsing(void)
 				expr_failure("Parse failed", map, pe);
 				ret++;
 			}
-			for (k = 0; k < idnum; k++)
-				zfree(&ids[k]);
-			free(ids);
+			expr__ctx_clear(&ctx);
 		}
 	}
 	/* TODO: fail when not ok */
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 8b4ce704a68d..f64ab91c432b 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -4,25 +4,76 @@ 
 #include "expr.h"
 #include "expr-bison.h"
 #include "expr-flex.h"
+#include <linux/kernel.h>
 
 #ifdef PARSER_DEBUG
 extern int expr_debug;
 #endif
 
+static size_t key_hash(const void *key, void *ctx __maybe_unused)
+{
+	const char *str = (const char *)key;
+	size_t hash = 0;
+
+	while (*str != '\0') {
+		hash *= 31;
+		hash += *str;
+		str++;
+	}
+	return hash;
+}
+
+static bool key_equal(const void *key1, const void *key2,
+		    void *ctx __maybe_unused)
+{
+	return !strcmp((const char *)key1, (const char *)key2);
+}
+
 /* Caller must make sure id is allocated */
-void expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
+int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
 {
-	int idx;
+	double *val_ptr = NULL, *old_val = NULL;
+	char *old_key = NULL;
+	int ret;
+
+	if (val != 0.0) {
+		val_ptr = malloc(sizeof(double));
+		if (!val_ptr)
+			return -ENOMEM;
+		*val_ptr = val;
+	}
+	ret = hashmap__set(&ctx->ids, name, val_ptr,
+			   (const void **)&old_key, (void **)&old_val);
+	free(old_key);
+	free(old_val);
+	return ret;
+}
+
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr)
+{
+	double *data;
 
-	assert(ctx->num_ids < MAX_PARSE_ID);
-	idx = ctx->num_ids++;
-	ctx->ids[idx].name = name;
-	ctx->ids[idx].val = val;
+	if (!hashmap__find(&ctx->ids, id, (void **)&data))
+		return -1;
+	*val_ptr = (data == NULL) ?  0.0 : *data;
+	return 0;
 }
 
 void expr__ctx_init(struct expr_parse_ctx *ctx)
 {
-	ctx->num_ids = 0;
+	hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
+}
+
+void expr__ctx_clear(struct expr_parse_ctx *ctx)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+		free((char *)cur->key);
+		free(cur->value);
+	}
+	hashmap__clear(&ctx->ids);
 }
 
 static int
@@ -56,61 +107,25 @@  __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
 	return ret;
 }
 
-int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime)
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+		const char *expr, int runtime)
 {
 	return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0;
 }
 
-static bool
-already_seen(const char *val, const char *one, const char **other,
-	     int num_other)
-{
-	int i;
-
-	if (one && !strcasecmp(one, val))
-		return true;
-	for (i = 0; i < num_other; i++)
-		if (!strcasecmp(other[i], val))
-			return true;
-	return false;
-}
-
-int expr__find_other(const char *expr, const char *one, const char ***other,
-		     int *num_other, int runtime)
+int expr__find_other(const char *expr, const char *one,
+		     struct expr_parse_ctx *ctx, int runtime)
 {
-	int err, i = 0, j = 0;
-	struct expr_parse_ctx ctx;
-
-	expr__ctx_init(&ctx);
-	err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER, runtime);
-	if (err)
-		return -1;
-
-	*other = malloc((ctx.num_ids + 1) * sizeof(char *));
-	if (!*other)
-		return -ENOMEM;
-
-	for (i = 0, j = 0; i < ctx.num_ids; i++) {
-		const char *str = ctx.ids[i].name;
-
-		if (already_seen(str, one, *other, j))
-			continue;
-
-		str = strdup(str);
-		if (!str)
-			goto out;
-		(*other)[j++] = str;
-	}
-	(*other)[j] = NULL;
-
-out:
-	if (i != ctx.num_ids) {
-		while (--j)
-			free((char *) (*other)[i]);
-		free(*other);
-		err = -1;
+	double *old_val = NULL;
+	char *old_key = NULL;
+	int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
+
+	if (one) {
+		hashmap__delete(&ctx->ids, one,
+				(const void **)&old_key, (void **)&old_val);
+		free(old_key);
+		free(old_val);
 	}
 
-	*num_other = j;
-	return err;
+	return ret;
 }
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 40fc452b0f2b..d60a8feaf50b 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,17 +2,14 @@ 
 #ifndef PARSE_CTX_H
 #define PARSE_CTX_H 1
 
-#define EXPR_MAX_OTHER 64
-#define MAX_PARSE_ID EXPR_MAX_OTHER
-
-struct expr_parse_id {
-	const char *name;
-	double val;
-};
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "hashmap.h"
+#endif
 
 struct expr_parse_ctx {
-	int num_ids;
-	struct expr_parse_id ids[MAX_PARSE_ID];
+	struct hashmap ids;
 };
 
 struct expr_scanner_ctx {
@@ -21,9 +18,12 @@  struct expr_scanner_ctx {
 };
 
 void expr__ctx_init(struct expr_parse_ctx *ctx);
-void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
-int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime);
-int expr__find_other(const char *expr, const char *one, const char ***other,
-		int *num_other, int runtime);
+void expr__ctx_clear(struct expr_parse_ctx *ctx);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr);
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+		const char *expr, int runtime);
+int expr__find_other(const char *expr, const char *one,
+		struct expr_parse_ctx *ids, int runtime);
 
 #endif
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 21e82a1e11a2..ec5a48bf5f34 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -46,19 +46,6 @@  static void expr_error(double *final_val __maybe_unused,
 	pr_debug("%s\n", s);
 }
 
-static int lookup_id(struct expr_parse_ctx *ctx, char *id, double *val)
-{
-	int i;
-
-	for (i = 0; i < ctx->num_ids; i++) {
-		if (!strcasecmp(ctx->ids[i].name, id)) {
-			*val = ctx->ids[i].val;
-			return 0;
-		}
-	}
-	return -1;
-}
-
 %}
 %%
 
@@ -72,12 +59,7 @@  all_other: all_other other
 
 other: ID
 {
-	if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) {
-		pr_err("failed: way too many variables");
-		YYABORT;
-	}
-
-	ctx->ids[ctx->num_ids++].name = $1;
+	expr__add_id(ctx, $1, 0.0);
 }
 |
 MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
@@ -92,7 +74,7 @@  if_expr:
 	;
 
 expr:	  NUMBER
-	| ID			{ if (lookup_id(ctx, $1, &$$) < 0) {
+	| ID			{ if (expr__get_id(ctx, $1, &$$)) {
 					pr_debug("%s not found\n", $1);
 					YYABORT;
 				  }
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index b071df373f8b..6772d256dfdf 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -85,8 +85,7 @@  static void metricgroup__rblist_init(struct rblist *metric_events)
 
 struct egroup {
 	struct list_head nd;
-	int idnum;
-	const char **ids;
+	struct expr_parse_ctx pctx;
 	const char *metric_name;
 	const char *metric_expr;
 	const char *metric_unit;
@@ -94,19 +93,21 @@  struct egroup {
 };
 
 static struct evsel *find_evsel_group(struct evlist *perf_evlist,
-				      const char **ids,
-				      int idnum,
+				      struct expr_parse_ctx *pctx,
 				      struct evsel **metric_events,
 				      bool *evlist_used)
 {
 	struct evsel *ev;
-	int i = 0, j = 0;
 	bool leader_found;
+	const size_t idnum = hashmap__size(&pctx->ids);
+	size_t i = 0;
+	int j = 0;
+	double *val_ptr;
 
 	evlist__for_each_entry (perf_evlist, ev) {
 		if (evlist_used[j++])
 			continue;
-		if (!strcmp(ev->name, ids[i])) {
+		if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) {
 			if (!metric_events[i])
 				metric_events[i] = ev;
 			i++;
@@ -117,14 +118,6 @@  static struct evsel *find_evsel_group(struct evlist *perf_evlist,
 			i = 0;
 			memset(metric_events, 0,
 				sizeof(struct evsel *) * idnum);
-
-			if (!strcmp(ev->name, ids[i])) {
-				if (!metric_events[i])
-					metric_events[i] = ev;
-				i++;
-				if (i == idnum)
-					break;
-			}
 		}
 	}
 
@@ -175,19 +168,20 @@  static int metricgroup__setup_events(struct list_head *groups,
 	list_for_each_entry (eg, groups, nd) {
 		struct evsel **metric_events;
 
-		metric_events = calloc(sizeof(void *), eg->idnum + 1);
+		metric_events = calloc(sizeof(void *),
+				hashmap__size(&eg->pctx.ids) + 1);
 		if (!metric_events) {
 			ret = -ENOMEM;
 			break;
 		}
-		evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
-					 metric_events, evlist_used);
+		evsel = find_evsel_group(perf_evlist, &eg->pctx, metric_events,
+					evlist_used);
 		if (!evsel) {
 			pr_debug("Cannot resolve %s: %s\n",
 					eg->metric_name, eg->metric_expr);
 			continue;
 		}
-		for (i = 0; i < eg->idnum; i++)
+		for (i = 0; metric_events[i]; i++)
 			metric_events[i]->collect_stat = true;
 		me = metricgroup__lookup(metric_events_list, evsel, true);
 		if (!me) {
@@ -415,20 +409,20 @@  void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 }
 
 static void metricgroup__add_metric_weak_group(struct strbuf *events,
-					       const char **ids,
-					       int idnum)
+					       struct expr_parse_ctx *ctx)
 {
+	struct hashmap_entry *cur;
+	size_t bkt, i = 0;
 	bool no_group = false;
-	int i;
 
-	for (i = 0; i < idnum; i++) {
-		pr_debug("found event %s\n", ids[i]);
+	hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+		pr_debug("found event %s\n", (const char *)cur->key);
 		/*
 		 * Duration time maps to a software event and can make
 		 * groups not count. Always use it outside a
 		 * group.
 		 */
-		if (!strcmp(ids[i], "duration_time")) {
+		if (!strcmp(cur->key, "duration_time")) {
 			if (i > 0)
 				strbuf_addf(events, "}:W,");
 			strbuf_addf(events, "duration_time");
@@ -437,21 +431,22 @@  static void metricgroup__add_metric_weak_group(struct strbuf *events,
 		}
 		strbuf_addf(events, "%s%s",
 			i == 0 || no_group ? "{" : ",",
-			ids[i]);
+			(const char *)cur->key);
 		no_group = false;
+		i++;
 	}
 	if (!no_group)
 		strbuf_addf(events, "}:W");
 }
 
 static void metricgroup__add_metric_non_group(struct strbuf *events,
-					      const char **ids,
-					      int idnum)
+					      struct expr_parse_ctx *ctx)
 {
-	int i;
+	struct hashmap_entry *cur;
+	size_t bkt;
 
-	for (i = 0; i < idnum; i++)
-		strbuf_addf(events, ",%s", ids[i]);
+	hashmap__for_each_entry((&ctx->ids), cur, bkt)
+		strbuf_addf(events, ",%s", (const char *)cur->key);
 }
 
 static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
@@ -495,32 +490,32 @@  int __weak arch_get_runtimeparam(void)
 static int __metricgroup__add_metric(struct strbuf *events,
 		struct list_head *group_list, struct pmu_event *pe, int runtime)
 {
-
-	const char **ids;
-	int idnum;
 	struct egroup *eg;
 
-	if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum, runtime) < 0)
-		return -EINVAL;
-
-	if (events->len > 0)
-		strbuf_addf(events, ",");
-
-	if (metricgroup__has_constraint(pe))
-		metricgroup__add_metric_non_group(events, ids, idnum);
-	else
-		metricgroup__add_metric_weak_group(events, ids, idnum);
-
 	eg = malloc(sizeof(*eg));
 	if (!eg)
 		return -ENOMEM;
 
-	eg->ids = ids;
-	eg->idnum = idnum;
+	expr__ctx_init(&eg->pctx);
 	eg->metric_name = pe->metric_name;
 	eg->metric_expr = pe->metric_expr;
 	eg->metric_unit = pe->unit;
 	eg->runtime = runtime;
+
+	if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) {
+		expr__ctx_clear(&eg->pctx);
+		free(eg);
+		return -EINVAL;
+	}
+
+	if (events->len > 0)
+		strbuf_addf(events, ",");
+
+	if (metricgroup__has_constraint(pe))
+		metricgroup__add_metric_non_group(events, &eg->pctx);
+	else
+		metricgroup__add_metric_weak_group(events, &eg->pctx);
+
 	list_add_tail(&eg->nd, group_list);
 
 	return 0;
@@ -603,12 +598,9 @@  static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
 static void metricgroup__free_egroups(struct list_head *group_list)
 {
 	struct egroup *eg, *egtmp;
-	int i;
 
 	list_for_each_entry_safe (eg, egtmp, group_list, nd) {
-		for (i = 0; i < eg->idnum; i++)
-			zfree(&eg->ids[i]);
-		zfree(&eg->ids);
+		expr__ctx_clear(&eg->pctx);
 		list_del_init(&eg->nd);
 		free(eg);
 	}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 9bd7a8d2a858..c44dc814b377 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -323,35 +323,46 @@  void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 {
 	struct evsel *counter, *leader, **metric_events, *oc;
 	bool found;
-	const char **metric_names;
+	struct expr_parse_ctx ctx;
+	struct hashmap_entry *cur;
+	size_t bkt;
 	int i;
-	int num_metric_names;
 
+	expr__ctx_init(&ctx);
 	evlist__for_each_entry(evsel_list, counter) {
 		bool invalid = false;
 
 		leader = counter->leader;
 		if (!counter->metric_expr)
 			continue;
+
+		expr__ctx_clear(&ctx);
 		metric_events = counter->metric_events;
 		if (!metric_events) {
-			if (expr__find_other(counter->metric_expr, counter->name,
-						&metric_names, &num_metric_names, 1) < 0)
+			if (expr__find_other(counter->metric_expr,
+					     counter->name,
+					     &ctx, 1) < 0)
 				continue;
 
 			metric_events = calloc(sizeof(struct evsel *),
-					       num_metric_names + 1);
-			if (!metric_events)
+					       hashmap__size(&ctx.ids) + 1);
+			if (!metric_events) {
+				expr__ctx_clear(&ctx);
 				return;
+			}
 			counter->metric_events = metric_events;
 		}
 
-		for (i = 0; i < num_metric_names; i++) {
+		i = 0;
+		hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+			const char *metric_name = (const char *)cur->key;
+
 			found = false;
 			if (leader) {
 				/* Search in group */
 				for_each_group_member (oc, leader) {
-					if (!strcasecmp(oc->name, metric_names[i]) &&
+					if (!strcasecmp(oc->name,
+							metric_name) &&
 						!oc->collect_stat) {
 						found = true;
 						break;
@@ -360,7 +371,8 @@  void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 			}
 			if (!found) {
 				/* Search ignoring groups */
-				oc = perf_stat__find_event(evsel_list, metric_names[i]);
+				oc = perf_stat__find_event(evsel_list,
+							   metric_name);
 			}
 			if (!oc) {
 				/* Deduping one is good enough to handle duplicated PMUs. */
@@ -373,27 +385,28 @@  void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 				 * of events. So we ask the user instead to add the missing
 				 * events.
 				 */
-				if (!printed || strcasecmp(printed, metric_names[i])) {
+				if (!printed ||
+				    strcasecmp(printed, metric_name)) {
 					fprintf(stderr,
 						"Add %s event to groups to get metric expression for %s\n",
-						metric_names[i],
+						metric_name,
 						counter->name);
-					printed = strdup(metric_names[i]);
+					printed = strdup(metric_name);
 				}
 				invalid = true;
 				continue;
 			}
-			metric_events[i] = oc;
+			metric_events[i++] = oc;
 			oc->collect_stat = true;
 		}
 		metric_events[i] = NULL;
-		free(metric_names);
 		if (invalid) {
 			free(metric_events);
 			counter->metric_events = NULL;
 			counter->metric_expr = NULL;
 		}
 	}
+	expr__ctx_clear(&ctx);
 }
 
 static double runtime_stat_avg(struct runtime_stat *st,
@@ -738,7 +751,10 @@  static void generic_metric(struct perf_stat_config *config,
 
 	expr__ctx_init(&pctx);
 	/* Must be first id entry */
-	expr__add_id(&pctx, name, avg);
+	n = strdup(name);
+	if (!n)
+		return;
+	expr__add_id(&pctx, n, avg);
 	for (i = 0; metric_events[i]; i++) {
 		struct saved_value *v;
 		struct stats *stats;
@@ -814,8 +830,7 @@  static void generic_metric(struct perf_stat_config *config,
 			     (metric_name ? metric_name : name) : "", 0);
 	}
 
-	for (i = 1; i < pctx.num_ids; i++)
-		zfree(&pctx.ids[i].name);
+	expr__ctx_clear(&pctx);
 }
 
 void perf_stat__print_shadow_stats(struct perf_stat_config *config,