diff mbox series

[v2,2/2] common/memsize.c: Fix get_ram_size() when cache is enabled

Message ID 20230530133327.178278-3-francesco@dolcini.it
State Accepted
Commit 1c64b98c1ec40d2c9eb68af2d190e989dded8919
Delegated to: Tom Rini
Headers show
Series common/memsize.c: Fix get_ram_size() when cache is enabled | expand

Commit Message

Francesco Dolcini May 30, 2023, 1:33 p.m. UTC
From: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>

Ensure that every write is flushed to memory and afterward reads are
from memory.
Since the algorithm rely on the fact that accessing to not existent
memory lead to write at addr / 2 without this modification accesses
to aliased (not physically present) addresses are cached and
wrong size is returned.

This was discovered while working on a TI AM625 based board
where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: Enable dcache in SPL").

Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
---
v2:
 * check if CONFIG_SYS_CACHELINE_SIZE is defined
 * do flush only when cache is enabled
---
 common/memsize.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Michael Nazzareno Trimarchi May 30, 2023, 1:42 p.m. UTC | #1
Hi

Few questions

On Tue, May 30, 2023 at 3:34 PM Francesco Dolcini <francesco@dolcini.it> wrote:
>
> From: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
>
> Ensure that every write is flushed to memory and afterward reads are
> from memory.
> Since the algorithm rely on the fact that accessing to not existent
> memory lead to write at addr / 2 without this modification accesses
> to aliased (not physically present) addresses are cached and
> wrong size is returned.
>
> This was discovered while working on a TI AM625 based board
> where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: Enable dcache in SPL").
>
> Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
> Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
> ---
> v2:
>  * check if CONFIG_SYS_CACHELINE_SIZE is defined
>  * do flush only when cache is enabled
> ---
>  common/memsize.c | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
>
> diff --git a/common/memsize.c b/common/memsize.c
> index 66d5be6a1ff3..d646df8b04cb 100644
> --- a/common/memsize.c
> +++ b/common/memsize.c
> @@ -7,9 +7,18 @@
>  #include <common.h>
>  #include <init.h>
>  #include <asm/global_data.h>
> +#include <cpu_func.h>
> +#include <stdint.h>
>
>  DECLARE_GLOBAL_DATA_PTR;
>
> +#ifdef CONFIG_SYS_CACHELINE_SIZE
> +# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
> +#else
> +/* Just use the greatest cache flush alignment requirement I'm aware of */
> +# define MEMSIZE_CACHELINE_SIZE 128
> +#endif
> +
>  #ifdef __PPC__
>  /*
>   * At least on G2 PowerPC cores, sequential accesses to non-existent
> @@ -20,6 +29,15 @@ DECLARE_GLOBAL_DATA_PTR;
>  # define sync()                /* nothing */
>  #endif
>
> +static void dcache_flush_invalidate(volatile long *p)
> +{
> +       uintptr_t start, stop;
> +       start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
> +       stop = start + MEMSIZE_CACHELINE_SIZE;
> +       flush_dcache_range(start, stop);
> +       invalidate_dcache_range(start, stop);
> +}
> +
>  /*
>   * Check memory range for valid RAM. A simple memory test determines
>   * the actually available RAM size between addresses `base' and
> @@ -34,6 +52,7 @@ long get_ram_size(long *base, long maxsize)
>         long           val;
>         long           size;
>         int            i = 0;
> +       int            dcache_en = dcache_status();
>
>         for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
>                 addr = base + cnt;      /* pointer arith! */
> @@ -41,6 +60,8 @@ long get_ram_size(long *base, long maxsize)
>                 save[i++] = *addr;
>                 sync();
>                 *addr = ~cnt;
> +               if (dcache_en)
> +                       dcache_flush_invalidate(addr);

Why this should be done on every increment if the invalidate keep a
range of address?
>         }
>
>         addr = base;
> @@ -50,6 +71,9 @@ long get_ram_size(long *base, long maxsize)
>         *addr = 0;
>
>         sync();
> +       if (dcache_en)
> +               dcache_flush_invalidate(addr);
> +
>         if ((val = *addr) != 0) {
>                 /* Restore the original data before leaving the function. */
>                 sync();

Can be possible just to enable/disable cache around memory test?

Michael
> --
> 2.25.1
>
Francesco Dolcini May 30, 2023, 1:48 p.m. UTC | #2
On Tue, May 30, 2023 at 03:42:18PM +0200, Michael Nazzareno Trimarchi wrote:
> On Tue, May 30, 2023 at 3:34 PM Francesco Dolcini <francesco@dolcini.it> wrote:
> >
> > From: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
> >
> > Ensure that every write is flushed to memory and afterward reads are
> > from memory.
> > Since the algorithm rely on the fact that accessing to not existent
> > memory lead to write at addr / 2 without this modification accesses
> > to aliased (not physically present) addresses are cached and
> > wrong size is returned.
> >
> > This was discovered while working on a TI AM625 based board
> > where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: Enable dcache in SPL").
> >
> > Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
> > Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
> > ---
> > v2:
> >  * check if CONFIG_SYS_CACHELINE_SIZE is defined
> >  * do flush only when cache is enabled
> > ---
> >  common/memsize.c | 24 ++++++++++++++++++++++++
> >  1 file changed, 24 insertions(+)
> >
> > diff --git a/common/memsize.c b/common/memsize.c
> > index 66d5be6a1ff3..d646df8b04cb 100644
> > --- a/common/memsize.c
> > +++ b/common/memsize.c
> > @@ -7,9 +7,18 @@
> >  #include <common.h>
> >  #include <init.h>
> >  #include <asm/global_data.h>
> > +#include <cpu_func.h>
> > +#include <stdint.h>
> >
> >  DECLARE_GLOBAL_DATA_PTR;
> >
> > +#ifdef CONFIG_SYS_CACHELINE_SIZE
> > +# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
> > +#else
> > +/* Just use the greatest cache flush alignment requirement I'm aware of */
> > +# define MEMSIZE_CACHELINE_SIZE 128
> > +#endif
> > +
> >  #ifdef __PPC__
> >  /*
> >   * At least on G2 PowerPC cores, sequential accesses to non-existent
> > @@ -20,6 +29,15 @@ DECLARE_GLOBAL_DATA_PTR;
> >  # define sync()                /* nothing */
> >  #endif
> >
> > +static void dcache_flush_invalidate(volatile long *p)
> > +{
> > +       uintptr_t start, stop;
> > +       start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
> > +       stop = start + MEMSIZE_CACHELINE_SIZE;
> > +       flush_dcache_range(start, stop);
> > +       invalidate_dcache_range(start, stop);
> > +}
> > +
> >  /*
> >   * Check memory range for valid RAM. A simple memory test determines
> >   * the actually available RAM size between addresses `base' and
> > @@ -34,6 +52,7 @@ long get_ram_size(long *base, long maxsize)
> >         long           val;
> >         long           size;
> >         int            i = 0;
> > +       int            dcache_en = dcache_status();
> >
> >         for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
> >                 addr = base + cnt;      /* pointer arith! */
> > @@ -41,6 +60,8 @@ long get_ram_size(long *base, long maxsize)
> >                 save[i++] = *addr;
> >                 sync();
> >                 *addr = ~cnt;
> > +               if (dcache_en)
> > +                       dcache_flush_invalidate(addr);
> 
> Why this should be done on every increment if the invalidate keep a
> range of address?

We do invalidate/flush the current write, the granularity of the
flush/invalidate is the page size.

This is required since we need to ensure ordering of the writes. How
would you know where the aliasing is going to happen if you flush all at
once at the end?

> Can be possible just to enable/disable cache around memory test?
In theory yes. In practice this proved some architecture to just crash
badly because the stack was "corrupted" after re-enabling the cache.

We'll submit a separate bug fix for that, bug given that this pattern
(disable AND enable) is normally not done in U-Boot it seems like
looking for trouble doing it in such a commonly used routine.

Francesco
Michael Nazzareno Trimarchi May 30, 2023, 2:04 p.m. UTC | #3
Hi

On Tue, May 30, 2023 at 3:49 PM Francesco Dolcini <francesco@dolcini.it> wrote:
>
> On Tue, May 30, 2023 at 03:42:18PM +0200, Michael Nazzareno Trimarchi wrote:
> > On Tue, May 30, 2023 at 3:34 PM Francesco Dolcini <francesco@dolcini.it> wrote:
> > >
> > > From: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
> > >
> > > Ensure that every write is flushed to memory and afterward reads are
> > > from memory.
> > > Since the algorithm rely on the fact that accessing to not existent
> > > memory lead to write at addr / 2 without this modification accesses
> > > to aliased (not physically present) addresses are cached and
> > > wrong size is returned.
> > >
> > > This was discovered while working on a TI AM625 based board
> > > where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: Enable dcache in SPL").
> > >
> > > Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
> > > Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
> > > ---
> > > v2:
> > >  * check if CONFIG_SYS_CACHELINE_SIZE is defined
> > >  * do flush only when cache is enabled
> > > ---
> > >  common/memsize.c | 24 ++++++++++++++++++++++++
> > >  1 file changed, 24 insertions(+)
> > >
> > > diff --git a/common/memsize.c b/common/memsize.c
> > > index 66d5be6a1ff3..d646df8b04cb 100644
> > > --- a/common/memsize.c
> > > +++ b/common/memsize.c
> > > @@ -7,9 +7,18 @@
> > >  #include <common.h>
> > >  #include <init.h>
> > >  #include <asm/global_data.h>
> > > +#include <cpu_func.h>
> > > +#include <stdint.h>
> > >
> > >  DECLARE_GLOBAL_DATA_PTR;
> > >
> > > +#ifdef CONFIG_SYS_CACHELINE_SIZE
> > > +# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
> > > +#else
> > > +/* Just use the greatest cache flush alignment requirement I'm aware of */
> > > +# define MEMSIZE_CACHELINE_SIZE 128
> > > +#endif
> > > +
> > >  #ifdef __PPC__
> > >  /*
> > >   * At least on G2 PowerPC cores, sequential accesses to non-existent
> > > @@ -20,6 +29,15 @@ DECLARE_GLOBAL_DATA_PTR;
> > >  # define sync()                /* nothing */
> > >  #endif
> > >
> > > +static void dcache_flush_invalidate(volatile long *p)
> > > +{
> > > +       uintptr_t start, stop;
> > > +       start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
> > > +       stop = start + MEMSIZE_CACHELINE_SIZE;
> > > +       flush_dcache_range(start, stop);
> > > +       invalidate_dcache_range(start, stop);
> > > +}
> > > +
> > >  /*
> > >   * Check memory range for valid RAM. A simple memory test determines
> > >   * the actually available RAM size between addresses `base' and
> > > @@ -34,6 +52,7 @@ long get_ram_size(long *base, long maxsize)
> > >         long           val;
> > >         long           size;
> > >         int            i = 0;
> > > +       int            dcache_en = dcache_status();
> > >
> > >         for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
> > >                 addr = base + cnt;      /* pointer arith! */
> > > @@ -41,6 +60,8 @@ long get_ram_size(long *base, long maxsize)
> > >                 save[i++] = *addr;
> > >                 sync();
> > >                 *addr = ~cnt;
> > > +               if (dcache_en)
> > > +                       dcache_flush_invalidate(addr);
> >
> > Why this should be done on every increment if the invalidate keep a
> > range of address?
>
> We do invalidate/flush the current write, the granularity of the
> flush/invalidate is the page size.
>
> This is required since we need to ensure ordering of the writes. How
> would you know where the aliasing is going to happen if you flush all at
> once at the end?
>

I see, I read the code properly of get_mem_size now.

> > Can be possible just to enable/disable cache around memory test?
> In theory yes. In practice this proved some architecture to just crash
> badly because the stack was "corrupted" after re-enabling the cache.
>
> We'll submit a separate bug fix for that, bug given that this pattern
> (disable AND enable) is normally not done in U-Boot it seems like
> looking for trouble doing it in such a commonly used routine.
>

Thank you
diff mbox series

Patch

diff --git a/common/memsize.c b/common/memsize.c
index 66d5be6a1ff3..d646df8b04cb 100644
--- a/common/memsize.c
+++ b/common/memsize.c
@@ -7,9 +7,18 @@ 
 #include <common.h>
 #include <init.h>
 #include <asm/global_data.h>
+#include <cpu_func.h>
+#include <stdint.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
+#ifdef CONFIG_SYS_CACHELINE_SIZE
+# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
+#else
+/* Just use the greatest cache flush alignment requirement I'm aware of */
+# define MEMSIZE_CACHELINE_SIZE 128
+#endif
+
 #ifdef __PPC__
 /*
  * At least on G2 PowerPC cores, sequential accesses to non-existent
@@ -20,6 +29,15 @@  DECLARE_GLOBAL_DATA_PTR;
 # define sync()		/* nothing */
 #endif
 
+static void dcache_flush_invalidate(volatile long *p)
+{
+	uintptr_t start, stop;
+	start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
+	stop = start + MEMSIZE_CACHELINE_SIZE;
+	flush_dcache_range(start, stop);
+	invalidate_dcache_range(start, stop);
+}
+
 /*
  * Check memory range for valid RAM. A simple memory test determines
  * the actually available RAM size between addresses `base' and
@@ -34,6 +52,7 @@  long get_ram_size(long *base, long maxsize)
 	long           val;
 	long           size;
 	int            i = 0;
+	int            dcache_en = dcache_status();
 
 	for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
 		addr = base + cnt;	/* pointer arith! */
@@ -41,6 +60,8 @@  long get_ram_size(long *base, long maxsize)
 		save[i++] = *addr;
 		sync();
 		*addr = ~cnt;
+		if (dcache_en)
+			dcache_flush_invalidate(addr);
 	}
 
 	addr = base;
@@ -50,6 +71,9 @@  long get_ram_size(long *base, long maxsize)
 	*addr = 0;
 
 	sync();
+	if (dcache_en)
+		dcache_flush_invalidate(addr);
+
 	if ((val = *addr) != 0) {
 		/* Restore the original data before leaving the function. */
 		sync();