[v2,4/7] exec: Use const alias for TARGET_PAGE_BITS_VARY
diff mbox series

Message ID 20191023154505.30521-5-richard.henderson@linaro.org
State New
Headers show
Series
  • exec: Improve code for TARGET_PAGE_BITS_VARY
Related show

Commit Message

Richard Henderson Oct. 23, 2019, 3:45 p.m. UTC
Using a variable that is declared "const" for this tells the
compiler that it may read the value once and assume that it
does not change across function calls.

For target_page_size, this means we have only one assert per
function, and one read of the variable.

This reduces the size of qemu-system-aarch64 by 8k.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Notice CONFIG_ATTRIBUTE_ALIAS, and work around Xcode 9 lossage.
---
 include/exec/cpu-all.h | 14 +++++++---
 exec-vary.c            | 60 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 62 insertions(+), 12 deletions(-)

Comments

Alex Bennée Oct. 25, 2019, 2:28 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Using a variable that is declared "const" for this tells the
> compiler that it may read the value once and assume that it
> does not change across function calls.
>
> For target_page_size, this means we have only one assert per
> function, and one read of the variable.
>
> This reduces the size of qemu-system-aarch64 by 8k.
>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
> v2: Notice CONFIG_ATTRIBUTE_ALIAS, and work around Xcode 9 lossage.
> ---
>  include/exec/cpu-all.h | 14 +++++++---
>  exec-vary.c            | 60 ++++++++++++++++++++++++++++++++++++------
>  2 files changed, 62 insertions(+), 12 deletions(-)
>
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 255bb186ac..76515dc8d9 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -210,10 +210,16 @@ static inline void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val
>  /* page related stuff */
>
>  #ifdef TARGET_PAGE_BITS_VARY
> -extern bool target_page_bits_decided;
> -extern int target_page_bits;
> -#define TARGET_PAGE_BITS ({ assert(target_page_bits_decided); \
> -                            target_page_bits; })
> +typedef struct {
> +    bool decided;
> +    int bits;
> +} TargetPageBits;
> +# if defined(CONFIG_ATTRIBUTE_ALIAS) || !defined(IN_EXEC_VARY)
> +extern const TargetPageBits target_page;
> +#else
> +extern TargetPageBits target_page;
> +# endif
> +#define TARGET_PAGE_BITS (assert(target_page.decided), target_page.bits)
>  #else
>  #define TARGET_PAGE_BITS_MIN TARGET_PAGE_BITS
>  #endif
> diff --git a/exec-vary.c b/exec-vary.c
> index 48c0ab306c..e0befd502a 100644
> --- a/exec-vary.c
> +++ b/exec-vary.c
> @@ -19,11 +19,55 @@
>
>  #include "qemu/osdep.h"
>  #include "qemu-common.h"
> +
> +#define IN_EXEC_VARY 1
> +
>  #include "exec/exec-all.h"
>
>  #ifdef TARGET_PAGE_BITS_VARY
> -int target_page_bits;
> -bool target_page_bits_decided;
> +# ifdef CONFIG_ATTRIBUTE_ALIAS
> +/*
> + * We want to declare the "target_page" variable as const, which tells
> + * the compiler that it can cache any value that it reads across calls.
> + * This avoids multiple assertions and multiple reads within any one user.
> + *
> + * This works because we initialize the target_page data very early, in a
> + * location far removed from the functions that require the final results.
> + *
> + * This also requires that we have a non-constant symbol by which we can
> + * perform the actual initialization, and which forces the data to be
> + * allocated within writable memory.  Thus "init_target_page", and we use
> + * that symbol exclusively in the two functions that initialize this value.
> + *
> + * The "target_page" symbol is created as an alias of "init_target_page".
> + */
> +static TargetPageBits init_target_page;
> +
> +/*
> + * Note that this is *not* a redundant decl, this is the definition of
> + * the "target_page" symbol.  The syntax for this definition requires
> + * the use of the extern keyword.  This seems to be a GCC bug in
> + * either the syntax for the alias attribute or in -Wredundant-decls.
> + *
> + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91765
> + */
> +#  pragma GCC diagnostic push
> +#  pragma GCC diagnostic ignored "-Wredundant-decls"
> +
> +extern const TargetPageBits target_page
> +    __attribute__((alias("init_target_page")));
> +
> +#  pragma GCC diagnostic pop
> +# else
> +/*
> + * When aliases are not supported then we force two different declarations,
> + * by way of suppressing the header declaration with IN_EXEC_VARY.
> + * We assume that on such an old compiler, LTO cannot be used, and so the
> + * compiler cannot not detect the mismatched declarations, and all is well.
> + */
> +TargetPageBits target_page;
> +#  define init_target_page target_page
> +# endif
>  #endif
>
>  bool set_preferred_target_page_bits(int bits)
> @@ -36,11 +80,11 @@ bool set_preferred_target_page_bits(int bits)
>       */
>  #ifdef TARGET_PAGE_BITS_VARY
>      assert(bits >= TARGET_PAGE_BITS_MIN);
> -    if (target_page_bits == 0 || target_page_bits > bits) {
> -        if (target_page_bits_decided) {
> +    if (init_target_page.bits == 0 || init_target_page.bits > bits) {
> +        if (init_target_page.decided) {
>              return false;
>          }
> -        target_page_bits = bits;
> +        init_target_page.bits = bits;
>      }
>  #endif
>      return true;
> @@ -49,9 +93,9 @@ bool set_preferred_target_page_bits(int bits)
>  void finalize_target_page_bits(void)
>  {
>  #ifdef TARGET_PAGE_BITS_VARY
> -    if (target_page_bits == 0) {
> -        target_page_bits = TARGET_PAGE_BITS_MIN;
> +    if (init_target_page.bits == 0) {
> +        init_target_page.bits = TARGET_PAGE_BITS_MIN;
>      }
> -    target_page_bits_decided = true;
> +    init_target_page.decided = true;
>  #endif
>  }


--
Alex Bennée
Peter Maydell Oct. 25, 2019, 2:51 p.m. UTC | #2
On Wed, 23 Oct 2019 at 18:06, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Using a variable that is declared "const" for this tells the
> compiler that it may read the value once and assume that it
> does not change across function calls.
>
> For target_page_size, this means we have only one assert per
> function, and one read of the variable.
>
> This reduces the size of qemu-system-aarch64 by 8k.
>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>


> + * We want to declare the "target_page" variable as const, which tells
> + * the compiler that it can cache any value that it reads across calls.
> + * This avoids multiple assertions and multiple reads within any one user.
> + *
> + * This works because we initialize the target_page data very early, in a
> + * location far removed from the functions that require the final results.

I have to say that this feels like a worryingly large amount
of magic. Is this actually guaranteed to work by the compiler?

thanks
-- PMM
Richard Henderson Oct. 25, 2019, 8:43 p.m. UTC | #3
On 10/25/19 10:51 AM, Peter Maydell wrote:
>> + * We want to declare the "target_page" variable as const, which tells
>> + * the compiler that it can cache any value that it reads across calls.
>> + * This avoids multiple assertions and multiple reads within any one user.
>> + *
>> + * This works because we initialize the target_page data very early, in a
>> + * location far removed from the functions that require the final results.
> 
> I have to say that this feels like a worryingly large amount
> of magic. Is this actually guaranteed to work by the compiler?

Yes.


r~
Peter Maydell Oct. 25, 2019, 9:01 p.m. UTC | #4
On Fri, 25 Oct 2019 at 21:43, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 10/25/19 10:51 AM, Peter Maydell wrote:
> >> + * We want to declare the "target_page" variable as const, which tells
> >> + * the compiler that it can cache any value that it reads across calls.
> >> + * This avoids multiple assertions and multiple reads within any one user.
> >> + *
> >> + * This works because we initialize the target_page data very early, in a
> >> + * location far removed from the functions that require the final results.
> >
> > I have to say that this feels like a worryingly large amount
> > of magic. Is this actually guaranteed to work by the compiler?
>
> Yes.

I'm curious to know how the compiler engineers define
"very early" and "far removed" -- in my experience they
usually prefer to be more precise than that :-)

thanks
-- PMM
Richard Henderson Oct. 25, 2019, 9:16 p.m. UTC | #5
On 10/25/19 5:01 PM, Peter Maydell wrote:
> On Fri, 25 Oct 2019 at 21:43, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 10/25/19 10:51 AM, Peter Maydell wrote:
>>>> + * We want to declare the "target_page" variable as const, which tells
>>>> + * the compiler that it can cache any value that it reads across calls.
>>>> + * This avoids multiple assertions and multiple reads within any one user.
>>>> + *
>>>> + * This works because we initialize the target_page data very early, in a
>>>> + * location far removed from the functions that require the final results.
>>>
>>> I have to say that this feels like a worryingly large amount
>>> of magic. Is this actually guaranteed to work by the compiler?
>>
>> Yes.
> 
> I'm curious to know how the compiler engineers define
> "very early" and "far removed" -- in my experience they
> usually prefer to be more precise than that :-)

I remembered putting more precise language in there, but I don't see it now.
Perhaps I just dreamt it.

The last write to the non-const variable happens before the first time we
access the const variable.  At the first access to the const variable, we
assert that it has been initialized.

There's no specific barrier to avoid that first read of the const variable not
be hoisted by the compiler before the last store of the non-const variable,
except for being in a separate function, in a separate compilation unit, and
thus "far away".

We could, perhaps, put a barrier() at the end of finalize_target_page_bits(),
documenting this fact against some future date when compilation with -flto is
viable.  I will say, though, that I've tried that recently and quite some work
is required before one could enable -flto.  In the meantime, the barrier()
would compile away to nothing.


r~

Patch
diff mbox series

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 255bb186ac..76515dc8d9 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -210,10 +210,16 @@  static inline void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val
 /* page related stuff */
 
 #ifdef TARGET_PAGE_BITS_VARY
-extern bool target_page_bits_decided;
-extern int target_page_bits;
-#define TARGET_PAGE_BITS ({ assert(target_page_bits_decided); \
-                            target_page_bits; })
+typedef struct {
+    bool decided;
+    int bits;
+} TargetPageBits;
+# if defined(CONFIG_ATTRIBUTE_ALIAS) || !defined(IN_EXEC_VARY)
+extern const TargetPageBits target_page;
+#else
+extern TargetPageBits target_page;
+# endif
+#define TARGET_PAGE_BITS (assert(target_page.decided), target_page.bits)
 #else
 #define TARGET_PAGE_BITS_MIN TARGET_PAGE_BITS
 #endif
diff --git a/exec-vary.c b/exec-vary.c
index 48c0ab306c..e0befd502a 100644
--- a/exec-vary.c
+++ b/exec-vary.c
@@ -19,11 +19,55 @@ 
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+
+#define IN_EXEC_VARY 1
+
 #include "exec/exec-all.h"
 
 #ifdef TARGET_PAGE_BITS_VARY
-int target_page_bits;
-bool target_page_bits_decided;
+# ifdef CONFIG_ATTRIBUTE_ALIAS
+/*
+ * We want to declare the "target_page" variable as const, which tells
+ * the compiler that it can cache any value that it reads across calls.
+ * This avoids multiple assertions and multiple reads within any one user.
+ *
+ * This works because we initialize the target_page data very early, in a
+ * location far removed from the functions that require the final results.
+ *
+ * This also requires that we have a non-constant symbol by which we can
+ * perform the actual initialization, and which forces the data to be
+ * allocated within writable memory.  Thus "init_target_page", and we use
+ * that symbol exclusively in the two functions that initialize this value.
+ *
+ * The "target_page" symbol is created as an alias of "init_target_page".
+ */
+static TargetPageBits init_target_page;
+
+/*
+ * Note that this is *not* a redundant decl, this is the definition of
+ * the "target_page" symbol.  The syntax for this definition requires
+ * the use of the extern keyword.  This seems to be a GCC bug in
+ * either the syntax for the alias attribute or in -Wredundant-decls.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91765
+ */
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wredundant-decls"
+
+extern const TargetPageBits target_page
+    __attribute__((alias("init_target_page")));
+
+#  pragma GCC diagnostic pop
+# else
+/*
+ * When aliases are not supported then we force two different declarations,
+ * by way of suppressing the header declaration with IN_EXEC_VARY.
+ * We assume that on such an old compiler, LTO cannot be used, and so the
+ * compiler cannot not detect the mismatched declarations, and all is well.
+ */
+TargetPageBits target_page;
+#  define init_target_page target_page
+# endif
 #endif
 
 bool set_preferred_target_page_bits(int bits)
@@ -36,11 +80,11 @@  bool set_preferred_target_page_bits(int bits)
      */
 #ifdef TARGET_PAGE_BITS_VARY
     assert(bits >= TARGET_PAGE_BITS_MIN);
-    if (target_page_bits == 0 || target_page_bits > bits) {
-        if (target_page_bits_decided) {
+    if (init_target_page.bits == 0 || init_target_page.bits > bits) {
+        if (init_target_page.decided) {
             return false;
         }
-        target_page_bits = bits;
+        init_target_page.bits = bits;
     }
 #endif
     return true;
@@ -49,9 +93,9 @@  bool set_preferred_target_page_bits(int bits)
 void finalize_target_page_bits(void)
 {
 #ifdef TARGET_PAGE_BITS_VARY
-    if (target_page_bits == 0) {
-        target_page_bits = TARGET_PAGE_BITS_MIN;
+    if (init_target_page.bits == 0) {
+        init_target_page.bits = TARGET_PAGE_BITS_MIN;
     }
-    target_page_bits_decided = true;
+    init_target_page.decided = true;
 #endif
 }