diff mbox series

[V2,3/7] aarch64: Implement system register validation tools

Message ID 20231018150310.253793-4-victor.donascimento@arm.com
State New
Headers show
Series aarch64: Add support for __arm_rsr and __arm_wsr ACLE function family | expand

Commit Message

Victor Do Nascimento Oct. 18, 2023, 3:02 p.m. UTC
Given the implementation of a mechanism of encoding system registers
into GCC, this patch provides the mechanism of validating their use by
the compiler.  In particular, this involves:

  1. Ensuring a supplied string corresponds to a known system
     register name.  System registers can be accessed either via their
     name (e.g. `SPSR_EL1') or their encoding (e.g. `S3_0_C4_C0_0').
     Register names are validated using a hash map, mapping known
     system register names to its corresponding `sysreg_t' struct,
     which is populated from the `aarch64_system_regs.def' file.
     Register name validation is done via `lookup_sysreg_map', while
     the encoding naming convention is validated via a parser
     implemented in this patch - `is_implem_def_reg'.
  2. Once a given register name is deemed to be valid, it is checked
     against a further 2 criteria:
       a. Is the referenced register implemented in the target
          architecture?  This is achieved by comparing the ARCH field
	  in the relevant SYSREG entry from `aarch64_system_regs.def'
	  against `aarch64_feature_flags' flags set at compile-time.
       b. Is the register being used correctly?  Check the requested
       	  operation against the FLAGS specified in SYSREG.
	  This prevents operations like writing to a read-only system
	  register.

gcc/ChangeLog:

	* gcc/config/aarch64/aarch64-protos.h (aarch64_valid_sysreg_name_p): New.
	(aarch64_retrieve_sysreg): Likewise.
	* gcc/config/aarch64/aarch64.cc (is_implem_def_reg): Likewise.
	(aarch64_valid_sysreg_name_p): Likewise.
	(aarch64_retrieve_sysreg): Likewise.
	(aarch64_register_sysreg): Likewise.
	(aarch64_init_sysregs): Likewise.
	(aarch64_lookup_sysreg_map): Likewise.
	* gcc/config/aarch64/predicates.md (aarch64_sysreg_string): New.
---
 gcc/config/aarch64/aarch64-protos.h |   2 +
 gcc/config/aarch64/aarch64.cc       | 146 ++++++++++++++++++++++++++++
 gcc/config/aarch64/predicates.md    |   4 +
 3 files changed, 152 insertions(+)

Comments

Richard Sandiford Oct. 18, 2023, 7:07 p.m. UTC | #1
Generally looks really good.  Some comments below.

Victor Do Nascimento <victor.donascimento@arm.com> writes:
> Given the implementation of a mechanism of encoding system registers
> into GCC, this patch provides the mechanism of validating their use by
> the compiler.  In particular, this involves:
>
>   1. Ensuring a supplied string corresponds to a known system
>      register name.  System registers can be accessed either via their
>      name (e.g. `SPSR_EL1') or their encoding (e.g. `S3_0_C4_C0_0').
>      Register names are validated using a hash map, mapping known
>      system register names to its corresponding `sysreg_t' struct,
>      which is populated from the `aarch64_system_regs.def' file.
>      Register name validation is done via `lookup_sysreg_map', while
>      the encoding naming convention is validated via a parser
>      implemented in this patch - `is_implem_def_reg'.
>   2. Once a given register name is deemed to be valid, it is checked
>      against a further 2 criteria:
>        a. Is the referenced register implemented in the target
>           architecture?  This is achieved by comparing the ARCH field
> 	  in the relevant SYSREG entry from `aarch64_system_regs.def'
> 	  against `aarch64_feature_flags' flags set at compile-time.
>        b. Is the register being used correctly?  Check the requested
>        	  operation against the FLAGS specified in SYSREG.
> 	  This prevents operations like writing to a read-only system
> 	  register.
>
> gcc/ChangeLog:
>
> 	* gcc/config/aarch64/aarch64-protos.h (aarch64_valid_sysreg_name_p): New.
> 	(aarch64_retrieve_sysreg): Likewise.
> 	* gcc/config/aarch64/aarch64.cc (is_implem_def_reg): Likewise.
> 	(aarch64_valid_sysreg_name_p): Likewise.
> 	(aarch64_retrieve_sysreg): Likewise.
> 	(aarch64_register_sysreg): Likewise.
> 	(aarch64_init_sysregs): Likewise.
> 	(aarch64_lookup_sysreg_map): Likewise.
> 	* gcc/config/aarch64/predicates.md (aarch64_sysreg_string): New.
> ---
>  gcc/config/aarch64/aarch64-protos.h |   2 +
>  gcc/config/aarch64/aarch64.cc       | 146 ++++++++++++++++++++++++++++
>  gcc/config/aarch64/predicates.md    |   4 +
>  3 files changed, 152 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 60a55f4bc19..a134e2fcf8e 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -830,6 +830,8 @@ bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
>  bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
>  bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *,
>  			enum simd_immediate_check w = AARCH64_CHECK_MOV);
> +bool aarch64_valid_sysreg_name_p (const char *);
> +const char *aarch64_retrieve_sysreg (char *, bool);
>  rtx aarch64_check_zero_based_sve_index_immediate (rtx);
>  bool aarch64_sve_index_immediate_p (rtx);
>  bool aarch64_sve_arith_immediate_p (machine_mode, rtx, bool);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 69de2366424..816c4b69fc8 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -85,6 +85,7 @@
>  #include "config/arm/aarch-common.h"
>  #include "config/arm/aarch-common-protos.h"
>  #include "ssa.h"
> +#include "hash-map.h"
>  
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -2845,6 +2846,52 @@ const sysreg_t sysreg_structs[] =
>  const unsigned nsysreg = TOTAL_ITEMS;
>  #undef TOTAL_ITEMS
>  
> +using sysreg_map_t = hash_map<nofree_string_hash, const sysreg_t *>;
> +static sysreg_map_t *sysreg_map = nullptr;

One concern with static, non-GTY, runtime-initialised data is "does it
work with PCH?".  I suspect it does, since all uses of the map go through
aarch64_lookup_sysreg_map, and since nothing seems to rely on persistent
pointer values.  But it would be good to have a PCH test just to make sure.

I'm thinking of something like the tests in gcc/testsuite/gcc.dg/pch.
The header file (.hs) would define a function that does sysreg reads
and writes.  When the .hs is included from the .c file, the reads and
writes would be imported through a PCH load, rather than through the
normal frontend route.

> +
> +/* Map system register names to their hardware metadata: Encoding,

s/Encoding/encoding/

> +   feature flags and architectural feature requirements, all of which
> +   are encoded in a sysreg_t struct.  */
> +void
> +aarch64_register_sysreg (const char *name, const sysreg_t *metadata)
> +{
> +  bool dup = sysreg_map->put (name, metadata);
> +  gcc_checking_assert (!dup);
> +}
> +
> +/* Lazily initialize hash table for system register validation,
> +   checking the validity of supplied register name and returning
> +   register's associated metadata.  */
> +static void
> +aarch64_init_sysregs (void)
> +{
> +  gcc_assert (!sysreg_map);
> +  sysreg_map = new sysreg_map_t;
> +  gcc_assert (sysreg_map);

This assert seems redundant.  new should abort with std::bad_alloc if
allocation fails.

> +
> +  for (unsigned i = 0; i < nsysreg; i++)
> +    {
> +      const sysreg_t *reg = sysreg_structs + i;
> +      aarch64_register_sysreg (reg->name , reg);

Nit: the coding convention adds no space before ",".

> +    }
> +}
> +
> +/* No direct access to the sysreg hash-map should be made.  Doing so
> +   risks trying to acess an unitialized hash-map and dereferencing the
> +   returned double pointer without due care risks dereferencing a
> +   null-pointer.  */
> +const sysreg_t *
> +aarch64_lookup_sysreg_map (const char *regname)
> +{
> +  if (!sysreg_map)
> +    aarch64_init_sysregs ();
> +
> +  const sysreg_t **sysreg_entry = sysreg_map->get (regname);
> +  if (sysreg_entry != NULL)
> +    return *sysreg_entry;
> +  return NULL;
> +}
> +
>  /* The current tuning set.  */
>  struct tune_params aarch64_tune_params = generic_tunings;
>  
> @@ -28053,6 +28100,105 @@ aarch64_pars_overlap_p (rtx par1, rtx par2)
>    return false;
>  }
>  
> +/* Parse an implementation-defined system register name of
> +   the form S[0-3]_[0-7]_C[0-15]_C[0-15]_[1-7].
> +   Return true if name matched against above pattern, false
> +   otherwise.  */
> +bool
> +is_implem_def_reg (const char *regname)
> +{
> +    /* Check for implementation-defined system registers.  */

Nit: too much indentation.  But I think this comment is redundant
with the one above the function.

> +  int name_len = strlen (regname);
> +  if (name_len < 12 || name_len > 14)
> +    return false;
> +
> +  int pos = 0, i = 0, j = 0;
> +  char n[3] = {0}, m[3] = {0};
> +  if (regname[pos] != 's' && regname[pos] != 'S')
> +    return false;
> +  pos++;
> +  if (regname[pos] < '0' || regname[pos] > '3')
> +    return false;
> +  pos++;
> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] < '0' || regname[pos] > '7')
> +    return false;
> +  pos++;
> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] != 'c' && regname[pos] != 'C')
> +    return false;
> +  pos++;
> +  while (regname[pos] != '_')
> +    {
> +      if (i > 2)
> +	return false;
> +      if (!ISDIGIT (regname[pos]))
> +	return false;
> +      n[i++] = regname[pos++];
> +    }
> +  if (atoi (n) > 15)
> +    return false;

It looks like this will accept C00 to C09 as a synonym of C0 to C9.
Unfortunately the tools seem a bit inconsistent on this.  GAS allows
any number of leading 0s (C0000000000000001: not a problem), whereas
Clang forbids any leading zeros.

I suppose we should avoid introducing a third variation in GCC.
Given that the code is already checking lengths, it probably makes
sense to follow Clang's behaviour.

> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] != 'c' && regname[pos] != 'C')
> +    return false;
> +  pos++;
> +  while (regname[pos] != '_')
> +    {
> +      if (j > 2)
> +	return false;
> +      if (!ISDIGIT (regname[pos]))
> +	return false;
> +      m[j++] = regname[pos++];
> +    }
> +  if (atoi (m) > 15)
> +    return false;

It'd be good to avoid the duplication here.  Maybe worth putting
the "C..." parsing into a lambda?

> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] < '0' || regname[pos] > '7')
> +    return false;
> +  return true;
> +}
> +
> +/* Validate REGNAME against the permitted system register names, or a
> +   generic sysreg specification.  For use in back-end predicate
> +   `aarch64_sysreg_string'.  */

"Return true if ..." might be a better construct here.  "Validate ..."
can imply error reporting.

> +bool
> +aarch64_valid_sysreg_name_p (const char *regname)
> +{
> +  const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname);
> +  if (sysreg == NULL)
> +    return is_implem_def_reg (regname);
> +  return (aarch64_isa_flags & sysreg->arch_reqs);
> +}
> +
> +/* Validate REGNAME against the permitted system register names, or a
> +   generic sysreg specification.  WRITE_P is true iff the register is
> +   being written to.  */

The comment should describe the return value, which AIUI is the
generic specification.

> +const char *
> +aarch64_retrieve_sysreg (char *regname, bool write_p)

What requires regname to be a non-const char *?

> +{
> +  const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname);
> +  if (sysreg == NULL)
> +    {
> +      if (is_implem_def_reg (regname))
> +	return (const char *) regname;
> +      else
> +	return NULL;
> +    }
> +  if ((write_p && (sysreg->properties & F_REG_READ))
> +      || (!write_p && (sysreg->properties & F_REG_WRITE)))
> +    return NULL;
> +  if (aarch64_isa_flags & sysreg->arch_reqs)
> +    {
> +      if (sysreg->encoding)
> +	return sysreg->encoding;

Could you explain the purpose of the "if (sysreg->encoding)" statement?
It looked odd that we could find a matching entry but still return null.
If possible, it would be good to assert that the encoding is nonnull
instead.

Thanks,
Richard

> +    }
> +  return NULL;
> +}
> +
>  /* Target-specific selftests.  */
>  
>  #if CHECKING_P
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index 01de4743974..5f0d242e4a8 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -20,6 +20,10 @@
>  
>  (include "../arm/common.md")
>  
> +(define_predicate "aarch64_sysreg_string"
> +  (and (match_code "const_string")
> +       (match_test "aarch64_valid_sysreg_name_p (XSTR (op, 0))")))
> +
>  (define_special_predicate "cc_register"
>    (and (match_code "reg")
>         (and (match_test "REGNO (op) == CC_REGNUM")
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 60a55f4bc19..a134e2fcf8e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -830,6 +830,8 @@  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
 bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *,
 			enum simd_immediate_check w = AARCH64_CHECK_MOV);
+bool aarch64_valid_sysreg_name_p (const char *);
+const char *aarch64_retrieve_sysreg (char *, bool);
 rtx aarch64_check_zero_based_sve_index_immediate (rtx);
 bool aarch64_sve_index_immediate_p (rtx);
 bool aarch64_sve_arith_immediate_p (machine_mode, rtx, bool);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 69de2366424..816c4b69fc8 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -85,6 +85,7 @@ 
 #include "config/arm/aarch-common.h"
 #include "config/arm/aarch-common-protos.h"
 #include "ssa.h"
+#include "hash-map.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -2845,6 +2846,52 @@  const sysreg_t sysreg_structs[] =
 const unsigned nsysreg = TOTAL_ITEMS;
 #undef TOTAL_ITEMS
 
+using sysreg_map_t = hash_map<nofree_string_hash, const sysreg_t *>;
+static sysreg_map_t *sysreg_map = nullptr;
+
+/* Map system register names to their hardware metadata: Encoding,
+   feature flags and architectural feature requirements, all of which
+   are encoded in a sysreg_t struct.  */
+void
+aarch64_register_sysreg (const char *name, const sysreg_t *metadata)
+{
+  bool dup = sysreg_map->put (name, metadata);
+  gcc_checking_assert (!dup);
+}
+
+/* Lazily initialize hash table for system register validation,
+   checking the validity of supplied register name and returning
+   register's associated metadata.  */
+static void
+aarch64_init_sysregs (void)
+{
+  gcc_assert (!sysreg_map);
+  sysreg_map = new sysreg_map_t;
+  gcc_assert (sysreg_map);
+
+  for (unsigned i = 0; i < nsysreg; i++)
+    {
+      const sysreg_t *reg = sysreg_structs + i;
+      aarch64_register_sysreg (reg->name , reg);
+    }
+}
+
+/* No direct access to the sysreg hash-map should be made.  Doing so
+   risks trying to acess an unitialized hash-map and dereferencing the
+   returned double pointer without due care risks dereferencing a
+   null-pointer.  */
+const sysreg_t *
+aarch64_lookup_sysreg_map (const char *regname)
+{
+  if (!sysreg_map)
+    aarch64_init_sysregs ();
+
+  const sysreg_t **sysreg_entry = sysreg_map->get (regname);
+  if (sysreg_entry != NULL)
+    return *sysreg_entry;
+  return NULL;
+}
+
 /* The current tuning set.  */
 struct tune_params aarch64_tune_params = generic_tunings;
 
@@ -28053,6 +28100,105 @@  aarch64_pars_overlap_p (rtx par1, rtx par2)
   return false;
 }
 
+/* Parse an implementation-defined system register name of
+   the form S[0-3]_[0-7]_C[0-15]_C[0-15]_[1-7].
+   Return true if name matched against above pattern, false
+   otherwise.  */
+bool
+is_implem_def_reg (const char *regname)
+{
+    /* Check for implementation-defined system registers.  */
+  int name_len = strlen (regname);
+  if (name_len < 12 || name_len > 14)
+    return false;
+
+  int pos = 0, i = 0, j = 0;
+  char n[3] = {0}, m[3] = {0};
+  if (regname[pos] != 's' && regname[pos] != 'S')
+    return false;
+  pos++;
+  if (regname[pos] < '0' || regname[pos] > '3')
+    return false;
+  pos++;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] < '0' || regname[pos] > '7')
+    return false;
+  pos++;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] != 'c' && regname[pos] != 'C')
+    return false;
+  pos++;
+  while (regname[pos] != '_')
+    {
+      if (i > 2)
+	return false;
+      if (!ISDIGIT (regname[pos]))
+	return false;
+      n[i++] = regname[pos++];
+    }
+  if (atoi (n) > 15)
+    return false;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] != 'c' && regname[pos] != 'C')
+    return false;
+  pos++;
+  while (regname[pos] != '_')
+    {
+      if (j > 2)
+	return false;
+      if (!ISDIGIT (regname[pos]))
+	return false;
+      m[j++] = regname[pos++];
+    }
+  if (atoi (m) > 15)
+    return false;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] < '0' || regname[pos] > '7')
+    return false;
+  return true;
+}
+
+/* Validate REGNAME against the permitted system register names, or a
+   generic sysreg specification.  For use in back-end predicate
+   `aarch64_sysreg_string'.  */
+bool
+aarch64_valid_sysreg_name_p (const char *regname)
+{
+  const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname);
+  if (sysreg == NULL)
+    return is_implem_def_reg (regname);
+  return (aarch64_isa_flags & sysreg->arch_reqs);
+}
+
+/* Validate REGNAME against the permitted system register names, or a
+   generic sysreg specification.  WRITE_P is true iff the register is
+   being written to.  */
+const char *
+aarch64_retrieve_sysreg (char *regname, bool write_p)
+{
+  const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname);
+  if (sysreg == NULL)
+    {
+      if (is_implem_def_reg (regname))
+	return (const char *) regname;
+      else
+	return NULL;
+    }
+  if ((write_p && (sysreg->properties & F_REG_READ))
+      || (!write_p && (sysreg->properties & F_REG_WRITE)))
+    return NULL;
+  if (aarch64_isa_flags & sysreg->arch_reqs)
+    {
+      if (sysreg->encoding)
+	return sysreg->encoding;
+    }
+  return NULL;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 01de4743974..5f0d242e4a8 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -20,6 +20,10 @@ 
 
 (include "../arm/common.md")
 
+(define_predicate "aarch64_sysreg_string"
+  (and (match_code "const_string")
+       (match_test "aarch64_valid_sysreg_name_p (XSTR (op, 0))")))
+
 (define_special_predicate "cc_register"
   (and (match_code "reg")
        (and (match_test "REGNO (op) == CC_REGNUM")