diff mbox series

[3/6] aarch64: Implement system register validation tools

Message ID 20231003151920.1853404-4-victor.donascimento@arm.com
State New
Headers show
Series aarch64: Add support for __arm_rsr and __arm_wsr ACLE function family | expand

Commit Message

Victor Do Nascimento Oct. 3, 2023, 3:18 p.m. UTC
Given the implementation of a mechanism of encoding system registers
into GCC, this patch provides the mechanism of validating their use by
the compiler.  In particular, this involves:

  1. Ensuring a supplied string corresponds to a known system
     register name.  System registers can be accessed either via their
     name (e.g. `SPSR_EL1') or their encoding (e.g. `S3_0_C4_C0_0').
     Register names are validated using a binary search of the
     `sysreg_names' structure populated from the
     `aarch64_system_regs.def' file via `match_reg'.
     The encoding naming convention is validated via a parser
     implemented in this patch - `is_implem_def_reg'.
  2. Once a given register name is deemed to be valid, it is checked
     against a further 2 criteria:
       a. Is the referenced register implemented in the target
          architecture?  This is achieved by comparing the ARCH field
	  in the relevant SYSREG entry from `aarch64_system_regs.def'
	  against `aarch64_feature_flags' flags set at compile-time.
       b. Is the register being used correctly?  Check the requested
       	  operation against the FLAGS specified in SYSREG.
	  This prevents operations like writing to a read-only system
	  register.
       NOTE: For registers specified via their encoding
       (e.g. `S3_0_C4_C0_0'), once the encoding value is deemed valid
       (as per step 1) no further checks such as read/write support or
       architectural feature requirements are done and this second step
       is skipped, as is done in gas.

gcc/ChangeLog:

	* gcc/config/aarch64/aarch64-protos.h (aarch64_valid_sysreg_name_p): New.
	(aarch64_retrieve_sysreg): Likewise.
	* gcc/config/aarch64/aarch64.cc (match_reg): Likewise.
	(is_implem_def_reg): Likewise.
	(aarch64_valid_sysreg_name_p): Likewise.
	(aarch64_retrieve_sysreg): Likewise.
	(aarch64_sysreg_valid_for_rw_p): Likewise.
	* gcc/config/aarch64/predicates.md (aarch64_sysreg_string): New.
---
 gcc/config/aarch64/aarch64-protos.h |   2 +
 gcc/config/aarch64/aarch64.cc       | 121 ++++++++++++++++++++++++++++
 gcc/config/aarch64/predicates.md    |   4 +
 3 files changed, 127 insertions(+)

Comments

Richard Earnshaw Oct. 5, 2023, 12:24 p.m. UTC | #1
On 03/10/2023 16:18, Victor Do Nascimento wrote:
> Given the implementation of a mechanism of encoding system registers
> into GCC, this patch provides the mechanism of validating their use by
> the compiler.  In particular, this involves:
> 
>    1. Ensuring a supplied string corresponds to a known system
>       register name.  System registers can be accessed either via their
>       name (e.g. `SPSR_EL1') or their encoding (e.g. `S3_0_C4_C0_0').
>       Register names are validated using a binary search of the
>       `sysreg_names' structure populated from the
>       `aarch64_system_regs.def' file via `match_reg'.
>       The encoding naming convention is validated via a parser
>       implemented in this patch - `is_implem_def_reg'.
>    2. Once a given register name is deemed to be valid, it is checked
>       against a further 2 criteria:
>         a. Is the referenced register implemented in the target
>            architecture?  This is achieved by comparing the ARCH field
> 	  in the relevant SYSREG entry from `aarch64_system_regs.def'
> 	  against `aarch64_feature_flags' flags set at compile-time.
>         b. Is the register being used correctly?  Check the requested
>         	  operation against the FLAGS specified in SYSREG.
> 	  This prevents operations like writing to a read-only system
> 	  register.
>         NOTE: For registers specified via their encoding
>         (e.g. `S3_0_C4_C0_0'), once the encoding value is deemed valid
>         (as per step 1) no further checks such as read/write support or
>         architectural feature requirements are done and this second step
>         is skipped, as is done in gas.
> 
> gcc/ChangeLog:
> 
> 	* gcc/config/aarch64/aarch64-protos.h (aarch64_valid_sysreg_name_p): New.
> 	(aarch64_retrieve_sysreg): Likewise.
> 	* gcc/config/aarch64/aarch64.cc (match_reg): Likewise.
> 	(is_implem_def_reg): Likewise.
> 	(aarch64_valid_sysreg_name_p): Likewise.
> 	(aarch64_retrieve_sysreg): Likewise.
> 	(aarch64_sysreg_valid_for_rw_p): Likewise.
> 	* gcc/config/aarch64/predicates.md (aarch64_sysreg_string): New.
> ---
>   gcc/config/aarch64/aarch64-protos.h |   2 +
>   gcc/config/aarch64/aarch64.cc       | 121 ++++++++++++++++++++++++++++
>   gcc/config/aarch64/predicates.md    |   4 +
>   3 files changed, 127 insertions(+)
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 60a55f4bc19..a134e2fcf8e 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -830,6 +830,8 @@ bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
>   bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
>   bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *,
>   			enum simd_immediate_check w = AARCH64_CHECK_MOV);
> +bool aarch64_valid_sysreg_name_p (const char *);
> +const char *aarch64_retrieve_sysreg (char *, bool);
>   rtx aarch64_check_zero_based_sve_index_immediate (rtx);
>   bool aarch64_sve_index_immediate_p (rtx);
>   bool aarch64_sve_arith_immediate_p (machine_mode, rtx, bool);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 030b39ded1a..dd5ac1cbc8d 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -28070,6 +28070,127 @@ aarch64_pars_overlap_p (rtx par1, rtx par2)
>     return false;
>   }
>   
> +/* Binary search of a user-supplied system register name against
> +   a database of known register names.  Upon match the index of
> +   hit in database is returned, else return -1.  */

Given that we expect the number of explicit sysregs in a single 
compilation unit to be small, this is probably OK.  An alternative would 
be to build a hashmap of the register names the first time this routine 
is called and then do a lookup in that.  That would also avoid the need 
for the list to be maintained alphabetically.

> +int
> +match_reg (const char *ref, const char *database[], int db_len)
> +{
> +  /* Check for named system registers.  */
> +  int imin = 0, imax = db_len - 1, mid, cmp_res;
> +  while (imin <= imax)
> +    {
> +      mid = (imin + imax) / 2;
> +
> +      cmp_res = strcmp (ref, database[mid]);
> +      if (cmp_res == 0)
> +	return mid;
> +      else if (cmp_res > 0)
> +	imin = mid+1;
> +      else
> +	imax = mid-1;
> +    }
> +  return -1;
> +}
> +
> +/* Parse an implementation-defined system register name of
> +   the form S[0-3]_[0-7]_C[0-15]_C[0-15]_[1-7].
> +   Return true if name matched against above pattern, false
> +   otherwise.  */

Another advantage of using a hash map above would be that we could then 
add registers matched by this routine to the map and therefore optimize 
rescanning for them (on the basis that if they are used once, there's a 
good chance of them being used again).

> +bool
> +is_implem_def_reg (const char *regname)
> +{
> +    /* Check for implementation-defined system registers.  */
> +  int name_len = strlen (regname);
> +  if (name_len < 12 || name_len > 14)
> +    return false;
> +
> +  int pos = 0, i = 0, j = 0;
> +  char n[3] = {0}, m[3] = {0};
> +  if (regname[pos] != 's' && regname[pos] != 'S')
> +    return false;
> +  pos++;
> +  if (regname[pos] < '0' || regname[pos] > '3')
> +    return false;
> +  pos++;
> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] < '0' || regname[pos] > '7')
> +    return false;
> +  pos++;
> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] != 'c' && regname[pos] != 'C')
> +    return false;
> +  pos++;
> +  while (regname[pos] != '_')
> +    {
> +      if (i > 2)
> +	return false;
> +      if (!ISDIGIT (regname[pos]))
> +	return false;
> +      n[i++] = regname[pos++];
> +    }
> +  if (atoi (n) > 15)
> +    return false;
> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] != 'c' && regname[pos] != 'C')
> +    return false;
> +  pos++;
> +  while (regname[pos] != '_')
> +    {
> +      if (j > 2)
> +	return false;
> +      if (!ISDIGIT (regname[pos]))
> +	return false;
> +      m[j++] = regname[pos++];
> +    }
> +  if (atoi (m) > 15)
> +    return false;
> +  if (regname[pos++] != '_')
> +    return false;
> +  if (regname[pos] < '0' || regname[pos] > '7')
> +    return false;
> +  return true;
> +}
> +
> +/* Ensure a supplied system register name is implemented in the target
> +   architecture.  For use in back-end predicate `aarch64_sysreg_string'.  */
> +bool
> +aarch64_valid_sysreg_name_p (const char *regname)
> +{
> +  int reg_id = match_reg (regname, sysreg_names, nsysreg);
> +  if (reg_id == -1)
> +    return is_implem_def_reg (regname);
> +  return (aarch64_isa_flags & sysreg_reqs[reg_id]);
> +}
> +
> +/* Ensure a supplied system register name is suitable for the desired use.
> +   This involves checking its suitability for the requested read/write
> +   operation and that it is implemented in the target architecture.
> +
> +   NOTE: The read/write flags refer to whether a given register is
> +   read/write-only.  */

The more usual comment style here would be something like:

Validate REGNAME against the permitted system register names, or a 
generic sysreg specification.  WRITE_P is true iff the register is being 
written.


> +const char *
> +aarch64_retrieve_sysreg (char *regname, bool write_p)
> +{
> +  int reg_id = match_reg (regname, sysreg_names, nsysreg);
> +  if (reg_id == -1)
> +    {
> +      if (is_implem_def_reg (regname))
> +	return (const char *) regname;
> +      else
> +	return NULL;
> +    }
> +  if ((write_p && (sysreg_properties[reg_id] & F_REG_READ))
> +      || (!write_p && (sysreg_properties[reg_id] & F_REG_WRITE)))
> +    return NULL;
> +  if (aarch64_isa_flags & sysreg_reqs[reg_id])
> +    return sysreg_names_generic[reg_id];
> +  return NULL;
> +}
> +
>   /* Target-specific selftests.  */
                          ^^^^^^
I would be good to add some of the static tests that I mentioned in the 
binutils review to the selftests pass that GCC has.  Eg, that the list 
is in alphabetical order (but see above about hash maps) and that each 
register has exactly one main definition (plus any number of aliases). 
This would then get run during the self-test run during the build process.

>   
>   #if CHECKING_P
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index 01de4743974..5f0d242e4a8 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -20,6 +20,10 @@
>   
>   (include "../arm/common.md")
>   
> +(define_predicate "aarch64_sysreg_string"
> +  (and (match_code "const_string")
> +       (match_test "aarch64_valid_sysreg_name_p (XSTR (op, 0))")))
> +
>   (define_special_predicate "cc_register"
>     (and (match_code "reg")
>          (and (match_test "REGNO (op) == CC_REGNUM")

R.
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 60a55f4bc19..a134e2fcf8e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -830,6 +830,8 @@  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
 bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *,
 			enum simd_immediate_check w = AARCH64_CHECK_MOV);
+bool aarch64_valid_sysreg_name_p (const char *);
+const char *aarch64_retrieve_sysreg (char *, bool);
 rtx aarch64_check_zero_based_sve_index_immediate (rtx);
 bool aarch64_sve_index_immediate_p (rtx);
 bool aarch64_sve_arith_immediate_p (machine_mode, rtx, bool);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 030b39ded1a..dd5ac1cbc8d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -28070,6 +28070,127 @@  aarch64_pars_overlap_p (rtx par1, rtx par2)
   return false;
 }
 
+/* Binary search of a user-supplied system register name against
+   a database of known register names.  Upon match the index of
+   hit in database is returned, else return -1.  */
+int
+match_reg (const char *ref, const char *database[], int db_len)
+{
+  /* Check for named system registers.  */
+  int imin = 0, imax = db_len - 1, mid, cmp_res;
+  while (imin <= imax)
+    {
+      mid = (imin + imax) / 2;
+
+      cmp_res = strcmp (ref, database[mid]);
+      if (cmp_res == 0)
+	return mid;
+      else if (cmp_res > 0)
+	imin = mid+1;
+      else
+	imax = mid-1;
+    }
+  return -1;
+}
+
+/* Parse an implementation-defined system register name of
+   the form S[0-3]_[0-7]_C[0-15]_C[0-15]_[1-7].
+   Return true if name matched against above pattern, false
+   otherwise.  */
+bool
+is_implem_def_reg (const char *regname)
+{
+    /* Check for implementation-defined system registers.  */
+  int name_len = strlen (regname);
+  if (name_len < 12 || name_len > 14)
+    return false;
+
+  int pos = 0, i = 0, j = 0;
+  char n[3] = {0}, m[3] = {0};
+  if (regname[pos] != 's' && regname[pos] != 'S')
+    return false;
+  pos++;
+  if (regname[pos] < '0' || regname[pos] > '3')
+    return false;
+  pos++;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] < '0' || regname[pos] > '7')
+    return false;
+  pos++;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] != 'c' && regname[pos] != 'C')
+    return false;
+  pos++;
+  while (regname[pos] != '_')
+    {
+      if (i > 2)
+	return false;
+      if (!ISDIGIT (regname[pos]))
+	return false;
+      n[i++] = regname[pos++];
+    }
+  if (atoi (n) > 15)
+    return false;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] != 'c' && regname[pos] != 'C')
+    return false;
+  pos++;
+  while (regname[pos] != '_')
+    {
+      if (j > 2)
+	return false;
+      if (!ISDIGIT (regname[pos]))
+	return false;
+      m[j++] = regname[pos++];
+    }
+  if (atoi (m) > 15)
+    return false;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] < '0' || regname[pos] > '7')
+    return false;
+  return true;
+}
+
+/* Ensure a supplied system register name is implemented in the target
+   architecture.  For use in back-end predicate `aarch64_sysreg_string'.  */
+bool
+aarch64_valid_sysreg_name_p (const char *regname)
+{
+  int reg_id = match_reg (regname, sysreg_names, nsysreg);
+  if (reg_id == -1)
+    return is_implem_def_reg (regname);
+  return (aarch64_isa_flags & sysreg_reqs[reg_id]);
+}
+
+/* Ensure a supplied system register name is suitable for the desired use.
+   This involves checking its suitability for the requested read/write
+   operation and that it is implemented in the target architecture.
+
+   NOTE: The read/write flags refer to whether a given register is
+   read/write-only.  */
+const char *
+aarch64_retrieve_sysreg (char *regname, bool write_p)
+{
+  int reg_id = match_reg (regname, sysreg_names, nsysreg);
+  if (reg_id == -1)
+    {
+      if (is_implem_def_reg (regname))
+	return (const char *) regname;
+      else
+	return NULL;
+    }
+  if ((write_p && (sysreg_properties[reg_id] & F_REG_READ))
+      || (!write_p && (sysreg_properties[reg_id] & F_REG_WRITE)))
+    return NULL;
+  if (aarch64_isa_flags & sysreg_reqs[reg_id])
+    return sysreg_names_generic[reg_id];
+  return NULL;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 01de4743974..5f0d242e4a8 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -20,6 +20,10 @@ 
 
 (include "../arm/common.md")
 
+(define_predicate "aarch64_sysreg_string"
+  (and (match_code "const_string")
+       (match_test "aarch64_valid_sysreg_name_p (XSTR (op, 0))")))
+
 (define_special_predicate "cc_register"
   (and (match_code "reg")
        (and (match_test "REGNO (op) == CC_REGNUM")