diff mbox

gcc/arc: Avoid add_s REG,REG,pcl on ARCv2 targets

Message ID 1467243758-12913-1-git-send-email-andrew.burgess@embecosm.com
State New
Headers show

Commit Message

Andrew Burgess June 29, 2016, 11:42 p.m. UTC
The ARCv2 targets don't support add_s REG,REG,pcl like earlier ARC
targets did.  This instruction format is used when generating case jump
tables.

This commit updates the code so that ARCv2 targets avoid the unsupported
instruction.  This does mean the code is slightly longer on ARCv2, so
the instruction length has changed accordingly.

gcc/ChangeLog:

	* config/arc/arc.md (casesi_compact_jump): Avoid generating add_s
	REG,REG,pcl on ARCv2 targets.

gcc/testsuite/ChangeLog:

	* gcc.target/arc/switch-1.c: New file.
---
 gcc/ChangeLog                           |  5 ++++
 gcc/config/arc/arc.md                   | 50 ++++++++++++++++++---------------
 gcc/testsuite/ChangeLog                 |  4 +++
 gcc/testsuite/gcc.target/arc/switch-1.c | 42 +++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/switch-1.c

Comments

Claudiu Zissulescu June 30, 2016, 7:36 a.m. UTC | #1
I think the compactsi makes no sense for ARCv2 without the add_s reg, reg,PCL instruction.  Moreover, I have proposed a patch about this issue months ago, See: https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01462.html.  However, our maintainer is missing in action.

Cheers,
Claudiu

> -----Original Message-----
> From: Andrew Burgess [mailto:andrew.burgess@embecosm.com]
> Sent: Thursday, June 30, 2016 1:43 AM
> To: gcc-patches@gcc.gnu.org; gnu@amylaar.uk
> Cc: Claudiu.Zissulescu@synopsys.com; Andrew Burgess
> <andrew.burgess@embecosm.com>
> Subject: [PATCH] gcc/arc: Avoid add_s REG,REG,pcl on ARCv2 targets
> 
> The ARCv2 targets don't support add_s REG,REG,pcl like earlier ARC
> targets did.  This instruction format is used when generating case jump
> tables.
> 
> This commit updates the code so that ARCv2 targets avoid the unsupported
> instruction.  This does mean the code is slightly longer on ARCv2, so
> the instruction length has changed accordingly.
> 
> gcc/ChangeLog:
> 
> 	* config/arc/arc.md (casesi_compact_jump): Avoid generating add_s
> 	REG,REG,pcl on ARCv2 targets.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/arc/switch-1.c: New file.
> ---
>  gcc/ChangeLog                           |  5 ++++
>  gcc/config/arc/arc.md                   | 50 ++++++++++++++++++---------------
>  gcc/testsuite/ChangeLog                 |  4 +++
>  gcc/testsuite/gcc.target/arc/switch-1.c | 42
> +++++++++++++++++++++++++++
>  4 files changed, 79 insertions(+), 22 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arc/switch-1.c
> 
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index 1102c53..0f39d49 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -3950,6 +3950,7 @@
>    int unalign = arc_get_unalign ();
>    rtx xop[3];
>    const char *s;
> +  int vec_offset = TARGET_V2 ? 12 : 10;
> 
>    xop[0] = operands[0];
>    xop[2] = operands[2];
> @@ -3962,11 +3963,11 @@
>  	 2 of these are for alignment, and are anticipated in the length
>  	 of the ADDR_DIFF_VEC.  */
>        if (unalign && !satisfies_constraint_Rcq (xop[0]))
> -	s = \"add2 %2,pcl,%0\n\tld_s %2,[%2,12]\";
> +	s = \"add2 %2,pcl,%0\n\tld_s %2,[%2,12]\";   /* Length = 6 bytes.  */
>        else if (unalign)
> -	s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\";
> +	s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\";  /* Length = 6 bytes.  */
>        else
> -	s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\";
> +	s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\";    /* Length = 8 bytes.  */
>        arc_clear_unalign ();
>        break;
>      case HImode:
> @@ -3974,26 +3975,26 @@
>  	{
>  	  if (satisfies_constraint_Rcq (xop[0]))
>  	    {
> -	      s = \"add_s %2,%0,%1\n\tld%_.as %2,[pcl,%2]\";
> -	      xop[1] = GEN_INT ((10 - unalign) / 2U);
> +	      s = \"add_s %2,%0,%1\n\tld%_.as %2,[pcl,%2]\";  /* Length = 6
> bytes.  */
> +	      xop[1] = GEN_INT ((vec_offset - unalign) / 2U);
>  	    }
>  	  else
>  	    {
> -	      s = \"add1 %2,pcl,%0\n\tld%__s %2,[%2,%1]\";
> -	      xop[1] = GEN_INT (10 + unalign);
> +	      s = \"add1 %2,pcl,%0\n\tld%__s %2,[%2,%1]\";    /* Length = 6
> bytes.  */
> +	      xop[1] = GEN_INT (vec_offset + unalign);
>  	    }
>  	}
>        else
>  	{
>  	  if (satisfies_constraint_Rcq (xop[0]))
>  	    {
> -	      s = \"add_s %2,%0,%1\n\tld%_.x.as %2,[pcl,%2]\";
> -	      xop[1] = GEN_INT ((10 - unalign) / 2U);
> +	      s = \"add_s %2,%0,%1\n\tld%_.x.as %2,[pcl,%2]\";  /* Length = 6
> bytes.  */
> +	      xop[1] = GEN_INT ((vec_offset - unalign) / 2U);
>  	    }
>  	  else
>  	    {
> -	      s = \"add1 %2,pcl,%0\n\tld%__s.x %2,[%2,%1]\";
> -	      xop[1] = GEN_INT (10 + unalign);
> +	      s = \"add1 %2,pcl,%0\n\tld%__s.x %2,[%2,%1]\";    /* Length = 6
> bytes.  */
> +	      xop[1] = GEN_INT (vec_offset + unalign);
>  	    }
>  	}
>        arc_toggle_unalign ();
> @@ -4001,17 +4002,18 @@
>      case QImode:
>        if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
>  	{
> -	  if ((rtx_equal_p (xop[2], xop[0])
> -	       || find_reg_note (insn, REG_DEAD, xop[0]))
> +	  if (!TARGET_V2
> +              && (rtx_equal_p (xop[2], xop[0])
> +	          || find_reg_note (insn, REG_DEAD, xop[0]))
>  	      && satisfies_constraint_Rcq (xop[0]))
>  	    {
> -	      s = \"add_s %0,%0,pcl\n\tldb_s %2,[%0,%1]\";
> -	      xop[1] = GEN_INT (8 + unalign);
> +	      s = \"add %0,%0,pcl\n\tldb_s %2,[%0,%1]\";        /* Length = 6
> bytes.  */
> +	      xop[1] = GEN_INT ((vec_offset - 2) + unalign);
>  	    }
>  	  else
>  	    {
> -	      s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\";
> -	      xop[1] = GEN_INT (10 + unalign);
> +	      s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\";        /* Length = 6
> bytes.  */
> +	      xop[1] = GEN_INT (vec_offset + unalign);
>  	      arc_toggle_unalign ();
>  	    }
>  	}
> @@ -4019,14 +4021,14 @@
>  		|| find_reg_note (insn, REG_DEAD, xop[0]))
>  	       && satisfies_constraint_Rcq (xop[0]))
>  	{
> -	  s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\";
> -	  xop[1] = GEN_INT (10 - unalign);
> +	  s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\";   /* Length = 6 bytes.
> */
> +	  xop[1] = GEN_INT (vec_offset - unalign);
>  	  arc_toggle_unalign ();
>  	}
>        else
>  	{
>  	  /* ??? Length is 12.  */
> -	  s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\";
> +	  s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\";     /* Length = 8 bytes.
> */
>  	  xop[1] = GEN_INT (8 + unalign);
>  	}
>        break;
> @@ -4034,9 +4036,13 @@
>        gcc_unreachable ();
>      }
>    output_asm_insn (s, xop);
> -  return \"add_s %2,%2,pcl\n\tj_s%* [%2]\";
> +  if (TARGET_V2)
> +    return \"add %2,%2,pcl\n\tj_s%* [%2]\";      /* Length = 6 bytes.  */
> +  else
> +    return \"add_s %2,%2,pcl\n\tj_s%* [%2]\";    /* Length = 4 bytes.  */
>  }"
> -  [(set_attr "length" "10")
> +  [(set (attr "length")
> +        (if_then_else (match_test "TARGET_V2") (const_int 12) (const_int 10)))
>     (set_attr "type" "jump")
>     (set_attr "iscompact" "true")
>     (set_attr "cond" "nocond")])
> diff --git a/gcc/testsuite/gcc.target/arc/switch-1.c
> b/gcc/testsuite/gcc.target/arc/switch-1.c
> new file mode 100644
> index 0000000..f68b719
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arc/switch-1.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mcpu=arcem -Os -Wall -Werror" } */
> +
> +/* { dg-final { scan-assembler-not "add_s\[ \t\]+r\[0-9\]+,r\[0-9\]+,pcl" } }
> */
> +
> +/* Unfortunately the following pragma is required in order to allow
> +   compiling with -Wall -Werror, but still skip one of the possible switch
> +   cases in this test.  */
> +#pragma GCC diagnostic ignored "-Wswitch"
> +
> +typedef struct {
> +  struct {
> +    struct {
> +      enum {
> +        REG_SAVED_OFFSET,
> +        REG_SAVED_REG,
> +        REG_SAVED_EXP,
> +        REG_SAVED_VAL_OFFSET,
> +        REG_SAVED_VAL_EXP
> +      } how;
> +    } reg[1];
> +  } regs;
> +} _Unwind_FrameState;
> +
> +_Unwind_FrameState a;
> +
> +extern void fn2 (void);
> +
> +void fn1() {
> +  switch (a.regs.reg[0].how) {
> +  case REG_SAVED_OFFSET:
> +    /* Specifically don't handle REG_SAVED_REG in this switch statement,
> +       this causes GCC to generate the specific switch-jump-vector table
> +       that triggered a bug on ARCv2 targets.  */
> +    /* case REG_SAVED_REG: */
> +  case REG_SAVED_EXP:
> +  case REG_SAVED_VAL_OFFSET:
> +    fn2();
> +  case REG_SAVED_VAL_EXP:
> +    fn2();
> +  }
> +}
> --
> 2.6.4
Andrew Burgess June 30, 2016, 9:13 a.m. UTC | #2
* Claudiu Zissulescu <Claudiu.Zissulescu@synopsys.com> [2016-06-30 07:36:32 +0000]:

> I think the compactsi makes no sense for ARCv2 without the add_s
> reg, reg,PCL instruction.  Moreover, I have proposed a patch about
> this issue months ago, See:
> https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01462.html.  However,
> our maintainer is missing in action.

OK, thanks for the link, I don't see a need for my patch then.

Lets hope we get some of your outstanding patches reviewed soon.

Thanks,
Andrew
diff mbox

Patch

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 1102c53..0f39d49 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -3950,6 +3950,7 @@ 
   int unalign = arc_get_unalign ();
   rtx xop[3];
   const char *s;
+  int vec_offset = TARGET_V2 ? 12 : 10;
 
   xop[0] = operands[0];
   xop[2] = operands[2];
@@ -3962,11 +3963,11 @@ 
 	 2 of these are for alignment, and are anticipated in the length
 	 of the ADDR_DIFF_VEC.  */
       if (unalign && !satisfies_constraint_Rcq (xop[0]))
-	s = \"add2 %2,pcl,%0\n\tld_s %2,[%2,12]\";
+	s = \"add2 %2,pcl,%0\n\tld_s %2,[%2,12]\";   /* Length = 6 bytes.  */
       else if (unalign)
-	s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\";
+	s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\";  /* Length = 6 bytes.  */
       else
-	s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\";
+	s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\";    /* Length = 8 bytes.  */
       arc_clear_unalign ();
       break;
     case HImode:
@@ -3974,26 +3975,26 @@ 
 	{
 	  if (satisfies_constraint_Rcq (xop[0]))
 	    {
-	      s = \"add_s %2,%0,%1\n\tld%_.as %2,[pcl,%2]\";
-	      xop[1] = GEN_INT ((10 - unalign) / 2U);
+	      s = \"add_s %2,%0,%1\n\tld%_.as %2,[pcl,%2]\";  /* Length = 6 bytes.  */
+	      xop[1] = GEN_INT ((vec_offset - unalign) / 2U);
 	    }
 	  else
 	    {
-	      s = \"add1 %2,pcl,%0\n\tld%__s %2,[%2,%1]\";
-	      xop[1] = GEN_INT (10 + unalign);
+	      s = \"add1 %2,pcl,%0\n\tld%__s %2,[%2,%1]\";    /* Length = 6 bytes.  */
+	      xop[1] = GEN_INT (vec_offset + unalign);
 	    }
 	}
       else
 	{
 	  if (satisfies_constraint_Rcq (xop[0]))
 	    {
-	      s = \"add_s %2,%0,%1\n\tld%_.x.as %2,[pcl,%2]\";
-	      xop[1] = GEN_INT ((10 - unalign) / 2U);
+	      s = \"add_s %2,%0,%1\n\tld%_.x.as %2,[pcl,%2]\";  /* Length = 6 bytes.  */
+	      xop[1] = GEN_INT ((vec_offset - unalign) / 2U);
 	    }
 	  else
 	    {
-	      s = \"add1 %2,pcl,%0\n\tld%__s.x %2,[%2,%1]\";
-	      xop[1] = GEN_INT (10 + unalign);
+	      s = \"add1 %2,pcl,%0\n\tld%__s.x %2,[%2,%1]\";    /* Length = 6 bytes.  */
+	      xop[1] = GEN_INT (vec_offset + unalign);
 	    }
 	}
       arc_toggle_unalign ();
@@ -4001,17 +4002,18 @@ 
     case QImode:
       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
 	{
-	  if ((rtx_equal_p (xop[2], xop[0])
-	       || find_reg_note (insn, REG_DEAD, xop[0]))
+	  if (!TARGET_V2
+              && (rtx_equal_p (xop[2], xop[0])
+	          || find_reg_note (insn, REG_DEAD, xop[0]))
 	      && satisfies_constraint_Rcq (xop[0]))
 	    {
-	      s = \"add_s %0,%0,pcl\n\tldb_s %2,[%0,%1]\";
-	      xop[1] = GEN_INT (8 + unalign);
+	      s = \"add %0,%0,pcl\n\tldb_s %2,[%0,%1]\";        /* Length = 6 bytes.  */
+	      xop[1] = GEN_INT ((vec_offset - 2) + unalign);
 	    }
 	  else
 	    {
-	      s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\";
-	      xop[1] = GEN_INT (10 + unalign);
+	      s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\";        /* Length = 6 bytes.  */
+	      xop[1] = GEN_INT (vec_offset + unalign);
 	      arc_toggle_unalign ();
 	    }
 	}
@@ -4019,14 +4021,14 @@ 
 		|| find_reg_note (insn, REG_DEAD, xop[0]))
 	       && satisfies_constraint_Rcq (xop[0]))
 	{
-	  s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\";
-	  xop[1] = GEN_INT (10 - unalign);
+	  s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\";   /* Length = 6 bytes.  */
+	  xop[1] = GEN_INT (vec_offset - unalign);
 	  arc_toggle_unalign ();
 	}
       else
 	{
 	  /* ??? Length is 12.  */
-	  s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\";
+	  s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\";     /* Length = 8 bytes.  */
 	  xop[1] = GEN_INT (8 + unalign);
 	}
       break;
@@ -4034,9 +4036,13 @@ 
       gcc_unreachable ();
     }
   output_asm_insn (s, xop);
-  return \"add_s %2,%2,pcl\n\tj_s%* [%2]\";
+  if (TARGET_V2)
+    return \"add %2,%2,pcl\n\tj_s%* [%2]\";      /* Length = 6 bytes.  */
+  else
+    return \"add_s %2,%2,pcl\n\tj_s%* [%2]\";    /* Length = 4 bytes.  */
 }"
-  [(set_attr "length" "10")
+  [(set (attr "length")
+        (if_then_else (match_test "TARGET_V2") (const_int 12) (const_int 10)))
    (set_attr "type" "jump")
    (set_attr "iscompact" "true")
    (set_attr "cond" "nocond")])
diff --git a/gcc/testsuite/gcc.target/arc/switch-1.c b/gcc/testsuite/gcc.target/arc/switch-1.c
new file mode 100644
index 0000000..f68b719
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/switch-1.c
@@ -0,0 +1,42 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mcpu=arcem -Os -Wall -Werror" } */
+
+/* { dg-final { scan-assembler-not "add_s\[ \t\]+r\[0-9\]+,r\[0-9\]+,pcl" } } */
+
+/* Unfortunately the following pragma is required in order to allow
+   compiling with -Wall -Werror, but still skip one of the possible switch
+   cases in this test.  */
+#pragma GCC diagnostic ignored "-Wswitch"
+
+typedef struct {
+  struct {
+    struct {
+      enum {
+        REG_SAVED_OFFSET,
+        REG_SAVED_REG,
+        REG_SAVED_EXP,
+        REG_SAVED_VAL_OFFSET,
+        REG_SAVED_VAL_EXP
+      } how;
+    } reg[1];
+  } regs;
+} _Unwind_FrameState;
+
+_Unwind_FrameState a;
+
+extern void fn2 (void);
+
+void fn1() {
+  switch (a.regs.reg[0].how) {
+  case REG_SAVED_OFFSET:
+    /* Specifically don't handle REG_SAVED_REG in this switch statement,
+       this causes GCC to generate the specific switch-jump-vector table
+       that triggered a bug on ARCv2 targets.  */
+    /* case REG_SAVED_REG: */
+  case REG_SAVED_EXP:
+  case REG_SAVED_VAL_OFFSET:
+    fn2();
+  case REG_SAVED_VAL_EXP:
+    fn2();
+  }
+}