diff mbox

[ARM,2/6] Fix Large struct mode splitters for cases where registers are not TImode.

Message ID CACUk7=UgMfjHA9XpDcndDShwog-JVK_isO4tv2Pizfn89ckKLg@mail.gmail.com
State New
Headers show

Commit Message

Ramana Radhakrishnan July 30, 2012, 11:45 a.m. UTC
> Patch 2 is a bug fix that fixes up the splitters so that they take
> into account the right register for the right mode . For instance a
> register not fit for a TImode value shouldn't be put in one even if
> the larger mode allows a different register . This is possible for
> OImode values or indeed HFA style values being passed around as
> parameters and is potentially an issue for folks building hard-float
> systems with neon and using some of the large structures.
    ,

      The large struct mode splitters don't take into account whether
    a TImode value can be generated from a value that is in an appropriate
    neon register for that value. This is possible in cases where you have
    an EImode, OImode, CImode or TImode value in the appropriate registers
    as these could be passed in their corresponding neon D registers.

    This was exposed by the tests for v{ld/st/tbl/tbx}2/3/4{lane/}* and
    friends in the new set of tests that follow at the end of this patch
    series.

    This is a problem for folks using the new hard float ABI and passing
    such values in registers - so it might not show up that much in practice
    but it's certainly worth backporting after sitting in trunk for a few
    days. It certainly is not a regression since this bug has always been
    there but it is a fundamental correctness issue in the backend with respect
    to such splits, so I'd like some more consensus on whether this can be
    safely backported.

    regards,
    Ramana

    2012-07-27  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>

            PR target/
            * config/arm/arm-protos.h (arm_split_eimoves): Declare.
            (arm_split_tocx_imoves): Declare.
            * config/arm/iterators.md (TOCXI): New.
            * config/arm/neon.md (EI TI OI CI XI mode splitters): Unify
            and use iterator. Simplify EImode splitter. Move logic to ...
            * config/arm/arm.c (arm_split_eimoves): here .. Handle
            case for EImode values in registers not suitable for splits
            into TImode values.
            (arm_split_tocx_imoves): Likewise.
---
 gcc/config/arm/arm-protos.h |    3 +
 gcc/config/arm/arm.c        |   91 +++++++++++++++++++++++++++++++++++++++++++
 gcc/config/arm/iterators.md |    3 +
 gcc/config/arm/neon.md      |   84 +++++-----------------------------------
 4 files changed, 107 insertions(+), 74 deletions(-)
diff mbox

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c590ef4..dc93c5d 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -248,6 +248,9 @@  extern int vfp3_const_double_for_fract_bits (rtx);
 extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
 					   rtx);
 extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
+extern void arm_split_tocx_imoves (rtx *, enum machine_mode);
+extern void arm_split_eimoves (rtx *);
+
 #endif /* RTX_CODE */

 extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1f3f9b3..b281485 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -26410,4 +26410,95 @@  arm_validize_comparison (rtx *comparison, rtx
* op1, rtx * op2)

 }

+/* EImode values are usually in 3 DImode registers. This could be suitably
+   split into TImode moves and DImode moves.  */
+void
+arm_split_eimoves (rtx *operands)
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  int count = 0;
+  int increment = 0;
+  rtx dest[3], src[3];
+  int i, j;
+
+  if (NEON_REGNO_OK_FOR_QUAD (rdest) && NEON_REGNO_OK_FOR_QUAD (rsrc))
+    {
+      dest[0] = gen_rtx_REG (TImode, rdest);
+      src[0] = gen_rtx_REG (TImode, rsrc);
+      count = 2;
+      increment = 4;
+    }
+  else
+    {
+      dest[0] = gen_rtx_REG (DImode, rdest);
+      src[0] = gen_rtx_REG (DImode, rsrc);
+      dest[1] = gen_rtx_REG (DImode, rdest + 2);
+      src[1] = gen_rtx_REG (DImode, rsrc + 2);
+      count = 3;
+      increment = 2;
+    }
+
+  dest[count - 1] = gen_rtx_REG (DImode, rdest + 4);
+  src[count - 1] = gen_rtx_REG (DImode, rsrc + 4);
+
+  neon_disambiguate_copy (operands, dest, src, count);
+
+  for (i = 0, j = 0 ; j < count ; i = i + 2, j++)
+      emit_move_insn (operands[i], operands[i + 1]);
+
+  return;
+}
+
+/* Split TI, CI, OI and XImode moves into appropriate smaller
+   forms.  */
+void
+arm_split_tocx_imoves (rtx *operands, enum machine_mode mode)
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  enum machine_mode split_mode;
+  int count = 0;
+  int factor = 0;
+  int j;
+  /* We never should need more than 8 DImode registers in the worst case.  */
+  rtx dest[8], src[8];
+  int i;
+
+  if (NEON_REGNO_OK_FOR_QUAD (rdest) && NEON_REGNO_OK_FOR_QUAD (rsrc))
+    {
+      split_mode = TImode;
+      if (dump_file)
+	fprintf (dump_file, "split_mode is TImode\n");
+    }
+  else
+    {
+      split_mode = DImode;
+      if (dump_file)
+	fprintf (dump_file, "split_mode is DImode\n");
+    }
+
+
+  count = GET_MODE_SIZE (mode) / GET_MODE_SIZE (split_mode);
+  factor = GET_MODE_SIZE (split_mode) / UNITS_PER_WORD;
+
+  if (dump_file)
+    fprintf (dump_file, "count %d factor %d\n", count, factor);
+
+  for (i = 0 ; i < count; i++)
+     {
+       dest[i] = gen_rtx_REG (split_mode, rdest + i * factor );
+       src[i] = gen_rtx_REG (split_mode, rsrc + i * factor);
+     }
+
+  neon_disambiguate_copy (operands, dest, src, count);
+  for (j = 0, i = 0 ; j < count ; j++, i = i + 2)
+   {
+      emit_move_insn (operands[i], operands[i + 1]);
+   }
+
+  return;
+
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index def8d9f..3474d16 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -89,6 +89,9 @@ 
 ;; Opaque structure types wider than TImode.
 (define_mode_iterator VSTRUCT [EI OI CI XI])

+;; Opaque structure types other than EImode.
+(define_mode_iterator TOCXI [TI OI CI XI])
+
 ;; Opaque structure types used in table lookups (except vtbl1/vtbx1).
 (define_mode_iterator VTAB [TI EI OI])

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 1ffbb7d..7434625 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -293,85 +293,21 @@ 
   [(set (match_operand:EI 0 "s_register_operand" "")
 	(match_operand:EI 1 "s_register_operand" ""))]
   "TARGET_NEON && reload_completed"
-  [(set (match_dup 0) (match_dup 1))
-   (set (match_dup 2) (match_dup 3))]
+  [(const_int 0)]
 {
-  int rdest = REGNO (operands[0]);
-  int rsrc = REGNO (operands[1]);
-  rtx dest[2], src[2];
-
-  dest[0] = gen_rtx_REG (TImode, rdest);
-  src[0] = gen_rtx_REG (TImode, rsrc);
-  dest[1] = gen_rtx_REG (DImode, rdest + 4);
-  src[1] = gen_rtx_REG (DImode, rsrc + 4);
-
-  neon_disambiguate_copy (operands, dest, src, 2);
+  arm_split_eimoves (operands);
+  DONE;
 })

-(define_split
-  [(set (match_operand:OI 0 "s_register_operand" "")
-	(match_operand:OI 1 "s_register_operand" ""))]
+;; Splitter for TI, OI, CI and XI modes.
+(define_split ;; TI, OI, CI and XImode move split.
+  [(set (match_operand:TOCXI 0 "s_register_operand" "")
+	(match_operand:TOCXI 1 "s_register_operand" ""))]
   "TARGET_NEON && reload_completed"
-  [(set (match_dup 0) (match_dup 1))
-   (set (match_dup 2) (match_dup 3))]
+  [(const_int 0)]
 {
-  int rdest = REGNO (operands[0]);
-  int rsrc = REGNO (operands[1]);
-  rtx dest[2], src[2];
-
-  dest[0] = gen_rtx_REG (TImode, rdest);
-  src[0] = gen_rtx_REG (TImode, rsrc);
-  dest[1] = gen_rtx_REG (TImode, rdest + 4);
-  src[1] = gen_rtx_REG (TImode, rsrc + 4);
-
-  neon_disambiguate_copy (operands, dest, src, 2);
-})
-
-(define_split
-  [(set (match_operand:CI 0 "s_register_operand" "")
-	(match_operand:CI 1 "s_register_operand" ""))]
-  "TARGET_NEON && reload_completed"
-  [(set (match_dup 0) (match_dup 1))
-   (set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (match_dup 5))]
-{
-  int rdest = REGNO (operands[0]);
-  int rsrc = REGNO (operands[1]);
-  rtx dest[3], src[3];
-
-  dest[0] = gen_rtx_REG (TImode, rdest);
-  src[0] = gen_rtx_REG (TImode, rsrc);
-  dest[1] = gen_rtx_REG (TImode, rdest + 4);
-  src[1] = gen_rtx_REG (TImode, rsrc + 4);
-  dest[2] = gen_rtx_REG (TImode, rdest + 8);
-  src[2] = gen_rtx_REG (TImode, rsrc + 8);
-
-  neon_disambiguate_copy (operands, dest, src, 3);
-})
-
-(define_split
-  [(set (match_operand:XI 0 "s_register_operand" "")
-	(match_operand:XI 1 "s_register_operand" ""))]
-  "TARGET_NEON && reload_completed"
-  [(set (match_dup 0) (match_dup 1))
-   (set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (match_dup 5))
-   (set (match_dup 6) (match_dup 7))]
-{
-  int rdest = REGNO (operands[0]);
-  int rsrc = REGNO (operands[1]);
-  rtx dest[4], src[4];
-
-  dest[0] = gen_rtx_REG (TImode, rdest);
-  src[0] = gen_rtx_REG (TImode, rsrc);
-  dest[1] = gen_rtx_REG (TImode, rdest + 4);
-  src[1] = gen_rtx_REG (TImode, rsrc + 4);
-  dest[2] = gen_rtx_REG (TImode, rdest + 8);
-  src[2] = gen_rtx_REG (TImode, rsrc + 8);
-  dest[3] = gen_rtx_REG (TImode, rdest + 12);
-  src[3] = gen_rtx_REG (TImode, rsrc + 12);
-
-  neon_disambiguate_copy (operands, dest, src, 4);
+  arm_split_tocx_imoves (operands, <MODE>mode);
+  DONE;
 })

 (define_expand "movmisalign<mode>"