===================================================================
@@ -151,6 +151,7 @@ extern rtx rs6000_longcall_ref (rtx);
extern void rs6000_fatal_bad_address (rtx);
extern rtx create_TOC_reference (rtx, rtx);
extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
extern bool valid_sf_si_move (rtx, rtx, machine_mode);
extern void rs6000_emit_move (rtx, rtx, machine_mode);
===================================================================
@@ -10503,17 +10503,28 @@ rs6000_const_vec (machine_mode mode)
/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
for a VSX load or store operation. */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
{
- /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
- 128-bit integers if they are allowed in VSX registers. */
- if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
- return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+ /* Scalar permutations are easier to express in integer modes rather than
+ floating-point modes, so cast them here. We use V1TImode instead
+ of TImode to ensure that the values don't go through GPRs. */
+ if (FLOAT128_VECTOR_P (mode))
+ {
+ dest = gen_lowpart (V1TImode, dest);
+ source = gen_lowpart (V1TImode, source);
+ mode = V1TImode;
+ }
+
+ /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
+ scalar. */
+ if (mode == TImode || mode == V1TImode)
+ emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+ GEN_INT (64))));
else
{
rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
- return gen_rtx_VEC_SELECT (mode, source, par);
+ emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
}
}
@@ -10523,8 +10534,6 @@ rs6000_gen_le_vsx_permute (rtx source, m
void
rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_mem, permute_reg;
-
/* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
V1TImode). */
if (mode == TImode || mode == V1TImode)
@@ -10534,11 +10543,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
source = adjust_address (source, V2DImode, 0);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
- permute_mem = rs6000_gen_le_vsx_permute (source, mode);
- permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_mem));
- emit_insn (gen_rtx_SET (dest, permute_reg));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a little-endian store to vector memory location DEST from VSX
@@ -10547,8 +10554,6 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
void
rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_src, permute_tmp;
-
/* This should never be called during or after reload, because it does
not re-permute the source register. It is intended only for use
during expand. */
@@ -10563,11 +10568,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx
source = gen_lowpart (V2DImode, source);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
- permute_src = rs6000_gen_le_vsx_permute (source, mode);
- permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_src));
- emit_insn (gen_rtx_SET (dest, permute_tmp));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a sequence representing a little-endian VSX load or store,
===================================================================
@@ -37,6 +37,9 @@ (define_mode_iterator VSX_LE_128 [(KF
(TI "TARGET_VSX_TIMODE")
V1TI])
+;; Iterator for 128-bit integer types that go in a single vector register.
+(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI])
+
;; Iterator for the 2 32-bit vector types
(define_mode_iterator VSX_W [V4SF V4SI])
@@ -750,9 +753,9 @@ (define_split
;; special V1TI container class, which it is not appropriate to use vec_select
;; for the type.
(define_insn "*vsx_le_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+ [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+ (rotate:VSX_TI
+ (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>")
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"@
@@ -763,10 +766,10 @@ (define_insn "*vsx_le_permute_<mode>"
(set_attr "type" "vecperm,vecload,vecstore")])
(define_insn_and_split "*vsx_le_undo_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
- (rotate:VSX_LE_128
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+ [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
+ (rotate:VSX_TI
+ (rotate:VSX_TI
+ (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
(const_int 64))
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -791,16 +794,15 @@ (define_insn_and_split "*vsx_le_perm_loa
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
"
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
}
"
[(set_attr "type" "vecload")
@@ -818,15 +820,14 @@ (define_split
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
})
;; Peephole to catch memory to memory transfers for TImode if TImode landed in
@@ -850,16 +851,13 @@ (define_split
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))]
- "")
+ [(const_int 0)]
+{
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ DONE;
+})
;; Vector constants that can be generated with XXSPLTIB that was added in ISA
;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.