@@ -2811,6 +2811,7 @@ expand_block_move (rtx operands[], bool might_overlap)
gen_func.mov = gen_vsx_movv2di_64bit;
}
else if (TARGET_BLOCK_OPS_UNALIGNED_VSX
+ && TARGET_POWERPC64
&& TARGET_POWER10 && bytes < 16
&& orig_bytes > 16
&& !(bytes == 1 || bytes == 2
@@ -5582,20 +5582,32 @@ (define_expand "first_mismatch_or_eos_index_<mode>"
DONE;
})
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length. If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
(define_expand "lxvl"
- [(set (match_dup 3)
- (ashift:DI (match_operand:DI 2 "register_operand")
- (const_int 56)))
- (set (match_operand:V16QI 0 "vsx_register_operand")
- (unspec:V16QI
- [(match_operand:DI 1 "gpc_reg_operand")
- (mem:V16QI (match_dup 1))
- (match_dup 3)]
- UNSPEC_LXVL))]
+ [(use (match_operand:V16QI 0 "vsx_register_operand"))
+ (use (match_operand:DI 1 "gpc_reg_operand"))
+ (use (match_operand:DI 2 "gpc_reg_operand"))]
"TARGET_P9_VECTOR && TARGET_64BIT"
{
- operands[3] = gen_reg_rtx (DImode);
+ rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+ rtx len;
+
+ if (TARGET_FUTURE)
+ len = shift_len;
+ else
+ {
+ len = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (len, shift_len));
+ }
+
+ rtx dest = operands[0];
+ rtx addr = operands[1];
+ rtx mem = gen_rtx_MEM (V16QImode, addr);
+ rtvec rv = gen_rtvec (3, addr, mem, len);
+ rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+ emit_insn (gen_rtx_SET (dest, lxvl));
+ DONE;
})
(define_insn "*lxvl"
@@ -5619,6 +5631,34 @@ (define_insn "lxvll"
"lxvll %x0,%1,%2"
[(set_attr "type" "vecload")])
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift. The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn. The lxvll buitl-in function required the user to have already done
+;; the shift. Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the built-in.
+(define_insn "*lxvrl"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (unspec:V16QI
+ [(match_operand:DI 1 "gpc_reg_operand" "b")
+ (mem:V16QI (match_dup 1))
+ (ashift:DI (match_operand:DI 2 "register_operand" "r")
+ (const_int 56))]
+ UNSPEC_LXVL))]
+ "TARGET_FUTURE && TARGET_64BIT"
+ "lxvrl %x0,%1,%2"
+ [(set_attr "type" "vecload")])
+
+(define_insn "*lxvrll"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
+ (mem:V16QI (match_dup 1))
+ (ashift:DI (match_operand:DI 2 "register_operand" "r")
+ (const_int 56))]
+ UNSPEC_LXVLL))]
+ "TARGET_FUTURE"
+ "lxvrll %x0,%1,%2"
+ [(set_attr "type" "vecload")])
+
;; Expand for builtin xl_len_r
(define_expand "xl_len_r"
[(match_operand:V16QI 0 "vsx_register_operand")
@@ -5650,18 +5690,29 @@ (define_insn "stxvll"
;; Store VSX Vector with Length
(define_expand "stxvl"
- [(set (match_dup 3)
- (ashift:DI (match_operand:DI 2 "register_operand")
- (const_int 56)))
- (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
- (unspec:V16QI
- [(match_operand:V16QI 0 "vsx_register_operand")
- (mem:V16QI (match_dup 1))
- (match_dup 3)]
- UNSPEC_STXVL))]
+ [(use (match_operand:V16QI 0 "vsx_register_operand"))
+ (use (match_operand:DI 1 "gpc_reg_operand"))
+ (use (match_operand:DI 2 "gpc_reg_operand"))]
"TARGET_P9_VECTOR && TARGET_64BIT"
{
- operands[3] = gen_reg_rtx (DImode);
+ rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+ rtx len;
+
+ if (TARGET_FUTURE)
+ len = shift_len;
+ else
+ {
+ len = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (len, shift_len));
+ }
+
+ rtx src = operands[0];
+ rtx addr = operands[1];
+ rtx mem = gen_rtx_MEM (V16QImode, addr);
+ rtvec rv = gen_rtvec (3, src, mem, len);
+ rtx stxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_STXVL);
+ emit_insn (gen_rtx_SET (mem, stxvl));
+ DONE;
})
;; Define optab for vector access with length vectorization exploitation.
@@ -5705,6 +5756,35 @@ (define_insn "*stxvl"
"stxvl %x0,%1,%2"
[(set_attr "type" "vecstore")])
+;; For stxvrl and stxvrll, use the combiner to eliminate the shift. The
+;; define_expand for stxvl will already incorporate the shift in generating the
+;; insn. The stxvll buitl-in function required the user to have already done
+;; the shift. Defining stxvrll this way, will optimize cases where the user
+;; has done the shift immediately before the built-in.
+
+(define_insn "*stxvrl"
+ [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
+ (unspec:V16QI
+ [(match_operand:V16QI 0 "vsx_register_operand" "wa")
+ (mem:V16QI (match_dup 1))
+ (ashift:DI (match_operand:DI 2 "register_operand" "r")
+ (const_int 56))]
+ UNSPEC_STXVL))]
+ "TARGET_FUTURE && TARGET_64BIT"
+ "stxvrl %x0,%1,%2"
+ [(set_attr "type" "vecstore")])
+
+(define_insn "*stxvrll"
+ [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
+ (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
+ (mem:V16QI (match_dup 1))
+ (ashift:DI (match_operand:DI 2 "register_operand" "r")
+ (const_int 56))]
+ UNSPEC_STXVLL))]
+ "TARGET_FUTURE"
+ "stxvrll %x0,%1,%2"
+ [(set_attr "type" "vecstore")])
+
;; Expand for builtin xst_len_r
(define_expand "xst_len_r"
[(match_operand:V16QI 0 "vsx_register_operand" "=wa")
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the lxvrl and stxvrl instructions are generated for
+ -mcpu=future on memory copy operations. */
+
+#ifndef VSIZE
+#define VSIZE 2
+#endif
+
+#ifndef LSIZE
+#define LSIZE 5
+#endif
+
+struct foo {
+ vector unsigned char vc[VSIZE];
+ unsigned char leftover[LSIZE];
+};
+
+void memcpy_ptr (struct foo *p, struct foo *q)
+{
+ __builtin_memcpy ((void *) p, /* lxvrl and stxvrl. */
+ (void *) q,
+ (sizeof (vector unsigned char) * VSIZE) + LSIZE);
+}
+
+/* { dg-final { scan-assembler {\mlxvrl\M} } } */
+/* { dg-final { scan-assembler {\mstxvrl\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvl\M} } } */
+/* { dg-final { scan-assembler-not {\mstxvl\M} } } */
@@ -6581,8 +6581,8 @@ proc check_effective_target_power10_ok { } {
}
}
-# Return 1 if this is a PowerPC target supporting -mcpu=future or -mdense-math
-# which enables the dense math operations.
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
proc check_effective_target_powerpc_dense_math_ok { } {
return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
__vector_quad vq;
@@ -6600,6 +6600,18 @@ proc check_effective_target_powerpc_dense_math_ok { } {
} "-mcpu=future"]
}
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the saturating subtract instruction.
+proc check_effective_target_powerpc_future_ok { } {
+ return [check_no_compiler_messages powerpc_future_ok object {
+ #ifndef _ARCH_PWR_FUTURE
+ #error "not -mcpu=future"
+ #else
+ int dummy;
+ #endif
+ } "-mcpu=future"]
+}
+
# Return 1 if this is a PowerPC target supporting -mfloat128 via either
# software emulation on power7/power8 systems or hardware support on power9.