@@ -75,6 +75,11 @@ (define_mode_iterator V_16_32_64
(V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
(V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
+(define_mode_iterator VI_16_32_64
+ [V2QI V4QI V2HI
+ (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
+ (V2SI "TARGET_64BIT")])
+
;; V2S* modes
(define_mode_iterator V2FI [V2SF V2SI])
@@ -86,6 +91,14 @@ (define_mode_attr mmxvecsize
[(V8QI "b") (V4QI "b") (V2QI "b")
(V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
+;; Mapping to same size integral mode.
+(define_mode_attr mmxinsnmode
+ [(V8QI "DI") (V4QI "SI") (V2QI "HI")
+ (V4HI "DI") (V2HI "SI")
+ (V2SI "DI")
+ (V4HF "DI") (V2HF "SI")
+ (V2SF "DI")])
+
(define_mode_attr mmxdoublemode
[(V8QI "V8HI") (V4HI "V4SI")])
@@ -350,22 +363,7 @@ (define_insn_and_split "*mov<mode>_imm"
HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
<MODE>mode);
operands[1] = GEN_INT (val);
- machine_mode mode;
- switch (GET_MODE_SIZE (<MODE>mode))
- {
- case 2:
- mode = HImode;
- break;
- case 4:
- mode = SImode;
- break;
- case 8:
- mode = DImode;
- break;
- default:
- gcc_unreachable ();
- }
- operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode);
+ operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
})
;; For TARGET_64BIT we always round up to 8 bytes.
@@ -2878,6 +2876,31 @@ (define_insn "mmx_andnot<mode>3"
(set_attr "type" "mmxadd,sselog,sselog,sselog")
(set_attr "mode" "DI,TI,TI,TI")])
+(define_insn "mmx_andnot<mode>3_gpr"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=?r,y,x,x,v")
+ (and:MMXMODEI
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "r,0,0,x,v"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "r,ym,x,x,v")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_MMX || TARGET_SSE2)"
+ "#"
+ [(set_attr "isa" "bmi,*,sse2_noavx,avx,avx512vl")
+ (set_attr "mmx_isa" "*,native,*,*,*")
+ (set_attr "type" "alu,mmxadd,sselog,sselog,sselog")
+ (set_attr "mode" "DI,DI,TI,TI,TI")])
+
+(define_split
+ [(set (match_operand:MMXMODEI 0 "register_operand")
+ (and:MMXMODEI
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_mmxmem_operand"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && (TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !GENERAL_REGNO_P (REGNO (operands[0]))"
+ [(set (match_dup 0)
+ (and:<MODE> (not:<MODE> (match_dup 1)) (match_dup 2)))])
+
(define_insn "*andnot<mode>3"
[(set (match_operand:VI_16_32 0 "register_operand" "=?&r,?r,x,x,v")
(and:VI_16_32
@@ -2892,20 +2915,20 @@ (define_insn "*andnot<mode>3"
(set_attr "mode" "SI,SI,TI,TI,TI")])
(define_split
- [(set (match_operand:VI_16_32 0 "general_reg_operand")
- (and:VI_16_32
- (not:VI_16_32 (match_operand:VI_16_32 1 "general_reg_operand"))
- (match_operand:VI_16_32 2 "general_reg_operand")))
+ [(set (match_operand:VI_16_32_64 0 "general_reg_operand")
+ (and:VI_16_32_64
+ (not:VI_16_32_64 (match_operand:VI_16_32_64 1 "general_reg_operand"))
+ (match_operand:VI_16_32_64 2 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI && reload_completed"
[(parallel
[(set (match_dup 0)
- (and:SI (not:SI (match_dup 1)) (match_dup 2)))
+ (and:<mmxinsnmode> (not:<mmxinsnmode> (match_dup 1)) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
- operands[2] = lowpart_subreg (SImode, operands[2], <MODE>mode);
- operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
- operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
+ operands[2] = lowpart_subreg (<mmxinsnmode>mode, operands[2], <MODE>mode);
+ operands[1] = lowpart_subreg (<mmxinsnmode>mode, operands[1], <MODE>mode);
+ operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
})
(define_split
@@ -2948,14 +2971,28 @@ (define_expand "mmx_<code><mode>3"
(match_operand:MMXMODEI 1 "register_mmxmem_operand")
(match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
- "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+{
+ ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+ if (TARGET_64BIT)
+ {
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, operands);
+ DONE;
+ }
+})
(define_expand "<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
(match_operand:MMXMODEI 1 "register_operand")
(match_operand:MMXMODEI 2 "register_operand")))]
- "TARGET_MMX_WITH_SSE")
+ "TARGET_MMX_WITH_SSE"
+{
+ if (TARGET_64BIT)
+ {
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, operands);
+ DONE;
+ }
+})
(define_insn "*mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
@@ -2974,6 +3011,32 @@ (define_insn "*mmx_<code><mode>3"
(set_attr "type" "mmxadd,sselog,sselog,sselog")
(set_attr "mode" "DI,TI,TI,TI")])
+(define_insn "*mmx_<code><mode>3_gpr"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=?r,y,x,x,v")
+ (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,0,x,v")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "r,ym,x,x,v")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_MMX || TARGET_SSE2)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "#"
+ [(set_attr "isa" "*,*,sse2_noavx,avx,avx512vl")
+ (set_attr "mmx_isa" "*,native,*,*,*")
+ (set_attr "type" "alu,mmxadd,sselog,sselog,sselog")
+ (set_attr "mode" "DI,DI,TI,TI,TI")])
+
+(define_split
+ [(set (match_operand:MMXMODEI 0 "register_operand")
+ (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && (TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !GENERAL_REGNO_P (REGNO (operands[0]))"
+ [(set (match_dup 0)
+ (any_logic:<MODE> (match_dup 1)
+ (match_dup 2)))])
+
(define_insn "<code><mode>3"
[(set (match_operand:VI_16_32 0 "register_operand" "=?r,x,x,v")
(any_logic:VI_16_32
@@ -2987,20 +3050,20 @@ (define_insn "<code><mode>3"
(set_attr "mode" "SI,TI,TI,TI")])
(define_split
- [(set (match_operand:VI_16_32 0 "general_reg_operand")
- (any_logic:VI_16_32
- (match_operand:VI_16_32 1 "general_reg_operand")
- (match_operand:VI_16_32 2 "general_reg_operand")))
+ [(set (match_operand:VI_16_32_64 0 "general_reg_operand")
+ (any_logic:VI_16_32_64
+ (match_operand:VI_16_32_64 1 "general_reg_operand")
+ (match_operand:VI_16_32_64 2 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
[(parallel
[(set (match_dup 0)
- (any_logic:SI (match_dup 1) (match_dup 2)))
+ (any_logic:<mmxinsnmode> (match_dup 1) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
- operands[2] = lowpart_subreg (SImode, operands[2], <MODE>mode);
- operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
- operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
+ operands[2] = lowpart_subreg (<mmxinsnmode>mode, operands[2], <MODE>mode);
+ operands[1] = lowpart_subreg (<mmxinsnmode>mode, operands[1], <MODE>mode);
+ operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
})
(define_split
new file mode 100644
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+/* { dg-final { scan-assembler-not "xmm" { xfail *-*-* } } } */
+
+void
+foo (char* a, char* __restrict b)
+{
+ a[0] &= b[0];
+ a[1] &= b[1];
+ a[2] &= b[2];
+ a[3] &= b[3];
+}
+
+void
+foo1 (char* a, char* __restrict b)
+{
+ a[0] &= b[0];
+ a[1] &= b[1];
+}
+
+void
+foo2 (char* a, char* __restrict b)
+{
+ a[0] &= b[0];
+ a[1] &= b[1];
+ a[2] &= b[2];
+ a[3] &= b[3];
+ a[4] &= b[4];
+ a[5] &= b[5];
+ a[6] &= b[6];
+ a[7] &= b[7];
+}
+
+void
+foo3 (char* a, char* __restrict b)
+{
+ a[0] &= 1;
+ a[1] &= 2;
+ a[2] &= 3;
+ a[3] &= 3;
+}
+
+void
+foo4 (char* a, char* __restrict b)
+{
+ a[0] &= 1;
+ a[1] &= 2;
+}
+
+void
+foo5 (char* a, char* __restrict b)
+{
+ a[0] &= 1;
+ a[1] &= 2;
+ a[2] &= 2;
+ a[3] &= 3;
+ a[4] &= 4;
+ a[5] &= 5;
+ a[6] &= 6;
+ a[7] &= 7;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+/* { dg-final { scan-assembler-not "xmm" { xfail *-*-* } } } */
+
+void
+foo (short* a, short* __restrict b)
+{
+ a[0] &= b[0];
+ a[1] &= b[1];
+ a[2] &= b[2];
+ a[3] &= b[3];
+}
+
+void
+foo1 (short* a, short* __restrict b)
+{
+ a[0] &= b[0];
+ a[1] &= b[1];
+}
+
+void
+foo3 (short* a, short* __restrict b)
+{
+ a[0] &= 1;
+ a[1] &= 2;
+ a[2] &= 3;
+ a[3] &= 3;
+}
+
+void
+foo4 (short* a, short* __restrict b)
+{
+ a[0] &= 1;
+ a[1] &= 2;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "xmm" { xfail { ! ia32 } } } } */
+
+void
+foo1 (int* a, int* __restrict b)
+{
+ a[0] &= b[0];
+ a[1] &= b[1];
+}
+
+void
+foo4 (int* a, int* __restrict b)
+{
+ a[0] &= 1;
+ a[1] &= 2;
+}