diff mbox series

i386: Add pack/unpack patterns for 32bit vectors [PR100637]

Message ID CAFULd4b3-y6C4PhR63a5gkUBdskvrmAZO2fi1Z-POZfKOi+Dtg@mail.gmail.com
State New
Headers show
Series i386: Add pack/unpack patterns for 32bit vectors [PR100637] | expand

Commit Message

Uros Bizjak July 8, 2021, 10:22 a.m. UTC
V1SI mode shift is needed to shift 32bit operands and consequently we
need to implement V1SI moves and pushes.

2021-07-08  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
    PR target/100637
    * config/i386/i386-expand.c (ix86_expand_sse_unpack):
    Handle V4QI mode.
    * config/i386/mmx.md (V_32): New mode iterator.
    (mov<V_32:mode>): Use V_32 mode iterator.
    (*mov<V_32:mode>_internal): Ditto.
    (*push<V_32:mode>2_rex64): Ditto.
    (*push<V_32:mode>2): Ditto.
    (movmisalign<V_32:mode>): Ditto.
    (mmx_<any_shiftrt:insn>v1si3): New insn pattern.
    (sse4_1_<any_extend:code>v2qiv2hi2): Ditto.
    (vec_unpacks_lo_v4qi): New expander.
    (vec_unpacks_hi_v4qi): Ditto.
    (vec_unpacku_lo_v4qi): Ditto.
    (vec_unpacku_hi_v4qi): Ditto.
    * config/i386/i386.h (VALID_SSE2_REG_MODE): Add V1SImode.
    (VALID_INT_MODE_P): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 58c208e166b..65764ad88c5 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -5355,6 +5355,12 @@  ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
 	  else
 	    unpack = gen_sse4_1_sign_extendv2hiv2si2;
 	  break;
+	case E_V4QImode:
+	  if (unsigned_p)
+	    unpack = gen_sse4_1_zero_extendv2qiv2hi2;
+	  else
+	    unpack = gen_sse4_1_sign_extendv2qiv2hi2;
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -5380,6 +5386,12 @@  ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
 	      emit_insn (gen_mmx_lshrv1di3 (tmp, gen_lowpart (V1DImode, src),
 					    GEN_INT (32)));
 	      break;
+	    case 4:
+	      /* Shift higher 2 bytes to lower 2 bytes.  */
+	      tmp = gen_reg_rtx (V1SImode);
+	      emit_insn (gen_mmx_lshrv1si3 (tmp, gen_lowpart (V1SImode, src),
+					    GEN_INT (16)));
+	      break;
 	    default:
 	      gcc_unreachable ();
 	    }
@@ -5427,6 +5439,12 @@  ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
 	  else
 	    unpack = gen_mmx_punpcklwd;
 	  break;
+	case E_V4QImode:
+	  if (high_p)
+	    unpack = gen_mmx_punpckhbw_low;
+	  else
+	    unpack = gen_mmx_punpcklbw_low;
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 03d176143fe..8c3eace56da 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1016,7 +1016,7 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
-   || (MODE) == V4QImode || (MODE) == V2HImode				\
+   || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode	\
    || (MODE) == V2DImode || (MODE) == DFmode)
 
 #define VALID_SSE_REG_MODE(MODE)					\
@@ -1048,7 +1048,7 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == SImode || (MODE) == DImode				\
    || (MODE) == CQImode || (MODE) == CHImode				\
    || (MODE) == CSImode || (MODE) == CDImode				\
-   || (MODE) == V4QImode || (MODE) == V2HImode				\
+   || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode	\
    || (TARGET_64BIT							\
        && ((MODE) == TImode || (MODE) == CTImode			\
 	   || (MODE) == TFmode || (MODE) == TCmode			\
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7e83b64ab59..986b758396a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -57,10 +57,13 @@  (define_mode_iterator MMXMODE14 [V8QI V2SI])
 (define_mode_iterator MMXMODE24 [V4HI V2SI])
 (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
 
-;; All 32bit integer vector modes
+;; All 4-byte integer vector modes
+(define_mode_iterator V_32 [V4QI V2HI V1SI])
+
+;; 4-byte integer vector modes
 (define_mode_iterator VI_32 [V4QI V2HI])
 
-;; All V2S* modes
+;; V2S* modes
 (define_mode_iterator V2FI [V2SF V2SI])
 
 ;; Mapping from integer vector mode to mnemonic suffix
@@ -238,8 +241,8 @@  (define_expand "movmisalign<mode>"
 })
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VI_32 0 "nonimmediate_operand")
-	(match_operand:VI_32 1 "nonimmediate_operand"))]
+  [(set (match_operand:V_32 0 "nonimmediate_operand")
+	(match_operand:V_32 1 "nonimmediate_operand"))]
   "TARGET_SSE2"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -247,9 +250,9 @@  (define_expand "mov<mode>"
 })
 
 (define_insn "*mov<mode>_internal"
-  [(set (match_operand:VI_32 0 "nonimmediate_operand"
+  [(set (match_operand:V_32 0 "nonimmediate_operand"
     "=r ,m ,v,v,v,m,r,v")
-	(match_operand:VI_32 1 "general_operand"
+	(match_operand:V_32 1 "general_operand"
     "rmC,rC,C,v,m,v,v,r"))]
   "TARGET_SSE2 &&
    !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -304,8 +307,8 @@  (define_insn "*mov<mode>_internal"
 
 ;; For TARGET_64BIT we always round up to 8 bytes.
 (define_insn "*push<mode>2_rex64"
-  [(set (match_operand:VI_32 0 "push_operand" "=X,X")
-	(match_operand:VI_32 1 "nonmemory_no_elim_operand" "rC,*v"))]
+  [(set (match_operand:V_32 0 "push_operand" "=X,X")
+	(match_operand:V_32 1 "nonmemory_no_elim_operand" "rC,*v"))]
   "TARGET_SSE2 && TARGET_64BIT"
   "@
    push{q}\t%q1
@@ -314,8 +317,8 @@  (define_insn "*push<mode>2_rex64"
    (set_attr "mode" "DI")])
 
 (define_insn "*push<mode>2"
-  [(set (match_operand:VI_32 0 "push_operand" "=<,<")
-	(match_operand:VI_32 1 "general_no_elim_operand" "rC*m,*v"))]
+  [(set (match_operand:V_32 0 "push_operand" "=<,<")
+	(match_operand:V_32 1 "general_no_elim_operand" "rC*m,*v"))]
   "TARGET_SSE2 && !TARGET_64BIT"
   "@
    push{l}\t%1
@@ -324,20 +327,20 @@  (define_insn "*push<mode>2"
    (set_attr "mode" "SI")])
 
 (define_split
-  [(set (match_operand:VI_32 0 "push_operand")
-	(match_operand:VI_32 1 "sse_reg_operand"))]
+  [(set (match_operand:V_32 0 "push_operand")
+	(match_operand:V_32 1 "sse_reg_operand"))]
   "TARGET_SSE2 && reload_completed"
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
     (set (match_dup 0) (match_dup 1))]
 {
-  operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<VI_32:MODE>mode)));
+  operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<V_32:MODE>mode)));
   /* Preserve memory attributes. */
   operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
 })
 
 (define_expand "movmisalign<mode>"
-  [(set (match_operand:VI_32 0 "nonimmediate_operand")
-	(match_operand:VI_32 1 "nonimmediate_operand"))]
+  [(set (match_operand:V_32 0 "nonimmediate_operand")
+	(match_operand:V_32 1 "nonimmediate_operand"))]
   "TARGET_SSE2"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -2006,6 +2009,23 @@  (define_expand "<insn><mode>3"
 	  (match_operand:DI 2 "nonmemory_operand")))]
   "TARGET_MMX_WITH_SSE")
 
+(define_insn "mmx_<insn>v1si3"
+  [(set (match_operand:V1SI 0 "register_operand" "=x,Yw")
+        (any_lshift:V1SI
+	  (match_operand:V1SI 1 "register_operand" "0,Yw")
+	  (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
+  "TARGET_SSE2"
+  "@
+   p<vshift>d\t{%2, %0|%0, %2}
+   vp<vshift>d\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
 (define_insn "<insn>v2hi3"
   [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
         (any_shift:V2HI
@@ -2732,6 +2752,20 @@  (define_insn "sse4_1_<code>v2hiv2si2"
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "TI")])
 
+(define_insn "sse4_1_<code>v2qiv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw")
+	(any_extend:V2HI
+	  (vec_select:V2QI
+	    (match_operand:V4QI 1 "register_operand" "Yr,*x,Yw")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "TI")])
+
 ;; Pack/unpack vector modes
 (define_mode_attr mmxpackmode
   [(V4HI "V8QI") (V2SI "V4HI")])
@@ -2748,6 +2782,18 @@  (define_expand "vec_pack_trunc_<mode>"
   DONE;
 })
 
+(define_expand "vec_pack_trunc_v2hi"
+  [(match_operand:V4QI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "TARGET_SSE2"
+{
+  rtx op1 = gen_lowpart (V4QImode, operands[1]);
+  rtx op2 = gen_lowpart (V4QImode, operands[2]);
+  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
+  DONE;
+})
+
 (define_mode_attr mmxunpackmode
   [(V8QI "V4HI") (V4HI "V2SI")])
 
@@ -2775,6 +2821,30 @@  (define_expand "vec_unpacku_hi_<mode>"
   "TARGET_MMX_WITH_SSE"
   "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
 
+(define_expand "vec_unpacks_lo_v4qi"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
+
+(define_expand "vec_unpacks_hi_v4qi"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
+
+(define_expand "vec_unpacku_lo_v4qi"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
+
+(define_expand "vec_unpacku_hi_v4qi"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "TARGET_SSE2"
+  "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
+
 (define_insn "*mmx_pinsrd"
   [(set (match_operand:V2SI 0 "register_operand" "=x,Yv")
         (vec_merge:V2SI