2010-06-22  Dmitry Plotnikov  <dplotnikov@ispras.ru>
            Dmitry Melnik  <dm@ispras.ru>


   * config/arm/arm.c (neon_immediate_valid_for_shift): New function.
   (neon_output_shift_immediate): New function.


   * config/arm/neon.md (vashl<mode>3): Modified constraint.
   (vashr<mode>3_imm): New insn pattern.
   (vlshr<mode>3_imm): New insn pattern.
   (vashr<mode>3): Modified constraint.
   (vlshr<mode>3): Modified constraint.


   * config/arm/predicates.md (imm_for_neon_shift_operand): New predicate.
   (imm_shift_or_reg_neon): New predicate.


   * optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr.

   * testsuite/gcc.target/arm/neon-vshr-imm-1.c: New testcase.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 84dd8fa..3fa19e8 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8026,6 +8026,51 @@ neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
   return 1;
 }
 
+/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
+   the immediate is valid, write a constant suitable for using as an operand
+   to VSHR/VSHL to *MODCONST and the corresponding element width to
+   *ELEMENTWIDTH. */
+
+int
+neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
+            rtx *modconst, int *elementwidth)
+{
+  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+  unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
+  unsigned HOST_WIDE_INT last_elt = 0;
+
+  /* Split vector constant out into a byte vector.  */
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx el = CONST_VECTOR_ELT (op, i);
+      unsigned HOST_WIDE_INT elpart;
+  
+      if (GET_CODE (el) == CONST_INT)
+        elpart = INTVAL (el);
+      else if (GET_CODE (el) == CONST_DOUBLE)
+        return 0;
+      else
+        gcc_unreachable ();
+
+      if (i != 0 && elpart != last_elt)
+        return 0;
+
+      last_elt = elpart;
+    }
+
+  /* shift less than element size */
+  if (last_elt > innersize * 8)
+    return 0;
+
+  if (elementwidth)
+    *elementwidth = innersize * 8;
+
+  if (modconst)
+    *modconst = CONST_VECTOR_ELT (op, 0);
+  
+  return 1;
+}
+
 /* Return a string suitable for output of Neon immediate logic operation
    MNEM.  */
 
@@ -8048,6 +8093,28 @@ neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
   return templ;
 }
 
+/* Return a string suitable for output of Neon immediate shift operation
+   (VSHR or VSHL) MNEM.  */
+
+char *
+neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, enum machine_mode mode,
+           int quad)
+{
+    int width, is_valid;
+    static char templ[40];
+
+    is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width);
+
+    gcc_assert (is_valid != 0);
+
+    if (quad)
+      sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
+    else
+      sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
+
+    return templ;
+}
+
 /* Output a sequence of pairwise operations to implement a reduction.
    NOTE: We do "too much work" here, because pairwise operations work on two
    registers-worth of operands in one go. Unfortunately we can't exploit those
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 43b3805..43c282d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1135,11 +1135,53 @@
 ; SImode elements.
 
 (define_insn "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
+  (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
+         (match_operand:VDQIW 2 "imm_shift_or_reg_neon" "w,Dn")))]
+  "TARGET_NEON"
+  {
+    switch (which_alternative)
+      {
+        case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+        case 1: return neon_output_shift_immediate ("vshl", 's', &operands[2],
+                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode));
+        default: gcc_unreachable ();
+      }
+  }
+
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
+
+
+(define_insn "vashr<mode>3_imm"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
-	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
-		      (match_operand:VDQIW 2 "s_register_operand" "w")))]
+  (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+           (match_operand:VDQIW 2 "imm_for_neon_shift_operand" "Dn")))]
   "TARGET_NEON"
-  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  {
+    return neon_output_shift_immediate ("vshr", 's', &operands[2],
+             <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode));
+  }
+
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
+
+(define_insn "vlshr<mode>3_imm"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+  (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+           (match_operand:VDQIW 2 "imm_for_neon_shift_operand" "Dn")))]
+  "TARGET_NEON"
+  {
+    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
+             <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode));
+  }              
+
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_vshl_ddd")
@@ -1182,29 +1224,35 @@
 
 (define_expand "vashr<mode>3"
   [(set (match_operand:VDQIW 0 "s_register_operand" "")
-	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
-			(match_operand:VDQIW 2 "s_register_operand" "")))]
+  (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+           (match_operand:VDQIW 2 "imm_shift_or_reg_neon" "")))]
   "TARGET_NEON"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
-
-  emit_insn (gen_neg<mode>2 (neg, operands[2]));
-  emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
-
+  if (REG_P (operands[2]))
+    {
+      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn(gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
   DONE;
 })
 
 (define_expand "vlshr<mode>3"
   [(set (match_operand:VDQIW 0 "s_register_operand" "")
-	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
-			(match_operand:VDQIW 2 "s_register_operand" "")))]
+  (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+           (match_operand:VDQIW 2 "imm_shift_or_reg_neon" "")))]
   "TARGET_NEON"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
-
-  emit_insn (gen_neg<mode>2 (neg, operands[2]));
-  emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
-
+  if (REG_P (operands[2]))
+    {
+      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn(gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
   DONE;
 })
 
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index d351f44..02fced4 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -496,6 +496,17 @@
   return neon_immediate_valid_for_move (op, mode, NULL, NULL);
 })
 
+(define_predicate "imm_for_neon_shift_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_shift (op, mode, NULL, NULL);
+})
+
+
+(define_predicate "imm_shift_or_reg_neon"
+    (ior (match_operand 0 "s_register_operand")
+         (match_operand 0 "imm_for_neon_shift_operand")))
+
 (define_predicate "imm_for_neon_logic_operand"
   (match_code "const_vector")
 {
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 555e256..dea1d9c 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6260,6 +6260,9 @@ init_optabs (void)
   init_optab (usashl_optab, US_ASHIFT);
   init_optab (ashr_optab, ASHIFTRT);
   init_optab (lshr_optab, LSHIFTRT);
+  init_optabv (vashl_optab, ASHIFT);
+  init_optabv (vashr_optab, ASHIFTRT);
+  init_optabv (vlshr_optab, LSHIFTRT);
   init_optab (rotl_optab, ROTATE);
   init_optab (rotr_optab, ROTATERT);
   init_optab (smin_optab, SMIN);
