diff mbox series

RISC-V: Use widening shift for scatter/gather if applicable.

Message ID e871b68e-3ee4-4d94-bf44-ef98efc70911@gmail.com
State New
Headers show
Series RISC-V: Use widening shift for scatter/gather if applicable. | expand

Commit Message

Robin Dapp May 17, 2024, 3:25 p.m. UTC
Hi,

with the zvbb extension we can emit a widening shift for scatter/gather
index preparation in case we need to multiply by 2 and zero extend.

The patch also adds vwsll to the mode_idx attribute and removes the
mode from shift-count operand of the insn pattern.

Regtested on rv64gcv_zvfh_zvbb.

Regards
 Robin

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_gather_scatter): Use vwsll if
	applicable.
	* config/riscv/vector-crypto.md: Remove mode from vwsll shift
	count operator.
	* config/riscv/vector.md: Add vwsll to mode iterator.

gcc/testsuite/ChangeLog:

	* lib/target-supports.exp: Add zvbb.
	* gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c: New test.
---
 gcc/config/riscv/riscv-v.cc                   |  42 +++++--
 gcc/config/riscv/vector-crypto.md             |   4 +-
 gcc/config/riscv/vector.md                    |   4 +-
 .../gather-scatter/gather_load_64-12-zvbb.c   | 113 ++++++++++++++++++
 gcc/testsuite/lib/target-supports.exp         |  48 +++++++-
 5 files changed, 193 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c

Comments

juzhe.zhong@rivai.ai May 17, 2024, 10:32 p.m. UTC | #1
LGTM



juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2024-05-17 23:25
To: gcc-patches
CC: rdapp.gcc; palmer; Kito Cheng; juzhe.zhong@rivai.ai; jeffreyalaw
Subject: [PATCH] RISC-V: Use widening shift for scatter/gather if applicable.
Hi,
 
with the zvbb extension we can emit a widening shift for scatter/gather
index preparation in case we need to multiply by 2 and zero extend.
 
The patch also adds vwsll to the mode_idx attribute and removes the
mode from shift-count operand of the insn pattern.
 
Regtested on rv64gcv_zvfh_zvbb.
 
Regards
Robin
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (expand_gather_scatter): Use vwsll if
applicable.
* config/riscv/vector-crypto.md: Remove mode from vwsll shift
count operator.
* config/riscv/vector.md: Add vwsll to mode iterator.
 
gcc/testsuite/ChangeLog:
 
* lib/target-supports.exp: Add zvbb.
* gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c: New test.
---
gcc/config/riscv/riscv-v.cc                   |  42 +++++--
gcc/config/riscv/vector-crypto.md             |   4 +-
gcc/config/riscv/vector.md                    |   4 +-
.../gather-scatter/gather_load_64-12-zvbb.c   | 113 ++++++++++++++++++
gcc/testsuite/lib/target-supports.exp         |  48 +++++++-
5 files changed, 193 insertions(+), 18 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 814c5febabe..8b41b9c7774 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4016,7 +4016,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
{
   rtx ptr, vec_offset, vec_reg;
   bool zero_extend_p;
-  int scale_log2;
+  int shift;
   rtx mask = ops[5];
   rtx len = ops[6];
   if (is_load)
@@ -4025,7 +4025,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
       ptr = ops[1];
       vec_offset = ops[2];
       zero_extend_p = INTVAL (ops[3]);
-      scale_log2 = exact_log2 (INTVAL (ops[4]));
+      shift = exact_log2 (INTVAL (ops[4]));
     }
   else
     {
@@ -4033,7 +4033,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
       ptr = ops[0];
       vec_offset = ops[1];
       zero_extend_p = INTVAL (ops[2]);
-      scale_log2 = exact_log2 (INTVAL (ops[3]));
+      shift = exact_log2 (INTVAL (ops[3]));
     }
   machine_mode vec_mode = GET_MODE (vec_reg);
@@ -4043,9 +4043,12 @@ expand_gather_scatter (rtx *ops, bool is_load)
   poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
   bool is_vlmax = is_vlmax_len_p (vec_mode, len);
+  bool use_widening_shift = false;
+
   /* Extend the offset element to address width.  */
   if (inner_offsize < BITS_PER_WORD)
     {
+      use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1;
       /* 7.2. Vector Load/Store Addressing Modes.
If the vector offset elements are narrower than XLEN, they are
zero-extended to XLEN before adding to the ptr effective address. If
@@ -4054,8 +4057,8 @@ expand_gather_scatter (rtx *ops, bool is_load)
raise an illegal instruction exception if the EEW is not supported for
offset elements.
- RVV spec only refers to the scale_log == 0 case.  */
-      if (!zero_extend_p || scale_log2 != 0)
+ RVV spec only refers to the shift == 0 case.  */
+      if (!zero_extend_p || shift)
{
  if (zero_extend_p)
    inner_idx_mode
@@ -4064,19 +4067,32 @@ expand_gather_scatter (rtx *ops, bool is_load)
    inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require ();
  machine_mode new_idx_mode
    = get_vector_mode (inner_idx_mode, nunits).require ();
-   rtx tmp = gen_reg_rtx (new_idx_mode);
-   emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
-       zero_extend_p ? true : false));
-   vec_offset = tmp;
+   if (!use_widening_shift)
+     {
+       rtx tmp = gen_reg_rtx (new_idx_mode);
+       emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
+   zero_extend_p ? true : false));
+       vec_offset = tmp;
+     }
  idx_mode = new_idx_mode;
}
     }
-  if (scale_log2 != 0)
+  if (shift)
     {
-      rtx tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
-       gen_int_mode (scale_log2, Pmode), NULL_RTX, 0,
-       OPTAB_DIRECT);
+      rtx tmp;
+      if (!use_widening_shift)
+ tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
+     gen_int_mode (shift, Pmode), NULL_RTX, 0,
+     OPTAB_DIRECT);
+      else
+ {
+   tmp = gen_reg_rtx (idx_mode);
+   insn_code icode = code_for_pred_vwsll_scalar (idx_mode);
+   rtx ops[] = {tmp, vec_offset, const1_rtx};
+   emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
+
       vec_offset = tmp;
     }
diff --git a/gcc/config/riscv/vector-crypto.md b/gcc/config/riscv/vector-crypto.md
index 24822e2712c..0ddc2f3f3c6 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -295,7 +295,7 @@ (define_insn "@pred_vwsll<mode>"
        (ashift:VWEXTI
          (zero_extend:VWEXTI
            (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "vr"))
-         (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"  "vr"))
+         (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"  "vrvk"))
        (match_operand:VWEXTI 2 "vector_merge_operand" "0vu")))]
   "TARGET_ZVBB"
   "vwsll.v%o4\t%0,%3,%4%p1"
@@ -316,7 +316,7 @@ (define_insn "@pred_vwsll<mode>_scalar"
        (ashift:VWEXTI
          (zero_extend:VWEXTI
            (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "   vr,    vr"))
-         (match_operand:<VSUBEL> 4 "pmode_reg_or_uimm5_operand" "   rK,    rK"))
+         (match_operand 4 "pmode_reg_or_uimm5_operand" "   rK,    rK"))
        (match_operand:VWEXTI 2 "vector_merge_operand"           "   vu,    0")))]
   "TARGET_ZVBB"
   "vwsll.v%o4\t%0,%3,%4%p1"
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 248461302dd..c6a3845dc13 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -750,10 +750,10 @@ (define_attr "mode_idx" ""
       (const_int 1)
       (eq_attr "type" "vssegte,vmpop,vmffs")
-        (const_int 2)       
+        (const_int 2)
       (eq_attr "type" "vstux,vstox,vssegts,vssegtux,vssegtox,vfcvtftoi,vfwcvtitof,vfwcvtftoi,
- vfwcvtftof,vmsfs,vired,viwred,vfredu,vfredo,vfwredu,vfwredo")
+ vfwcvtftof,vmsfs,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vwsll")
       (const_int 3)
       (eq_attr "type" "viwalu,viwmul,viwmuladd,vfwalu,vfwmul,vfwmuladd")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
new file mode 100644
index 00000000000..11a4031f47b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
@@ -0,0 +1,113 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-fno-vect-cost-model -fdump-tree-vect-details -mrvv-max-lmul=m4" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(DATA_TYPE, INDEX_TYPE)                                       \
+  void __attribute__ ((noinline, noclone))                                     \
+  f_##DATA_TYPE##_##INDEX_TYPE (DATA_TYPE *restrict y, DATA_TYPE *restrict x,  \
+ INDEX_TYPE *restrict index)                    \
+  {                                                                            \
+    for (int i = 0; i < 100; ++i)                                              \
+      {                                                                        \
+ y[i * 2] = x[index[i * 2]] + 1;                                        \
+ y[i * 2 + 1] = x[index[i * 2 + 1]] + 2;                                \
+      }                                                                        \
+  }
+
+TEST_LOOP (int8_t, int8_t)
+TEST_LOOP (uint8_t, int8_t)
+TEST_LOOP (int16_t, int8_t)
+TEST_LOOP (uint16_t, int8_t)
+TEST_LOOP (int32_t, int8_t)
+TEST_LOOP (uint32_t, int8_t)
+TEST_LOOP (int64_t, int8_t)
+TEST_LOOP (uint64_t, int8_t)
+TEST_LOOP (_Float16, int8_t)
+TEST_LOOP (float, int8_t)
+TEST_LOOP (double, int8_t)
+TEST_LOOP (int8_t, int16_t)
+TEST_LOOP (uint8_t, int16_t)
+TEST_LOOP (int16_t, int16_t)
+TEST_LOOP (uint16_t, int16_t)
+TEST_LOOP (int32_t, int16_t)
+TEST_LOOP (uint32_t, int16_t)
+TEST_LOOP (int64_t, int16_t)
+TEST_LOOP (uint64_t, int16_t)
+TEST_LOOP (_Float16, int16_t)
+TEST_LOOP (float, int16_t)
+TEST_LOOP (double, int16_t)
+TEST_LOOP (int8_t, int32_t)
+TEST_LOOP (uint8_t, int32_t)
+TEST_LOOP (int16_t, int32_t)
+TEST_LOOP (uint16_t, int32_t)
+TEST_LOOP (int32_t, int32_t)
+TEST_LOOP (uint32_t, int32_t)
+TEST_LOOP (int64_t, int32_t)
+TEST_LOOP (uint64_t, int32_t)
+TEST_LOOP (_Float16, int32_t)
+TEST_LOOP (float, int32_t)
+TEST_LOOP (double, int32_t)
+TEST_LOOP (int8_t, int64_t)
+TEST_LOOP (uint8_t, int64_t)
+TEST_LOOP (int16_t, int64_t)
+TEST_LOOP (uint16_t, int64_t)
+TEST_LOOP (int32_t, int64_t)
+TEST_LOOP (uint32_t, int64_t)
+TEST_LOOP (int64_t, int64_t)
+TEST_LOOP (uint64_t, int64_t)
+TEST_LOOP (_Float16, int64_t)
+TEST_LOOP (float, int64_t)
+TEST_LOOP (double, int64_t)
+TEST_LOOP (int8_t, uint8_t)
+TEST_LOOP (uint8_t, uint8_t)
+TEST_LOOP (int16_t, uint8_t)
+TEST_LOOP (uint16_t, uint8_t)
+TEST_LOOP (int32_t, uint8_t)
+TEST_LOOP (uint32_t, uint8_t)
+TEST_LOOP (int64_t, uint8_t)
+TEST_LOOP (uint64_t, uint8_t)
+TEST_LOOP (_Float16, uint8_t)
+TEST_LOOP (float, uint8_t)
+TEST_LOOP (double, uint8_t)
+TEST_LOOP (int8_t, uint16_t)
+TEST_LOOP (uint8_t, uint16_t)
+TEST_LOOP (int16_t, uint16_t)
+TEST_LOOP (uint16_t, uint16_t)
+TEST_LOOP (int32_t, uint16_t)
+TEST_LOOP (uint32_t, uint16_t)
+TEST_LOOP (int64_t, uint16_t)
+TEST_LOOP (uint64_t, uint16_t)
+TEST_LOOP (_Float16, uint16_t)
+TEST_LOOP (float, uint16_t)
+TEST_LOOP (double, uint16_t)
+TEST_LOOP (int8_t, uint32_t)
+TEST_LOOP (uint8_t, uint32_t)
+TEST_LOOP (int16_t, uint32_t)
+TEST_LOOP (uint16_t, uint32_t)
+TEST_LOOP (int32_t, uint32_t)
+TEST_LOOP (uint32_t, uint32_t)
+TEST_LOOP (int64_t, uint32_t)
+TEST_LOOP (uint64_t, uint32_t)
+TEST_LOOP (_Float16, uint32_t)
+TEST_LOOP (float, uint32_t)
+TEST_LOOP (double, uint32_t)
+TEST_LOOP (int8_t, uint64_t)
+TEST_LOOP (uint8_t, uint64_t)
+TEST_LOOP (int16_t, uint64_t)
+TEST_LOOP (uint16_t, uint64_t)
+TEST_LOOP (int32_t, uint64_t)
+TEST_LOOP (uint32_t, uint64_t)
+TEST_LOOP (int64_t, uint64_t)
+TEST_LOOP (uint64_t, uint64_t)
+TEST_LOOP (_Float16, uint64_t)
+TEST_LOOP (float, uint64_t)
+TEST_LOOP (double, uint64_t)
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 88 "vect" } } */
+/* { dg-final { scan-tree-dump " \.MASK_LEN_GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-assembler "vwsll.vi" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3a55b2a4159..999e2e974ef 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1965,6 +1965,17 @@ proc check_effective_target_riscv_zbb { } {
     }]
}
+# Return 1 if the target arch supports the Zbb extension, 0 otherwise.
+# Cache the result.
+
+proc check_effective_target_riscv_zvbb { } {
+    return [check_no_compiler_messages riscv_ext_zvbb assembly {
+       #ifndef __riscv_zvbb
+       #error "Not __riscv_zvbb"
+       #endif
+    }]
+}
+
# Return 1 if the target arch supports the XTheadVector extension, 0 otherwise.
# Cache the result.
@@ -2053,10 +2064,33 @@ proc check_effective_target_riscv_zvfh_ok { } {
     return 0
}
+proc check_effective_target_riscv_zvbb_ok { } {
+    # If the target already supports v without any added options,
+    # we may assume we can execute just fine.
+    if { [check_effective_target_riscv_zvbb] } {
+ return 1
+    }
+
+    # check if we can execute vector insns with the given hardware or
+    # simulator
+    set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &zvbb]
+    if { [check_runtime ${gcc_march}_exec {
+ int main()
+ {
+     asm ("vsetivli zero,8,e16,m1,ta,ma");
+     asm ("vwsll.vi v8,v16,2" : : : "v8");
+     return 0;
+ } } "-march=${gcc_march}"] } {
+     return 1
+ }
+
+    return 0
+}
+
proc riscv_get_arch { } {
     set gcc_march ""
     # ??? do we neeed to add more extensions to the list below?
-    foreach ext { i m a f d q c v zicsr zifencei zfh zba zbb zbc zbs zvfh ztso } {
+    foreach ext { i m a f d q c v zicsr zifencei zfh zba zbb zbc zbs zvbb zvfh ztso } {
if { [check_no_compiler_messages  riscv_ext_$ext assembly [string map [list DEF __riscv_$ext] {
#ifndef DEF
#error "Not DEF"
@@ -2151,6 +2185,18 @@ proc add_options_for_riscv_zvfh { flags } {
     return "$flags -march=[riscv_get_arch]_zvfh"
}
+proc add_options_for_riscv_zvbb { flags } {
+    if { [lsearch $flags -march=*] >= 0 } {
+ # If there are multiple -march flags, we have to adjust all of them.
+ set flags [regsub -all -- {(?:^|[[:space:]])-march=[[:alnum:]_.]*} $flags &_zvbb ]
+ return [regsub -all -- {((?:^|[[:space:]])-march=[[:alnum:]_.]*_zvbb[[:alnum:]_.]*)_zvbb} $flags \\1 ]
+    }
+    if { [check_effective_target_riscv_zvbb] } {
+ return "$flags"
+    }
+    return "$flags -march=[riscv_get_arch]_zvbb"
+}
+
# Return 1 if the target OS supports running SSE executables, 0
# otherwise.  Cache the result.
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 814c5febabe..8b41b9c7774 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4016,7 +4016,7 @@  expand_gather_scatter (rtx *ops, bool is_load)
 {
   rtx ptr, vec_offset, vec_reg;
   bool zero_extend_p;
-  int scale_log2;
+  int shift;
   rtx mask = ops[5];
   rtx len = ops[6];
   if (is_load)
@@ -4025,7 +4025,7 @@  expand_gather_scatter (rtx *ops, bool is_load)
       ptr = ops[1];
       vec_offset = ops[2];
       zero_extend_p = INTVAL (ops[3]);
-      scale_log2 = exact_log2 (INTVAL (ops[4]));
+      shift = exact_log2 (INTVAL (ops[4]));
     }
   else
     {
@@ -4033,7 +4033,7 @@  expand_gather_scatter (rtx *ops, bool is_load)
       ptr = ops[0];
       vec_offset = ops[1];
       zero_extend_p = INTVAL (ops[2]);
-      scale_log2 = exact_log2 (INTVAL (ops[3]));
+      shift = exact_log2 (INTVAL (ops[3]));
     }
 
   machine_mode vec_mode = GET_MODE (vec_reg);
@@ -4043,9 +4043,12 @@  expand_gather_scatter (rtx *ops, bool is_load)
   poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
   bool is_vlmax = is_vlmax_len_p (vec_mode, len);
 
+  bool use_widening_shift = false;
+
   /* Extend the offset element to address width.  */
   if (inner_offsize < BITS_PER_WORD)
     {
+      use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1;
       /* 7.2. Vector Load/Store Addressing Modes.
 	 If the vector offset elements are narrower than XLEN, they are
 	 zero-extended to XLEN before adding to the ptr effective address. If
@@ -4054,8 +4057,8 @@  expand_gather_scatter (rtx *ops, bool is_load)
 	 raise an illegal instruction exception if the EEW is not supported for
 	 offset elements.
 
-	 RVV spec only refers to the scale_log == 0 case.  */
-      if (!zero_extend_p || scale_log2 != 0)
+	 RVV spec only refers to the shift == 0 case.  */
+      if (!zero_extend_p || shift)
 	{
 	  if (zero_extend_p)
 	    inner_idx_mode
@@ -4064,19 +4067,32 @@  expand_gather_scatter (rtx *ops, bool is_load)
 	    inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require ();
 	  machine_mode new_idx_mode
 	    = get_vector_mode (inner_idx_mode, nunits).require ();
-	  rtx tmp = gen_reg_rtx (new_idx_mode);
-	  emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
-				      zero_extend_p ? true : false));
-	  vec_offset = tmp;
+	  if (!use_widening_shift)
+	    {
+	      rtx tmp = gen_reg_rtx (new_idx_mode);
+	      emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
+					  zero_extend_p ? true : false));
+	      vec_offset = tmp;
+	    }
 	  idx_mode = new_idx_mode;
 	}
     }
 
-  if (scale_log2 != 0)
+  if (shift)
     {
-      rtx tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
-			      gen_int_mode (scale_log2, Pmode), NULL_RTX, 0,
-			      OPTAB_DIRECT);
+      rtx tmp;
+      if (!use_widening_shift)
+	tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
+			    gen_int_mode (shift, Pmode), NULL_RTX, 0,
+			    OPTAB_DIRECT);
+      else
+	{
+	  tmp = gen_reg_rtx (idx_mode);
+	  insn_code icode = code_for_pred_vwsll_scalar (idx_mode);
+	  rtx ops[] = {tmp, vec_offset, const1_rtx};
+	  emit_vlmax_insn (icode, BINARY_OP, ops);
+	}
+
       vec_offset = tmp;
     }
 
diff --git a/gcc/config/riscv/vector-crypto.md b/gcc/config/riscv/vector-crypto.md
index 24822e2712c..0ddc2f3f3c6 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -295,7 +295,7 @@  (define_insn "@pred_vwsll<mode>"
        (ashift:VWEXTI
          (zero_extend:VWEXTI
            (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "vr"))
-         (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"  "vr"))
+         (match_operand:<V_DOUBLE_TRUNC> 4 "vector_shift_operand"  "vrvk"))
        (match_operand:VWEXTI 2 "vector_merge_operand" "0vu")))]
   "TARGET_ZVBB"
   "vwsll.v%o4\t%0,%3,%4%p1"
@@ -316,7 +316,7 @@  (define_insn "@pred_vwsll<mode>_scalar"
        (ashift:VWEXTI
          (zero_extend:VWEXTI
            (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "   vr,    vr"))
-         (match_operand:<VSUBEL> 4 "pmode_reg_or_uimm5_operand" "   rK,    rK"))
+         (match_operand 4 "pmode_reg_or_uimm5_operand"		"   rK,    rK"))
        (match_operand:VWEXTI 2 "vector_merge_operand"           "   vu,    0")))]
   "TARGET_ZVBB"
   "vwsll.v%o4\t%0,%3,%4%p1"
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 248461302dd..c6a3845dc13 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -750,10 +750,10 @@  (define_attr "mode_idx" ""
 	       (const_int 1)
 
 	       (eq_attr "type" "vssegte,vmpop,vmffs")
-	       (const_int 2)       
+	       (const_int 2)
 
 	       (eq_attr "type" "vstux,vstox,vssegts,vssegtux,vssegtox,vfcvtftoi,vfwcvtitof,vfwcvtftoi,
-				vfwcvtftof,vmsfs,vired,viwred,vfredu,vfredo,vfwredu,vfwredo")
+				vfwcvtftof,vmsfs,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vwsll")
 	       (const_int 3)
 
 	       (eq_attr "type" "viwalu,viwmul,viwmuladd,vfwalu,vfwmul,vfwmuladd")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
new file mode 100644
index 00000000000..11a4031f47b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
@@ -0,0 +1,113 @@ 
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-fno-vect-cost-model -fdump-tree-vect-details -mrvv-max-lmul=m4" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(DATA_TYPE, INDEX_TYPE)                                       \
+  void __attribute__ ((noinline, noclone))                                     \
+  f_##DATA_TYPE##_##INDEX_TYPE (DATA_TYPE *restrict y, DATA_TYPE *restrict x,  \
+				INDEX_TYPE *restrict index)                    \
+  {                                                                            \
+    for (int i = 0; i < 100; ++i)                                              \
+      {                                                                        \
+	y[i * 2] = x[index[i * 2]] + 1;                                        \
+	y[i * 2 + 1] = x[index[i * 2 + 1]] + 2;                                \
+      }                                                                        \
+  }
+
+TEST_LOOP (int8_t, int8_t)
+TEST_LOOP (uint8_t, int8_t)
+TEST_LOOP (int16_t, int8_t)
+TEST_LOOP (uint16_t, int8_t)
+TEST_LOOP (int32_t, int8_t)
+TEST_LOOP (uint32_t, int8_t)
+TEST_LOOP (int64_t, int8_t)
+TEST_LOOP (uint64_t, int8_t)
+TEST_LOOP (_Float16, int8_t)
+TEST_LOOP (float, int8_t)
+TEST_LOOP (double, int8_t)
+TEST_LOOP (int8_t, int16_t)
+TEST_LOOP (uint8_t, int16_t)
+TEST_LOOP (int16_t, int16_t)
+TEST_LOOP (uint16_t, int16_t)
+TEST_LOOP (int32_t, int16_t)
+TEST_LOOP (uint32_t, int16_t)
+TEST_LOOP (int64_t, int16_t)
+TEST_LOOP (uint64_t, int16_t)
+TEST_LOOP (_Float16, int16_t)
+TEST_LOOP (float, int16_t)
+TEST_LOOP (double, int16_t)
+TEST_LOOP (int8_t, int32_t)
+TEST_LOOP (uint8_t, int32_t)
+TEST_LOOP (int16_t, int32_t)
+TEST_LOOP (uint16_t, int32_t)
+TEST_LOOP (int32_t, int32_t)
+TEST_LOOP (uint32_t, int32_t)
+TEST_LOOP (int64_t, int32_t)
+TEST_LOOP (uint64_t, int32_t)
+TEST_LOOP (_Float16, int32_t)
+TEST_LOOP (float, int32_t)
+TEST_LOOP (double, int32_t)
+TEST_LOOP (int8_t, int64_t)
+TEST_LOOP (uint8_t, int64_t)
+TEST_LOOP (int16_t, int64_t)
+TEST_LOOP (uint16_t, int64_t)
+TEST_LOOP (int32_t, int64_t)
+TEST_LOOP (uint32_t, int64_t)
+TEST_LOOP (int64_t, int64_t)
+TEST_LOOP (uint64_t, int64_t)
+TEST_LOOP (_Float16, int64_t)
+TEST_LOOP (float, int64_t)
+TEST_LOOP (double, int64_t)
+TEST_LOOP (int8_t, uint8_t)
+TEST_LOOP (uint8_t, uint8_t)
+TEST_LOOP (int16_t, uint8_t)
+TEST_LOOP (uint16_t, uint8_t)
+TEST_LOOP (int32_t, uint8_t)
+TEST_LOOP (uint32_t, uint8_t)
+TEST_LOOP (int64_t, uint8_t)
+TEST_LOOP (uint64_t, uint8_t)
+TEST_LOOP (_Float16, uint8_t)
+TEST_LOOP (float, uint8_t)
+TEST_LOOP (double, uint8_t)
+TEST_LOOP (int8_t, uint16_t)
+TEST_LOOP (uint8_t, uint16_t)
+TEST_LOOP (int16_t, uint16_t)
+TEST_LOOP (uint16_t, uint16_t)
+TEST_LOOP (int32_t, uint16_t)
+TEST_LOOP (uint32_t, uint16_t)
+TEST_LOOP (int64_t, uint16_t)
+TEST_LOOP (uint64_t, uint16_t)
+TEST_LOOP (_Float16, uint16_t)
+TEST_LOOP (float, uint16_t)
+TEST_LOOP (double, uint16_t)
+TEST_LOOP (int8_t, uint32_t)
+TEST_LOOP (uint8_t, uint32_t)
+TEST_LOOP (int16_t, uint32_t)
+TEST_LOOP (uint16_t, uint32_t)
+TEST_LOOP (int32_t, uint32_t)
+TEST_LOOP (uint32_t, uint32_t)
+TEST_LOOP (int64_t, uint32_t)
+TEST_LOOP (uint64_t, uint32_t)
+TEST_LOOP (_Float16, uint32_t)
+TEST_LOOP (float, uint32_t)
+TEST_LOOP (double, uint32_t)
+TEST_LOOP (int8_t, uint64_t)
+TEST_LOOP (uint8_t, uint64_t)
+TEST_LOOP (int16_t, uint64_t)
+TEST_LOOP (uint16_t, uint64_t)
+TEST_LOOP (int32_t, uint64_t)
+TEST_LOOP (uint32_t, uint64_t)
+TEST_LOOP (int64_t, uint64_t)
+TEST_LOOP (uint64_t, uint64_t)
+TEST_LOOP (_Float16, uint64_t)
+TEST_LOOP (float, uint64_t)
+TEST_LOOP (double, uint64_t)
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 88 "vect" } } */
+/* { dg-final { scan-tree-dump " \.MASK_LEN_GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "vect" } } */
+/* { dg-final { scan-assembler "vwsll.vi" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3a55b2a4159..999e2e974ef 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1965,6 +1965,17 @@  proc check_effective_target_riscv_zbb { } {
     }]
 }
 
+# Return 1 if the target arch supports the Zbb extension, 0 otherwise.
+# Cache the result.
+
+proc check_effective_target_riscv_zvbb { } {
+    return [check_no_compiler_messages riscv_ext_zvbb assembly {
+       #ifndef __riscv_zvbb
+       #error "Not __riscv_zvbb"
+       #endif
+    }]
+}
+
 # Return 1 if the target arch supports the XTheadVector extension, 0 otherwise.
 # Cache the result.
 
@@ -2053,10 +2064,33 @@  proc check_effective_target_riscv_zvfh_ok { } {
     return 0
 }
 
+proc check_effective_target_riscv_zvbb_ok { } {
+    # If the target already supports v without any added options,
+    # we may assume we can execute just fine.
+    if { [check_effective_target_riscv_zvbb] } {
+	return 1
+    }
+
+    # check if we can execute vector insns with the given hardware or
+    # simulator
+    set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &zvbb]
+    if { [check_runtime ${gcc_march}_exec {
+	int main()
+	{
+	    asm ("vsetivli zero,8,e16,m1,ta,ma");
+	    asm ("vwsll.vi v8,v16,2" : : : "v8");
+	    return 0;
+	} } "-march=${gcc_march}"] } {
+	    return 1
+	}
+
+    return 0
+}
+
 proc riscv_get_arch { } {
     set gcc_march ""
     # ??? do we neeed to add more extensions to the list below?
-    foreach ext { i m a f d q c v zicsr zifencei zfh zba zbb zbc zbs zvfh ztso } {
+    foreach ext { i m a f d q c v zicsr zifencei zfh zba zbb zbc zbs zvbb zvfh ztso } {
 	if { [check_no_compiler_messages  riscv_ext_$ext assembly [string map [list DEF __riscv_$ext] {
 		#ifndef DEF
 		#error "Not DEF"
@@ -2151,6 +2185,18 @@  proc add_options_for_riscv_zvfh { flags } {
     return "$flags -march=[riscv_get_arch]_zvfh"
 }
 
+proc add_options_for_riscv_zvbb { flags } {
+    if { [lsearch $flags -march=*] >= 0 } {
+	# If there are multiple -march flags, we have to adjust all of them.
+	set flags [regsub -all -- {(?:^|[[:space:]])-march=[[:alnum:]_.]*} $flags &_zvbb ]
+	return [regsub -all -- {((?:^|[[:space:]])-march=[[:alnum:]_.]*_zvbb[[:alnum:]_.]*)_zvbb} $flags \\1 ]
+    }
+    if { [check_effective_target_riscv_zvbb] } {
+	return "$flags"
+    }
+    return "$flags -march=[riscv_get_arch]_zvbb"
+}
+
 # Return 1 if the target OS supports running SSE executables, 0
 # otherwise.  Cache the result.