diff mbox series

[V6] Machine Description: Add LEN_MASK_{GATHER_LOAD, SCATTER_STORE} pattern

Message ID 20230703021500.165265-1-juzhe.zhong@rivai.ai
State New
Headers show
Series [V6] Machine Description: Add LEN_MASK_{GATHER_LOAD, SCATTER_STORE} pattern | expand

Commit Message

juzhe.zhong@rivai.ai July 3, 2023, 2:15 a.m. UTC
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>

Hi, Richi and Richard.

Base one the review comments from Richard:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623405.html

I add the helper function:
/* Add len, bias and mask arguments according to the STMT.  */

static unsigned int
add_len_bias_and_mask_args (expand_operand *ops, unsigned int opno, gcall *stmt)
{
  internal_fn ifn = gimple_call_internal_fn (stmt);
  int len_index = internal_fn_len_index (ifn);
  int bias_index = internal_fn_bias_index (ifn);
  int mask_index = internal_fn_mask_index (ifn);
  /* The order of arguments are always {len,bias,mask}.  */
  if (len_index >= 0)
    {
      tree len = gimple_call_arg (stmt, len_index);
      rtx len_rtx = expand_normal (len);
      create_convert_operand_from (&ops[opno++], len_rtx,
				   TYPE_MODE (TREE_TYPE (len)),
				   TYPE_UNSIGNED (TREE_TYPE (len)));
    }
  if (bias_index >= 0)
    {
      tree biast = gimple_call_arg (stmt, bias_index);
      rtx bias = expand_normal (biast);
      create_input_operand (&ops[opno++], bias, QImode);
    }
  if (mask_index >= 0)
    {
      tree mask = gimple_call_arg (stmt, mask_index);
      rtx mask_rtx = expand_normal (mask);
      create_input_operand (&ops[opno++], mask_rtx,
			    TYPE_MODE (TREE_TYPE (mask)));
    }
  return opno;
}

I think current codes look more reasonable and easier maitain now.

This patch is adding LEN_MASK_{GATHER_LOAD,SCATTER_STORE} to allow targets
handle flow control by mask and loop control by length on gather/scatter memory
operations. Consider this following case:

#include <stdint.h>
void
f (uint8_t *restrict a,
   uint8_t *restrict b, int n,
   int base, int step,
   int *restrict cond)
{
  for (int i = 0; i < n; ++i)
    {
      if (cond[i])
        a[i * step + base] = b[i * step + base];
    }
}

We hope RVV can vectorize such case into following IR:

loop_len = SELECT_VL
control_mask = comparison
v = LEN_MASK_GATHER_LOAD (.., loop_len, control_mask, bias)
LEN_SCATTER_STORE (... v, ..., loop_len, control_mask, bias)

This patch doesn't apply such patterns into vectorizer, just add patterns
and update the documents.

Will send patch which apply such patterns into vectorizer soon after this
patch is approved.

Ok for trunk?

gcc/ChangeLog:

        * doc/md.texi: Add len_mask_gather_load/len_mask_scatter_store.
        * internal-fn.cc (add_len_bias_and_mask_args): New function.
        (expand_scatter_store_optab_fn): Add len_mask_gather_load/len_mask_scatter_store.
        (expand_gather_load_optab_fn): Ditto.
        (internal_load_fn_p): Ditto.
        (internal_store_fn_p): Ditto.
        (internal_gather_scatter_fn_p): Ditto.
        (internal_fn_mask_index): Ditto.
        (internal_fn_len_index): New function.
        (internal_fn_bias_index): New function.
        (internal_fn_stored_value_index): Add len_mask_gather_load/len_mask_scatter_store.
        * internal-fn.def (LEN_MASK_GATHER_LOAD): Ditto.
        (LEN_MASK_SCATTER_STORE): Ditto.
        * internal-fn.h (internal_fn_len_index): New function.
        (internal_fn_bias_index): Ditto.
        * optabs.def (OPTAB_CD): Add len_mask_gather_load/len_mask_scatter_store.

---
 gcc/doc/md.texi     |  17 +++++++
 gcc/internal-fn.cc  | 108 ++++++++++++++++++++++++++++++++++++++------
 gcc/internal-fn.def |   8 +++-
 gcc/internal-fn.h   |   2 +
 gcc/optabs.def      |   2 +
 5 files changed, 120 insertions(+), 17 deletions(-)

Comments

juzhe.zhong@rivai.ai July 3, 2023, 2:21 a.m. UTC | #1
Oh. Sorry. I forget update the changeLog.
We hope RVV can vectorize such case into following IR:
loop_len = SELECT_VL
control_mask = comparison
v = LEN_MASK_GATHER_LOAD (.., loop_len, control_mask, bias)
LEN_SCATTER_STORE (... v, ..., loop_len, control_mask, bias)

This change log is incorrect and misleading.

As Richard's comments, I have change the order of the arguments into:

loop_len = SELECT_VL
control_mask = comparison
v = LEN_MASK_GATHER_LOAD (.., loop_len, bias, control_mask)
LEN_SCATTER_STORE (... v, ..., loop_len, bias, control_mask)
Sorry about that.


juzhe.zhong@rivai.ai
 
From: juzhe.zhong
Date: 2023-07-03 10:15
To: gcc-patches
CC: richard.sandiford; rguenther; pan2.li; Ju-Zhe Zhong
Subject: [PATCH V6] Machine Description: Add LEN_MASK_{GATHER_LOAD, SCATTER_STORE} pattern
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
 
Hi, Richi and Richard.
 
Base one the review comments from Richard:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623405.html
 
I add the helper function:
/* Add len, bias and mask arguments according to the STMT.  */
 
static unsigned int
add_len_bias_and_mask_args (expand_operand *ops, unsigned int opno, gcall *stmt)
{
  internal_fn ifn = gimple_call_internal_fn (stmt);
  int len_index = internal_fn_len_index (ifn);
  int bias_index = internal_fn_bias_index (ifn);
  int mask_index = internal_fn_mask_index (ifn);
  /* The order of arguments are always {len,bias,mask}.  */
  if (len_index >= 0)
    {
      tree len = gimple_call_arg (stmt, len_index);
      rtx len_rtx = expand_normal (len);
      create_convert_operand_from (&ops[opno++], len_rtx,
   TYPE_MODE (TREE_TYPE (len)),
   TYPE_UNSIGNED (TREE_TYPE (len)));
    }
  if (bias_index >= 0)
    {
      tree biast = gimple_call_arg (stmt, bias_index);
      rtx bias = expand_normal (biast);
      create_input_operand (&ops[opno++], bias, QImode);
    }
  if (mask_index >= 0)
    {
      tree mask = gimple_call_arg (stmt, mask_index);
      rtx mask_rtx = expand_normal (mask);
      create_input_operand (&ops[opno++], mask_rtx,
    TYPE_MODE (TREE_TYPE (mask)));
    }
  return opno;
}
 
I think current codes look more reasonable and easier maitain now.
 
This patch is adding LEN_MASK_{GATHER_LOAD,SCATTER_STORE} to allow targets
handle flow control by mask and loop control by length on gather/scatter memory
operations. Consider this following case:
 
#include <stdint.h>
void
f (uint8_t *restrict a,
   uint8_t *restrict b, int n,
   int base, int step,
   int *restrict cond)
{
  for (int i = 0; i < n; ++i)
    {
      if (cond[i])
        a[i * step + base] = b[i * step + base];
    }
}
 
We hope RVV can vectorize such case into following IR:
 
loop_len = SELECT_VL
control_mask = comparison
v = LEN_MASK_GATHER_LOAD (.., loop_len, control_mask, bias)
LEN_SCATTER_STORE (... v, ..., loop_len, control_mask, bias)
 
This patch doesn't apply such patterns into vectorizer, just add patterns
and update the documents.
 
Will send patch which apply such patterns into vectorizer soon after this
patch is approved.
 
Ok for trunk?
 
gcc/ChangeLog:
 
        * doc/md.texi: Add len_mask_gather_load/len_mask_scatter_store.
        * internal-fn.cc (add_len_bias_and_mask_args): New function.
        (expand_scatter_store_optab_fn): Add len_mask_gather_load/len_mask_scatter_store.
        (expand_gather_load_optab_fn): Ditto.
        (internal_load_fn_p): Ditto.
        (internal_store_fn_p): Ditto.
        (internal_gather_scatter_fn_p): Ditto.
        (internal_fn_mask_index): Ditto.
        (internal_fn_len_index): New function.
        (internal_fn_bias_index): New function.
        (internal_fn_stored_value_index): Add len_mask_gather_load/len_mask_scatter_store.
        * internal-fn.def (LEN_MASK_GATHER_LOAD): Ditto.
        (LEN_MASK_SCATTER_STORE): Ditto.
        * internal-fn.h (internal_fn_len_index): New function.
        (internal_fn_bias_index): Ditto.
        * optabs.def (OPTAB_CD): Add len_mask_gather_load/len_mask_scatter_store.
 
---
gcc/doc/md.texi     |  17 +++++++
gcc/internal-fn.cc  | 108 ++++++++++++++++++++++++++++++++++++++------
gcc/internal-fn.def |   8 +++-
gcc/internal-fn.h   |   2 +
gcc/optabs.def      |   2 +
5 files changed, 120 insertions(+), 17 deletions(-)
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index cefdee84821..b44d1ba3af9 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5040,6 +5040,15 @@ operand 5.  Bit @var{i} of the mask is set if element @var{i}
of the result should be loaded from memory and clear if element @var{i}
of the result should be set to zero.
+@cindex @code{len_mask_gather_load@var{m}@var{n}} instruction pattern
+@item @samp{len_mask_gather_load@var{m}@var{n}}
+Like @samp{gather_load@var{m}@var{n}}, but takes an extra length operand (operand 5),
+a bias operand (operand 6) as well as a mask operand (operand 7).  Similar to len_maskload,
+the instruction loads at most (operand 5 + operand 6) elements from memory.
+Bit @var{i} of the mask is set if element @var{i} of the result should
+be loaded from memory and clear if element @var{i} of the result should be undefined.
+Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
+
@cindex @code{scatter_store@var{m}@var{n}} instruction pattern
@item @samp{scatter_store@var{m}@var{n}}
Store a vector of mode @var{m} into several distinct memory locations.
@@ -5069,6 +5078,14 @@ Like @samp{scatter_store@var{m}@var{n}}, but takes an extra mask operand as
operand 5.  Bit @var{i} of the mask is set if element @var{i}
of the result should be stored to memory.
+@cindex @code{len_mask_scatter_store@var{m}@var{n}} instruction pattern
+@item @samp{len_mask_scatter_store@var{m}@var{n}}
+Like @samp{scatter_store@var{m}@var{n}}, but takes an extra length operand (operand 5),
+a bias operand (operand 6) as well as a mask operand (operand 7).  The instruction stores
+at most (operand 5 + operand 6) elements of (operand 4) to memory.
+Bit @var{i} of the mask is set if element @var{i} of (operand 4) should be stored.
+Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
+
@cindex @code{vec_set@var{m}} instruction pattern
@item @samp{vec_set@var{m}}
Set given field in the vector value.  Operand 0 is the vector to modify,
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 9017176dc7a..43ac5b37cc9 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3519,6 +3519,40 @@ expand_LAUNDER (internal_fn, gcall *call)
   expand_assignment (lhs, gimple_call_arg (call, 0), false);
}
+/* Add len, bias and mask arguments according to the STMT.  */
+
+static unsigned int
+add_len_bias_and_mask_args (expand_operand *ops, unsigned int opno, gcall *stmt)
+{
+  internal_fn ifn = gimple_call_internal_fn (stmt);
+  int len_index = internal_fn_len_index (ifn);
+  int bias_index = internal_fn_bias_index (ifn);
+  int mask_index = internal_fn_mask_index (ifn);
+  /* The order of arguments are always {len,bias,mask}.  */
+  if (len_index >= 0)
+    {
+      tree len = gimple_call_arg (stmt, len_index);
+      rtx len_rtx = expand_normal (len);
+      create_convert_operand_from (&ops[opno++], len_rtx,
+    TYPE_MODE (TREE_TYPE (len)),
+    TYPE_UNSIGNED (TREE_TYPE (len)));
+    }
+  if (bias_index >= 0)
+    {
+      tree biast = gimple_call_arg (stmt, bias_index);
+      rtx bias = expand_normal (biast);
+      create_input_operand (&ops[opno++], bias, QImode);
+    }
+  if (mask_index >= 0)
+    {
+      tree mask = gimple_call_arg (stmt, mask_index);
+      rtx mask_rtx = expand_normal (mask);
+      create_input_operand (&ops[opno++], mask_rtx,
+     TYPE_MODE (TREE_TYPE (mask)));
+    }
+  return opno;
+}
+
/* Expand {MASK_,}SCATTER_STORE{S,U} call CALL using optab OPTAB.  */
static void
@@ -3526,7 +3560,6 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
{
   internal_fn ifn = gimple_call_internal_fn (stmt);
   int rhs_index = internal_fn_stored_value_index (ifn);
-  int mask_index = internal_fn_mask_index (ifn);
   tree base = gimple_call_arg (stmt, 0);
   tree offset = gimple_call_arg (stmt, 1);
   tree scale = gimple_call_arg (stmt, 2);
@@ -3537,19 +3570,14 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
   HOST_WIDE_INT scale_int = tree_to_shwi (scale);
   rtx rhs_rtx = expand_normal (rhs);
-  class expand_operand ops[6];
+  class expand_operand ops[8];
   int i = 0;
   create_address_operand (&ops[i++], base_rtx);
   create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], scale_int);
   create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
-  if (mask_index >= 0)
-    {
-      tree mask = gimple_call_arg (stmt, mask_index);
-      rtx mask_rtx = expand_normal (mask);
-      create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
-    }
+  i = add_len_bias_and_mask_args (ops, i, stmt);
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
   TYPE_MODE (TREE_TYPE (offset)));
@@ -3572,18 +3600,13 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
   HOST_WIDE_INT scale_int = tree_to_shwi (scale);
   int i = 0;
-  class expand_operand ops[6];
+  class expand_operand ops[8];
   create_output_operand (&ops[i++], lhs_rtx, TYPE_MODE (TREE_TYPE (lhs)));
   create_address_operand (&ops[i++], base_rtx);
   create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], scale_int);
-  if (optab == mask_gather_load_optab)
-    {
-      tree mask = gimple_call_arg (stmt, 4);
-      rtx mask_rtx = expand_normal (mask);
-      create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
-    }
+  i = add_len_bias_and_mask_args (ops, i, stmt);
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
   TYPE_MODE (TREE_TYPE (offset)));
   expand_insn (icode, i, ops);
@@ -4434,6 +4457,7 @@ internal_load_fn_p (internal_fn fn)
     case IFN_MASK_LOAD_LANES:
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_GATHER_LOAD:
     case IFN_LEN_LOAD:
     case IFN_LEN_MASK_LOAD:
       return true;
@@ -4455,6 +4479,7 @@ internal_store_fn_p (internal_fn fn)
     case IFN_MASK_STORE_LANES:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_MASK_SCATTER_STORE:
     case IFN_LEN_STORE:
     case IFN_LEN_MASK_STORE:
       return true;
@@ -4473,8 +4498,10 @@ internal_gather_scatter_fn_p (internal_fn fn)
     {
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_GATHER_LOAD:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_MASK_SCATTER_STORE:
       return true;
     default:
@@ -4500,6 +4527,10 @@ internal_fn_mask_index (internal_fn fn)
     case IFN_MASK_SCATTER_STORE:
       return 4;
+    case IFN_LEN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_SCATTER_STORE:
+      return 6;
+
     case IFN_LEN_MASK_LOAD:
     case IFN_LEN_MASK_STORE:
       return 3;
@@ -4510,6 +4541,52 @@ internal_fn_mask_index (internal_fn fn)
     }
}
+/* If FN takes a vector len argument, return the index of that argument,
+   otherwise return -1.  */
+
+int
+internal_fn_len_index (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_LEN_LOAD:
+    case IFN_LEN_STORE:
+    case IFN_LEN_MASK_LOAD:
+    case IFN_LEN_MASK_STORE:
+      return 3;
+
+    case IFN_LEN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_SCATTER_STORE:
+      return 4;
+
+    default:
+      return -1;
+    }
+}
+
+/* If FN takes a vector bias argument, return the index of that argument,
+   otherwise return -1.  */
+
+int
+internal_fn_bias_index (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_LEN_LOAD:
+    case IFN_LEN_STORE:
+    case IFN_LEN_MASK_LOAD:
+    case IFN_LEN_MASK_STORE:
+      return 4;
+
+    case IFN_LEN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_SCATTER_STORE:
+      return 5;
+
+    default:
+      return -1;
+    }
+}
+
/* If FN takes a value that should be stored to memory, return the index
    of that argument, otherwise return -1.  */
@@ -4522,6 +4599,7 @@ internal_fn_stored_value_index (internal_fn fn)
     case IFN_MASK_STORE_LANES:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_MASK_SCATTER_STORE:
     case IFN_LEN_STORE:
       return 3;
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index d9fcca8430f..a1fdbcbf515 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -48,14 +48,14 @@ along with GCC; see the file COPYING3.  If not see
    - mask_load: currently just maskload
    - load_lanes: currently just vec_load_lanes
    - mask_load_lanes: currently just vec_mask_load_lanes
-   - gather_load: used for {mask_,}gather_load
+   - gather_load: used for {mask_,len_mask_,}gather_load
    - len_load: currently just len_load
    - len_maskload: currently just len_maskload
    - mask_store: currently just maskstore
    - store_lanes: currently just vec_store_lanes
    - mask_store_lanes: currently just vec_mask_store_lanes
-   - scatter_store: used for {mask_,}scatter_store
+   - scatter_store: used for {mask_,len_mask_,}scatter_store
    - len_store: currently just len_store
    - len_maskstore: currently just len_maskstore
@@ -157,6 +157,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_LOAD_LANES, ECF_PURE,
DEF_INTERNAL_OPTAB_FN (GATHER_LOAD, ECF_PURE, gather_load, gather_load)
DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
       mask_gather_load, gather_load)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_GATHER_LOAD, ECF_PURE,
+        len_mask_gather_load, gather_load)
DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
DEF_INTERNAL_OPTAB_FN (LEN_MASK_LOAD, ECF_PURE, len_maskload, len_maskload)
@@ -164,6 +166,8 @@ DEF_INTERNAL_OPTAB_FN (LEN_MASK_LOAD, ECF_PURE, len_maskload, len_maskload)
DEF_INTERNAL_OPTAB_FN (SCATTER_STORE, 0, scatter_store, scatter_store)
DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
       mask_scatter_store, scatter_store)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_SCATTER_STORE, 0,
+        len_mask_scatter_store, scatter_store)
DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 8f21068e300..396ad2a48bc 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -234,6 +234,8 @@ extern bool internal_load_fn_p (internal_fn);
extern bool internal_store_fn_p (internal_fn);
extern bool internal_gather_scatter_fn_p (internal_fn);
extern int internal_fn_mask_index (internal_fn);
+extern int internal_fn_len_index (internal_fn);
+extern int internal_fn_bias_index (internal_fn);
extern int internal_fn_stored_value_index (internal_fn);
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
    tree, tree, int);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a901b68c538..73c9a0c760f 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -95,8 +95,10 @@ OPTAB_CD(len_maskload_optab, "len_maskload$a$b")
OPTAB_CD(len_maskstore_optab, "len_maskstore$a$b")
OPTAB_CD(gather_load_optab, "gather_load$a$b")
OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b")
+OPTAB_CD(len_mask_gather_load_optab, "len_mask_gather_load$a$b")
OPTAB_CD(scatter_store_optab, "scatter_store$a$b")
OPTAB_CD(mask_scatter_store_optab, "mask_scatter_store$a$b")
+OPTAB_CD(len_mask_scatter_store_optab, "len_mask_scatter_store$a$b")
OPTAB_CD(vec_extract_optab, "vec_extract$a$b")
OPTAB_CD(vec_init_optab, "vec_init$a$b")
diff mbox series

Patch

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index cefdee84821..b44d1ba3af9 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5040,6 +5040,15 @@  operand 5.  Bit @var{i} of the mask is set if element @var{i}
 of the result should be loaded from memory and clear if element @var{i}
 of the result should be set to zero.
 
+@cindex @code{len_mask_gather_load@var{m}@var{n}} instruction pattern
+@item @samp{len_mask_gather_load@var{m}@var{n}}
+Like @samp{gather_load@var{m}@var{n}}, but takes an extra length operand (operand 5),
+a bias operand (operand 6) as well as a mask operand (operand 7).  Similar to len_maskload,
+the instruction loads at most (operand 5 + operand 6) elements from memory.
+Bit @var{i} of the mask is set if element @var{i} of the result should
+be loaded from memory and clear if element @var{i} of the result should be undefined.
+Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
+
 @cindex @code{scatter_store@var{m}@var{n}} instruction pattern
 @item @samp{scatter_store@var{m}@var{n}}
 Store a vector of mode @var{m} into several distinct memory locations.
@@ -5069,6 +5078,14 @@  Like @samp{scatter_store@var{m}@var{n}}, but takes an extra mask operand as
 operand 5.  Bit @var{i} of the mask is set if element @var{i}
 of the result should be stored to memory.
 
+@cindex @code{len_mask_scatter_store@var{m}@var{n}} instruction pattern
+@item @samp{len_mask_scatter_store@var{m}@var{n}}
+Like @samp{scatter_store@var{m}@var{n}}, but takes an extra length operand (operand 5),
+a bias operand (operand 6) as well as a mask operand (operand 7).  The instruction stores
+at most (operand 5 + operand 6) elements of (operand 4) to memory.
+Bit @var{i} of the mask is set if element @var{i} of (operand 4) should be stored.
+Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
+
 @cindex @code{vec_set@var{m}} instruction pattern
 @item @samp{vec_set@var{m}}
 Set given field in the vector value.  Operand 0 is the vector to modify,
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 9017176dc7a..43ac5b37cc9 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3519,6 +3519,40 @@  expand_LAUNDER (internal_fn, gcall *call)
   expand_assignment (lhs, gimple_call_arg (call, 0), false);
 }
 
+/* Add len, bias and mask arguments according to the STMT.  */
+
+static unsigned int
+add_len_bias_and_mask_args (expand_operand *ops, unsigned int opno, gcall *stmt)
+{
+  internal_fn ifn = gimple_call_internal_fn (stmt);
+  int len_index = internal_fn_len_index (ifn);
+  int bias_index = internal_fn_bias_index (ifn);
+  int mask_index = internal_fn_mask_index (ifn);
+  /* The order of arguments are always {len,bias,mask}.  */
+  if (len_index >= 0)
+    {
+      tree len = gimple_call_arg (stmt, len_index);
+      rtx len_rtx = expand_normal (len);
+      create_convert_operand_from (&ops[opno++], len_rtx,
+				   TYPE_MODE (TREE_TYPE (len)),
+				   TYPE_UNSIGNED (TREE_TYPE (len)));
+    }
+  if (bias_index >= 0)
+    {
+      tree biast = gimple_call_arg (stmt, bias_index);
+      rtx bias = expand_normal (biast);
+      create_input_operand (&ops[opno++], bias, QImode);
+    }
+  if (mask_index >= 0)
+    {
+      tree mask = gimple_call_arg (stmt, mask_index);
+      rtx mask_rtx = expand_normal (mask);
+      create_input_operand (&ops[opno++], mask_rtx,
+			    TYPE_MODE (TREE_TYPE (mask)));
+    }
+  return opno;
+}
+
 /* Expand {MASK_,}SCATTER_STORE{S,U} call CALL using optab OPTAB.  */
 
 static void
@@ -3526,7 +3560,6 @@  expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
 {
   internal_fn ifn = gimple_call_internal_fn (stmt);
   int rhs_index = internal_fn_stored_value_index (ifn);
-  int mask_index = internal_fn_mask_index (ifn);
   tree base = gimple_call_arg (stmt, 0);
   tree offset = gimple_call_arg (stmt, 1);
   tree scale = gimple_call_arg (stmt, 2);
@@ -3537,19 +3570,14 @@  expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
   HOST_WIDE_INT scale_int = tree_to_shwi (scale);
   rtx rhs_rtx = expand_normal (rhs);
 
-  class expand_operand ops[6];
+  class expand_operand ops[8];
   int i = 0;
   create_address_operand (&ops[i++], base_rtx);
   create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], scale_int);
   create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
-  if (mask_index >= 0)
-    {
-      tree mask = gimple_call_arg (stmt, mask_index);
-      rtx mask_rtx = expand_normal (mask);
-      create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
-    }
+  i = add_len_bias_and_mask_args (ops, i, stmt);
 
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
 					   TYPE_MODE (TREE_TYPE (offset)));
@@ -3572,18 +3600,13 @@  expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
   HOST_WIDE_INT scale_int = tree_to_shwi (scale);
 
   int i = 0;
-  class expand_operand ops[6];
+  class expand_operand ops[8];
   create_output_operand (&ops[i++], lhs_rtx, TYPE_MODE (TREE_TYPE (lhs)));
   create_address_operand (&ops[i++], base_rtx);
   create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], scale_int);
-  if (optab == mask_gather_load_optab)
-    {
-      tree mask = gimple_call_arg (stmt, 4);
-      rtx mask_rtx = expand_normal (mask);
-      create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
-    }
+  i = add_len_bias_and_mask_args (ops, i, stmt);
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
 					   TYPE_MODE (TREE_TYPE (offset)));
   expand_insn (icode, i, ops);
@@ -4434,6 +4457,7 @@  internal_load_fn_p (internal_fn fn)
     case IFN_MASK_LOAD_LANES:
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_GATHER_LOAD:
     case IFN_LEN_LOAD:
     case IFN_LEN_MASK_LOAD:
       return true;
@@ -4455,6 +4479,7 @@  internal_store_fn_p (internal_fn fn)
     case IFN_MASK_STORE_LANES:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_MASK_SCATTER_STORE:
     case IFN_LEN_STORE:
     case IFN_LEN_MASK_STORE:
       return true;
@@ -4473,8 +4498,10 @@  internal_gather_scatter_fn_p (internal_fn fn)
     {
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_GATHER_LOAD:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_MASK_SCATTER_STORE:
       return true;
 
     default:
@@ -4500,6 +4527,10 @@  internal_fn_mask_index (internal_fn fn)
     case IFN_MASK_SCATTER_STORE:
       return 4;
 
+    case IFN_LEN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_SCATTER_STORE:
+      return 6;
+
     case IFN_LEN_MASK_LOAD:
     case IFN_LEN_MASK_STORE:
       return 3;
@@ -4510,6 +4541,52 @@  internal_fn_mask_index (internal_fn fn)
     }
 }
 
+/* If FN takes a vector len argument, return the index of that argument,
+   otherwise return -1.  */
+
+int
+internal_fn_len_index (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_LEN_LOAD:
+    case IFN_LEN_STORE:
+    case IFN_LEN_MASK_LOAD:
+    case IFN_LEN_MASK_STORE:
+      return 3;
+
+    case IFN_LEN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_SCATTER_STORE:
+      return 4;
+
+    default:
+      return -1;
+    }
+}
+
+/* If FN takes a vector bias argument, return the index of that argument,
+   otherwise return -1.  */
+
+int
+internal_fn_bias_index (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_LEN_LOAD:
+    case IFN_LEN_STORE:
+    case IFN_LEN_MASK_LOAD:
+    case IFN_LEN_MASK_STORE:
+      return 4;
+
+    case IFN_LEN_MASK_GATHER_LOAD:
+    case IFN_LEN_MASK_SCATTER_STORE:
+      return 5;
+
+    default:
+      return -1;
+    }
+}
+
 /* If FN takes a value that should be stored to memory, return the index
    of that argument, otherwise return -1.  */
 
@@ -4522,6 +4599,7 @@  internal_fn_stored_value_index (internal_fn fn)
     case IFN_MASK_STORE_LANES:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
+    case IFN_LEN_MASK_SCATTER_STORE:
     case IFN_LEN_STORE:
       return 3;
 
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index d9fcca8430f..a1fdbcbf515 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -48,14 +48,14 @@  along with GCC; see the file COPYING3.  If not see
    - mask_load: currently just maskload
    - load_lanes: currently just vec_load_lanes
    - mask_load_lanes: currently just vec_mask_load_lanes
-   - gather_load: used for {mask_,}gather_load
+   - gather_load: used for {mask_,len_mask_,}gather_load
    - len_load: currently just len_load
    - len_maskload: currently just len_maskload
 
    - mask_store: currently just maskstore
    - store_lanes: currently just vec_store_lanes
    - mask_store_lanes: currently just vec_mask_store_lanes
-   - scatter_store: used for {mask_,}scatter_store
+   - scatter_store: used for {mask_,len_mask_,}scatter_store
    - len_store: currently just len_store
    - len_maskstore: currently just len_maskstore
 
@@ -157,6 +157,8 @@  DEF_INTERNAL_OPTAB_FN (MASK_LOAD_LANES, ECF_PURE,
 DEF_INTERNAL_OPTAB_FN (GATHER_LOAD, ECF_PURE, gather_load, gather_load)
 DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
 		       mask_gather_load, gather_load)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_GATHER_LOAD, ECF_PURE,
+		       len_mask_gather_load, gather_load)
 
 DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
 DEF_INTERNAL_OPTAB_FN (LEN_MASK_LOAD, ECF_PURE, len_maskload, len_maskload)
@@ -164,6 +166,8 @@  DEF_INTERNAL_OPTAB_FN (LEN_MASK_LOAD, ECF_PURE, len_maskload, len_maskload)
 DEF_INTERNAL_OPTAB_FN (SCATTER_STORE, 0, scatter_store, scatter_store)
 DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
 		       mask_scatter_store, scatter_store)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_SCATTER_STORE, 0,
+		       len_mask_scatter_store, scatter_store)
 
 DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
 DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 8f21068e300..396ad2a48bc 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -234,6 +234,8 @@  extern bool internal_load_fn_p (internal_fn);
 extern bool internal_store_fn_p (internal_fn);
 extern bool internal_gather_scatter_fn_p (internal_fn);
 extern int internal_fn_mask_index (internal_fn);
+extern int internal_fn_len_index (internal_fn);
+extern int internal_fn_bias_index (internal_fn);
 extern int internal_fn_stored_value_index (internal_fn);
 extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
 						    tree, tree, int);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a901b68c538..73c9a0c760f 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -95,8 +95,10 @@  OPTAB_CD(len_maskload_optab, "len_maskload$a$b")
 OPTAB_CD(len_maskstore_optab, "len_maskstore$a$b")
 OPTAB_CD(gather_load_optab, "gather_load$a$b")
 OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b")
+OPTAB_CD(len_mask_gather_load_optab, "len_mask_gather_load$a$b")
 OPTAB_CD(scatter_store_optab, "scatter_store$a$b")
 OPTAB_CD(mask_scatter_store_optab, "mask_scatter_store$a$b")
+OPTAB_CD(len_mask_scatter_store_optab, "len_mask_scatter_store$a$b")
 OPTAB_CD(vec_extract_optab, "vec_extract$a$b")
 OPTAB_CD(vec_init_optab, "vec_init$a$b")