[AArch64] Add BIC-imm and ORR-imm SIMD pattern

Submitted by Sudi Das on April 18, 2017, 4:39 p.m.

Details

Message ID VI1PR08MB2688AF6541412B89D11F691C98190@VI1PR08MB2688.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Sudi Das April 18, 2017, 4:39 p.m.
Hello all

This patch adds the support for BIC (vector, immediate) and ORR (vector, immediate) SIMD patterns to the AArch64 backend.
One of the examples of this is : (with -O2 -ftree-vectorize)

void
bic_s (short *a)
{
  for (int i = 0; i < 1024; i++)
    a[i] &= ~(0xff);
}

which now produces :
bic_s:
	add	x1, x0, 2048
	.p2align 2
.L2:
	ldr	q0, [x0]
	bic	v0.8h, #255
	str	q0, [x0], 16
	cmp	x1, x0
	bne	.L2
	ret

instead of
bic_s:
	movi	v1.8h, 0xff, lsl 8
	add	x1, x0, 2048
	.p2align 2
.L2:
	ldr	q0, [x0]
	and	v0.16b, v0.16b, v1.16b
	str	q0, [x0], 16
	cmp	x1, x0
	bne	.L2
	ret

Added new tests and checked for regressions on bootstrapped aarch64-none-linux-gnu
Ok for stage 1?

Thanks 
Sudi

2017-04-04 Sudakshina Das  <sudi.das@arm.com>

	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
	for aarch64_simd_valid_immediate.
	(aarch64_output_simd_general_immediate): New declaration.
	(aarch64_simd_valid_immediate): Update prototype.

	* config/aarch64/aarch64-simd.md (*bic_imm_<mode>3): New pattern.
	(*ior_imm_<mode>3): Likewise.

	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
	for valid immediate for BIC and ORR based on new enum argument.
	(aarch64_output_simd_general_immediate): New function to output new BIC/ORR.
 
	* config/aarch64/predicates.md (aarch64_simd_valid_bic_imm_p) : New.
	(aarch64_simd_valid_orr_imm_p) : Likewise.

2017-04-04 Sudakshina Das  <sudi.das@arm.com>

	* gcc.target/aarch64/bic_imm_1.c: New test.
	* gcc.target/aarch64/orr_imm_1.c: Likewise.

Comments

Richard Earnshaw (lists) May 5, 2017, 1:30 p.m.
On 18/04/17 17:39, Sudi Das wrote:
> 
> Hello all
> 
> This patch adds the support for BIC (vector, immediate) and ORR (vector, immediate) SIMD patterns to the AArch64 backend.
> One of the examples of this is : (with -O2 -ftree-vectorize)
> 
> void
> bic_s (short *a)
> {
>   for (int i = 0; i < 1024; i++)
>     a[i] &= ~(0xff);
> }
> 
> which now produces :
> bic_s:
> 	add	x1, x0, 2048
> 	.p2align 2
> .L2:
> 	ldr	q0, [x0]
> 	bic	v0.8h, #255
> 	str	q0, [x0], 16
> 	cmp	x1, x0
> 	bne	.L2
> 	ret
> 
> instead of
> bic_s:
> 	movi	v1.8h, 0xff, lsl 8
> 	add	x1, x0, 2048
> 	.p2align 2
> .L2:
> 	ldr	q0, [x0]
> 	and	v0.16b, v0.16b, v1.16b
> 	str	q0, [x0], 16
> 	cmp	x1, x0
> 	bne	.L2
> 	ret
> 
> Added new tests and checked for regressions on bootstrapped aarch64-none-linux-gnu
> Ok for stage 1?
> 
> Thanks 
> Sudi
> 
> 2017-04-04 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
> 	for aarch64_simd_valid_immediate.
> 	(aarch64_output_simd_general_immediate): New declaration.
> 	(aarch64_simd_valid_immediate): Update prototype.
> 
> 	* config/aarch64/aarch64-simd.md (*bic_imm_<mode>3): New pattern.
> 	(*ior_imm_<mode>3): Likewise.
> 
> 	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
> 	for valid immediate for BIC and ORR based on new enum argument.
> 	(aarch64_output_simd_general_immediate): New function to output new BIC/ORR.
>  
> 	* config/aarch64/predicates.md (aarch64_simd_valid_bic_imm_p) : New.
> 	(aarch64_simd_valid_orr_imm_p) : Likewise.
> 
> 2017-04-04 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* gcc.target/aarch64/bic_imm_1.c: New test.
> 	* gcc.target/aarch64/orr_imm_1.c: Likewise.
> 
> 
> patch-7260-2.diff
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 9543f8c..89cc455 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -297,6 +297,15 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
> +   has both bits set.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  CHECK_I   = 1,	/* Perform only non-inverted immediate checks (ORR).  */
> +  CHECK_NI  = 2,	/* Perform only inverted immediate checks (BIC).  */
> +  CHECK_ALL = 3		/* Perform all checks (MOVI/MNVI).  */
> +};
> +
>  extern struct tune_params aarch64_tune_params;
>  
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> @@ -334,6 +343,8 @@ rtx aarch64_reverse_mask (enum machine_mode);
>  bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
>  char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
>  char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
> +char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned,
> +					     const char*);
>  bool aarch64_pad_arg_upward (machine_mode, const_tree);
>  bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
>  bool aarch64_regno_ok_for_base_p (int, bool);
> @@ -345,7 +356,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
>  bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
>  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
>  bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
> -				   struct simd_immediate_info *);
> +				   struct simd_immediate_info *,
> +				   enum simd_immediate_check w = CHECK_ALL);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
>  bool aarch64_uimm12_shift (HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index c462164..92275dc 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -280,6 +280,26 @@
>    [(set_attr "type" "neon_logic<q>")]
>  )
>  
> +(define_insn "*bic_imm_<mode>3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +       (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> +		(match_operand:VDQ_I 2 "aarch64_simd_valid_bic_imm_p" "")))]
> + "TARGET_SIMD"
> + { return aarch64_output_simd_general_immediate (operands[2],
> +			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic"); }
> +  [(set_attr "type" "neon_logic<q>")]
> +)
> +
> +(define_insn "*ior_imm_<mode>3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +       (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> +		(match_operand:VDQ_I 2 "aarch64_simd_valid_orr_imm_p" "")))]
> + "TARGET_SIMD"
> + { return aarch64_output_simd_general_immediate (operands[2],
> +			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr"); }
> +  [(set_attr "type" "neon_logic<q>")]
> +)

Both of these generate the same RTL constructs as the simd ior<mode>3
and and<mode>3 patterns, so should be merged as subcases of those
patterns (and handled with suitable constraint alternatives).

R.

> +
>  (define_insn "add<mode>3"
>    [(set (match_operand:VDQ_I 0 "register_operand" "=w")
>          (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 4f769a4..450c42d 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11066,7 +11066,8 @@ aarch64_vect_float_const_representable_p (rtx x)
>  /* Return true for valid and false for invalid.  */
>  bool
>  aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
> -			      struct simd_immediate_info *info)
> +			      struct simd_immediate_info *info,
> +			      enum simd_immediate_check which)
>  {
>  #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
>    matches = 1;						\
> @@ -11130,54 +11131,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
>  
>    do
>      {
> -      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
> +      if (which & CHECK_I)
> +	{
> +	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
>  
> -      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
> +	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
>  
> -      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
> +	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
>  
> -      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +	}
>  
> -      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
> +      if (which & CHECK_NI)
> +	{
> +	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
>  
> -      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
> +	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
>  
> -      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
> +	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
>  
> -      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +	}
>  
> -      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +      /* Shifting ones / 8-bit / 64-bit variants only checked
> +	 for 'ALL' (MOVI/MVNI).  */
> +      if (which == CHECK_ALL)
> +	{
> +	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
> +	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
>  
> -      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> -	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> +		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +	}
>      }
>    while (0);
>  
> @@ -12598,6 +12610,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
>    return templ;
>  }
>  
> +/* This function is similar to aarch64_output_simd_mov_immediate, used for
> +   immediate versions of 'bic' or 'orr'.  */
> +char*
> +aarch64_output_simd_general_immediate (rtx const_vector,
> +				       machine_mode mode,
> +				       unsigned width,
> +				       const char *mnemonic)
> +{
> +  bool is_valid;
> +  static char templ[40];
> +  unsigned int lane_count = 0;
> +  char element_char;
> +
> +  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
> +
> +  if (strcmp (mnemonic, "orr") == 0)
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +					     &info, CHECK_I);
> +  else
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +					     &info, CHECK_NI);
> +
> +  gcc_assert (is_valid);
> +  gcc_assert (CONST_INT_P (info.value));
> +
> +  element_char = sizetochar (info.element_width);
> +  lane_count = width / info.element_width;
> +
> +  if (lane_count == 1)
> +    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
> +	      mnemonic, UINTVAL (info.value));
> +  else if (info.shift)
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
> +	      ", %s #%d", mnemonic, lane_count, element_char,
> +	      UINTVAL (info.value), "lsl", info.shift);
> +  else
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
> +	      mnemonic, lane_count, element_char, UINTVAL (info.value));
> +  return templ;
> +}
> +
>  char*
>  aarch64_output_scalar_simd_mov_immediate (rtx immediate,
>  					  machine_mode mode)
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index e83d45b..fe65f2b 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -307,6 +307,18 @@
>    return aarch64_simd_shift_imm_p (op, mode, false);
>  })
>  
> +(define_special_predicate "aarch64_simd_valid_bic_imm_p"
> +  (match_code "const_vector")
> +{
> +  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_NI);
> +})
> +
> +(define_special_predicate "aarch64_simd_valid_orr_imm_p"
> +  (match_code "const_vector")
> +{
> +  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_I);
> +})
> +
>  (define_predicate "aarch64_simd_reg_or_zero"
>    (and (match_code "reg,subreg,const_int,const_double,const_vector")
>         (ior (match_operand 0 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> new file mode 100644
> index 0000000..d94dd90
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> @@ -0,0 +1,26 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +bic_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +void
> +bic_ss (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff00);
> +}
> +
> +void
> +bic_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> new file mode 100644
> index 0000000..919a6ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> @@ -0,0 +1,18 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +orr_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0xab;
> +}
> +
> +void
> +orr_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0xab;
> +}
> +
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */
>

Patch hide | download patch | download mbox

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 9543f8c..89cc455 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -297,6 +297,15 @@  enum aarch64_parse_opt_result
   AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
 };
 
+/* Enum to distinguish which type of check is to be done in
+   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
+   has both bits set.  Adding new types would require changes accordingly.  */
+enum simd_immediate_check {
+  CHECK_I   = 1,	/* Perform only non-inverted immediate checks (ORR).  */
+  CHECK_NI  = 2,	/* Perform only inverted immediate checks (BIC).  */
+  CHECK_ALL = 3		/* Perform all checks (MOVI/MNVI).  */
+};
+
 extern struct tune_params aarch64_tune_params;
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
@@ -334,6 +343,8 @@  rtx aarch64_reverse_mask (enum machine_mode);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
 char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
+char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned,
+					     const char*);
 bool aarch64_pad_arg_upward (machine_mode, const_tree);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
@@ -345,7 +356,8 @@  bool aarch64_simd_imm_zero_p (rtx, machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
-				   struct simd_immediate_info *);
+				   struct simd_immediate_info *,
+				   enum simd_immediate_check w = CHECK_ALL);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c462164..92275dc 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -280,6 +280,26 @@ 
   [(set_attr "type" "neon_logic<q>")]
 )
 
+(define_insn "*bic_imm_<mode>3"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
+		(match_operand:VDQ_I 2 "aarch64_simd_valid_bic_imm_p" "")))]
+ "TARGET_SIMD"
+ { return aarch64_output_simd_general_immediate (operands[2],
+			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic"); }
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "*ior_imm_<mode>3"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
+		(match_operand:VDQ_I 2 "aarch64_simd_valid_orr_imm_p" "")))]
+ "TARGET_SIMD"
+ { return aarch64_output_simd_general_immediate (operands[2],
+			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr"); }
+  [(set_attr "type" "neon_logic<q>")]
+)
+
 (define_insn "add<mode>3"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4f769a4..450c42d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11066,7 +11066,8 @@  aarch64_vect_float_const_representable_p (rtx x)
 /* Return true for valid and false for invalid.  */
 bool
 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
-			      struct simd_immediate_info *info)
+			      struct simd_immediate_info *info,
+			      enum simd_immediate_check which)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -11130,54 +11131,65 @@  aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
 
   do
     {
-      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
+      if (which & CHECK_I)
+	{
+	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
 
-      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
+	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
 
-      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
+	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
 
-      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	}
 
-      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
+      if (which & CHECK_NI)
+	{
+	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
 
-      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
+	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
 
-      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
+	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
 
-      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	}
 
-      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+      /* Shifting ones / 8-bit / 64-bit variants only checked
+	 for 'ALL' (MOVI/MVNI).  */
+      if (which == CHECK_ALL)
+	{
+	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
+	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
 
-      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
-	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	}
     }
   while (0);
 
@@ -12598,6 +12610,47 @@  aarch64_output_simd_mov_immediate (rtx const_vector,
   return templ;
 }
 
+/* This function is similar to aarch64_output_simd_mov_immediate, used for
+   immediate versions of 'bic' or 'orr'.  */
+char*
+aarch64_output_simd_general_immediate (rtx const_vector,
+				       machine_mode mode,
+				       unsigned width,
+				       const char *mnemonic)
+{
+  bool is_valid;
+  static char templ[40];
+  unsigned int lane_count = 0;
+  char element_char;
+
+  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
+
+  if (strcmp (mnemonic, "orr") == 0)
+    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					     &info, CHECK_I);
+  else
+    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					     &info, CHECK_NI);
+
+  gcc_assert (is_valid);
+  gcc_assert (CONST_INT_P (info.value));
+
+  element_char = sizetochar (info.element_width);
+  lane_count = width / info.element_width;
+
+  if (lane_count == 1)
+    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
+	      mnemonic, UINTVAL (info.value));
+  else if (info.shift)
+    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
+	      ", %s #%d", mnemonic, lane_count, element_char,
+	      UINTVAL (info.value), "lsl", info.shift);
+  else
+    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
+	      mnemonic, lane_count, element_char, UINTVAL (info.value));
+  return templ;
+}
+
 char*
 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
 					  machine_mode mode)
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e83d45b..fe65f2b 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -307,6 +307,18 @@ 
   return aarch64_simd_shift_imm_p (op, mode, false);
 })
 
+(define_special_predicate "aarch64_simd_valid_bic_imm_p"
+  (match_code "const_vector")
+{
+  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_NI);
+})
+
+(define_special_predicate "aarch64_simd_valid_orr_imm_p"
+  (match_code "const_vector")
+{
+  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_I);
+})
+
 (define_predicate "aarch64_simd_reg_or_zero"
   (and (match_code "reg,subreg,const_int,const_double,const_vector")
        (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
new file mode 100644
index 0000000..d94dd90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
@@ -0,0 +1,26 @@ 
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void
+bic_s (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff);
+}
+
+void
+bic_ss (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff00);
+}
+
+void
+bic_int (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff);
+}
+
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
new file mode 100644
index 0000000..919a6ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
@@ -0,0 +1,18 @@ 
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void
+orr_s (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+void
+orr_int (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */