diff mbox series

[avr,applied] Improve output of insn "*insv.any_shift.<mode>".

Message ID e0d50cbb-38d3-4740-a236-d0d4f77b1f4c@gjlay.de
State New
Headers show
Series [avr,applied] Improve output of insn "*insv.any_shift.<mode>". | expand

Commit Message

Georg-Johann Lay March 5, 2024, 11:15 a.m. UTC
Applied Roger's proposed improvements with some changes:

Lengthy code is more convenient in avr.cc than in an insn
output function, and it makes it easy to work out the exact
instruction length.  Moreover, the code can handle shifts
with offset zero (cases of *and<mode>3 insns).

Passed with no new regressions on ATmega128.

Applied as https://gcc.gnu.org/r14-9317

Johann

--

AVR: Improve output of insn "*insv.any_shift.<mode>_split".

The instructions printed by insn "*insv.any_shift.<mode>_split" were
sub-optimal.  The code to print the improved output is lengthy and
performed by new function avr_out_insv.  As it turns out, the function
can also handle shift-offsets of zero, which is "*andhi3", "*andpsi3"
and "*andsi3".  Thus, these tree insns get a new 3-operand alternative
where the 3rd operand is an exact power of 2.

gcc/
         * config/avr/avr-protos.h (avr_out_insv): New proto.
         * config/avr/avr.cc (avr_out_insv): New function.
         (avr_adjust_insn_length) [ADJUST_LEN_INSV]: Handle case.
         (avr_cbranch_cost) [ZERO_EXTRACT]: Adjust rtx costs.
         * config/avr/avr.md (define_attr "adjust_len") Add insv.
         (andhi3, *andhi3, andpsi3, *andpsi3, andsi3, *andsi3):
         Add constraint alternative where the 3rd operand is a power
         of 2, and the source register may differ from the destination.
         (*insv.any_shift.<mode>_split): Call avr_out_insv to output
         instructions.  Set attr "length" to "insv".
         * config/avr/constraints.md (Cb2, Cb3, Cb4): New constraints.

gcc/testsuite/
         * gcc.target/avr/torture/insv-anyshift-hi.c: New test.
         * gcc.target/avr/torture/insv-anyshift-si.c: New test.
diff mbox series

Patch

commit 49a1a340ea0eef681f23b6861f3cdb6840aadd99
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Tue Mar 5 11:06:17 2024 +0100

    AVR: Improve output of insn "*insv.any_shift.<mode>_split".
    
    The instructions printed by insn "*insv.any_shift.<mode>_split" were
    sub-optimal.  The code to print the improved output is lengthy and
    performed by new function avr_out_insv.  As it turns out, the function
    can also handle shift-offsets of zero, which is "*andhi3", "*andpsi3"
    and "*andsi3".  Thus, these tree insns get a new 3-operand alternative
    where the 3rd operand is an exact power of 2.
    
    gcc/
            * config/avr/avr-protos.h (avr_out_insv): New proto.
            * config/avr/avr.cc (avr_out_insv): New function.
            (avr_adjust_insn_length) [ADJUST_LEN_INSV]: Handle case.
            (avr_cbranch_cost) [ZERO_EXTRACT]: Adjust rtx costs.
            * config/avr/avr.md (define_attr "adjust_len") Add insv.
            (andhi3, *andhi3, andpsi3, *andpsi3, andsi3, *andsi3):
            Add constraint alternative where the 3rd operand is a power
            of 2, and the source register may differ from the destination.
            (*insv.any_shift.<mode>_split): Call avr_out_insv to output
            instructions.  Set attr "length" to "insv".
            * config/avr/constraints.md (Cb2, Cb3, Cb4): New constraints.
    
    gcc/testsuite/
            * gcc.target/avr/torture/insv-anyshift-hi.c: New test.
            * gcc.target/avr/torture/insv-anyshift-si.c: New test.

diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 3e19409d636..bb680312117 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -58,6 +58,7 @@  extern const char *ret_cond_branch (rtx x, int len, int reverse);
 extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*);
 extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*);
+extern const char *avr_out_insv (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*);
 extern const char *avr_out_plus_set_ZN (rtx*, int*);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index c8b2b504e3f..36995e05cbe 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -9795,6 +9795,178 @@  avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen)
 }
 
 
+/* Output instructions for  XOP[0] = (XOP[1] <Shift> XOP[2]) & XOP[3]  where
+   -  XOP[0] and XOP[1] have the same mode which is one of: QI, HI, PSI, SI.
+   -  XOP[3] is an exact const_int power of 2.
+   -  XOP[2] and XOP[3] are const_int.
+   -  <Shift> is any of: ASHIFT, LSHIFTRT, ASHIFTRT.
+   -  The result depends on XOP[1].
+   or  XOP[0] = XOP[1] & XOP[2]  where
+   -  XOP[0] and XOP[1] have the same mode which is one of: HI, PSI, SI.
+   -  XOP[2] is an exact const_int power of 2.
+   Returns "".
+   PLEN != 0: Set *PLEN to the code length in words.  Don't output anything.
+   PLEN == 0: Output instructions.  */
+
+const char*
+avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
+{
+  machine_mode mode = GET_MODE (xop[0]);
+  int n_bytes = GET_MODE_SIZE (mode);
+  rtx xsrc = SET_SRC (single_set (insn));
+
+  gcc_assert (AND == GET_CODE (xsrc));
+
+  rtx xop2 = xop[2];
+  rtx xop3 = xop[3];
+
+  if (REG_P (XEXP (xsrc, 0)))
+    {
+      // This function can also handle AND with an exact power of 2,
+      // which can be regarded as a XOP[1] shift with offset 0.
+      rtx xshift = gen_rtx_ASHIFT (mode, xop[1], const0_rtx);
+      xsrc = gen_rtx_AND (mode, xshift, xop[2]);
+      xop3 = xop[2];
+      xop2 = const0_rtx;
+    }
+
+  // Any of ASHIFT, LSHIFTRT, ASHIFTRT.
+  enum rtx_code code = GET_CODE (XEXP (xsrc, 0));
+  int shift = code == ASHIFT ? INTVAL (xop2) : -INTVAL (xop2);
+
+  // Determines the position of the output bit.
+  unsigned mask = GET_MODE_MASK (mode) & INTVAL (xop3);
+
+  // Position of the output / input bit, respectively.
+  int obit = exact_log2 (mask);
+  int ibit = obit - shift;
+
+  gcc_assert (IN_RANGE (obit, 0, GET_MODE_BITSIZE (mode) - 1));
+  gcc_assert (IN_RANGE (ibit, 0, GET_MODE_BITSIZE (mode) - 1));
+
+  // In the remainder, use the sub-bytes that hold the bits.
+  rtx op[4] =
+    {
+      // Output
+      simplify_gen_subreg (QImode, xop[0], mode, obit / 8),
+      GEN_INT (obit & 7),
+      // Input
+      simplify_gen_subreg (QImode, xop[1], mode, ibit / 8),
+      GEN_INT (ibit & 7)
+    };
+  obit &= 7;
+  ibit &= 7;
+
+  // The length of the default sequence at the end of this function.
+  // We only emit anything other than the default when we find a sequence
+  // that is strictly shorter than the default sequence; which is:
+  // BST + <CLR-result-bytes> + BLD.
+  const int len0 = 2 + n_bytes - (n_bytes == 4 && AVR_HAVE_MOVW);
+
+  // Finding something shorter than the default sequence implies that there
+  // must be at most 2 instructions that deal with the bytes containing the
+  // relevant bits.  In addition, we need  N_BYTES - 1  instructions to clear
+  // the remaining result bytes.
+
+  const int n_clr = n_bytes - 1;
+  bool clr_p = false;
+  bool andi_p = false;
+
+  if (plen)
+    *plen = 0;
+
+  if (REGNO (op[0]) == REGNO (op[2])
+      // Output reg allows ANDI.
+      && test_hard_reg_class (LD_REGS, op[0]))
+    {
+      if (1 + n_clr < len0
+	  // Same byte and bit: A single ANDI will do.
+	  && obit == ibit)
+	{
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // |obit - ibit| = 4:  SWAP + ANDI will do.
+	       && (obit == ibit + 4 || obit == ibit - 4))
+	{
+	  avr_asm_len ("swap %0", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // LSL + ANDI will do.
+	       && obit == ibit + 1)
+	{
+	  avr_asm_len ("lsl %0", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // LSR + ANDI will do.
+	       && obit == ibit - 1)
+	{
+	  avr_asm_len ("lsr %0", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+    }
+
+  if (REGNO (op[0]) != REGNO (op[2])
+      && obit == ibit)
+    {
+      if (2 + n_clr < len0
+	  // Same bit but different byte: MOV + ANDI will do.
+	  && test_hard_reg_class (LD_REGS, op[0]))
+	{
+	  avr_asm_len ("mov %0,%2", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // Same bit but different byte:  We can use ANDI + MOV,
+	       // but only if the input byte is LD_REGS and unused after.
+	       && test_hard_reg_class (LD_REGS, op[2])
+	       && reg_unused_after (insn, op[2]))
+	{
+	  avr_asm_len ("andi %2,1<<%3"  CR_TAB
+		       "mov %0,%2", op, plen, 2);
+	  clr_p = true;
+	}
+    }
+
+  // Output remaining instructions of the shorter sequence.
+
+  if (andi_p)
+    avr_asm_len ("andi %0,1<<%1", op, plen, 1);
+
+  if (clr_p)
+    {
+      for (int b = 0; b < n_bytes; ++b)
+	{
+	  rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+	  if (REGNO (byte) != REGNO (op[0]))
+	    avr_asm_len ("clr %0", &byte, plen, 1);
+	}
+
+      // CLR_P means we found a shorter sequence, so we are done now.
+      return "";
+    }
+
+  // No shorter sequence found, just emit  BST, CLR*, BLD  sequence.
+
+  avr_asm_len ("bst %2,%3", op, plen, -1);
+
+  if (n_bytes == 4 && AVR_HAVE_MOVW)
+    avr_asm_len ("clr %A0"   CR_TAB
+		 "clr %B0"   CR_TAB
+		 "movw %C0,%A0", xop, plen, 3);
+  else
+    for (int b = 0; b < n_bytes; ++b)
+      {
+	rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+	avr_asm_len ("clr %0", &byte, plen, 1);
+      }
+
+  return avr_asm_len ("bld %0,%1", op, plen, 1);
+}
+
+
 /* Output instructions to extract a bit to 8-bit register XOP[0].
    The input XOP[1] is a register or an 8-bit MEM in the lower I/O range.
    XOP[2] is the const_int bit position.  Return "".
@@ -10721,6 +10893,7 @@  avr_adjust_insn_length (rtx_insn *insn, int len)
     case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break;
     case ADJUST_LEN_EXTR_NOT: avr_out_extr_not (insn, op, &len); break;
     case ADJUST_LEN_EXTR: avr_out_extr (insn, op, &len); break;
+    case ADJUST_LEN_INSV: avr_out_insv (insn, op, &len); break;
 
     case ADJUST_LEN_PLUS: avr_out_plus (insn, op, &len); break;
     case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break;
@@ -12206,6 +12379,14 @@  avr_cbranch_cost (rtx x)
       return COSTS_N_INSNS (size + 1 + 1);
     }
 
+  if (GET_CODE (xreg) == ZERO_EXTRACT
+      && XEXP (xreg, 1) == const1_rtx)
+    {
+      // Branch on a single bit, with an additional edge due to less
+      // register pressure.
+      return (int) COSTS_N_INSNS (1.5);
+    }
+
   bool reg_p = register_operand (xreg, mode);
   bool reg_or_0_p = reg_or_0_operand (xval, mode);
 
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 6606837b5f7..6bdf4682fab 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -170,7 +170,7 @@  (define_attr "adjust_len"
    ashlhi, ashrhi, lshrhi,
    ashlsi, ashrsi, lshrsi,
    ashlpsi, ashrpsi, lshrpsi,
-   insert_bits, insv_notbit,
+   insert_bits, insv_notbit, insv,
    add_set_ZN, cmp_uext, cmp_sext,
    no"
   (const_string "no"))
@@ -4380,10 +4380,10 @@  (define_insn "*andqi3"
   [(set_attr "length" "1,1,2")])
 
 (define_insn_and_split "andhi3"
-  [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r")
-        (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0  ,0")
-                (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,n")))
-   (clobber (match_scratch:QI 3                        "=X,X,X,X  ,&d"))]
+  [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r  ,r")
+        (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0  ,r  ,0")
+                (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,Cb2,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X,X  ,X  ,&d"))]
   ""
   "#"
   "&& reload_completed"
@@ -4394,10 +4394,10 @@  (define_insn_and_split "andhi3"
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*andhi3"
-  [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r")
-        (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0  ,0")
-                (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,n")))
-   (clobber (match_scratch:QI 3                        "=X,X,X,X  ,&d"))
+  [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r  ,r")
+        (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0  ,r  ,0")
+                (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,Cb2,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X,X  ,X  ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -4405,17 +4405,19 @@  (define_insn "*andhi3"
       return "and %A0,%A2\;and %B0,%B2";
     else if (which_alternative == 1)
       return "andi %A0,lo8(%2)\;andi %B0,hi8(%2)";
+    else if (which_alternative == 4)
+      return avr_out_insv (insn, operands, NULL);
 
     return avr_out_bitop (insn, operands, NULL);
   }
-  [(set_attr "length" "2,2,2,4,4")
-   (set_attr "adjust_len" "*,*,out_bitop,out_bitop,out_bitop")])
+  [(set_attr "length" "2,2,2,4,4,4")
+   (set_attr "adjust_len" "*,*,out_bitop,out_bitop,insv,out_bitop")])
 
 (define_insn_and_split "andpsi3"
-  [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r")
-        (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0  ,0")
-                 (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,n")))
-   (clobber (match_scratch:QI 3                          "=X,X,X  ,&d"))]
+  [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r  ,r")
+        (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0  ,r  ,0")
+                 (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,Cb3,n")))
+   (clobber (match_scratch:QI 3                          "=X,X,X  ,X  ,&d"))]
   ""
   "#"
   "&& reload_completed"
@@ -4426,10 +4428,10 @@  (define_insn_and_split "andpsi3"
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*andpsi3"
-  [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r")
-        (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0  ,0")
-                 (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,n")))
-   (clobber (match_scratch:QI 3                          "=X,X,X  ,&d"))
+  [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r  ,r")
+        (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0  ,r  ,0")
+                 (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,Cb3,n")))
+   (clobber (match_scratch:QI 3                          "=X,X,X  ,X  ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -4438,16 +4440,19 @@  (define_insn "*andpsi3"
              "and %B0,%B2" CR_TAB
              "and %C0,%C2";
 
+    if (which_alternative == 3)
+      return avr_out_insv (insn, operands, NULL);
+
     return avr_out_bitop (insn, operands, NULL);
   }
-  [(set_attr "length" "3,3,6,6")
-   (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")])
+  [(set_attr "length" "3,3,6,5,6")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop,insv,out_bitop")])
 
 (define_insn_and_split "andsi3"
-  [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r")
-        (and:SI (match_operand:SI 1 "register_operand" "%0,0,0  ,0")
-                (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,n")))
-   (clobber (match_scratch:QI 3                        "=X,X,X  ,&d"))]
+  [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r  ,r")
+        (and:SI (match_operand:SI 1 "register_operand" "%0,0,0  ,r  ,0")
+                (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,Cb4,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X  ,X  ,&d"))]
   ""
   "#"
   "&& reload_completed"
@@ -4458,10 +4463,10 @@  (define_insn_and_split "andsi3"
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*andsi3"
-  [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r")
-        (and:SI (match_operand:SI 1 "register_operand" "%0,0,0  ,0")
-                (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,n")))
-   (clobber (match_scratch:QI 3                        "=X,X,X  ,&d"))
+  [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r  ,r")
+        (and:SI (match_operand:SI 1 "register_operand" "%0,0,0  ,r  ,0")
+                (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,Cb4,n")))
+   (clobber (match_scratch:QI 3                        "=X,X,X  ,X  ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -4471,10 +4476,13 @@  (define_insn "*andsi3"
              "and %C0,%C2" CR_TAB
              "and %D0,%D2";
 
+    if (which_alternative == 3)
+      return avr_out_insv (insn, operands, NULL);
+
     return avr_out_bitop (insn, operands, NULL);
   }
-  [(set_attr "length" "4,4,8,8")
-   (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")])
+  [(set_attr "length" "4,4,8,6,8")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop,insv,out_bitop")])
 
 (define_peephole2 ; andi
   [(parallel [(set (match_operand:QI 0 "d_register_operand" "")
@@ -9852,6 +9860,12 @@  (define_insn_and_split "*extzv.io.lsr7"
                          (const_int 1)
                          (const_int 7)))])
 
+;; This insn serves as a combine bridge because insn combine will only
+;; combine so much (3) insns at most.  It's not actually an open coded
+;; bit-insertion but just a part of it.  It may occur in other contexts
+;; than INSV though, and in such a case the code may be worse than without
+;; this pattern.  We still have to emit code for it in that case because
+;; we cannot roll back.
 (define_insn_and_split "*insv.any_shift.<mode>_split"
   [(set (match_operand:QISI 0 "register_operand" "=r")
         (and:QISI (any_shift:QISI (match_operand:QISI 1 "register_operand" "r")
@@ -9874,27 +9888,9 @@  (define_insn "*insv.any_shift.<mode>"
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
-    int shift = <CODE> == ASHIFT ? INTVAL (operands[2]) : -INTVAL (operands[2]);
-    int mask = GET_MODE_MASK (<MODE>mode) & INTVAL (operands[3]);
-    // Position of the output / input bit, respectively.
-    int obit = exact_log2 (mask);
-    int ibit = obit - shift;
-    gcc_assert (IN_RANGE (obit, 0, <MSB>));
-    gcc_assert (IN_RANGE (ibit, 0, <MSB>));
-    operands[3] = GEN_INT (obit);
-    operands[2] = GEN_INT (ibit);
-
-    if (<SIZE> == 1) return "bst %T1%T2\;clr %0\;"                 "bld %T0%T3";
-    if (<SIZE> == 2) return "bst %T1%T2\;clr %A0\;clr %B0\;"       "bld %T0%T3";
-    if (<SIZE> == 3) return "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;bld %T0%T3";
-    return AVR_HAVE_MOVW
-      ? "bst %T1%T2\;clr %A0\;clr %B0\;movw %C0,%A0\;"  "bld %T0%T3"
-      : "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;clr %D0\;bld %T0%T3";
+    return avr_out_insv (insn, operands, nullptr);
   }
-  [(set (attr "length")
-        (minus (symbol_ref "2 + <SIZE>")
-               ; One less if we can use a MOVW to clear.
-               (symbol_ref "<SIZE> == 4 && AVR_HAVE_MOVW")))])
+  [(set_attr "adjust_len" "insv")])
 
 
 (define_insn_and_split "*extzv.<mode>hi2"
diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md
index 81ed63db2cc..fac54da17db 100644
--- a/gcc/config/avr/constraints.md
+++ b/gcc/config/avr/constraints.md
@@ -188,6 +188,21 @@  (define_constraint "Co4"
   (and (match_code "const_int")
        (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<1) | (1<<8))")))
 
+(define_constraint "Cb2"
+  "Constant 2-byte integer that has exactly 1 bit set."
+  (and (match_code "const_int")
+       (match_test "single_one_operand (op, HImode)")))
+
+(define_constraint "Cb3"
+  "Constant 3-byte integer that has exactly 1 bit set."
+  (and (match_code "const_int")
+       (match_test "single_one_operand (op, PSImode)")))
+
+(define_constraint "Cb4"
+  "Constant 4-byte integer that has exactly 1 bit set."
+  (and (match_code "const_int")
+       (match_test "single_one_operand (op, SImode)")))
+
 (define_constraint "Cx2"
   "Constant 2-byte integer that allows XOR without clobber register."
   (and (match_code "const_int")
diff --git a/gcc/testsuite/gcc.target/avr/torture/insv-anyshift-hi.c b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift-hi.c
new file mode 100644
index 00000000000..7ee5c04813a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift-hi.c
@@ -0,0 +1,141 @@ 
+/* { dg-do run } */
+/* { dg-additional-options { -fno-split-wide-types } } */
+
+typedef __UINT16_TYPE__ uint16_t;
+
+/* Testing inlined and completely folded versions of functions
+   against their non-inlined, non-folded counnterparts.  */
+
+#define MK_FUN1(OBIT, LSR)						\
+  static __inline__ __attribute__((__always_inline__))			\
+  uint16_t fun1_lsr_##OBIT##_##LSR##_ai (int x, uint16_t a)		\
+  {									\
+    (void) x;								\
+    return (a >> LSR) & (1u << OBIT);					\
+  }									\
+									\
+  __attribute__((__noinline__,__noclone__))				\
+  uint16_t fun1_lsr_##OBIT##_##LSR##_ni (int x, uint16_t a)		\
+  {									\
+    return fun1_lsr_##OBIT##_##LSR##_ai (x, a);				\
+  }									\
+									\
+  void test_fun1_lsr_##OBIT##_##LSR (void)				\
+  {									\
+    if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1u << (OBIT + LSR))		\
+	!= fun1_lsr_##OBIT##_##LSR##_ai (0, 1u << (OBIT + LSR)))	\
+      __builtin_abort();						\
+									\
+    if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1u << (OBIT + LSR))		\
+	!= fun1_lsr_##OBIT##_##LSR##_ai (0, -1u))			\
+      __builtin_abort();						\
+  }
+
+#define MK_FUN3(OBIT, LSR)						\
+  static __inline__ __attribute__((__always_inline__))			\
+  uint16_t fun3_lsr_##OBIT##_##LSR##_ai (uint16_t a)			\
+  {									\
+    return (a >> LSR) & (1u << OBIT);					\
+  }									\
+									\
+  __attribute__((__noinline__,__noclone__))				\
+  uint16_t fun3_lsr_##OBIT##_##LSR##_ni (uint16_t a)			\
+  {									\
+    return fun3_lsr_##OBIT##_##LSR##_ai (a);				\
+  }									\
+									\
+  void test_fun3_lsr_##OBIT##_##LSR (void)				\
+  {									\
+    if (fun3_lsr_##OBIT##_##LSR##_ni (1u << (OBIT + LSR))		\
+	!= fun3_lsr_##OBIT##_##LSR##_ai (1u << (OBIT + LSR)))		\
+      __builtin_abort();						\
+									\
+    if (fun3_lsr_##OBIT##_##LSR##_ni (1u << (OBIT + LSR))		\
+	!= fun3_lsr_##OBIT##_##LSR##_ai (-1u))				\
+      __builtin_abort();						\
+  }
+
+
+#define MK_FUN2(OBIT, LSL)						\
+  static __inline__ __attribute__((__always_inline__))			\
+  uint16_t fun2_lsl_##OBIT##_##LSL##_ai (uint16_t a)			\
+  {									\
+    return (a << LSL) & (1u << OBIT);					\
+  }									\
+									\
+  __attribute__((__noinline__,__noclone__))				\
+  uint16_t fun2_lsl_##OBIT##_##LSL##_ni (uint16_t a)			\
+  {									\
+    return fun2_lsl_##OBIT##_##LSL##_ai (a);				\
+  }									\
+									\
+  void test_fun2_lsl_##OBIT##_##LSL (void)				\
+  {									\
+    if (fun2_lsl_##OBIT##_##LSL##_ni (1u << (OBIT - LSL))		\
+	!= fun2_lsl_##OBIT##_##LSL##_ai (1u << (OBIT - LSL)))		\
+      __builtin_abort();						\
+									\
+    if (fun2_lsl_##OBIT##_##LSL##_ni (1u << (OBIT - LSL))		\
+	!= fun2_lsl_##OBIT##_##LSL##_ai (-1u))				\
+      __builtin_abort();						\
+  }
+
+
+MK_FUN1 (10, 4)
+MK_FUN1 (6, 1)
+MK_FUN1 (1, 5)
+MK_FUN1 (0, 8)
+MK_FUN1 (0, 4)
+MK_FUN1 (0, 1)
+MK_FUN1 (0, 0)
+
+MK_FUN3 (10, 4)
+MK_FUN3 (6, 1)
+MK_FUN3 (1, 5)
+MK_FUN3 (0, 8)
+MK_FUN3 (0, 4)
+MK_FUN3 (0, 1)
+MK_FUN3 (0, 0)
+
+MK_FUN2 (12, 8)
+MK_FUN2 (15, 15)
+MK_FUN2 (14, 12)
+MK_FUN2 (8, 8)
+MK_FUN2 (7, 4)
+MK_FUN2 (5, 4)
+MK_FUN2 (5, 1)
+MK_FUN2 (4, 0)
+MK_FUN2 (1, 0)
+MK_FUN2 (0, 0)
+
+int main (void)
+{
+  test_fun1_lsr_10_4 ();
+  test_fun1_lsr_6_1 ();
+  test_fun1_lsr_1_5 ();
+  test_fun1_lsr_0_8 ();
+  test_fun1_lsr_0_4 ();
+  test_fun1_lsr_0_1 ();
+  test_fun1_lsr_0_0 ();
+
+  test_fun3_lsr_10_4 ();
+  test_fun3_lsr_6_1 ();
+  test_fun3_lsr_1_5 ();
+  test_fun3_lsr_0_8 ();
+  test_fun3_lsr_0_4 ();
+  test_fun3_lsr_0_1 ();
+  test_fun3_lsr_0_0 ();
+
+  test_fun2_lsl_12_8 ();
+  test_fun2_lsl_15_15 ();
+  test_fun2_lsl_14_12 ();
+  test_fun2_lsl_8_8 ();
+  test_fun2_lsl_7_4 ();
+  test_fun2_lsl_5_4 ();
+  test_fun2_lsl_5_1 ();
+  test_fun2_lsl_4_0 ();
+  test_fun2_lsl_1_0 ();
+  test_fun2_lsl_0_0 ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/insv-anyshift-si.c b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift-si.c
new file mode 100644
index 00000000000..f52593cf0a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift-si.c
@@ -0,0 +1,89 @@ 
+/* { dg-do run } */
+
+typedef __UINT32_TYPE__ uint32_t;
+
+/* Testing inlined and completely folded versions of functions
+   against their non-inlined, non-folded counnterparts.  */
+
+#define MK_FUN1(OBIT, LSR)						\
+  static __inline__ __attribute__((__always_inline__))			\
+  uint32_t fun1_lsr_##OBIT##_##LSR##_ai (int x, uint32_t a)		\
+  {									\
+    (void) x;								\
+    return (a >> LSR) & (1ul << OBIT);					\
+  }									\
+									\
+  __attribute__((__noinline__,__noclone__))				\
+  uint32_t fun1_lsr_##OBIT##_##LSR##_ni (int x, uint32_t a)		\
+  {									\
+    return fun1_lsr_##OBIT##_##LSR##_ai (x, a);				\
+  }									\
+									\
+  void test_fun1_lsr_##OBIT##_##LSR (void)				\
+  {									\
+    if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR))		\
+	!= fun1_lsr_##OBIT##_##LSR##_ai (0, 1ul << (OBIT + LSR)))	\
+      __builtin_abort();						\
+									\
+    if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR))		\
+	!= fun1_lsr_##OBIT##_##LSR##_ai (0, -1ul))			\
+      __builtin_abort();						\
+  }
+  
+
+#define MK_FUN2(OBIT, LSL)						\
+  static __inline__ __attribute__((__always_inline__))			\
+  uint32_t fun2_lsl_##OBIT##_##LSL##_ai (int x, uint32_t a)		\
+  {									\
+    (void) x;								\
+    return (a << LSL) & (1ul << OBIT);					\
+  }									\
+									\
+  __attribute__((__noinline__,__noclone__))				\
+  uint32_t fun2_lsl_##OBIT##_##LSL##_ni (int x, uint32_t a)		\
+  {									\
+    return fun2_lsl_##OBIT##_##LSL##_ai (x, a);				\
+  }									\
+									\
+  void test_fun2_lsl_##OBIT##_##LSL (void)				\
+  {									\
+    if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL))		\
+	!= fun2_lsl_##OBIT##_##LSL##_ai (0, 1ul << (OBIT - LSL)))	\
+      __builtin_abort();						\
+									\
+    if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL))		\
+	!= fun2_lsl_##OBIT##_##LSL##_ai (0, -1ul))			\
+      __builtin_abort();						\
+  }
+
+
+MK_FUN1 (13, 15)
+MK_FUN1 (13, 16)
+MK_FUN1 (13, 17)
+MK_FUN1 (13, 12)
+MK_FUN1 (0, 31)
+MK_FUN1 (0, 8)
+MK_FUN1 (0, 0)
+
+MK_FUN2 (12, 8)
+MK_FUN2 (13, 8)
+MK_FUN2 (16, 8)
+MK_FUN2 (16, 0)
+
+int main (void)
+{
+  test_fun1_lsr_13_15 ();
+  test_fun1_lsr_13_16 ();
+  test_fun1_lsr_13_17 ();
+  test_fun1_lsr_13_12 ();
+  test_fun1_lsr_0_31 ();
+  test_fun1_lsr_0_8 ();
+  test_fun1_lsr_0_0 ();
+
+  test_fun2_lsl_12_8 ();
+  test_fun2_lsl_13_8 ();
+  test_fun2_lsl_16_8 ();
+  test_fun2_lsl_16_0 ();
+
+  return 0;
+}