diff mbox

[AVR] : PR49687 (better widening 32-bit mul)

Message ID 4E31796C.4090104@gjlay.de
State New
Headers show

Commit Message

Georg-Johann Lay July 28, 2011, 2:59 p.m. UTC
Richard Henderson wrote:
> On 07/27/2011 06:21 AM, Georg-Johann Lay wrote:
>> +(define_insn_and_split "*mulsi3"
>> +  [(set (match_operand:SI 0 "pseudo_register_operand"                      "=r")
>> +        (mult:SI (match_operand:SI 1 "pseudo_register_operand"              "r")
>> +                 (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
>> +   (clobber (reg:DI 18))]
>> +  "AVR_HAVE_MUL && !reload_completed"
>> +  { gcc_unreachable(); }
>> +  "&& 1"
>> +  [(set (reg:SI 18)
>> +        (match_dup 1))
> 
> That seems like it's guaranteed to force an unnecessary move.
> Have you tried defining special-purpose register classes to
> force reload to move the data into the right hard regs?
> 
> E.g.  "Y" prefix
>       "QHS" size
>       two digit starting register number, as needed.
> 
> You'll probably end up with quite a few register classes 
> out of this, but hopefully reload can do a better job than
> you can manually...
> 
> 
> r~

Waaaaaahh, I introduced register classes and constraints to tell register allocator
what's the intention if the insns, the ... parts just dealing with CONST_INTs
and not needed in the remainder:

(define_expand "mulsi3"
  [(parallel [(set (match_operand:SI 0 "register_operand" "")
                   (mult:SI (match_operand:SI 1 "register_operand" "")
                            (match_operand:SI 2 "nonmemory_operand" "")))
              (clobber (reg:HI 26))])]
  "AVR_HAVE_MUL"
  {
     ...
  })

(define_insn_and_split "*mulsi3"
  [(set (match_operand:SI 0 "register_operand"           "=RS22")
        (mult:SI (match_operand:SI 1 "register_operand"  "%RS22")
                 (match_operand:SI 2 "nonmemory_operand"  "RS18")))
   (clobber (reg:HI 26))]
  "AVR_HAVE_MUL"
  "%~call __mulsi3"
  "&& !reload_completed"
  [(clobber (const_int 0))]
  {
    ...
    FAIL;
  }
  [(set_attr "type" "xcall")
   (set_attr "cc" "clobber")])


Again, I used the simple test case from above:

long mul (long a, long b)
{
    return a*b;
}

long mul2 (long a, long b)
{
    return b*a;
}

Compiled with -Os -mmcu=atmega8 -fno-split-wide-types:

mul:
/* prologue: function */
	rcall __mulsi3	 ;  7	*mulsi3	[length = 1]
/* epilogue start */
	ret	 ;  21	return	[length = 1]

mul2:
	push r8	 ;  22	*pushqi/1	[length = 1]
	push r9	 ;  23	*pushqi/1	[length = 1]
	push r10	 ;  24	*pushqi/1	[length = 1]
	push r11	 ;  25	*pushqi/1	[length = 1]
	push r12	 ;  26	*pushqi/1	[length = 1]
	push r13	 ;  27	*pushqi/1	[length = 1]
	push r14	 ;  28	*pushqi/1	[length = 1]
	push r15	 ;  29	*pushqi/1	[length = 1]
	push r28	 ;  30	*pushqi/1	[length = 1]
	push r29	 ;  31	*pushqi/1	[length = 1]
	rcall .	 ;  35	*addhi3_sp_R_pc2	[length = 2]
	rcall .
	in r28,__SP_L__	 ;  36	*movhi_sp/2	[length = 2]
	in r29,__SP_H__
/* prologue: function */
/* frame size = 4 */
/* stack size = 14 */
.L__stack_usage = 14
	movw r12,r22	 ;  2	*movsi/1	[length = 2]
	movw r14,r24
	movw r24,r20	 ;  19	*movsi/1	[length = 2]
	movw r22,r18
	movw r20,r14	 ;  21	*movsi/1	[length = 2]
	movw r18,r12
	rcall __mulsi3	 ;  7	*mulsi3	[length = 1]
/* epilogue start */
	pop __tmp_reg__	 ;  41	*addhi3_sp_R_pc2	[length = 4]
	pop __tmp_reg__
	pop __tmp_reg__
	pop __tmp_reg__
	pop r29	 ;  42	popqi	[length = 1]
	pop r28	 ;  43	popqi	[length = 1]
	pop r15	 ;  44	popqi	[length = 1]
	pop r14	 ;  45	popqi	[length = 1]
	pop r13	 ;  46	popqi	[length = 1]
	pop r12	 ;  47	popqi	[length = 1]
	pop r11	 ;  48	popqi	[length = 1]
	pop r10	 ;  49	popqi	[length = 1]
	pop r9	 ;  50	popqi	[length = 1]
	pop r8	 ;  51	popqi	[length = 1]
	ret	 ;  52	return_from_epilogue	[length = 1]


With -fsplit-wide-types (which is on per default) the code is even
worse and the first function inflates to unacceptable code, too.

Using constraints "=RS22,%0,RS18" instead of "=RS22,%RS22,RS18"
the code of the second function is a bit better:

mul2:
	push r28	 ;  20	*pushqi/1	[length = 1]
	push r29	 ;  21	*pushqi/1	[length = 1]
	rcall .	 ;  25	*addhi3_sp_R_pc2	[length = 2]
	rcall .
	in r28,__SP_L__	 ;  26	*movhi_sp/2	[length = 2]
	in r29,__SP_H__
/* prologue: function */
/* frame size = 4 */
/* stack size = 6 */
.L__stack_usage = 6
	std Y+1,r22	 ;  2	*movsi/4	[length = 4]
	std Y+2,r23
	std Y+3,r24
	std Y+4,r25
	movw r24,r20	 ;  3	*movsi/1	[length = 2]
	movw r22,r18
	ldd r18,Y+1	 ;  19	*movsi/3	[length = 4]
	ldd r19,Y+2
	ldd r20,Y+3
	ldd r21,Y+4
	rcall __mulsi3	 ;  7	*mulsi3	[length = 1]
/* epilogue start */
	pop __tmp_reg__	 ;  31	*addhi3_sp_R_pc2	[length = 4]
	pop __tmp_reg__
	pop __tmp_reg__
	pop __tmp_reg__
	pop r29	 ;  32	popqi	[length = 1]
	pop r28	 ;  33	popqi	[length = 1]
	ret	 ;  34	return_from_epilogue	[length = 1]


Remember that without register constraints and explicit hard-reg
move expansion the result was (both with -fsplit-wide-types and
-fno-split-wide-types):

mul:
	push r12	 ;  35	*pushqi/1	[length = 1]
	push r13	 ;  36	*pushqi/1	[length = 1]
	push r14	 ;  37	*pushqi/1	[length = 1]
	push r15	 ;  38	*pushqi/1	[length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 4 */
.L__stack_usage = 4
	movw r12,r22	 ;  2	*movsi/1	[length = 2]
	movw r14,r24
	movw r24,r20	 ;  3	*movsi/1	[length = 2]
	movw r22,r18
	movw r20,r14	 ;  26	*movsi/1	[length = 2]
	movw r18,r12
	rcall __mulsi3	 ;  28	*mulsi3_call	[length = 1]
/* epilogue start */
	pop r15	 ;  41	popqi	[length = 1]
	pop r14	 ;  42	popqi	[length = 1]
	pop r13	 ;  43	popqi	[length = 1]
	pop r12	 ;  44	popqi	[length = 1]
	ret	 ;  45	return_from_epilogue	[length = 1]

So it appears that IRA is not as smart as we thought and not
prepared for this...

Or did I do something fundamentally wrong?

Attached the changes that I made.  Note that md has just some changes
for the simple example as I had a look at what the compiler produces
and I was not amused.

Johann

Comments

Denis Chertykov July 31, 2011, 2:41 p.m. UTC | #1
2011/7/28 Georg-Johann Lay <avr@gjlay.de>:
> Richard Henderson wrote:
>> On 07/27/2011 06:21 AM, Georg-Johann Lay wrote:
>>> +(define_insn_and_split "*mulsi3"
>>> +  [(set (match_operand:SI 0 "pseudo_register_operand"                      "=r")
>>> +        (mult:SI (match_operand:SI 1 "pseudo_register_operand"              "r")
>>> +                 (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
>>> +   (clobber (reg:DI 18))]
>>> +  "AVR_HAVE_MUL && !reload_completed"
>>> +  { gcc_unreachable(); }
>>> +  "&& 1"
>>> +  [(set (reg:SI 18)
>>> +        (match_dup 1))
>>
>> That seems like it's guaranteed to force an unnecessary move.
>> Have you tried defining special-purpose register classes to
>> force reload to move the data into the right hard regs?
>>
>> E.g.  "Y" prefix
>>       "QHS" size
>>       two digit starting register number, as needed.
>>
>> You'll probably end up with quite a few register classes
>> out of this, but hopefully reload can do a better job than
>> you can manually...
>>
>>
>> r~
>
> Waaaaaahh, I introduced register classes and constraints to tell register allocator
> what's the intention if the insns, the ... parts just dealing with CONST_INTs
> and not needed in the remainder:
>
> (define_expand "mulsi3"
>  [(parallel [(set (match_operand:SI 0 "register_operand" "")
>                   (mult:SI (match_operand:SI 1 "register_operand" "")
>                            (match_operand:SI 2 "nonmemory_operand" "")))
>              (clobber (reg:HI 26))])]
>  "AVR_HAVE_MUL"
>  {
>     ...
>  })
>
> (define_insn_and_split "*mulsi3"
>  [(set (match_operand:SI 0 "register_operand"           "=RS22")
>        (mult:SI (match_operand:SI 1 "register_operand"  "%RS22")
>                 (match_operand:SI 2 "nonmemory_operand"  "RS18")))
>   (clobber (reg:HI 26))]
>  "AVR_HAVE_MUL"
>  "%~call __mulsi3"
>  "&& !reload_completed"
>  [(clobber (const_int 0))]
>  {
>    ...
>    FAIL;
>  }
>  [(set_attr "type" "xcall")
>   (set_attr "cc" "clobber")])
>
>
> Again, I used the simple test case from above:
>
> long mul (long a, long b)
> {
>    return a*b;
> }
>
> long mul2 (long a, long b)
> {
>    return b*a;
> }
>
> Compiled with -Os -mmcu=atmega8 -fno-split-wide-types:
>
> mul:
> /* prologue: function */
>        rcall __mulsi3   ;  7   *mulsi3 [length = 1]
> /* epilogue start */
>        ret      ;  21  return  [length = 1]
>
> mul2:
>        push r8  ;  22  *pushqi/1       [length = 1]
>        push r9  ;  23  *pushqi/1       [length = 1]
>        push r10         ;  24  *pushqi/1       [length = 1]
>        push r11         ;  25  *pushqi/1       [length = 1]
>        push r12         ;  26  *pushqi/1       [length = 1]
>        push r13         ;  27  *pushqi/1       [length = 1]
>        push r14         ;  28  *pushqi/1       [length = 1]
>        push r15         ;  29  *pushqi/1       [length = 1]
>        push r28         ;  30  *pushqi/1       [length = 1]
>        push r29         ;  31  *pushqi/1       [length = 1]
>        rcall .  ;  35  *addhi3_sp_R_pc2        [length = 2]
>        rcall .
>        in r28,__SP_L__  ;  36  *movhi_sp/2     [length = 2]
>        in r29,__SP_H__
> /* prologue: function */
> /* frame size = 4 */
> /* stack size = 14 */
> .L__stack_usage = 14
>        movw r12,r22     ;  2   *movsi/1        [length = 2]
>        movw r14,r24
>        movw r24,r20     ;  19  *movsi/1        [length = 2]
>        movw r22,r18
>        movw r20,r14     ;  21  *movsi/1        [length = 2]
>        movw r18,r12
>        rcall __mulsi3   ;  7   *mulsi3 [length = 1]
> /* epilogue start */
>        pop __tmp_reg__  ;  41  *addhi3_sp_R_pc2        [length = 4]
>        pop __tmp_reg__
>        pop __tmp_reg__
>        pop __tmp_reg__
>        pop r29  ;  42  popqi   [length = 1]
>        pop r28  ;  43  popqi   [length = 1]
>        pop r15  ;  44  popqi   [length = 1]
>        pop r14  ;  45  popqi   [length = 1]
>        pop r13  ;  46  popqi   [length = 1]
>        pop r12  ;  47  popqi   [length = 1]
>        pop r11  ;  48  popqi   [length = 1]
>        pop r10  ;  49  popqi   [length = 1]
>        pop r9   ;  50  popqi   [length = 1]
>        pop r8   ;  51  popqi   [length = 1]
>        ret      ;  52  return_from_epilogue    [length = 1]
>
>
> With -fsplit-wide-types (which is on per default) the code is even
> worse and the first function inflates to unacceptable code, too.
>
> Using constraints "=RS22,%0,RS18" instead of "=RS22,%RS22,RS18"
> the code of the second function is a bit better:
>
> mul2:
>        push r28         ;  20  *pushqi/1       [length = 1]
>        push r29         ;  21  *pushqi/1       [length = 1]
>        rcall .  ;  25  *addhi3_sp_R_pc2        [length = 2]
>        rcall .
>        in r28,__SP_L__  ;  26  *movhi_sp/2     [length = 2]
>        in r29,__SP_H__
> /* prologue: function */
> /* frame size = 4 */
> /* stack size = 6 */
> .L__stack_usage = 6
>        std Y+1,r22      ;  2   *movsi/4        [length = 4]
>        std Y+2,r23
>        std Y+3,r24
>        std Y+4,r25
>        movw r24,r20     ;  3   *movsi/1        [length = 2]
>        movw r22,r18
>        ldd r18,Y+1      ;  19  *movsi/3        [length = 4]
>        ldd r19,Y+2
>        ldd r20,Y+3
>        ldd r21,Y+4
>        rcall __mulsi3   ;  7   *mulsi3 [length = 1]
> /* epilogue start */
>        pop __tmp_reg__  ;  31  *addhi3_sp_R_pc2        [length = 4]
>        pop __tmp_reg__
>        pop __tmp_reg__
>        pop __tmp_reg__
>        pop r29  ;  32  popqi   [length = 1]
>        pop r28  ;  33  popqi   [length = 1]
>        ret      ;  34  return_from_epilogue    [length = 1]
>
>
> Remember that without register constraints and explicit hard-reg
> move expansion the result was (both with -fsplit-wide-types and
> -fno-split-wide-types):
>
> mul:
>        push r12         ;  35  *pushqi/1       [length = 1]
>        push r13         ;  36  *pushqi/1       [length = 1]
>        push r14         ;  37  *pushqi/1       [length = 1]
>        push r15         ;  38  *pushqi/1       [length = 1]
> /* prologue: function */
> /* frame size = 0 */
> /* stack size = 4 */
> .L__stack_usage = 4
>        movw r12,r22     ;  2   *movsi/1        [length = 2]
>        movw r14,r24
>        movw r24,r20     ;  3   *movsi/1        [length = 2]
>        movw r22,r18
>        movw r20,r14     ;  26  *movsi/1        [length = 2]
>        movw r18,r12
>        rcall __mulsi3   ;  28  *mulsi3_call    [length = 1]
> /* epilogue start */
>        pop r15  ;  41  popqi   [length = 1]
>        pop r14  ;  42  popqi   [length = 1]
>        pop r13  ;  43  popqi   [length = 1]
>        pop r12  ;  44  popqi   [length = 1]
>        ret      ;  45  return_from_epilogue    [length = 1]
>
> So it appears that IRA is not as smart as we thought and not
> prepared for this...

I have tried this method before I have realising the current.
I drop the "right constraints" idea because of "unable to fiand a
register to spill".
Seems that something changed (register allocator changed) but force
move again seems as a best solution.

Denis.
diff mbox

Patch

Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 176865)
+++ config/avr/avr.md	(working copy)
@@ -726,7 +726,7 @@  (define_insn "*addhi3_zero_extend1"
 (define_insn "*addhi3_sp_R_pc2"
   [(set (match_operand:HI 1 "stack_register_operand" "=q")
         (plus:HI (match_operand:HI 2 "stack_register_operand" "q")
-                 (match_operand:HI 0 "avr_sp_immediate_operand" "R")))]
+                 (match_operand:HI 0 "avr_sp_immediate_operand" "C65")))]
   "AVR_2_BYTE_PC"
   "*{
       if (CONST_INT_P (operands[0]))
@@ -795,7 +795,7 @@  (define_insn "*addhi3_sp_R_pc2"
 (define_insn "*addhi3_sp_R_pc3"
   [(set (match_operand:HI 1 "stack_register_operand" "=q")
         (plus:HI (match_operand:HI 2 "stack_register_operand" "q")
-                 (match_operand:QI 0 "avr_sp_immediate_operand" "R")))]
+                 (match_operand:QI 0 "avr_sp_immediate_operand" "C65")))]
   "AVR_3_BYTE_PC"
   "*{
       if (CONST_INT_P (operands[0]))
@@ -1372,7 +1372,7 @@  (define_expand "mulsi3"
   [(parallel [(set (match_operand:SI 0 "register_operand" "")
                    (mult:SI (match_operand:SI 1 "register_operand" "")
                             (match_operand:SI 2 "nonmemory_operand" "")))
-              (clobber (reg:DI 18))])]
+              (clobber (reg:HI 26))])]
   "AVR_HAVE_MUL"
   {
     if (u16_operand (operands[2], SImode))
@@ -1391,23 +1391,14 @@  (define_expand "mulsi3"
   })
 
 (define_insn_and_split "*mulsi3"
-  [(set (match_operand:SI 0 "pseudo_register_operand"                      "=r")
-        (mult:SI (match_operand:SI 1 "pseudo_register_operand"              "r")
-                 (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
-   (clobber (reg:DI 18))]
-  "AVR_HAVE_MUL && !reload_completed"
-  { gcc_unreachable(); }
-  "&& 1"
-  [(set (reg:SI 18)
-        (match_dup 1))
-   (set (reg:SI 22) 
-        (match_dup 2))
-   (parallel [(set (reg:SI 22)
-                   (mult:SI (reg:SI 22)
-                            (reg:SI 18)))
-              (clobber (reg:HI 26))])
-   (set (match_dup 0)
-        (reg:SI 22))]
+  [(set (match_operand:SI 0 "register_operand"           "=RS22")
+        (mult:SI (match_operand:SI 1 "register_operand"  "%0")
+                 (match_operand:SI 2 "nonmemory_operand"  "RS18")))
+   (clobber (reg:HI 26))]
+  "AVR_HAVE_MUL"
+  "%~call __mulsi3"
+  "&& !reload_completed"
+  [(clobber (const_int 0))]
   {
     if (u16_operand (operands[2], SImode))
       {
@@ -1422,37 +1413,43 @@  (define_insn_and_split "*mulsi3"
         emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
         DONE;
       }
-  })
 
-;; "muluqisi3"
-;; "muluhisi3"
-(define_insn_and_split "mulu<mode>si3"
-  [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
-        (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
-                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
-   (clobber (reg:DI 18))]
-  "AVR_HAVE_MUL && !reload_completed"
+    FAIL;
+  }
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "muluhisi3"
+  [(set (match_operand:SI 0 "register_operand"                           "=RS22")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand"   "x"))
+                 (match_operand:SI 2 "register_operand"                   "RS18")))]
+  "AVR_HAVE_MUL"
+  "%~call __muluhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; "*muluqisi3.split"
+;; "*muluhisi3.split"
+(define_insn_and_split "*mulu<mode>si3.split"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+        (mult:SI (zero_extend:SI (match_operand:QIHI 1 "register_operand" "r"))
+                 (match_operand:SI 2 "nonmemory_operand"                  "rn")))
+   (clobber (reg:HI 26))]
+  "AVR_HAVE_MUL"
   { gcc_unreachable(); }
-  "&& 1"
-  [(set (reg:HI 26)
-        (match_dup 1))
-   (set (reg:SI 18)
-        (match_dup 2))
-   (set (reg:SI 22)
-        (mult:SI (zero_extend:SI (reg:HI 26))
-                 (reg:SI 18)))
-   (set (match_dup 0)
-        (reg:SI 22))]
+  "&& !reload_completed"
+  [(set (match_dup 0)
+        (mult:SI (zero_extend:SI (match_dup 1))
+                 (match_dup 2)))]
   {
     /* Do the QI -> HI extension explicitely before the multiplication.  */
     /* Do the HI -> SI extension implicitely and after the multiplication.  */
        
     if (QImode == <MODE>mode)
-      operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]);
+      operands[1] = force_reg (HImode, gen_rtx_ZERO_EXTEND (HImode, operands[1]));
 
     if (u16_operand (operands[2], SImode))
       {
-        operands[1] = force_reg (HImode, operands[1]);
         operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
         emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2]));
         DONE;
@@ -1602,16 +1599,6 @@  (define_insn_and_split
       }
   })
 
-(define_insn "*mulsi3_call"
-  [(set (reg:SI 22)
-        (mult:SI (reg:SI 22)
-                 (reg:SI 18)))
-   (clobber (reg:HI 26))]
-  "AVR_HAVE_MUL"
-  "%~call __mulsi3"
-  [(set_attr "type" "xcall")
-   (set_attr "cc" "clobber")])
-
 (define_insn "*mulhisi3_call"
   [(set (reg:SI 22)
         (mult:SI (sign_extend:SI (reg:HI 18))
@@ -1639,15 +1626,6 @@  (define_insn "*usmulhisi3_call"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
-(define_insn "*muluhisi3_call"
-  [(set (reg:SI 22)
-        (mult:SI (zero_extend:SI (reg:HI 26))
-                 (reg:SI 18)))]
-  "AVR_HAVE_MUL"
-  "%~call __muluhisi3"
-  [(set_attr "type" "xcall")
-   (set_attr "cc" "clobber")])
-
 (define_insn "*mulshisi3_call"
   [(set (reg:SI 22)
         (mult:SI (sign_extend:SI (reg:HI 26))
Index: config/avr/constraints.md
===================================================================
--- config/avr/constraints.md	(revision 176864)
+++ config/avr/constraints.md	(working copy)
@@ -17,14 +17,74 @@ 
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; Register constraints
+;; 1-Register Register Constraints
 
 (define_register_constraint "t" "R0_REG"
   "Temporary register r0")
 
+(define_register_constraint "RQ18" "R18_REG"
+  "Register r18")
+
+(define_register_constraint "RQ19" "R19_REG"
+  "Register r19")
+
+(define_register_constraint "RQ20" "R20_REG"
+  "Register r20")
+
+(define_register_constraint "RQ21" "R21_REG"
+  "Register r21")
+
+(define_register_constraint "RQ22" "R22_REG"
+  "Register r22")
+
+(define_register_constraint "RQ23" "R23_REG"
+  "Register r23")
+
+(define_register_constraint "RQ24" "R24_REG"
+  "Register r24")
+
+(define_register_constraint "RQ25" "R25_REG"
+  "Register r25")
+
+;; 2-Register Register Constraints
+
+(define_register_constraint "RH18" "R18_R19_REGS"
+  "Register pair r18--r19")
+
+(define_register_constraint "RH20" "R20_R21_REGS"
+  "Register pair r20--r21")
+
+(define_register_constraint "RH22" "R22_R23_REGS"
+  "Register pair r22--r23")
+
+(define_register_constraint "RH24" "R24_R25_REGS"
+  "Register pair r24--r25")
+
+(define_register_constraint "x" "POINTER_X_REGS"
+  "Register pair X (r27:r26).")
+
+(define_register_constraint "y" "POINTER_Y_REGS"
+  "Register pair Y (r29:r28).")
+
+(define_register_constraint "z" "POINTER_Z_REGS"
+  "Register pair Z (r31:r30).")
+
+(define_register_constraint "q" "STACK_REG"
+  "Stack pointer register (SPH:SPL).")
+
+;; 4-Register Register Constraints
+
+(define_register_constraint "RS18" "R18_R21_REGS"
+  "Register r18--r21")
+
+(define_register_constraint "RS22" "R22_R25_REGS"
+  "Register r22--r25")
+
 (define_register_constraint "b" "BASE_POINTER_REGS"
   "Base pointer registers (r28--r31)")
 
+;; Even more Register Constraints
+
 (define_register_constraint "e" "POINTER_REGS"
   "Pointer registers (r26--r31)")
 
@@ -41,18 +101,6 @@  (define_register_constraint "l" "NO_LD_R
 (define_register_constraint "a" "SIMPLE_LD_REGS"
   "Registers from r16 to r23.")
 
-(define_register_constraint "x" "POINTER_X_REGS"
-  "Register pair X (r27:r26).")
-
-(define_register_constraint "y" "POINTER_Y_REGS"
-  "Register pair Y (r29:r28).")
-
-(define_register_constraint "z" "POINTER_Z_REGS"
-  "Register pair Z (r31:r30).")
-
-(define_register_constraint "q" "STACK_REG"
-  "Stack pointer register (SPH:SPL).")
-
 (define_constraint "I"
   "Integer constant in the range 0 @dots{} 63."
   (and (match_code "const_int")
@@ -98,7 +146,7 @@  (define_constraint "G"
   (and (match_code "const_double")
        (match_test "op == CONST0_RTX (SFmode)")))
 
-(define_constraint "R"
+(define_constraint "C65"
   "Integer constant in the range -6 @dots{} 5."
   (and (match_code "const_int")
        (match_test "ival >= -6 && ival <= 5")))
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 176864)
+++ config/avr/avr.c	(working copy)
@@ -277,22 +277,6 @@  avr_option_override (void)
   init_machine_status = avr_init_machine_status;
 }
 
-/*  return register class from register number.  */
-
-static const enum reg_class reg_class_tab[]={
-  GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,
-  GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,
-  GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,
-  GENERAL_REGS, /* r0 - r15 */
-  LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,
-  LD_REGS,                      /* r16 - 23 */
-  ADDW_REGS,ADDW_REGS,          /* r24,r25 */
-  POINTER_X_REGS,POINTER_X_REGS, /* r26,27 */
-  POINTER_Y_REGS,POINTER_Y_REGS, /* r28,r29 */
-  POINTER_Z_REGS,POINTER_Z_REGS, /* r30,r31 */
-  STACK_REG,STACK_REG           /* SPL,SPH */
-};
-
 /* Function to set up the backend function structure.  */
 
 static struct machine_function *
@@ -306,7 +290,30 @@  avr_init_machine_status (void)
 enum reg_class
 avr_regno_reg_class (int r)
 {
-  if (r <= 33)
+  static const enum reg_class
+    reg_class_tab[] =
+    {
+      /* r0 - r15 */
+      R0_REG,     NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS,
+      /* r16 - r17 */
+      SIMPLE_LD_REGS, SIMPLE_LD_REGS,
+      /* r18 - r25 */
+      R18_REG, R19_REG, R20_REG, R21_REG,
+      R22_REG, R23_REG, R24_REG, R25_REG,
+      /* r26 - r27 */
+      POINTER_X_REGS, POINTER_X_REGS,
+      /* r28 - r29 */
+      POINTER_Y_REGS, POINTER_Y_REGS,
+      /* r30 - r31 */
+      POINTER_Z_REGS, POINTER_Z_REGS,
+      /* SPL, SPH */
+      STACK_REG, STACK_REG
+    };
+
+  if (r <= 33) 
     return reg_class_tab[r];
   return ALL_REGS;
 }
Index: config/avr/avr.h
===================================================================
--- config/avr/avr.h	(revision 176864)
+++ config/avr/avr.h	(working copy)
@@ -235,59 +235,102 @@  extern GTY(()) section *progmem_section;
 
 enum reg_class {
   NO_REGS,
-  R0_REG,			/* r0 */
-  POINTER_X_REGS,		/* r26 - r27 */
-  POINTER_Y_REGS,		/* r28 - r29 */
-  POINTER_Z_REGS,		/* r30 - r31 */
-  STACK_REG,			/* STACK */
-  BASE_POINTER_REGS,		/* r28 - r31 */
-  POINTER_REGS,			/* r26 - r31 */
-  ADDW_REGS,			/* r24 - r31 */
-  SIMPLE_LD_REGS,		/* r16 - r23 */
-  LD_REGS,			/* r16 - r31 */
-  NO_LD_REGS,			/* r0 - r15 */
-  GENERAL_REGS,			/* r0 - r31 */
-  ALL_REGS, LIM_REG_CLASSES
+  R0_REG,               /* r0 */
+  R18_REG,              /* r18 */
+  R19_REG,              /* r19 */
+  R20_REG,              /* r20 */
+  R21_REG,              /* r21 */
+  R22_REG,              /* r22 */
+  R23_REG,              /* r23 */
+  R24_REG,              /* r24 */
+  R25_REG,              /* r25 */
+  R18_R19_REGS,         /* r18-r19 */
+  R20_R21_REGS,         /* r20-r21 */
+  R22_R23_REGS,         /* r22-r23 */
+  R24_R25_REGS,         /* r24-r25 */
+  R18_R21_REGS,         /* r18-r21 */
+  R22_R25_REGS,         /* r22-r25 */
+  POINTER_X_REGS,       /* r26 - r27 */
+  POINTER_Y_REGS,       /* r28 - r29 */
+  POINTER_Z_REGS,       /* r30 - r31 */
+  STACK_REG,            /* STACK */
+  BASE_POINTER_REGS,    /* r28 - r31 */
+  POINTER_REGS,         /* r26 - r31 */
+  ADDW_REGS,            /* r24 - r31 */
+  SIMPLE_LD_REGS,       /* r16 - r23 */
+  LD_REGS,              /* r16 - r31 */
+  NO_LD_REGS,           /* r0 - r15 */
+  GENERAL_REGS,         /* r0 - r31 */
+  ALL_REGS,
+  LIM_REG_CLASSES
 };
 
 
 #define N_REG_CLASSES (int)LIM_REG_CLASSES
 
-#define REG_CLASS_NAMES {					\
-		 "NO_REGS",					\
-		   "R0_REG",	/* r0 */                        \
-		   "POINTER_X_REGS", /* r26 - r27 */		\
-		   "POINTER_Y_REGS", /* r28 - r29 */		\
-		   "POINTER_Z_REGS", /* r30 - r31 */		\
-		   "STACK_REG",	/* STACK */			\
-		   "BASE_POINTER_REGS",	/* r28 - r31 */		\
-		   "POINTER_REGS", /* r26 - r31 */		\
-		   "ADDW_REGS",	/* r24 - r31 */			\
-                   "SIMPLE_LD_REGS", /* r16 - r23 */            \
-		   "LD_REGS",	/* r16 - r31 */			\
-                   "NO_LD_REGS", /* r0 - r15 */                 \
-		   "GENERAL_REGS", /* r0 - r31 */		\
-		   "ALL_REGS" }
-
-#define REG_CLASS_CONTENTS {						\
-  {0x00000000,0x00000000},	/* NO_REGS */				\
-  {0x00000001,0x00000000},	/* R0_REG */                            \
-  {3 << REG_X,0x00000000},      /* POINTER_X_REGS, r26 - r27 */		\
-  {3 << REG_Y,0x00000000},      /* POINTER_Y_REGS, r28 - r29 */		\
-  {3 << REG_Z,0x00000000},      /* POINTER_Z_REGS, r30 - r31 */		\
-  {0x00000000,0x00000003},	/* STACK_REG, STACK */			\
-  {(3 << REG_Y) | (3 << REG_Z),						\
-     0x00000000},		/* BASE_POINTER_REGS, r28 - r31 */	\
-  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z),				\
-     0x00000000},		/* POINTER_REGS, r26 - r31 */		\
-  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W),		\
-     0x00000000},		/* ADDW_REGS, r24 - r31 */		\
-  {0x00ff0000,0x00000000},	/* SIMPLE_LD_REGS r16 - r23 */          \
-  {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16),	\
-     0x00000000},	/* LD_REGS, r16 - r31 */			\
-  {0x0000ffff,0x00000000},	/* NO_LD_REGS  r0 - r15 */              \
-  {0xffffffff,0x00000000},	/* GENERAL_REGS, r0 - r31 */		\
-  {0xffffffff,0x00000003}	/* ALL_REGS */				\
+#define REG_CLASS_NAMES {                       \
+  "NO_REGS",                                    \
+  "R0_REG",           /* r0 */                  \
+  "R18_REG",          /* r18 */                 \
+  "R19_REG",          /* r19 */                 \
+  "R20_REG",          /* r20 */                 \
+  "R21_REG",          /* r21 */                 \
+  "R22_REG",          /* r22 */                 \
+  "R23_REG",          /* r23 */                 \
+  "R24_REG",          /* r24 */                 \
+  "R25_REG",          /* r25 */                 \
+  "R18_R19_REGS",     /* r18 - r19 */           \
+  "R20_R21_REGS",     /* r20 - r21 */           \
+  "R22_R23_REGS",     /* r22 - r23 */           \
+  "R24_R25_REGS",     /* r24 - r25 */           \
+  "R18_R21_REGS",     /* r18 - r21 */           \
+  "R22_R25_REGS",     /* r22 - r25 */           \
+  "POINTER_X_REGS",   /* r26 - r27 */           \
+  "POINTER_Y_REGS",   /* r28 - r29 */           \
+  "POINTER_Z_REGS",   /* r30 - r31 */           \
+  "STACK_REG",        /* STACK */               \
+  "BASE_POINTER_REGS",/* r28 - r31 */           \
+  "POINTER_REGS",     /* r26 - r31 */           \
+  "ADDW_REGS",        /* r24 - r31 */           \
+  "SIMPLE_LD_REGS",   /* r16 - r23 */           \
+  "LD_REGS",          /* r16 - r31 */           \
+  "NO_LD_REGS",       /* r0  - r15 */           \
+  "GENERAL_REGS",     /* r0  - r31 */           \
+  "ALL_REGS"                                    \
+}
+
+#define REG_CLASS_CONTENTS {                                            \
+  { 0, 0 },             /* NO_REGS */                                   \
+  {  1 << 0,  0 },      /* R0_REG */                                    \
+  {  1 << 18, 0 },      /* r18 */                                       \
+  {  1 << 19, 0 },      /* r19 */                                       \
+  {  1 << 20, 0 },      /* r20 */                                       \
+  {  1 << 21, 0 },      /* r21 */                                       \
+  {  1 << 22, 0 },      /* r22 */                                       \
+  {  1 << 23, 0 },      /* r23 */                                       \
+  {  1 << 24, 0 },      /* r24 */                                       \
+  {  1 << 25, 0 },      /* r25 */                                       \
+  {  3 << 18, 0 },      /* r18 - r19 */                                 \
+  {  3 << 20, 0 },      /* r20 - r21 */                                 \
+  {  3 << 22, 0 },      /* r22 - r23 */                                 \
+  {  3 << 24, 0 },      /* r24 - r25 */                                 \
+  { 15 << 18, 0 },      /* r28 - r21 */                                 \
+  { 15 << 22, 0 },      /* r22 - r25 */                                 \
+  {  3 << REG_X, 0 },   /* POINTER_X_REGS, r26 - r27 */                 \
+  {  3 << REG_Y, 0 },   /* POINTER_Y_REGS, r28 - r29 */                 \
+  {  3 << REG_Z, 0 },   /* POINTER_Z_REGS, r30 - r31 */                 \
+  { 0, 0x3 },           /* STACK_REG, Stack Pointer  */                 \
+  /* BASE_POINTER_REGS, r28-r31 */                                      \
+  { (3 << REG_Y) | (3 << REG_Z), 0 },                                   \
+  /* POINTER_REGS, r26 - r31 */                                         \
+  { (3 << REG_X) | (3 << REG_Y) | (3 << REG_Z), 0 },                    \
+  /* ADDW_REGS, r24 - r31 */                                            \
+  { (3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W), 0 },     \
+  { 0x00ff0000, 0 },    /* SIMPLE_LD_REGS, r16 - r23 */                 \
+  { 0xffff0000, 0 },    /* LD_REGS,        r16 - r31 */                 \
+  { 0x0000ffff, 0 },    /* NO_LD_REGS,     r0  - r15  */                \
+  { 0xffffffff, 0 },    /* GENERAL_REGS,   r0  - r31  */                \
+  { 0xffffffff, 0x3 }   /* ALL_REGS */                                  \
 }
 
 #define REGNO_REG_CLASS(R) avr_regno_reg_class(R)