diff mbox

[AVR] : Implement PR50931 (24-bit integers) (divmod) [2/n]

Message ID 4EB13507.80601@gjlay.de
State New
Headers show

Commit Message

Georg-Johann Lay Nov. 2, 2011, 12:18 p.m. UTC
Georg-Johann Lay wrote:
> To support the upcoming named address space support in avr, a 24-bit pointer
> type is needed. This patch adds respective support of a 24-bit integer mode
> called PSI.
> 
> The patch supports more than is actually needed for a pointer-only
> implementation: is supplies almost all needed insns to render the new mode
> efficient for use in arithmetic.
> 
> The impact on already existing code for non-PSI part of the backend is very
> small and just a handfull of lines:
> 
> - avr_out_plus_1, output_reload_in_const and avr_simplify_comparison_p
>   can handle 3-byte types now.
> 
> - avr_libcall_value: 3-byte values will be passed in even registers.
> 
> - TARGET_SCALAR_MODE_SUPPORTED_P reports PSI as supported scalar
> 
> - avr_init_builtins exposes the new mode to user land as new
>   build-in types __int24_t and __uint24_t.
> 
> - avr_cpu_cpp_builtins adds build-in macros
>   __INT24_MAX__, __INT24_MIN__ and __UINT24_MAX__ so that user can test
>   if the new mode is available for arithmetic.
> 
> The rest of the patch is PSI-specific:
> 
> Routines for comparison, addition, rotation, and, or, xor were already generic
> enough to support the new type without effort.
> 
> Shifts and load/store/move are a bit lengthy routines as it is the case with
> SI, too.
> 
> There are some parts missing and are planned to supply them in separate patches:
> 
> - Documentation
> - Test cases
> - libgcc support of __[u]divmodpsi4
> - Perhaps more efficient MUL. At the moment, multiplication is extended to
>   32 bits. This leads to suboptimal code because of 32-bit arithmetic and
>   more SUBREGs than with a native mulpsi3 support.
> 
> Patch is lightly tested and passes the test suites.
> 
> Ok for trunk?
> 
> Johann
> 	PR target/50931
> 	* config/avr/avr-modes.def: New file defining PSImode.
> 	* config/avr/avr-c.c (__INT24_MAX__, __INT24_MIN__,
> 	__UINT24_MAX__): New built-in defines.
> 	* config/avr/avr.md (adjust_len): Add tstpsi, mov24,  reload_in24,
> 	ashlpsi, ashrpsi, lshrpsi.
> 	(HISI, HIDI, MPUSH, rotx, rotsmode): Add PSI.
> 	(MOVMODE): New mode iterator.
> 	(movpsi): New expander.
> 	(movqi, movhi, movsi, movsf, movpsi): Write as one using MOVMODE.
> 	(*reload_inpsi, *movpsi): New insns.
> 	(*reload_inpsi): New RTL peephole.
> 	(addpsi3, *addpsi3_zero_extend.qi, *addpsi3_zero_extend.hi,
> 	*addpsi3_sign_extend.hi): New insns.
> 	(subpsi3, *subpsi3_zero_extend.qi, *subpsi3_zero_extend.hi,
> 	*subpsi3_sign_extend.hi): New insns.
> 	(divmodpsi4, udivmodpsi4): New define insn-and-split.
> 	(*divmodpsi4_call, *udivmodpsi4_call): New insns.
> 	(andpsi3, iorpsi3, xorpsi3): New insns.
> 	(*rotlpsi2.1, *rotlpsi2.23): New insns.
> 	(*rotw<mode>): Insn condition only allow even-sized modes.
> 	(*rotb<mode>): Insn condition allows odd-sized modes.
> 	(ashlpsi3, ashrpsi3, lshrpsi3): New insns.
> 	(negpsi2, one_cmplpsi2): New insns.
> 	(extendqipsi2, extendhipsi2, extendpsisi2): New insns.
> 	(zero_extendqipsi2, zero_extendhipsi2, zero_extendpsisi2): New
> 	insn-and-splits.
> 	(*cmppsi, *negated_tstpsi, *reversed_tstpsi): New insns.
> 	(cbranchpsi4): New expander.
> 	* config/avr/constraints.md (Ca3, Co3, Cx3): New constraints.
> 	* config/avr/avr-protos.h (avr_out_tstpsi, avr_out_movpsi,
> 	avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3,
> 	avr_out_reload_inpsi): New prototypes.
> 	* config/avr/avr.c (TARGET_SCALAR_MODE_SUPPORTED_P): Define to...
> 	(avr_scalar_mode_supported_p): ...this new static function.
> 	(avr_asm_len): Always return "".
> 	(avr_out_load_psi, avr_out_store_psi): New static functions.
> 	(avr_out_movpsi, avr_out_reload_inpsi): New functions.
> 	(avr_out_tstpsi): New function.
> 	(avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3): New functions.
> 	(avr_out_plus_1, output_reload_in_const): Handle 3-byte types.
> 	(avr_simplify_comparison_p): Ditto.
> 	(adjust_insn_length): Handle ADJUST_LEN_RELOAD_IN24,
> 	ADJUST_LEN_MOV24, ADJUST_LEN_TSTPSI, ADJUST_LEN_ASHLPSI,
> 	ADJUST_LEN_ASHRPSI, ADJUST_LEN_LSHRPSI.
> 	(avr_rtx_costs_1): Report PSI costs.
> 	(avr_libcall_value): Handle odd-sized parameters.
> 	(avr_init_int24): New static function.
> 	(avr_init_builtins): Use it.

This patch is the div/mod support for libgcc.

With this patch, 24-bit integers are fully supported for integer arithmetic.

During the implementation it turned out that the register footprint is smaller
than that of SImode: PSI need 3 registers less so that the representation in
avr.md needs adjustment like so:


@@ -2199,6 +2294,66 @@ (define_insn "*udivmodhi4_call"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])

+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "divmodpsi4"
+  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+                   (div:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
+                            (match_operand:PSI 2 "pseudo_register_operand" "")))
+              (set (match_operand:PSI 3 "pseudo_register_operand" "")
+                   (mod:PSI (match_dup 1) (match_dup 2)))
+              (clobber (reg:DI 18))
+              (clobber (reg:QI 26))])]
+  ""
+  { gcc_unreachable(); }
+  ""
+  [(set (reg:PSI 22) (match_dup 1))
+   (set (reg:PSI 18) (match_dup 2))
+   (parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
+              (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
+              (clobber (reg:QI 26))])
+   (set (match_dup 0) (reg:PSI 22))
+   (set (match_dup 3) (reg:PSI 18))])
+
+(define_insn "*divmodpsi4_call"
+  [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
+   (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __divmodpsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodpsi4"
+  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+                   (udiv:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
+                             (match_operand:PSI 2 "pseudo_register_operand" "")))
+              (set (match_operand:PSI 3 "pseudo_register_operand" "")
+                   (umod:PSI (match_dup 1) (match_dup 2)))
+              (clobber (reg:DI 18))
+              (clobber (reg:QI 26))])]
+  ""
+  { gcc_unreachable(); }
+  ""
+  [(set (reg:PSI 22) (match_dup 1))
+   (set (reg:PSI 18) (match_dup 2))
+   (parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
+              (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
+              (clobber (reg:QI 26))])
+   (set (match_dup 0) (reg:PSI 22))
+   (set (match_dup 3) (reg:PSI 18))])
+
+(define_insn "*udivmodpsi4_call"
+  [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
+   (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __udivmodpsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+

The division routines are tested against the 32-bit division functions for lots
of (pseudo) random values and special values like 0, 1, -1, 0x7f..., 0x80...
etc. libgcc builds fine and assembles for the new objects.

Ok for trunk?

Johann

	PR target/50931
	* config/avr/t-avr (LIB1ASMFUNCS): Add _divmodpsi4, _udivmodpsi4.
	* config/avr/libgcc.S (__udivmodpsi4, __divmodpsi4): New functions.

Comments

Denis Chertykov Nov. 4, 2011, 11:17 a.m. UTC | #1
2011/11/2 Georg-Johann Lay <avr@gjlay.de>:
> Georg-Johann Lay wrote:
>> To support the upcoming named address space support in avr, a 24-bit pointer
>> type is needed. This patch adds respective support of a 24-bit integer mode
>> called PSI.
>>
>> The patch supports more than is actually needed for a pointer-only
>> implementation: is supplies almost all needed insns to render the new mode
>> efficient for use in arithmetic.
>>
>> The impact on already existing code for non-PSI part of the backend is very
>> small and just a handfull of lines:
>>
>> - avr_out_plus_1, output_reload_in_const and avr_simplify_comparison_p
>>   can handle 3-byte types now.
>>
>> - avr_libcall_value: 3-byte values will be passed in even registers.
>>
>> - TARGET_SCALAR_MODE_SUPPORTED_P reports PSI as supported scalar
>>
>> - avr_init_builtins exposes the new mode to user land as new
>>   build-in types __int24_t and __uint24_t.
>>
>> - avr_cpu_cpp_builtins adds build-in macros
>>   __INT24_MAX__, __INT24_MIN__ and __UINT24_MAX__ so that user can test
>>   if the new mode is available for arithmetic.
>>
>> The rest of the patch is PSI-specific:
>>
>> Routines for comparison, addition, rotation, and, or, xor were already generic
>> enough to support the new type without effort.
>>
>> Shifts and load/store/move are a bit lengthy routines as it is the case with
>> SI, too.
>>
>> There are some parts missing and are planned to supply them in separate patches:
>>
>> - Documentation
>> - Test cases
>> - libgcc support of __[u]divmodpsi4
>> - Perhaps more efficient MUL. At the moment, multiplication is extended to
>>   32 bits. This leads to suboptimal code because of 32-bit arithmetic and
>>   more SUBREGs than with a native mulpsi3 support.
>>
>> Patch is lightly tested and passes the test suites.
>>
>> Ok for trunk?
>>
>> Johann
>>       PR target/50931
>>       * config/avr/avr-modes.def: New file defining PSImode.
>>       * config/avr/avr-c.c (__INT24_MAX__, __INT24_MIN__,
>>       __UINT24_MAX__): New built-in defines.
>>       * config/avr/avr.md (adjust_len): Add tstpsi, mov24,  reload_in24,
>>       ashlpsi, ashrpsi, lshrpsi.
>>       (HISI, HIDI, MPUSH, rotx, rotsmode): Add PSI.
>>       (MOVMODE): New mode iterator.
>>       (movpsi): New expander.
>>       (movqi, movhi, movsi, movsf, movpsi): Write as one using MOVMODE.
>>       (*reload_inpsi, *movpsi): New insns.
>>       (*reload_inpsi): New RTL peephole.
>>       (addpsi3, *addpsi3_zero_extend.qi, *addpsi3_zero_extend.hi,
>>       *addpsi3_sign_extend.hi): New insns.
>>       (subpsi3, *subpsi3_zero_extend.qi, *subpsi3_zero_extend.hi,
>>       *subpsi3_sign_extend.hi): New insns.
>>       (divmodpsi4, udivmodpsi4): New define insn-and-split.
>>       (*divmodpsi4_call, *udivmodpsi4_call): New insns.
>>       (andpsi3, iorpsi3, xorpsi3): New insns.
>>       (*rotlpsi2.1, *rotlpsi2.23): New insns.
>>       (*rotw<mode>): Insn condition only allow even-sized modes.
>>       (*rotb<mode>): Insn condition allows odd-sized modes.
>>       (ashlpsi3, ashrpsi3, lshrpsi3): New insns.
>>       (negpsi2, one_cmplpsi2): New insns.
>>       (extendqipsi2, extendhipsi2, extendpsisi2): New insns.
>>       (zero_extendqipsi2, zero_extendhipsi2, zero_extendpsisi2): New
>>       insn-and-splits.
>>       (*cmppsi, *negated_tstpsi, *reversed_tstpsi): New insns.
>>       (cbranchpsi4): New expander.
>>       * config/avr/constraints.md (Ca3, Co3, Cx3): New constraints.
>>       * config/avr/avr-protos.h (avr_out_tstpsi, avr_out_movpsi,
>>       avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3,
>>       avr_out_reload_inpsi): New prototypes.
>>       * config/avr/avr.c (TARGET_SCALAR_MODE_SUPPORTED_P): Define to...
>>       (avr_scalar_mode_supported_p): ...this new static function.
>>       (avr_asm_len): Always return "".
>>       (avr_out_load_psi, avr_out_store_psi): New static functions.
>>       (avr_out_movpsi, avr_out_reload_inpsi): New functions.
>>       (avr_out_tstpsi): New function.
>>       (avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3): New functions.
>>       (avr_out_plus_1, output_reload_in_const): Handle 3-byte types.
>>       (avr_simplify_comparison_p): Ditto.
>>       (adjust_insn_length): Handle ADJUST_LEN_RELOAD_IN24,
>>       ADJUST_LEN_MOV24, ADJUST_LEN_TSTPSI, ADJUST_LEN_ASHLPSI,
>>       ADJUST_LEN_ASHRPSI, ADJUST_LEN_LSHRPSI.
>>       (avr_rtx_costs_1): Report PSI costs.
>>       (avr_libcall_value): Handle odd-sized parameters.
>>       (avr_init_int24): New static function.
>>       (avr_init_builtins): Use it.
>
> This patch is the div/mod support for libgcc.
>
> With this patch, 24-bit integers are fully supported for integer arithmetic.
>
> During the implementation it turned out that the register footprint is smaller
> than that of SImode: PSI need 3 registers less so that the representation in
> avr.md needs adjustment like so:
>
>
> @@ -2199,6 +2294,66 @@ (define_insn "*udivmodhi4_call"
>   [(set_attr "type" "xcall")
>    (set_attr "cc" "clobber")])
>
> +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> +
> +(define_insn_and_split "divmodpsi4"
> +  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
> +                   (div:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
> +                            (match_operand:PSI 2 "pseudo_register_operand" "")))
> +              (set (match_operand:PSI 3 "pseudo_register_operand" "")
> +                   (mod:PSI (match_dup 1) (match_dup 2)))
> +              (clobber (reg:DI 18))
> +              (clobber (reg:QI 26))])]
> +  ""
> +  { gcc_unreachable(); }
> +  ""
> +  [(set (reg:PSI 22) (match_dup 1))
> +   (set (reg:PSI 18) (match_dup 2))
> +   (parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
> +              (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
> +              (clobber (reg:QI 26))])
> +   (set (match_dup 0) (reg:PSI 22))
> +   (set (match_dup 3) (reg:PSI 18))])
> +
> +(define_insn "*divmodpsi4_call"
> +  [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
> +   (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
> +   (clobber (reg:QI 26))]
> +  ""
> +  "%~call __divmodpsi4"
> +  [(set_attr "type" "xcall")
> +   (set_attr "cc" "clobber")])
> +
> +(define_insn_and_split "udivmodpsi4"
> +  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
> +                   (udiv:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
> +                             (match_operand:PSI 2 "pseudo_register_operand" "")))
> +              (set (match_operand:PSI 3 "pseudo_register_operand" "")
> +                   (umod:PSI (match_dup 1) (match_dup 2)))
> +              (clobber (reg:DI 18))
> +              (clobber (reg:QI 26))])]
> +  ""
> +  { gcc_unreachable(); }
> +  ""
> +  [(set (reg:PSI 22) (match_dup 1))
> +   (set (reg:PSI 18) (match_dup 2))
> +   (parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
> +              (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
> +              (clobber (reg:QI 26))])
> +   (set (match_dup 0) (reg:PSI 22))
> +   (set (match_dup 3) (reg:PSI 18))])
> +
> +(define_insn "*udivmodpsi4_call"
> +  [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
> +   (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
> +   (clobber (reg:QI 26))]
> +  ""
> +  "%~call __udivmodpsi4"
> +  [(set_attr "type" "xcall")
> +   (set_attr "cc" "clobber")])
> +
> +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> +
>
> The division routines are tested against the 32-bit division functions for lots
> of (pseudo) random values and special values like 0, 1, -1, 0x7f..., 0x80...
> etc. libgcc builds fine and assembles for the new objects.
>
> Ok for trunk?
>
> Johann
>
>        PR target/50931
>        * config/avr/t-avr (LIB1ASMFUNCS): Add _divmodpsi4, _udivmodpsi4.
>        * config/avr/libgcc.S (__udivmodpsi4, __divmodpsi4): New functions.
>

Please commit both patches.

Denis.
Georg-Johann Lay Nov. 4, 2011, 4:48 p.m. UTC | #2
Denis Chertykov wrote:
> 2011/11/2 Georg-Johann Lay <xxx@yyy.zz>:
>> Georg-Johann Lay wrote:
>>> To support the upcoming named address space support in avr, a 24-bit pointer
>>> type is needed. This patch adds respective support of a 24-bit integer mode
>>> called PSI.
>>>
>>> The patch supports more than is actually needed for a pointer-only
>>> implementation: is supplies almost all needed insns to render the new mode
>>> efficient for use in arithmetic.
>>>
>>> The impact on already existing code for non-PSI part of the backend is very
>>> small and just a handfull of lines:
>>>
>>> - avr_out_plus_1, output_reload_in_const and avr_simplify_comparison_p
>>>   can handle 3-byte types now.
>>>
>>> - avr_libcall_value: 3-byte values will be passed in even registers.
>>>
>>> - TARGET_SCALAR_MODE_SUPPORTED_P reports PSI as supported scalar
>>>
>>> - avr_init_builtins exposes the new mode to user land as new
>>>   build-in types __int24_t and __uint24_t.
>>>
>>> - avr_cpu_cpp_builtins adds build-in macros
>>>   __INT24_MAX__, __INT24_MIN__ and __UINT24_MAX__ so that user can test
>>>   if the new mode is available for arithmetic.
>>>
>>> The rest of the patch is PSI-specific:
>>>
>>> Routines for comparison, addition, rotation, and, or, xor were already generic
>>> enough to support the new type without effort.
>>>
>>> Shifts and load/store/move are a bit lengthy routines as it is the case with
>>> SI, too.
>>>
>>> There are some parts missing and are planned to supply them in separate patches:
>>>
>>> - Documentation
>>> - Test cases
>>> - libgcc support of __[u]divmodpsi4
>>> - Perhaps more efficient MUL. At the moment, multiplication is extended to
>>>   32 bits. This leads to suboptimal code because of 32-bit arithmetic and
>>>   more SUBREGs than with a native mulpsi3 support.
>>>
>>> Patch is lightly tested and passes the test suites.
>>>
>>> Ok for trunk?
>>>
>>> Johann
>>>       PR target/50931
>>>       * config/avr/avr-modes.def: New file defining PSImode.
>>>       * config/avr/avr-c.c (__INT24_MAX__, __INT24_MIN__,
>>>       __UINT24_MAX__): New built-in defines.
>>>       * config/avr/avr.md (adjust_len): Add tstpsi, mov24,  reload_in24,
>>>       ashlpsi, ashrpsi, lshrpsi.
>>>       (HISI, HIDI, MPUSH, rotx, rotsmode): Add PSI.
>>>       (MOVMODE): New mode iterator.
>>>       (movpsi): New expander.
>>>       (movqi, movhi, movsi, movsf, movpsi): Write as one using MOVMODE.
>>>       (*reload_inpsi, *movpsi): New insns.
>>>       (*reload_inpsi): New RTL peephole.
>>>       (addpsi3, *addpsi3_zero_extend.qi, *addpsi3_zero_extend.hi,
>>>       *addpsi3_sign_extend.hi): New insns.
>>>       (subpsi3, *subpsi3_zero_extend.qi, *subpsi3_zero_extend.hi,
>>>       *subpsi3_sign_extend.hi): New insns.
>>>       (divmodpsi4, udivmodpsi4): New define insn-and-split.
>>>       (*divmodpsi4_call, *udivmodpsi4_call): New insns.
>>>       (andpsi3, iorpsi3, xorpsi3): New insns.
>>>       (*rotlpsi2.1, *rotlpsi2.23): New insns.
>>>       (*rotw<mode>): Insn condition only allow even-sized modes.
>>>       (*rotb<mode>): Insn condition allows odd-sized modes.
>>>       (ashlpsi3, ashrpsi3, lshrpsi3): New insns.
>>>       (negpsi2, one_cmplpsi2): New insns.
>>>       (extendqipsi2, extendhipsi2, extendpsisi2): New insns.
>>>       (zero_extendqipsi2, zero_extendhipsi2, zero_extendpsisi2): New
>>>       insn-and-splits.
>>>       (*cmppsi, *negated_tstpsi, *reversed_tstpsi): New insns.
>>>       (cbranchpsi4): New expander.
>>>       * config/avr/constraints.md (Ca3, Co3, Cx3): New constraints.
>>>       * config/avr/avr-protos.h (avr_out_tstpsi, avr_out_movpsi,
>>>       avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3,
>>>       avr_out_reload_inpsi): New prototypes.
>>>       * config/avr/avr.c (TARGET_SCALAR_MODE_SUPPORTED_P): Define to...
>>>       (avr_scalar_mode_supported_p): ...this new static function.
>>>       (avr_asm_len): Always return "".
>>>       (avr_out_load_psi, avr_out_store_psi): New static functions.
>>>       (avr_out_movpsi, avr_out_reload_inpsi): New functions.
>>>       (avr_out_tstpsi): New function.
>>>       (avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3): New functions.
>>>       (avr_out_plus_1, output_reload_in_const): Handle 3-byte types.
>>>       (avr_simplify_comparison_p): Ditto.
>>>       (adjust_insn_length): Handle ADJUST_LEN_RELOAD_IN24,
>>>       ADJUST_LEN_MOV24, ADJUST_LEN_TSTPSI, ADJUST_LEN_ASHLPSI,
>>>       ADJUST_LEN_ASHRPSI, ADJUST_LEN_LSHRPSI.
>>>       (avr_rtx_costs_1): Report PSI costs.
>>>       (avr_libcall_value): Handle odd-sized parameters.
>>>       (avr_init_int24): New static function.
>>>       (avr_init_builtins): Use it.
>> This patch is the div/mod support for libgcc.
>>
>> With this patch, 24-bit integers are fully supported for integer arithmetic.
>>
>> During the implementation it turned out that the register footprint is smaller
>> than that of SImode: PSI need 3 registers less so that the representation in
>> avr.md needs adjustment like so:
>>
>>
>> @@ -2199,6 +2294,66 @@ (define_insn "*udivmodhi4_call"
>>   [(set_attr "type" "xcall")
>>    (set_attr "cc" "clobber")])
>>
>> +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>> +
>> +(define_insn_and_split "divmodpsi4"
>> +  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
>> +                   (div:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
>> +                            (match_operand:PSI 2 "pseudo_register_operand" "")))
>> +              (set (match_operand:PSI 3 "pseudo_register_operand" "")
>> +                   (mod:PSI (match_dup 1) (match_dup 2)))
>> +              (clobber (reg:DI 18))
>> +              (clobber (reg:QI 26))])]
>> +  ""
>> +  { gcc_unreachable(); }
>> +  ""
>> +  [(set (reg:PSI 22) (match_dup 1))
>> +   (set (reg:PSI 18) (match_dup 2))
>> +   (parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
>> +              (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
>> +              (clobber (reg:QI 26))])
>> +   (set (match_dup 0) (reg:PSI 22))
>> +   (set (match_dup 3) (reg:PSI 18))])
>> +
>> +(define_insn "*divmodpsi4_call"
>> +  [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
>> +   (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
>> +   (clobber (reg:QI 26))]
>> +  ""
>> +  "%~call __divmodpsi4"
>> +  [(set_attr "type" "xcall")
>> +   (set_attr "cc" "clobber")])
>> +
>> +(define_insn_and_split "udivmodpsi4"
>> +  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
>> +                   (udiv:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
>> +                             (match_operand:PSI 2 "pseudo_register_operand" "")))
>> +              (set (match_operand:PSI 3 "pseudo_register_operand" "")
>> +                   (umod:PSI (match_dup 1) (match_dup 2)))
>> +              (clobber (reg:DI 18))
>> +              (clobber (reg:QI 26))])]
>> +  ""
>> +  { gcc_unreachable(); }
>> +  ""
>> +  [(set (reg:PSI 22) (match_dup 1))
>> +   (set (reg:PSI 18) (match_dup 2))
>> +   (parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
>> +              (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
>> +              (clobber (reg:QI 26))])
>> +   (set (match_dup 0) (reg:PSI 22))
>> +   (set (match_dup 3) (reg:PSI 18))])
>> +
>> +(define_insn "*udivmodpsi4_call"
>> +  [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
>> +   (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
>> +   (clobber (reg:QI 26))]
>> +  ""
>> +  "%~call __udivmodpsi4"
>> +  [(set_attr "type" "xcall")
>> +   (set_attr "cc" "clobber")])
>> +
>> +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>> +
>>
>> The division routines are tested against the 32-bit division functions for lots
>> of (pseudo) random values and special values like 0, 1, -1, 0x7f..., 0x80...
>> etc. libgcc builds fine and assembles for the new objects.
>>
>> Ok for trunk?
>>
>> Johann
>>
>>        PR target/50931
>>        * config/avr/t-avr (LIB1ASMFUNCS): Add _divmodpsi4, _udivmodpsi4.
>>        * config/avr/libgcc.S (__udivmodpsi4, __divmodpsi4): New functions.
>>
> 
> Please commit both patches.
> 
> Denis.


Committed with the following changes:

* The new 24-bit types are named __int24 and __uint24 (instead of
  __[u]int24_t) similar to __int128 that is supported by some targets.

* PSI is also element of QISI and QIDI iterators.  The insns that use
  these iterators are general enough to handle PSI without extension.

* __[u]divmodpsi4 call clobbers R21, R25 and R26, not only R26

* There is a combine pattern *addpsi3.lt0 similar to *addsi3.lt0.

http://gcc.gnu.org/viewcvs?view=revision&revision=180962

Johann
diff mbox

Patch

Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S	(revision 180738)
+++ config/avr/libgcc.S	(working copy)
@@ -599,7 +599,142 @@  ENDF __divmodhi4
 #undef r_arg2L 
              	
 #undef r_cnt   	
-	
+
+/*******************************************************
+       Division 24 / 24 => (result + remainder)
+*******************************************************/
+
+;; A[0..2]: In: Dividend; Out: Quotient
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+
+;; B[0..2]: In: Divisor;   Out: Remainder
+#define B0  18
+#define B1  B0+1
+#define B2  B0+2
+    
+;; C[0..2]: Expand remainder
+#define C0  __zero_reg__
+#define C1  26
+#define C2  25
+
+;; Loop counter
+#define r_cnt   21
+
+#if defined (L_udivmodpsi4)
+;; R24:R22 = R24:R22  udiv  R20:R18
+;; R20:R18 = R24:R22  umod  R20:R18
+;; Clobbers: R21, R25, R26
+
+DEFUN __udivmodpsi4
+    ; init loop counter
+    ldi     r_cnt, 24+1
+    ; Clear remainder and carry.  C0 is already 0
+    clr     C1
+    sub     C2, C2
+    ; jump to entry point
+    rjmp    __udivmodpsi4_start
+__udivmodpsi4_loop:
+    ; shift dividend into remainder
+    rol     C0
+    rol     C1
+    rol     C2
+    ; compare remainder & divisor
+    cp      C0, B0
+    cpc     C1, B1
+    cpc     C2, B2
+    brcs    __udivmodpsi4_start ; remainder <= divisor
+    sub     C0, B0              ; restore remainder
+    sbc     C1, B1
+    sbc     C2, B2
+__udivmodpsi4_start:
+    ; shift dividend (with CARRY)
+    rol     A0
+    rol     A1
+    rol     A2
+    ; decrement loop counter
+    dec     r_cnt
+    brne    __udivmodpsi4_loop
+    com     A0
+    com     A1
+    com     A2
+    ; div/mod results to return registers
+    ; remainder
+    mov     B0, C0
+    mov     B1, C1
+    mov     B2, C2
+    clr     __zero_reg__ ; C0
+    ret
+ENDF __udivmodpsi4
+#endif /* defined (L_udivmodpsi4) */
+
+#if defined (L_divmodpsi4)
+;; R24:R22 = R24:R22  div  R20:R18
+;; R20:R18 = R24:R22  mod  R20:R18
+;; Clobbers: T, __tmp_reg__, R21, R25, R26
+
+DEFUN __divmodpsi4
+    ; R0.7 will contain the sign of the result:
+    ; R0.7 = A.sign ^ B.sign
+    mov __tmp_reg__, B2
+    ; T-flag = sign of dividend
+    bst     A2, 7
+    brtc    0f
+    com     __tmp_reg__
+    ; Adjust dividend's sign
+    rcall   __divmodpsi4_negA
+0:    
+    ; Adjust divisor's sign
+    sbrc    B2, 7
+    rcall   __divmodpsi4_negB
+
+    ; Do the unsigned div/mod
+    XCALL   __udivmodpsi4
+
+    ; Adjust quotient's sign
+    sbrc    __tmp_reg__, 7
+    rcall   __divmodpsi4_negA
+
+    ; Adjust remainder's sign
+    brtc    __divmodpsi4_end
+        
+__divmodpsi4_negB:
+    ; Correct divisor/remainder sign
+    com     B2
+    com     B1
+    neg     B0
+    sbci    B1, -1
+    sbci    B2, -1
+    ret
+
+    ; Correct dividend/quotient sign
+__divmodpsi4_negA:
+    com     A2
+    com     A1
+    neg     A0
+    sbci    A1, -1
+    sbci    A2, -1
+__divmodpsi4_end:
+    ret
+
+ENDF __divmodpsi4
+#endif /* defined (L_divmodpsi4) */
+
+#undef A0
+#undef A1
+#undef A2
+
+#undef B0
+#undef B1
+#undef B2
+
+#undef C0
+#undef C1
+#undef C2
+
+#undef r_cnt
+
 /*******************************************************
        Division 32 / 32 => (result + remainder)
 *******************************************************/
Index: config/avr/t-avr
===================================================================
--- config/avr/t-avr	(revision 180738)
+++ config/avr/t-avr	(working copy)
@@ -53,6 +53,7 @@  LIB1ASMFUNCS = \
 	_divmodqi4 \
 	_udivmodhi4 \
 	_divmodhi4 \
+	_divmodpsi4 _udivmodpsi4 \
 	_udivmodsi4 \
 	_divmodsi4 \
 	_prologue \