Message ID | 5329699D.6020605@arm.com |
---|---|
State | New |
Headers | show |
On Wed, Mar 19, 2014 at 9:55 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote: > Hi all, > > This patch adds a recogniser for the bitmask,shift,orr sequence of > instructions that can be used to reverse the bytes in 16-bit halfwords (for > the sequence itself look at the testcase included in the patch). This can be > implemented with a rev16 instruction. > Since the shifts can occur in any order and there are no canonicalisation > rules for where they appear in the expression we have to have two patterns > to match both cases. > > The rtx costs function is updated to recognise the pattern and cost it > appropriately by using the rev field of the cost tables introduced in patch > [1/3]. The rtx costs helper functions that are used to recognise those > bitwise operations are placed in config/arm/aarch-common.c so that they can > be reused by both arm and aarch64. The ARM bits of this are OK if there are no regressions. > > I've added an execute testcase but no scan-assembler tests since > conceptually in the future the combiner might decide to not use a rev > instruction due to rtx costs. We can at least test that the code generated > is functionally correct though. > > Tested aarch64-none-elf. What about arm-none-eabi :) ? > > Ok for stage1? > > [gcc/] > 2014-03-19 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > * config/aarch64/aarch64.md (rev16<mode>2): New pattern. > (rev16<mode>2_alt): Likewise. > * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. > * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. > (aarch_rev16_shleft_mask_imm_p): Likewise. > (aarch_rev16_p_1): Likewise. > (aarch_rev16_p): Likewise. > * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. > (aarch_rev16_shright_mask_imm_p): Likewise. > (aarch_rev16_shleft_mask_imm_p): Likewise. > > [gcc/testsuite/] > 2014-03-19 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > * gcc.target/aarch64/rev16_1.c: New test.
On 28/03/14 14:21, Ramana Radhakrishnan wrote: > On Wed, Mar 19, 2014 at 9:55 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote: >> Hi all, >> >> This patch adds a recogniser for the bitmask,shift,orr sequence of >> instructions that can be used to reverse the bytes in 16-bit halfwords (for >> the sequence itself look at the testcase included in the patch). This can be >> implemented with a rev16 instruction. >> Since the shifts can occur in any order and there are no canonicalisation >> rules for where they appear in the expression we have to have two patterns >> to match both cases. >> >> The rtx costs function is updated to recognise the pattern and cost it >> appropriately by using the rev field of the cost tables introduced in patch >> [1/3]. The rtx costs helper functions that are used to recognise those >> bitwise operations are placed in config/arm/aarch-common.c so that they can >> be reused by both arm and aarch64. > The ARM bits of this are OK if there are no regressions. > >> I've added an execute testcase but no scan-assembler tests since >> conceptually in the future the combiner might decide to not use a rev >> instruction due to rtx costs. We can at least test that the code generated >> is functionally correct though. >> >> Tested aarch64-none-elf. > What about arm-none-eabi :) ? Tested arm-none-eabi and bootstrap on arm linux together with patch [3/3] in the series :) Kyrill > >> Ok for stage1? >> >> [gcc/] >> 2014-03-19 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >> >> * config/aarch64/aarch64.md (rev16<mode>2): New pattern. >> (rev16<mode>2_alt): Likewise. >> * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. >> * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. >> (aarch_rev16_shleft_mask_imm_p): Likewise. >> (aarch_rev16_p_1): Likewise. >> (aarch_rev16_p): Likewise. >> * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. >> (aarch_rev16_shright_mask_imm_p): Likewise. >> (aarch_rev16_shleft_mask_imm_p): Likewise. >> >> [gcc/testsuite/] >> 2014-03-19 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >> >> * gcc.target/aarch64/rev16_1.c: New test.
On 19 March 2014 09:55, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote: > [gcc/] > 2014-03-19 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > * config/aarch64/aarch64.md (rev16<mode>2): New pattern. > (rev16<mode>2_alt): Likewise. > * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. > * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. > (aarch_rev16_shleft_mask_imm_p): Likewise. > (aarch_rev16_p_1): Likewise. > (aarch_rev16_p): Likewise. > * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. > (aarch_rev16_shright_mask_imm_p): Likewise. > (aarch_rev16_shleft_mask_imm_p): Likewise. > > [gcc/testsuite/] > 2014-03-19 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > * gcc.target/aarch64/rev16_1.c: New test. The aarch64 parts are OK for stage-1. /Marcus
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ebd58c0..41761ae 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4682,6 +4682,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, return false; case IOR: + if (aarch_rev16_p (x)) + { + *cost = COSTS_N_INSNS (1); + + if (speed) + *cost += extra_cost->alu.rev; + + return true; + } + /* Fall through. */ case XOR: case AND: cost_logic: diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 99a6ac8..a23452b 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -3173,6 +3173,38 @@ [(set_attr "type" "rev")] ) +;; There are no canonicalisation rules for the position of the lshiftrt, ashift +;; operations within an IOR/AND RTX, therefore we have two patterns matching +;; each valid permutation. + +(define_insn "rev16<mode>2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n")) + (and:GPI (lshiftrt:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode) + && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)" + "rev16\\t%<w>0, %<w>1" + [(set_attr "type" "rev")] +) + +(define_insn "rev16<mode>2_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (ashift:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode) + && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)" + "rev16\\t%<w>0, %<w>1" + [(set_attr "type" "rev")] +) + ;; zero_extend version of above (define_insn "*bswapsi2_uxtw" [(set (match_operand:DI 0 "register_operand" "=r") diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index d97ee61..08c4c7a 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -23,6 +23,9 @@ #ifndef GCC_AARCH_COMMON_PROTOS_H #define GCC_AARCH_COMMON_PROTOS_H +extern bool aarch_rev16_p (rtx); +extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode); +extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode); extern int arm_early_load_addr_dep (rtx, rtx); extern int arm_early_store_addr_dep (rtx, rtx); extern int arm_mac_accumulator_is_mul_result (rtx, rtx); diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index c11f7e9..75ed3fd 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -155,6 +155,79 @@ arm_get_set_operands (rtx producer, rtx consumer, return 0; } +bool +aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff, mode); +} + +bool +aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff00, mode); +} + + +static bool +aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode) +{ + if (GET_CODE (lhs) == AND + && GET_CODE (XEXP (lhs, 0)) == ASHIFT + && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) + && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 + && REG_P (XEXP (XEXP (lhs, 0), 0)) + && CONST_INT_P (XEXP (lhs, 1)) + && GET_CODE (rhs) == AND + && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT + && REG_P (XEXP (XEXP (rhs, 0), 0)) + && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) + && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 + && CONST_INT_P (XEXP (rhs, 1)) + && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) + + { + rtx lhs_mask = XEXP (lhs, 1); + rtx rhs_mask = XEXP (rhs, 1); + + return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) + && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); + } + + return false; +} + +/* Recognise a sequence of bitwise operations corresponding to a rev16 operation. + These will be of the form: + ((x >> 8) & 0x00ff00ff) + | ((x << 8) & 0xff00ff00) + for SImode and with similar but wider bitmasks for DImode. + The two sub-expressions of the IOR can appear on either side so check both + permutations with the help of aarch_rev16_p_1 above. */ + +bool +aarch_rev16_p (rtx x) +{ + rtx left_sub_rtx, right_sub_rtx; + bool is_rev = false; + + if (GET_CODE (x) != IOR) + return false; + + left_sub_rtx = XEXP (x, 0); + right_sub_rtx = XEXP (x, 1); + + /* There are no canonicalisation rules for the position of the two shifts + involved in a rev, so try both permutations. */ + is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); + + if (!is_rev) + is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); + + return is_rev; +} + /* Return nonzero if the CONSUMER instruction (a load) does need PRODUCER's value to calculate the address. */ int diff --git a/gcc/testsuite/gcc.target/aarch64/rev16_1.c b/gcc/testsuite/gcc.target/aarch64/rev16_1.c new file mode 100644 index 0000000..126d3c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/rev16_1.c @@ -0,0 +1,59 @@ +/* { dg-options "-O2" } */ +/* { dg-do run } */ + +extern void abort (void); + +typedef unsigned int __u32; + +__u32 +__rev16_32_alt (__u32 x) +{ + return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) + | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); +} + +__u32 +__rev16_32 (__u32 x) +{ + return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) + | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); +} + +typedef unsigned long long __u64; + +__u64 +__rev16_64_alt (__u64 x) +{ + return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) + | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); +} + +__u64 +__rev16_64 (__u64 x) +{ + return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) + | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); +} + +int +main (void) +{ + volatile __u32 in32 = 0x12345678; + volatile __u32 expected32 = 0x34127856; + volatile __u64 in64 = 0x1234567890abcdefUL; + volatile __u64 expected64 = 0x34127856ab90efcdUL; + + if (__rev16_32 (in32) != expected32) + abort (); + + if (__rev16_32_alt (in32) != expected32) + abort (); + + if (__rev16_64 (in64) != expected64) + abort (); + + if (__rev16_64_alt (in64) != expected64) + abort (); + + return 0; +}