Patchwork [v4,08/33] tcg-aarch64: Introduce tcg_fmt_Rdnm and tcg_fmt_Rdnm_lsl

login
register
mail settings
Submitter Richard Henderson
Date Sept. 14, 2013, 9:54 p.m.
Message ID <1379195690-6509-9-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/274987/
State New
Headers show

Comments

Richard Henderson - Sept. 14, 2013, 9:54 p.m.
Now that we've converted opcode fields to pre-shifted insns, we
can merge the implementation of arithmetic and shift insns.

Simplify the left/right shift parameter to just the left shift
needed by tcg_out_tlb_read.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 40 deletions(-)
Claudio Fontana - Sept. 16, 2013, 8:41 a.m.
On 14.09.2013 23:54, Richard Henderson wrote:
> Now that we've converted opcode fields to pre-shifted insns, we
> can merge the implementation of arithmetic and shift insns.
> 
> Simplify the left/right shift parameter to just the left shift
> needed by tcg_out_tlb_read.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
>  1 file changed, 38 insertions(+), 40 deletions(-)
> 
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index cc56fe5..0e7b67b 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -302,6 +302,30 @@ static inline uint32_t tcg_in32(TCGContext *s)
>      return v;
>  }
>  
> +/*
> + * Encode various formats.
> + */
> +
> +/* This function can be used for both Arithmetic and Logical (shifted register)
> +   type insns.  Since we don't actually use the other available shifts, we only
> +   support LSL here.  */
> +static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
> +                                    TCGType sf, TCGReg rd, TCGReg rn,
> +                                    TCGReg rm, int imm6)
> +{
> +    /* Note that LSL is bits {23,22} = 0.  */
> +    tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
> +}
> +
> +/* This function can be used for most insns with 2 input registers and one
> +   output register.  This includes Arithmetic (shifted register, sans shift),
> +   Logical, Shift, Multiply, Divide, and Bit operation.  */
> +static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
> +                                TCGReg rd, TCGReg rn, TCGReg rm)
> +{
> +    tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
> +}
> +

The name of the function should reflect the fact that we are actually emitting instructions,
not only formatting them. Also I despise mixed case in functions.
So theoretically, tcg_out_rdnm.

I'd still rather have a name of the function that expresses the meaning of what we are trying to do
(tcg_out_arith seems a good name, you can merge with shiftrot_reg if you want), rather than how we are doing it, if the model fits.

I guess the question would be, are all instructions formatted exactly that way arithmetic and logical shifted register instructions of some sort?
If so, I'd go with tcg_out_arith or similar. If not, we can say tcg_out_rdnm.

Also we lose a couple things here.
The previous implementation made it impossible to pass wrong opcodes to the function, since the opcode for the arith was a separate type.
It made it obvious to the reader in which cases the function can be used.
We would lose this with this change here (combined with the INSN change).

Also we lose the ability to do right-shifted arithmetic operations, which I feel we should provide for completeness and to reduce the pain for the programmer who will eventually need them.

>  static inline void tcg_out_ldst_9(TCGContext *s,
>                                    enum aarch64_ldst_op_data op_data,
>                                    enum aarch64_ldst_op_type op_type,
> @@ -445,23 +469,6 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
>                   arg, arg1, arg2);
>  }
>  
> -static inline void tcg_out_arith(TCGContext *s, AArch64Insn insn,
> -                                 TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
> -                                 int shift_imm)
> -{
> -    /* Using shifted register arithmetic operations */
> -    /* if extended register operation (64bit) just OR with 0x80 << 24 */
> -    unsigned int shift, base = insn | (ext ? 0x80000000 : 0);
> -    if (shift_imm == 0) {
> -        shift = 0;
> -    } else if (shift_imm > 0) {
> -        shift = shift_imm << 10 | 1 << 22;
> -    } else /* (shift_imm < 0) */ {
> -        shift = (-shift_imm) << 10;
> -    }
> -    tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
> -}
> -
>  static inline void tcg_out_mul(TCGContext *s, TCGType ext,
>                                 TCGReg rd, TCGReg rn, TCGReg rm)
>  {
> @@ -470,15 +477,6 @@ static inline void tcg_out_mul(TCGContext *s, TCGType ext,
>      tcg_out32(s, base | rm << 16 | rn << 5 | rd);
>  }
>  
> -static inline void tcg_out_shiftrot_reg(TCGContext *s,
> -                                        AArch64Insn insn, TCGType ext,
> -                                        TCGReg rd, TCGReg rn, TCGReg rm)
> -{
> -    /* using 2-source data processing instructions 0x1ac02000 */
> -    unsigned int base = insn | (ext ? 0x80000000 : 0);
> -    tcg_out32(s, base | rm << 16 | rn << 5 | rd);
> -}
> -
>  static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
>                                  TCGReg rn, unsigned int a, unsigned int b)
>  {
> @@ -546,7 +544,7 @@ static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
>                                 TCGReg rm)
>  {
>      /* Using CMP alias SUBS wzr, Wn, Wm */
> -    tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
> +    tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
>  }
>  
>  static inline void tcg_out_cset(TCGContext *s, TCGType ext,
> @@ -906,8 +904,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
>      tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
>      /* Merge the tlb index contribution into X2.
>         X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
> -    tcg_out_arith(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
> -                  TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
> +    tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
> +                     TCG_REG_X0, CPU_TLB_ENTRY_BITS);
>      /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
>         X0 = load [X2 + (tlb_offset & 0x000fff)] */
>      tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
> @@ -1183,27 +1181,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>  
>      case INDEX_op_add_i64:
>      case INDEX_op_add_i32:
> -        tcg_out_arith(s, INSN_ADD, ext, a0, a1, a2, 0);
> +        tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
>          break;
>  
>      case INDEX_op_sub_i64:
>      case INDEX_op_sub_i32:
> -        tcg_out_arith(s, INSN_SUB, ext, a0, a1, a2, 0);
> +        tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
>          break;
>  
>      case INDEX_op_and_i64:
>      case INDEX_op_and_i32:
> -        tcg_out_arith(s, INSN_AND, ext, a0, a1, a2, 0);
> +        tcg_fmt_Rdnm(s, INSN_AND, ext, a0, a1, a2);
>          break;
>  
>      case INDEX_op_or_i64:
>      case INDEX_op_or_i32:
> -        tcg_out_arith(s, INSN_ORR, ext, a0, a1, a2, 0);
> +        tcg_fmt_Rdnm(s, INSN_ORR, ext, a0, a1, a2);
>          break;
>  
>      case INDEX_op_xor_i64:
>      case INDEX_op_xor_i32:
> -        tcg_out_arith(s, INSN_EOR, ext, a0, a1, a2, 0);
> +        tcg_fmt_Rdnm(s, INSN_EOR, ext, a0, a1, a2);
>          break;
>  
>      case INDEX_op_mul_i64:
> @@ -1216,7 +1214,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (c2) {
>              tcg_out_shl(s, ext, a0, a1, a2);
>          } else {
> -            tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
> +            tcg_fmt_Rdnm(s, INSN_LSLV, ext, a0, a1, a2);
>          }
>          break;
>  
> @@ -1225,7 +1223,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (c2) {
>              tcg_out_shr(s, ext, a0, a1, a2);
>          } else {
> -            tcg_out_shiftrot_reg(s, INSN_LSRV, ext, a0, a1, a2);
> +            tcg_fmt_Rdnm(s, INSN_LSRV, ext, a0, a1, a2);
>          }
>          break;
>  
> @@ -1234,7 +1232,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (c2) {
>              tcg_out_sar(s, ext, a0, a1, a2);
>          } else {
> -            tcg_out_shiftrot_reg(s, INSN_ASRV, ext, a0, a1, a2);
> +            tcg_fmt_Rdnm(s, INSN_ASRV, ext, a0, a1, a2);
>          }
>          break;
>  
> @@ -1243,7 +1241,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (c2) {
>              tcg_out_rotr(s, ext, a0, a1, a2);
>          } else {
> -            tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, a2);
> +            tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, a2);
>          }
>          break;
>  
> @@ -1252,8 +1250,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (c2) {
>              tcg_out_rotl(s, ext, a0, a1, a2);
>          } else {
> -            tcg_out_arith(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
> -            tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
> +            tcg_fmt_Rdnm(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
> +            tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
>          }
>          break;
>  
>
Richard Henderson - Sept. 16, 2013, 3:32 p.m.
On 09/16/2013 01:41 AM, Claudio Fontana wrote:
> On 14.09.2013 23:54, Richard Henderson wrote:
>> Now that we've converted opcode fields to pre-shifted insns, we
>> can merge the implementation of arithmetic and shift insns.
>>
>> Simplify the left/right shift parameter to just the left shift
>> needed by tcg_out_tlb_read.
>>
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>> ---
>>  tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
>>  1 file changed, 38 insertions(+), 40 deletions(-)
>>
>> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
>> index cc56fe5..0e7b67b 100644
>> --- a/tcg/aarch64/tcg-target.c
>> +++ b/tcg/aarch64/tcg-target.c
>> @@ -302,6 +302,30 @@ static inline uint32_t tcg_in32(TCGContext *s)
>>      return v;
>>  }
>>  
>> +/*
>> + * Encode various formats.
>> + */
>> +
>> +/* This function can be used for both Arithmetic and Logical (shifted register)
>> +   type insns.  Since we don't actually use the other available shifts, we only
>> +   support LSL here.  */
>> +static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
>> +                                    TCGType sf, TCGReg rd, TCGReg rn,
>> +                                    TCGReg rm, int imm6)
>> +{
>> +    /* Note that LSL is bits {23,22} = 0.  */
>> +    tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
>> +}
>> +
>> +/* This function can be used for most insns with 2 input registers and one
>> +   output register.  This includes Arithmetic (shifted register, sans shift),
>> +   Logical, Shift, Multiply, Divide, and Bit operation.  */
>> +static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
>> +                                TCGReg rd, TCGReg rn, TCGReg rm)
>> +{
>> +    tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
>> +}
>> +
> 
> The name of the function should reflect the fact that we are actually emitting instructions,
> not only formatting them. Also I despise mixed case in functions.
> So theoretically, tcg_out_rdnm.

Ok.

> I guess the question would be, are all instructions formatted exactly that
> way arithmetic and logical shifted register instructions of some sort?

The functino comment lists the insn groups from the manual to which the
function may be applied: Arithemetic, Logical, Shift, Multiply, Divide, Bit
operation.

> If so, I'd go with tcg_out_arith or similar. If not, we can say tcg_out_rdnm.

rdmn it is then.

> The previous implementation made it impossible to pass wrong opcodes to the
> function, since the opcode for the arith was a separate type.

No, this isn't C++.  Enumeration checks like that don't happen for C.

> It made it obvious to the reader in which cases the function can be used.
> We would lose this with this change here (combined with the INSN change).

Perhaps, perhaps not.

It would have been handy if ARM had officially assigned identifiers to the
formats, like Power, S390, and ia64 do.  Then one can build in the format ids
into both the function and enumeration names and use the preprocessor for
typechecking (c.f. the tcg_out_insn macro in  tcg/s390/tcg-target.c).

But without those format ids being official, inventing a set of format names
may be more confusing than not.  I'm not sure.

> Also we lose the ability to do right-shifted arithmetic operations, which I
>feel we should provide for completeness and to reduce the pain for the
>programmer who will eventually need them.

Nor do we provide ASR or ROR shifts; should we provide those too?  Please think
about what situations in which those would be useful.  Also think about the one
operation at a time nature of TCG.

My guess is that, beyond the one explicit use in the tlb, we could only make
use of shifted operations if TCG grew some sort of peephole optimizer so that
we can look across single operations.  And I don't ever see that happening.

Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
bloats the backend.


r~
Richard Henderson - Sept. 16, 2013, 7:11 p.m.
On 09/16/2013 08:32 AM, Richard Henderson wrote:
> Nor do we provide ASR or ROR shifts; should we provide those too?  Please think
> about what situations in which those would be useful.  Also think about the one
> operation at a time nature of TCG.
> 
> My guess is that, beyond the one explicit use in the tlb, we could only make
> use of shifted operations if TCG grew some sort of peephole optimizer so that
> we can look across single operations.  And I don't ever see that happening.
> 
> Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
> bloats the backend.


Unless: one decides NOT to use the alias definitions in the manual:

   LSR d, n, shift => UBFM d, n, shift, 63
   LSL d, n, shift => UBFM d, n, 64 - shift, 63 - shift
   ASR d, n, shift => SBFN d, n, shift, 63
   ROR d, n, shift => EXTR d, n, n, shift

and instead use

   LSR d, n, shift => ORR d, xzr, n, lsr #shift
   LSL d, n, shift => ORR d, xzr, n, lsl #shift
   ASR d, n, shift => ORR d, xzr, n, asr #shift
   ROR d, n, shift => ORR d, xzr, n, ror #shift

It's not implausible that using the same (base) insn for all of these could
result in more code sharing and therefore smaller code size within the tcg backend.


r~


PS: All rather academic, but considering that AArch64 ORR (shifted register) is
the preferred implementation for MOV, and AArch32 MOV (shifted register) is the
preferred implementation for shift immediates, I'm actually surprised that:
there is no AArch64 MOV (shifted register) alias for AArch32 compatibility, and
that ORR isn't the preferred implementation of shift immediates for AArch64.
Claudio Fontana - Sept. 17, 2013, 8:23 a.m.
On 16.09.2013 17:32, Richard Henderson wrote:
> On 09/16/2013 01:41 AM, Claudio Fontana wrote:
>> On 14.09.2013 23:54, Richard Henderson wrote:
>>> Now that we've converted opcode fields to pre-shifted insns, we
>>> can merge the implementation of arithmetic and shift insns.
>>>
>>> Simplify the left/right shift parameter to just the left shift
>>> needed by tcg_out_tlb_read.
>>>
>>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>>> ---
>>>  tcg/aarch64/tcg-target.c | 78 +++++++++++++++++++++++-------------------------
>>>  1 file changed, 38 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
>>> index cc56fe5..0e7b67b 100644
>>> --- a/tcg/aarch64/tcg-target.c
>>> +++ b/tcg/aarch64/tcg-target.c
>>> @@ -302,6 +302,30 @@ static inline uint32_t tcg_in32(TCGContext *s)
>>>      return v;
>>>  }
>>>  
>>> +/*
>>> + * Encode various formats.
>>> + */
>>> +
>>> +/* This function can be used for both Arithmetic and Logical (shifted register)
>>> +   type insns.  Since we don't actually use the other available shifts, we only
>>> +   support LSL here.  */
>>> +static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
>>> +                                    TCGType sf, TCGReg rd, TCGReg rn,
>>> +                                    TCGReg rm, int imm6)
>>> +{
>>> +    /* Note that LSL is bits {23,22} = 0.  */
>>> +    tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
>>> +}
>>> +
>>> +/* This function can be used for most insns with 2 input registers and one
>>> +   output register.  This includes Arithmetic (shifted register, sans shift),
>>> +   Logical, Shift, Multiply, Divide, and Bit operation.  */
>>> +static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
>>> +                                TCGReg rd, TCGReg rn, TCGReg rm)
>>> +{
>>> +    tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
>>> +}
>>> +
>>
>> The name of the function should reflect the fact that we are actually emitting instructions,
>> not only formatting them. Also I despise mixed case in functions.
>> So theoretically, tcg_out_rdnm.
> 
> Ok.
> 
>> I guess the question would be, are all instructions formatted exactly that
>> way arithmetic and logical shifted register instructions of some sort?
> 
> The functino comment lists the insn groups from the manual to which the
> function may be applied: Arithemetic, Logical, Shift, Multiply, Divide, Bit
> operation.
> 
>> If so, I'd go with tcg_out_arith or similar. If not, we can say tcg_out_rdnm.
> 
> rdmn it is then.

Ok.

> 
>> The previous implementation made it impossible to pass wrong opcodes to the
>> function, since the opcode for the arith was a separate type.
> 
> No, this isn't C++.  Enumeration checks like that don't happen for C.

I know, I did not express it well. What I meant is that it is impossible to misunderstand what is supposed to be passed to the function.
Not that it is impossible to willingly do so. 

> 
>> It made it obvious to the reader in which cases the function can be used.
>> We would lose this with this change here (combined with the INSN change).
> 
> Perhaps, perhaps not.
> 
> It would have been handy if ARM had officially assigned identifiers to the
> formats, like Power, S390, and ia64 do.  Then one can build in the format ids
> into both the function and enumeration names and use the preprocessor for
> typechecking (c.f. the tcg_out_insn macro in  tcg/s390/tcg-target.c).

No need to do force explicit typechecking like that.
That kind of use of the preprocessor really hurts.
The only thing that needs to be addressed is to ensure that the programmer calling the function can quickly know for sure which instructions are ok to pass and which not.

Maybe we can categorize the instructions by order of appearance in the enum, coupled with an appropriate prefix.
For example INSN_RDNM_ADD, INSN_RDNM_SUB, ...

> 
> But without those format ids being official, inventing a set of format names
> may be more confusing than not.  I'm not sure.
> 
>> Also we lose the ability to do right-shifted arithmetic operations, which I
>> feel we should provide for completeness and to reduce the pain for the
>> programmer who will eventually need them.
> 
> Nor do we provide ASR or ROR shifts; should we provide those too?

No, not yet.

> Please think
> about what situations in which those would be useful.  Also think about the one
> operation at a time nature of TCG.
> 
> My guess is that, beyond the one explicit use in the tlb, we could only make
> use of shifted operations if TCG grew some sort of peephole optimizer so that
> we can look across single operations.  And I don't ever see that happening.
> 
> Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
> bloats the backend.

I agree, lets just keep only left shift and right shift.
Distinguishing the two costs one comparison per call, I think we can survive it.

C.
Richard Henderson - Sept. 17, 2013, 2:54 p.m.
On 09/17/2013 01:23 AM, Claudio Fontana wrote:
>> It would have been handy if ARM had officially assigned identifiers to the
>> formats, like Power, S390, and ia64 do.  Then one can build in the format ids
>> into both the function and enumeration names and use the preprocessor for
>> typechecking (c.f. the tcg_out_insn macro in  tcg/s390/tcg-target.c).
> 
> No need to do force explicit typechecking like that.
> That kind of use of the preprocessor really hurts.

Why do you believe this?  Have you browsed through the s390 backend?
I think it's a remarkably clean solution -- one we ought to have used
in the ia64 backend, which has even more format codes.


>> Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
>> bloats the backend.
> 
> I agree, lets just keep only left shift and right shift.
> Distinguishing the two costs one comparison per call, I think we can survive it.

I don't understand you at all.

You agree that removing the unused shift from cmp is sensible.
You agree that not adding unused asr/ror shifts is sensible.
But you insist that the unused lsr shift should be retained?

You complain about wasting Y space in my positioning of comments.
But you insist on wasting X space, and allowing the possibility of
mismatch, by requiring format names to be duplicated?


r~
Claudio Fontana - Sept. 18, 2013, 8:24 a.m.
On 17.09.2013 16:54, Richard Henderson wrote:
> On 09/17/2013 01:23 AM, Claudio Fontana wrote:
>>> It would have been handy if ARM had officially assigned identifiers to the
>>> formats, like Power, S390, and ia64 do.  Then one can build in the format ids
>>> into both the function and enumeration names and use the preprocessor for
>>> typechecking (c.f. the tcg_out_insn macro in  tcg/s390/tcg-target.c).
>>
>> No need to do force explicit typechecking like that.
>> That kind of use of the preprocessor really hurts.
> 
> Why do you believe this?  Have you browsed through the s390 backend?
> I think it's a remarkably clean solution -- one we ought to have used
> in the ia64 backend, which has even more format codes.

Why do I believe this? Because my experience tells me to let this kind of stuff go,
in order to allow developers that are not familiar with the code base yet to trace their way through the calls.
It keeps the code base discoverable, by hand and by tools.

>>> Therefore I think adding LSR, ASR and ROR shifts is both a waste of time and
>>> bloats the backend.
>>
>> I agree, lets just keep only left shift and right shift.
>> Distinguishing the two costs one comparison per call, I think we can survive it.
> 
> I don't understand you at all.
> 
> You agree that removing the unused shift from cmp is sensible.

Yes. The additional parameter to cmp (0) can be confusing, therefore it seems more sensible to remove it until there is actual demonstrable use for the shifted version of cmp.

> You agree that not adding unused asr/ror shifts is sensible.

Yep.

> But you insist that the unused lsr shift should be retained?

Yes. It's a balance thing, it's not black and white.
In this case the drawback of keeping the right shift is negligible.
No additional parameter is needed, the existing code just looks at the sign of the immediate to decide the direction to shift.
It's one comparison only, and I find that the tradeoff is acceptable.

If you _really_ want to strip the right shift functionality away for some reason, could you state it?
Is it for that one comparision? Or for consistency with something else?

> 
> You complain about wasting Y space in my positioning of comments.
> But you insist on wasting X space,

They are different resources. Y space gives us context and is a more scarse and precious resource than X space.
Of course it's a tradeoff.
We don't want the whole code on one line, we have 80 char limits and we do one thing per line.

> and allowing the possibility of
> mismatch, by requiring format names to be duplicated?

I don't understand this one.
C.
Richard Henderson - Sept. 18, 2013, 2:54 p.m.
On 09/18/2013 01:24 AM, Claudio Fontana wrote:
> If you _really_ want to strip the right shift functionality away for some reason, could you state it?

(1) It will never never ever be used.

(2) I do not find + for LSR, - for LSL intuitive at all.
    If we're to have them at all, better to pass that
    separately from the shift count, an enumeration that
    exactly maps to the "shift" field in the insn.

(3) It will never never ever be used.


r~
Claudio Fontana - Sept. 18, 2013, 3:01 p.m.
On 18.09.2013 16:54, Richard Henderson wrote:
> On 09/18/2013 01:24 AM, Claudio Fontana wrote:
>> If you _really_ want to strip the right shift functionality away for some reason, could you state it?
> 
> (1) It will never never ever be used.
> 
> (2) I do not find + for LSR, - for LSL intuitive at all.
>     If we're to have them at all, better to pass that
>     separately from the shift count, an enumeration that
>     exactly maps to the "shift" field in the insn.
> 
> (3) It will never never ever be used.

You seem to feel strongly about it, and I don't, so lets indeed drop it then.

C.

Patch

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index cc56fe5..0e7b67b 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -302,6 +302,30 @@  static inline uint32_t tcg_in32(TCGContext *s)
     return v;
 }
 
+/*
+ * Encode various formats.
+ */
+
+/* This function can be used for both Arithmetic and Logical (shifted register)
+   type insns.  Since we don't actually use the other available shifts, we only
+   support LSL here.  */
+static inline void tcg_fmt_Rdnm_lsl(TCGContext *s, AArch64Insn insn,
+                                    TCGType sf, TCGReg rd, TCGReg rn,
+                                    TCGReg rm, int imm6)
+{
+    /* Note that LSL is bits {23,22} = 0.  */
+    tcg_out32(s, insn | sf << 31 | imm6 << 10 | rm << 16 | rn << 5 | rd);
+}
+
+/* This function can be used for most insns with 2 input registers and one
+   output register.  This includes Arithmetic (shifted register, sans shift),
+   Logical, Shift, Multiply, Divide, and Bit operation.  */
+static inline void tcg_fmt_Rdnm(TCGContext *s, AArch64Insn insn, TCGType sf,
+                                TCGReg rd, TCGReg rn, TCGReg rm)
+{
+    tcg_out32(s, insn | sf << 31 | rm << 16 | rn << 5 | rd);
+}
+
 static inline void tcg_out_ldst_9(TCGContext *s,
                                   enum aarch64_ldst_op_data op_data,
                                   enum aarch64_ldst_op_type op_type,
@@ -445,23 +469,6 @@  static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
                  arg, arg1, arg2);
 }
 
-static inline void tcg_out_arith(TCGContext *s, AArch64Insn insn,
-                                 TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
-                                 int shift_imm)
-{
-    /* Using shifted register arithmetic operations */
-    /* if extended register operation (64bit) just OR with 0x80 << 24 */
-    unsigned int shift, base = insn | (ext ? 0x80000000 : 0);
-    if (shift_imm == 0) {
-        shift = 0;
-    } else if (shift_imm > 0) {
-        shift = shift_imm << 10 | 1 << 22;
-    } else /* (shift_imm < 0) */ {
-        shift = (-shift_imm) << 10;
-    }
-    tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
-}
-
 static inline void tcg_out_mul(TCGContext *s, TCGType ext,
                                TCGReg rd, TCGReg rn, TCGReg rm)
 {
@@ -470,15 +477,6 @@  static inline void tcg_out_mul(TCGContext *s, TCGType ext,
     tcg_out32(s, base | rm << 16 | rn << 5 | rd);
 }
 
-static inline void tcg_out_shiftrot_reg(TCGContext *s,
-                                        AArch64Insn insn, TCGType ext,
-                                        TCGReg rd, TCGReg rn, TCGReg rm)
-{
-    /* using 2-source data processing instructions 0x1ac02000 */
-    unsigned int base = insn | (ext ? 0x80000000 : 0);
-    tcg_out32(s, base | rm << 16 | rn << 5 | rd);
-}
-
 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
                                 TCGReg rn, unsigned int a, unsigned int b)
 {
@@ -546,7 +544,7 @@  static inline void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn,
                                TCGReg rm)
 {
     /* Using CMP alias SUBS wzr, Wn, Wm */
-    tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
+    tcg_fmt_Rdnm(s, INSN_SUBS, ext, TCG_REG_XZR, rn, rm);
 }
 
 static inline void tcg_out_cset(TCGContext *s, TCGType ext,
@@ -906,8 +904,8 @@  static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
     tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
     /* Merge the tlb index contribution into X2.
        X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
-    tcg_out_arith(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
-                  TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
+    tcg_fmt_Rdnm_lsl(s, INSN_ADD, 1, TCG_REG_X2, TCG_REG_X2,
+                     TCG_REG_X0, CPU_TLB_ENTRY_BITS);
     /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
        X0 = load [X2 + (tlb_offset & 0x000fff)] */
     tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
@@ -1183,27 +1181,27 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_add_i64:
     case INDEX_op_add_i32:
-        tcg_out_arith(s, INSN_ADD, ext, a0, a1, a2, 0);
+        tcg_fmt_Rdnm(s, INSN_ADD, ext, a0, a1, a2);
         break;
 
     case INDEX_op_sub_i64:
     case INDEX_op_sub_i32:
-        tcg_out_arith(s, INSN_SUB, ext, a0, a1, a2, 0);
+        tcg_fmt_Rdnm(s, INSN_SUB, ext, a0, a1, a2);
         break;
 
     case INDEX_op_and_i64:
     case INDEX_op_and_i32:
-        tcg_out_arith(s, INSN_AND, ext, a0, a1, a2, 0);
+        tcg_fmt_Rdnm(s, INSN_AND, ext, a0, a1, a2);
         break;
 
     case INDEX_op_or_i64:
     case INDEX_op_or_i32:
-        tcg_out_arith(s, INSN_ORR, ext, a0, a1, a2, 0);
+        tcg_fmt_Rdnm(s, INSN_ORR, ext, a0, a1, a2);
         break;
 
     case INDEX_op_xor_i64:
     case INDEX_op_xor_i32:
-        tcg_out_arith(s, INSN_EOR, ext, a0, a1, a2, 0);
+        tcg_fmt_Rdnm(s, INSN_EOR, ext, a0, a1, a2);
         break;
 
     case INDEX_op_mul_i64:
@@ -1216,7 +1214,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (c2) {
             tcg_out_shl(s, ext, a0, a1, a2);
         } else {
-            tcg_out_shiftrot_reg(s, INSN_LSLV, ext, a0, a1, a2);
+            tcg_fmt_Rdnm(s, INSN_LSLV, ext, a0, a1, a2);
         }
         break;
 
@@ -1225,7 +1223,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (c2) {
             tcg_out_shr(s, ext, a0, a1, a2);
         } else {
-            tcg_out_shiftrot_reg(s, INSN_LSRV, ext, a0, a1, a2);
+            tcg_fmt_Rdnm(s, INSN_LSRV, ext, a0, a1, a2);
         }
         break;
 
@@ -1234,7 +1232,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (c2) {
             tcg_out_sar(s, ext, a0, a1, a2);
         } else {
-            tcg_out_shiftrot_reg(s, INSN_ASRV, ext, a0, a1, a2);
+            tcg_fmt_Rdnm(s, INSN_ASRV, ext, a0, a1, a2);
         }
         break;
 
@@ -1243,7 +1241,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (c2) {
             tcg_out_rotr(s, ext, a0, a1, a2);
         } else {
-            tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, a2);
+            tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, a2);
         }
         break;
 
@@ -1252,8 +1250,8 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (c2) {
             tcg_out_rotl(s, ext, a0, a1, a2);
         } else {
-            tcg_out_arith(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
-            tcg_out_shiftrot_reg(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
+            tcg_fmt_Rdnm(s, INSN_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
+            tcg_fmt_Rdnm(s, INSN_RORV, ext, a0, a1, TCG_REG_TMP);
         }
         break;