diff mbox

[v2,3/5] target-arm: convert sar, shl and shr helpers to TCG

Message ID 1348256016-5077-4-git-send-email-aurelien@aurel32.net
State New
Headers show

Commit Message

Aurelien Jarno Sept. 21, 2012, 7:33 p.m. UTC
Now that the movcond TCG op is available, it's possible to replace
shl and shr helpers by TCG code. The code generated by TCG is slightly
longer than the code generated by GCC for the helper but is still worth
it as this avoid all the consequences of using an helper: globals saved
back to memory, no possible optimization, call overhead, etc.

Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-arm/helper.h    |    3 ---
 target-arm/op_helper.c |   24 ----------------------
 target-arm/translate.c |   52 ++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 46 insertions(+), 33 deletions(-)

Comments

Richard Henderson Sept. 21, 2012, 11:14 p.m. UTC | #1
On 09/21/2012 12:33 PM, Aurelien Jarno wrote:
> +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> +{
> +    TCGv tmp1, tmp2, tmp3;
> +    tmp1 = tcg_temp_new_i32();
> +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> +    tmp2 = tcg_const_i32(0x1f);
> +    tmp3 = tcg_const_i32(0);
> +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> +    tcg_temp_free_i32(tmp3);
> +    tcg_temp_free_i32(tmp2);
> +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> +    tcg_gen_sar_i32(dest, t0, tmp1);
> +    tcg_temp_free_i32(tmp1);
> +}

tmp3 is unused in this version.


r~
Aurelien Jarno Sept. 22, 2012, 9:33 a.m. UTC | #2
On Fri, Sep 21, 2012 at 04:14:29PM -0700, Richard Henderson wrote:
> On 09/21/2012 12:33 PM, Aurelien Jarno wrote:
> > +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> > +{
> > +    TCGv tmp1, tmp2, tmp3;
> > +    tmp1 = tcg_temp_new_i32();
> > +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> > +    tmp2 = tcg_const_i32(0x1f);
> > +    tmp3 = tcg_const_i32(0);
> > +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> > +    tcg_temp_free_i32(tmp3);
> > +    tcg_temp_free_i32(tmp2);
> > +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> > +    tcg_gen_sar_i32(dest, t0, tmp1);
> > +    tcg_temp_free_i32(tmp1);
> > +}
> 
> tmp3 is unused in this version.
> 

Good catch, fix locally.
Peter Maydell Sept. 25, 2012, 1:57 p.m. UTC | #3
On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> +{
> +    TCGv tmp1, tmp2, tmp3;
> +    tmp1 = tcg_temp_new_i32();
> +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> +    tmp2 = tcg_const_i32(0x1f);
> +    tmp3 = tcg_const_i32(0);
> +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> +    tcg_temp_free_i32(tmp3);
> +    tcg_temp_free_i32(tmp2);
> +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> +    tcg_gen_sar_i32(dest, t0, tmp1);
> +    tcg_temp_free_i32(tmp1);
> +}

I don't think you need the "tcg_gen_andi_i32(tmp1, tmp1, 0x1f);"
for sar, do you? The movcond is doing "shift = (shift > 31) ? 31 : shift"
so we know it's going to be in [0..31] and the and will do nothing,
right?

thanks
-- PMM
Aurelien Jarno Sept. 25, 2012, 10:41 p.m. UTC | #4
On Tue, Sep 25, 2012 at 02:57:20PM +0100, Peter Maydell wrote:
> On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> > +{
> > +    TCGv tmp1, tmp2, tmp3;
> > +    tmp1 = tcg_temp_new_i32();
> > +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> > +    tmp2 = tcg_const_i32(0x1f);
> > +    tmp3 = tcg_const_i32(0);
> > +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> > +    tcg_temp_free_i32(tmp3);
> > +    tcg_temp_free_i32(tmp2);
> > +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> > +    tcg_gen_sar_i32(dest, t0, tmp1);
> > +    tcg_temp_free_i32(tmp1);
> > +}
> 
> I don't think you need the "tcg_gen_andi_i32(tmp1, tmp1, 0x1f);"
> for sar, do you? The movcond is doing "shift = (shift > 31) ? 31 : shift"
> so we know it's going to be in [0..31] and the and will do nothing,
> right?
> 

Indeed, that's a leftover from the previous version based on setcond. 
It should be removed.
diff mbox

Patch

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 7151e28..794e2b1 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -145,9 +145,6 @@  DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 DEF_HELPER_3(adc_cc, i32, env, i32, i32)
 DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
-DEF_HELPER_3(shl, i32, env, i32, i32)
-DEF_HELPER_3(shr, i32, env, i32, i32)
-DEF_HELPER_3(sar, i32, env, i32, i32)
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
 DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 6095f24..aef592a 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -355,30 +355,6 @@  uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 
 /* Similarly for variable shift instructions.  */
 
-uint32_t HELPER(shl)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return x << shift;
-}
-
-uint32_t HELPER(shr)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return (uint32_t)x >> shift;
-}
-
-uint32_t HELPER(sar)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        shift = 31;
-    return (int32_t)x >> shift;
-}
-
 uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 19bb1e8..9abc115 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -440,6 +440,40 @@  static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
     tcg_gen_mov_i32(dest, cpu_NF);
 }
 
+#define GEN_SHIFT(name)                                               \
+static void gen_##name(TCGv dest, TCGv t0, TCGv t1)                   \
+{                                                                     \
+    TCGv tmp1, tmp2, tmp3;                                            \
+    tmp1 = tcg_temp_new_i32();                                        \
+    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
+    tmp2 = tcg_const_i32(0);                                          \
+    tmp3 = tcg_const_i32(0x1f);                                       \
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
+    tcg_temp_free_i32(tmp3);                                          \
+    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
+    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
+    tcg_temp_free_i32(tmp2);                                          \
+    tcg_temp_free_i32(tmp1);                                          \
+}
+GEN_SHIFT(shl)
+GEN_SHIFT(shr)
+#undef GEN_SHIFT
+
+static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp1, tmp2, tmp3;
+    tmp1 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(tmp1, t1, 0xff);
+    tmp2 = tcg_const_i32(0x1f);
+    tmp3 = tcg_const_i32(0);
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
+    tcg_temp_free_i32(tmp3);
+    tcg_temp_free_i32(tmp2);
+    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
+    tcg_gen_sar_i32(dest, t0, tmp1);
+    tcg_temp_free_i32(tmp1);
+}
+
 /* FIXME:  Implement this natively.  */
 #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
 
@@ -516,9 +550,15 @@  static inline void gen_arm_shift_reg(TCGv var, int shiftop,
         }
     } else {
         switch (shiftop) {
-        case 0: gen_helper_shl(var, cpu_env, var, shift); break;
-        case 1: gen_helper_shr(var, cpu_env, var, shift); break;
-        case 2: gen_helper_sar(var, cpu_env, var, shift); break;
+        case 0:
+            gen_shl(var, var, shift);
+            break;
+        case 1:
+            gen_shr(var, var, shift);
+            break;
+        case 2:
+            gen_sar(var, var, shift);
+            break;
         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
                 tcg_gen_rotr_i32(var, var, shift); break;
         }
@@ -9161,7 +9201,7 @@  static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x2: /* lsl */
             if (s->condexec_mask) {
-                gen_helper_shl(tmp2, cpu_env, tmp2, tmp);
+                gen_shl(tmp2, tmp2, tmp);
             } else {
                 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
@@ -9169,7 +9209,7 @@  static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x3: /* lsr */
             if (s->condexec_mask) {
-                gen_helper_shr(tmp2, cpu_env, tmp2, tmp);
+                gen_shr(tmp2, tmp2, tmp);
             } else {
                 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
@@ -9177,7 +9217,7 @@  static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x4: /* asr */
             if (s->condexec_mask) {
-                gen_helper_sar(tmp2, cpu_env, tmp2, tmp);
+                gen_sar(tmp2, tmp2, tmp);
             } else {
                 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);