Patchwork target-arm: Implement VMULL.P8

login
register
mail settings
Submitter Peter Maydell
Date Feb. 10, 2011, 7:07 p.m.
Message ID <1297364875-12741-1-git-send-email-peter.maydell@linaro.org>
Download mbox | patch
Permalink /patch/82657/
State New
Headers show

Comments

Peter Maydell - Feb. 10, 2011, 7:07 p.m.
Implement VMULL.P8 (the 32x32->64 version of the polynomial multiply
instruction).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helpers.h     |    1 +
 target-arm/neon_helper.c |   30 ++++++++++++++++++++++++++++++
 target-arm/translate.c   |    6 ++++--
 3 files changed, 35 insertions(+), 2 deletions(-)
Aurelien Jarno - Feb. 20, 2011, 4:29 p.m.
On Thu, Feb 10, 2011 at 07:07:55PM +0000, Peter Maydell wrote:
> Implement VMULL.P8 (the 32x32->64 version of the polynomial multiply
> instruction).
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  target-arm/helpers.h     |    1 +
>  target-arm/neon_helper.c |   30 ++++++++++++++++++++++++++++++
>  target-arm/translate.c   |    6 ++++--
>  3 files changed, 35 insertions(+), 2 deletions(-)

Thanks, applied.

> diff --git a/target-arm/helpers.h b/target-arm/helpers.h
> index 4d0de00..0d37abe 100644
> --- a/target-arm/helpers.h
> +++ b/target-arm/helpers.h
> @@ -275,6 +275,7 @@ DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
>  DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
>  DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
>  DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
> +DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
>  
>  DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
>  DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
> diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
> index 61890dd..b59ad38 100644
> --- a/target-arm/neon_helper.c
> +++ b/target-arm/neon_helper.c
> @@ -895,6 +895,36 @@ uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
>      return result;
>  }
>  
> +uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
> +{
> +    uint64_t result = 0;
> +    uint64_t mask;
> +    uint64_t op2ex = op2;
> +    op2ex = (op2ex & 0xff) |
> +        ((op2ex & 0xff00) << 8) |
> +        ((op2ex & 0xff0000) << 16) |
> +        ((op2ex & 0xff000000) << 24);
> +    while (op1) {
> +        mask = 0;
> +        if (op1 & 1) {
> +            mask |= 0xffff;
> +        }
> +        if (op1 & (1 << 8)) {
> +            mask |= (0xffffU << 16);
> +        }
> +        if (op1 & (1 << 16)) {
> +            mask |= (0xffffULL << 32);
> +        }
> +        if (op1 & (1 << 24)) {
> +            mask |= (0xffffULL << 48);
> +        }
> +        result ^= op2ex & mask;
> +        op1 = (op1 >> 1) & 0x7f7f7f7f;
> +        op2ex <<= 1;
> +    }
> +    return result;
> +}
> +
>  #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
>  NEON_VOP(tst_u8, neon_u8, 4)
>  NEON_VOP(tst_u16, neon_u16, 2)
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 3087a5d..f640a50 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -5124,8 +5124,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                          gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
>                          break;
>                      case 14: /* Polynomial VMULL */
> -                        cpu_abort(env, "Polynomial VMULL not implemented");
> -
> +                        gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
> +                        dead_tmp(tmp2);
> +                        dead_tmp(tmp);
> +                        break;
>                      default: /* 15 is RESERVED.  */
>                          return 1;
>                      }
> -- 
> 1.7.1
> 
> 
>

Patch

diff --git a/target-arm/helpers.h b/target-arm/helpers.h
index 4d0de00..0d37abe 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helpers.h
@@ -275,6 +275,7 @@  DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
 DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
+DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
 
 DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
 DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 61890dd..b59ad38 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -895,6 +895,36 @@  uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
     return result;
 }
 
+uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
+{
+    uint64_t result = 0;
+    uint64_t mask;
+    uint64_t op2ex = op2;
+    op2ex = (op2ex & 0xff) |
+        ((op2ex & 0xff00) << 8) |
+        ((op2ex & 0xff0000) << 16) |
+        ((op2ex & 0xff000000) << 24);
+    while (op1) {
+        mask = 0;
+        if (op1 & 1) {
+            mask |= 0xffff;
+        }
+        if (op1 & (1 << 8)) {
+            mask |= (0xffffU << 16);
+        }
+        if (op1 & (1 << 16)) {
+            mask |= (0xffffULL << 32);
+        }
+        if (op1 & (1 << 24)) {
+            mask |= (0xffffULL << 48);
+        }
+        result ^= op2ex & mask;
+        op1 = (op1 >> 1) & 0x7f7f7f7f;
+        op2ex <<= 1;
+    }
+    return result;
+}
+
 #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
 NEON_VOP(tst_u8, neon_u8, 4)
 NEON_VOP(tst_u16, neon_u16, 2)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 3087a5d..f640a50 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -5124,8 +5124,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
                         break;
                     case 14: /* Polynomial VMULL */
-                        cpu_abort(env, "Polynomial VMULL not implemented");
-
+                        gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
+                        dead_tmp(tmp2);
+                        dead_tmp(tmp);
+                        break;
                     default: /* 15 is RESERVED.  */
                         return 1;
                     }