diff mbox

[RFC,v0,5/6] target-ppc: add vsrv instruction

Message ID 1469561218-3067-6-git-send-email-nikunj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Nikunj A Dadhania July 26, 2016, 7:26 p.m. UTC
From: Vivek Andrew Sha <vivekandrewsha@gmail.com>

Adds Vector Shift Right Variable instruction.

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h     |  1 +
 target-ppc/int_helper.c | 17 +++++++++++++++++
 target-ppc/translate.c  |  2 ++
 3 files changed, 20 insertions(+)

Comments

David Gibson July 27, 2016, 6:05 a.m. UTC | #1
On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote:
> From: Vivek Andrew Sha <vivekandrewsha@gmail.com>
> 
> Adds Vector Shift Right Variable instruction.
> 
> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/helper.h     |  1 +
>  target-ppc/int_helper.c | 17 +++++++++++++++++
>  target-ppc/translate.c  |  2 ++
>  3 files changed, 20 insertions(+)
> 
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 9703f85..8eada2f 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr)
>  DEF_HELPER_3(vsld, void, avr, avr, avr)
>  DEF_HELPER_3(vslo, void, avr, avr, avr)
>  DEF_HELPER_3(vsro, void, avr, avr, avr)
> +DEF_HELPER_3(vsrv, void, avr, avr, avr)
>  DEF_HELPER_3(vslv, void, avr, avr, avr)
>  DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
>  DEF_HELPER_3(vsubcuw, void, avr, avr, avr)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 412398f..f4776f0 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
>      }
>  }
>  
> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> +{
> +    int i;
> +    unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1];
> +
> +    src[0] = 0;
> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
> +        src[i + 1] = a->u8[i];
> +    }
> +
> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
> +        shift = b->u8[i] & 0x7;               /* extract shift value */
> +        bytes = (src[i] << 8) + src[i + 1];     /* extract adjacent bytes */

I think you should be able to construct bytes on the fly without
pre-generating teh whole of src, as you already did for vslv.

> +        r->u8[i] = (bytes >> shift) & 0xFF;   /* shift and store result */
> +    }
> +}
> +
>  void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
>  {
>      int sh = shift & 0xf;
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 473f21a..3382cd0 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -7457,6 +7457,7 @@ GEN_VXFORM(vsraw, 2, 14);
>  GEN_VXFORM(vsrad, 2, 15);
>  GEN_VXFORM(vslo, 6, 16);
>  GEN_VXFORM(vsro, 6, 17);
> +GEN_VXFORM(vsrv, 2, 28);
>  GEN_VXFORM(vslv, 2, 29);
>  GEN_VXFORM(vaddcuw, 0, 6);
>  GEN_VXFORM(vsubcuw, 0, 22);
> @@ -10943,6 +10944,7 @@ GEN_VXFORM(vsraw, 2, 14),
>  GEN_VXFORM_207(vsrad, 2, 15),
>  GEN_VXFORM(vslo, 6, 16),
>  GEN_VXFORM(vsro, 6, 17),
> +GEN_VXFORM(vsrv, 2, 28),
>  GEN_VXFORM(vslv, 2, 29),
>  GEN_VXFORM(vaddcuw, 0, 6),
>  GEN_VXFORM(vsubcuw, 0, 22),
Nikunj A Dadhania July 27, 2016, 6:31 a.m. UTC | #2
David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote:
>> From: Vivek Andrew Sha <vivekandrewsha@gmail.com>
>> 
>> Adds Vector Shift Right Variable instruction.
>> 
>> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>  target-ppc/helper.h     |  1 +
>>  target-ppc/int_helper.c | 17 +++++++++++++++++
>>  target-ppc/translate.c  |  2 ++
>>  3 files changed, 20 insertions(+)
>> 
>> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> index 9703f85..8eada2f 100644
>> --- a/target-ppc/helper.h
>> +++ b/target-ppc/helper.h
>> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr)
>>  DEF_HELPER_3(vsld, void, avr, avr, avr)
>>  DEF_HELPER_3(vslo, void, avr, avr, avr)
>>  DEF_HELPER_3(vsro, void, avr, avr, avr)
>> +DEF_HELPER_3(vsrv, void, avr, avr, avr)
>>  DEF_HELPER_3(vslv, void, avr, avr, avr)
>>  DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
>>  DEF_HELPER_3(vsubcuw, void, avr, avr, avr)
>> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
>> index 412398f..f4776f0 100644
>> --- a/target-ppc/int_helper.c
>> +++ b/target-ppc/int_helper.c
>> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
>>      }
>>  }
>>  
>> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
>> +{
>> +    int i;
>> +    unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1];
>> +
>> +    src[0] = 0;
>> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
>> +        src[i + 1] = a->u8[i];
>> +    }
>> +
>> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
>> +        shift = b->u8[i] & 0x7;               /* extract shift value */
>> +        bytes = (src[i] << 8) + src[i + 1];     /* extract adjacent bytes */
>
> I think you should be able to construct bytes on the fly without
> pre-generating teh whole of src, as you already did for vslv.

Had done that, but that introduces a bug like this, for eg:

    vslv vra,vra,vrb

So modified vra->u8[i] is used during subsequent operation as input.

Assuming I take care or special casing "0":

bytes = ((vra->u8[i - 1]  << 8) | (vra->u8[i])) 
vra->u8[i] = (bytes >> shift) & 0xFF;

when i = 1, bytes will ((vra->u8[0]  << 8) | (vra->u8[1])). But vra->u8[0],
was changed in the previous operation.

Thats the reason src[] is needed

Regards
Nikunj
David Gibson July 27, 2016, 6:48 a.m. UTC | #3
On Wed, Jul 27, 2016 at 12:01:33PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote:
> >> From: Vivek Andrew Sha <vivekandrewsha@gmail.com>
> >> 
> >> Adds Vector Shift Right Variable instruction.
> >> 
> >> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >> ---
> >>  target-ppc/helper.h     |  1 +
> >>  target-ppc/int_helper.c | 17 +++++++++++++++++
> >>  target-ppc/translate.c  |  2 ++
> >>  3 files changed, 20 insertions(+)
> >> 
> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> >> index 9703f85..8eada2f 100644
> >> --- a/target-ppc/helper.h
> >> +++ b/target-ppc/helper.h
> >> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr)
> >>  DEF_HELPER_3(vsld, void, avr, avr, avr)
> >>  DEF_HELPER_3(vslo, void, avr, avr, avr)
> >>  DEF_HELPER_3(vsro, void, avr, avr, avr)
> >> +DEF_HELPER_3(vsrv, void, avr, avr, avr)
> >>  DEF_HELPER_3(vslv, void, avr, avr, avr)
> >>  DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
> >>  DEF_HELPER_3(vsubcuw, void, avr, avr, avr)
> >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> >> index 412398f..f4776f0 100644
> >> --- a/target-ppc/int_helper.c
> >> +++ b/target-ppc/int_helper.c
> >> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> >>      }
> >>  }
> >>  
> >> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> >> +{
> >> +    int i;
> >> +    unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1];
> >> +
> >> +    src[0] = 0;
> >> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
> >> +        src[i + 1] = a->u8[i];
> >> +    }
> >> +
> >> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
> >> +        shift = b->u8[i] & 0x7;               /* extract shift value */
> >> +        bytes = (src[i] << 8) + src[i + 1];     /* extract adjacent bytes */
> >
> > I think you should be able to construct bytes on the fly without
> > pre-generating teh whole of src, as you already did for vslv.
> 
> Had done that, but that introduces a bug like this, for eg:
> 
>     vslv vra,vra,vrb
> 
> So modified vra->u8[i] is used during subsequent operation as input.
> 
> Assuming I take care or special casing "0":
> 
> bytes = ((vra->u8[i - 1]  << 8) | (vra->u8[i])) 
> vra->u8[i] = (bytes >> shift) & 0xFF;
> 
> when i = 1, bytes will ((vra->u8[0]  << 8) | (vra->u8[1])). But vra->u8[0],
> was changed in the previous operation.

Ah, good point.

> Thats the reason src[] is needed

It's probably possible to avoid generating all of src by instead
generating the bytes one loop iteration ahead, but that sounds fiddly,
so the current approach is fine for now.
Nikunj A Dadhania July 27, 2016, 6:57 a.m. UTC | #4
David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Wed, Jul 27, 2016 at 12:01:33PM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > [ Unknown signature status ]
>> > On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote:
>> >> From: Vivek Andrew Sha <vivekandrewsha@gmail.com>
>> >> 
>> >> Adds Vector Shift Right Variable instruction.
>> >> 
>> >> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
>> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> >> ---
>> >>  target-ppc/helper.h     |  1 +
>> >>  target-ppc/int_helper.c | 17 +++++++++++++++++
>> >>  target-ppc/translate.c  |  2 ++
>> >>  3 files changed, 20 insertions(+)
>> >> 
>> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> >> index 9703f85..8eada2f 100644
>> >> --- a/target-ppc/helper.h
>> >> +++ b/target-ppc/helper.h
>> >> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr)
>> >>  DEF_HELPER_3(vsld, void, avr, avr, avr)
>> >>  DEF_HELPER_3(vslo, void, avr, avr, avr)
>> >>  DEF_HELPER_3(vsro, void, avr, avr, avr)
>> >> +DEF_HELPER_3(vsrv, void, avr, avr, avr)
>> >>  DEF_HELPER_3(vslv, void, avr, avr, avr)
>> >>  DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
>> >>  DEF_HELPER_3(vsubcuw, void, avr, avr, avr)
>> >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
>> >> index 412398f..f4776f0 100644
>> >> --- a/target-ppc/int_helper.c
>> >> +++ b/target-ppc/int_helper.c
>> >> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
>> >>      }
>> >>  }
>> >>  
>> >> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
>> >> +{
>> >> +    int i;
>> >> +    unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1];
>> >> +
>> >> +    src[0] = 0;
>> >> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
>> >> +        src[i + 1] = a->u8[i];
>> >> +    }
>> >> +
>> >> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
>> >> +        shift = b->u8[i] & 0x7;               /* extract shift value */
>> >> +        bytes = (src[i] << 8) + src[i + 1];     /* extract adjacent bytes */
>> >
>> > I think you should be able to construct bytes on the fly without
>> > pre-generating teh whole of src, as you already did for vslv.
>> 
>> Had done that, but that introduces a bug like this, for eg:
>> 
>>     vslv vra,vra,vrb
>> 
>> So modified vra->u8[i] is used during subsequent operation as input.
>> 
>> Assuming I take care or special casing "0":
>> 
>> bytes = ((vra->u8[i - 1]  << 8) | (vra->u8[i])) 
>> vra->u8[i] = (bytes >> shift) & 0xFF;
>> 
>> when i = 1, bytes will ((vra->u8[0]  << 8) | (vra->u8[1])). But vra->u8[0],
>> was changed in the previous operation.
>
> Ah, good point.
>
>> Thats the reason src[] is needed
>
> It's probably possible to avoid generating all of src by instead
> generating the bytes one loop iteration ahead, but that sounds fiddly,
> so the current approach is fine for now.

Or do the operation in the reverse: starting from size to 0. Let me try
that.

Regards
Nikunj
David Gibson July 27, 2016, 7:02 a.m. UTC | #5
On Wed, Jul 27, 2016 at 12:27:27PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Wed, Jul 27, 2016 at 12:01:33PM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> > [ Unknown signature status ]
> >> > On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote:
> >> >> From: Vivek Andrew Sha <vivekandrewsha@gmail.com>
> >> >> 
> >> >> Adds Vector Shift Right Variable instruction.
> >> >> 
> >> >> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
> >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >> >> ---
> >> >>  target-ppc/helper.h     |  1 +
> >> >>  target-ppc/int_helper.c | 17 +++++++++++++++++
> >> >>  target-ppc/translate.c  |  2 ++
> >> >>  3 files changed, 20 insertions(+)
> >> >> 
> >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> >> >> index 9703f85..8eada2f 100644
> >> >> --- a/target-ppc/helper.h
> >> >> +++ b/target-ppc/helper.h
> >> >> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr)
> >> >>  DEF_HELPER_3(vsld, void, avr, avr, avr)
> >> >>  DEF_HELPER_3(vslo, void, avr, avr, avr)
> >> >>  DEF_HELPER_3(vsro, void, avr, avr, avr)
> >> >> +DEF_HELPER_3(vsrv, void, avr, avr, avr)
> >> >>  DEF_HELPER_3(vslv, void, avr, avr, avr)
> >> >>  DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
> >> >>  DEF_HELPER_3(vsubcuw, void, avr, avr, avr)
> >> >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> >> >> index 412398f..f4776f0 100644
> >> >> --- a/target-ppc/int_helper.c
> >> >> +++ b/target-ppc/int_helper.c
> >> >> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> >> >>      }
> >> >>  }
> >> >>  
> >> >> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> >> >> +{
> >> >> +    int i;
> >> >> +    unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1];
> >> >> +
> >> >> +    src[0] = 0;
> >> >> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
> >> >> +        src[i + 1] = a->u8[i];
> >> >> +    }
> >> >> +
> >> >> +    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
> >> >> +        shift = b->u8[i] & 0x7;               /* extract shift value */
> >> >> +        bytes = (src[i] << 8) + src[i + 1];     /* extract adjacent bytes */
> >> >
> >> > I think you should be able to construct bytes on the fly without
> >> > pre-generating teh whole of src, as you already did for vslv.
> >> 
> >> Had done that, but that introduces a bug like this, for eg:
> >> 
> >>     vslv vra,vra,vrb
> >> 
> >> So modified vra->u8[i] is used during subsequent operation as input.
> >> 
> >> Assuming I take care or special casing "0":
> >> 
> >> bytes = ((vra->u8[i - 1]  << 8) | (vra->u8[i])) 
> >> vra->u8[i] = (bytes >> shift) & 0xFF;
> >> 
> >> when i = 1, bytes will ((vra->u8[0]  << 8) | (vra->u8[1])). But vra->u8[0],
> >> was changed in the previous operation.
> >
> > Ah, good point.
> >
> >> Thats the reason src[] is needed
> >
> > It's probably possible to avoid generating all of src by instead
> > generating the bytes one loop iteration ahead, but that sounds fiddly,
> > so the current approach is fine for now.
> 
> Or do the operation in the reverse: starting from size to 0. Let me try
> that.

Good idea, that should work.
diff mbox

Patch

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 9703f85..8eada2f 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -211,6 +211,7 @@  DEF_HELPER_3(vslw, void, avr, avr, avr)
 DEF_HELPER_3(vsld, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
+DEF_HELPER_3(vsrv, void, avr, avr, avr)
 DEF_HELPER_3(vslv, void, avr, avr, avr)
 DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
 DEF_HELPER_3(vsubcuw, void, avr, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 412398f..f4776f0 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1722,6 +1722,23 @@  void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     }
 }
 
+void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i;
+    unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1];
+
+    src[0] = 0;
+    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+        src[i + 1] = a->u8[i];
+    }
+
+    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+        shift = b->u8[i] & 0x7;               /* extract shift value */
+        bytes = (src[i] << 8) + src[i + 1];     /* extract adjacent bytes */
+        r->u8[i] = (bytes >> shift) & 0xFF;   /* shift and store result */
+    }
+}
+
 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
 {
     int sh = shift & 0xf;
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 473f21a..3382cd0 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7457,6 +7457,7 @@  GEN_VXFORM(vsraw, 2, 14);
 GEN_VXFORM(vsrad, 2, 15);
 GEN_VXFORM(vslo, 6, 16);
 GEN_VXFORM(vsro, 6, 17);
+GEN_VXFORM(vsrv, 2, 28);
 GEN_VXFORM(vslv, 2, 29);
 GEN_VXFORM(vaddcuw, 0, 6);
 GEN_VXFORM(vsubcuw, 0, 22);
@@ -10943,6 +10944,7 @@  GEN_VXFORM(vsraw, 2, 14),
 GEN_VXFORM_207(vsrad, 2, 15),
 GEN_VXFORM(vslo, 6, 16),
 GEN_VXFORM(vsro, 6, 17),
+GEN_VXFORM(vsrv, 2, 28),
 GEN_VXFORM(vslv, 2, 29),
 GEN_VXFORM(vaddcuw, 0, 6),
 GEN_VXFORM(vsubcuw, 0, 22),