diff mbox series

powerpc: Fix IFUNC for memrchr

Message ID 1507200204-23449-1-git-send-email-raji@linux.vnet.ibm.com
State New
Headers show
Series powerpc: Fix IFUNC for memrchr | expand

Commit Message

Rajalakshmi Srinivasaraghavan Oct. 5, 2017, 10:43 a.m. UTC
Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in
ifunc list.  Also handled discarding unwanted bytes for
unaligned inputs in power8 optimization.

2017-10-05  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert
	back to powerpc32 file.
	* sysdeps/powerpc/powerpc64/multiarch/memrchr.c
	(memrchr): Add __memrchr_power8 to ifunc list.
	* sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask
	extra bytes for unaligned inputs.
---
 .../powerpc/powerpc64/multiarch/memrchr-ppc64.c    | 15 +-------------
 sysdeps/powerpc/powerpc64/multiarch/memrchr.c      |  3 +++
 sysdeps/powerpc/powerpc64/power8/memrchr.S         | 24 ++++++++++++++++++++++
 3 files changed, 28 insertions(+), 14 deletions(-)

Comments

Adhemerval Zanella Oct. 5, 2017, 1:48 p.m. UTC | #1
On 05/10/2017 07:43, Rajalakshmi Srinivasaraghavan wrote:
> Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in
> ifunc list.  Also handled discarding unwanted bytes for
> unaligned inputs in power8 optimization.
> 
> 2017-10-05  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
> 
> 	* sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert
> 	back to powerpc32 file.
> 	* sysdeps/powerpc/powerpc64/multiarch/memrchr.c
> 	(memrchr): Add __memrchr_power8 to ifunc list.
> 	* sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask
> 	extra bytes for unaligned inputs.
> ---
>  .../powerpc/powerpc64/multiarch/memrchr-ppc64.c    | 15 +-------------
>  sysdeps/powerpc/powerpc64/multiarch/memrchr.c      |  3 +++
>  sysdeps/powerpc/powerpc64/power8/memrchr.S         | 24 ++++++++++++++++++++++
>  3 files changed, 28 insertions(+), 14 deletions(-)
> 
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
> index be24689336..e2d53ac0f4 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
> @@ -15,17 +15,4 @@
>     You should have received a copy of the GNU Lesser General Public
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
> -
> -#include <string.h>
> -
> -#define MEMRCHR  __memrchr_ppc
> -
> -#undef weak_alias
> -#define weak_alias(a, b)
> -
> -#undef libc_hidden_builtin_def
> -#define libc_hidden_builtin_def(name)
> -
> -extern __typeof (memrchr) __memrchr_ppc attribute_hidden;
> -
> -#include <string/memrchr.c>
> +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c>
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
> index fb09fdf89c..441598ace5 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
> @@ -23,10 +23,13 @@
>  
>  extern __typeof (__memrchr) __memrchr_ppc attribute_hidden;
>  extern __typeof (__memrchr) __memrchr_power7 attribute_hidden;
> +extern __typeof (__memrchr) __memrchr_power8 attribute_hidden;
>  
>  /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
>     ifunc symbol properly.  */
>  libc_ifunc (__memrchr,
> +	    (hwcap2 & PPC_FEATURE2_ARCH_2_07)
> +	    ? __memrchr_power8 :
>  	    (hwcap & PPC_FEATURE_HAS_VSX)
>              ? __memrchr_power7
>              : __memrchr_ppc);

I think indentation is a little off here, compared to other ifunc implementations.
Something like.

    (hwcap2 & PPC_FEATURE2_ARCH_2_07)
    ? __memrchr_power8 :
      (hwcap & PPC_FEATURE_HAS_VSX)
      ? __memrchr_power7
    : __memrchr_ppc);

> diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S
> index 521b3c84a2..22b01ec69c 100644
> --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S
> +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S
> @@ -233,11 +233,35 @@ L(found):
>  #endif
>  	addi	r8, r8, 63
>  	sub	r3, r8, r6	/* Compute final address.  */
> +	cmpld	cr7, r3, r10
> +	bgelr	cr7
> +	li	r3, 0
>  	blr
>  
>  	/* Found a match in last 16 bytes.  */
>  	.align	4
>  L(found_16B):
> +	cmpld	r8, r10		/* Are we on the last QW?  */
> +	bge	L(last)
> +	/* Now discard bytes before starting address.  */
> +	sub	r9, r10, r8
> +	MTVRD(v9, r9)
> +	vspltisb	v8, 3
> +	/* Mask unwanted bytes.  */
> +#ifdef __LITTLE_ENDIAN__
> +	lvsr	v7, 0, r10
> +	vperm   v6, v0, v6, v7
> +	vsldoi	v9, v0, v9, 8
> +	vsl	v9, v9, v8
> +	vslo	v6, v6, v9
> +#else
> +	lvsl	v7, 0, r10
> +	vperm   v6, v6, v0, v7
> +	vsldoi	v9, v0, v9, 8
> +	vsl	v9, v9, v8
> +	vsro	v6, v6, v9
> +#endif
> +L(last):
>  	/* Permute the first bit of each byte into bits 48-63.  */
>  	VBPERMQ(v6, v6, v10)
>  	/* Shift each component into its correct position for merging.  */

Isn't already covered by a testcase? If not, could you add one to test-memrchr?
Rajalakshmi Srinivasaraghavan Oct. 5, 2017, 1:50 p.m. UTC | #2
On 10/05/2017 07:18 PM, Adhemerval Zanella wrote:
> 
> 
> On 05/10/2017 07:43, Rajalakshmi Srinivasaraghavan wrote:
>> Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in
>> ifunc list.  Also handled discarding unwanted bytes for
>> unaligned inputs in power8 optimization.
>>
>> 2017-10-05  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
>>
>> 	* sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert
>> 	back to powerpc32 file.
>> 	* sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>> 	(memrchr): Add __memrchr_power8 to ifunc list.
>> 	* sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask
>> 	extra bytes for unaligned inputs.
>> ---
>>   .../powerpc/powerpc64/multiarch/memrchr-ppc64.c    | 15 +-------------
>>   sysdeps/powerpc/powerpc64/multiarch/memrchr.c      |  3 +++
>>   sysdeps/powerpc/powerpc64/power8/memrchr.S         | 24 ++++++++++++++++++++++
>>   3 files changed, 28 insertions(+), 14 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
>> index be24689336..e2d53ac0f4 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
>> @@ -15,17 +15,4 @@
>>      You should have received a copy of the GNU Lesser General Public
>>      License along with the GNU C Library; if not, see
>>      <http://www.gnu.org/licenses/>.  */
>> -
>> -#include <string.h>
>> -
>> -#define MEMRCHR  __memrchr_ppc
>> -
>> -#undef weak_alias
>> -#define weak_alias(a, b)
>> -
>> -#undef libc_hidden_builtin_def
>> -#define libc_hidden_builtin_def(name)
>> -
>> -extern __typeof (memrchr) __memrchr_ppc attribute_hidden;
>> -
>> -#include <string/memrchr.c>
>> +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>> index fb09fdf89c..441598ace5 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>> @@ -23,10 +23,13 @@
>>   
>>   extern __typeof (__memrchr) __memrchr_ppc attribute_hidden;
>>   extern __typeof (__memrchr) __memrchr_power7 attribute_hidden;
>> +extern __typeof (__memrchr) __memrchr_power8 attribute_hidden;
>>   
>>   /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
>>      ifunc symbol properly.  */
>>   libc_ifunc (__memrchr,
>> +	    (hwcap2 & PPC_FEATURE2_ARCH_2_07)
>> +	    ? __memrchr_power8 :
>>   	    (hwcap & PPC_FEATURE_HAS_VSX)
>>               ? __memrchr_power7
>>               : __memrchr_ppc);
> 
> I think indentation is a little off here, compared to other ifunc implementations.
> Something like.
> 
>      (hwcap2 & PPC_FEATURE2_ARCH_2_07)
>      ? __memrchr_power8 :
>        (hwcap & PPC_FEATURE_HAS_VSX)
>        ? __memrchr_power7
>      : __memrchr_ppc);

Ack

> 
>> diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S
>> index 521b3c84a2..22b01ec69c 100644
>> --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S
>> +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S
>> @@ -233,11 +233,35 @@ L(found):
>>   #endif
>>   	addi	r8, r8, 63
>>   	sub	r3, r8, r6	/* Compute final address.  */
>> +	cmpld	cr7, r3, r10
>> +	bgelr	cr7
>> +	li	r3, 0
>>   	blr
>>   
>>   	/* Found a match in last 16 bytes.  */
>>   	.align	4
>>   L(found_16B):
>> +	cmpld	r8, r10		/* Are we on the last QW?  */
>> +	bge	L(last)
>> +	/* Now discard bytes before starting address.  */
>> +	sub	r9, r10, r8
>> +	MTVRD(v9, r9)
>> +	vspltisb	v8, 3
>> +	/* Mask unwanted bytes.  */
>> +#ifdef __LITTLE_ENDIAN__
>> +	lvsr	v7, 0, r10
>> +	vperm   v6, v0, v6, v7
>> +	vsldoi	v9, v0, v9, 8
>> +	vsl	v9, v9, v8
>> +	vslo	v6, v6, v9
>> +#else
>> +	lvsl	v7, 0, r10
>> +	vperm   v6, v6, v0, v7
>> +	vsldoi	v9, v0, v9, 8
>> +	vsl	v9, v9, v8
>> +	vsro	v6, v6, v9
>> +#endif
>> +L(last):
>>   	/* Permute the first bit of each byte into bits 48-63.  */
>>   	VBPERMQ(v6, v6, v10)
>>   	/* Shift each component into its correct position for merging.  */
> 
> Isn't already covered by a testcase? If not, could you add one to test-memrchr?

Already covered in string/tester.c.

> 
>
Adhemerval Zanella Oct. 5, 2017, 3:10 p.m. UTC | #3
> Il giorno 05 ott 2017, alle ore 10:50, Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> ha scritto:
> 
> 
> 
>> On 10/05/2017 07:18 PM, Adhemerval Zanella wrote:
>>> On 05/10/2017 07:43, Rajalakshmi Srinivasaraghavan wrote:
>>> Recent commit 59ba2d2b5421 missed to add __memrchr_power8 in
>>> ifunc list.  Also handled discarding unwanted bytes for
>>> unaligned inputs in power8 optimization.
>>> 
>>> 2017-10-05  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
>>> 
>>>    * sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c: Revert
>>>    back to powerpc32 file.
>>>    * sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>>>    (memrchr): Add __memrchr_power8 to ifunc list.
>>>    * sysdeps/powerpc/powerpc64/power8/memrchr.S: Mask
>>>    extra bytes for unaligned inputs.
>>> ---
>>>  .../powerpc/powerpc64/multiarch/memrchr-ppc64.c    | 15 +-------------
>>>  sysdeps/powerpc/powerpc64/multiarch/memrchr.c      |  3 +++
>>>  sysdeps/powerpc/powerpc64/power8/memrchr.S         | 24 ++++++++++++++++++++++
>>>  3 files changed, 28 insertions(+), 14 deletions(-)
>>> 
>>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
>>> index be24689336..e2d53ac0f4 100644
>>> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
>>> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
>>> @@ -15,17 +15,4 @@
>>>     You should have received a copy of the GNU Lesser General Public
>>>     License along with the GNU C Library; if not, see
>>>     <http://www.gnu.org/licenses/>.  */
>>> -
>>> -#include <string.h>
>>> -
>>> -#define MEMRCHR  __memrchr_ppc
>>> -
>>> -#undef weak_alias
>>> -#define weak_alias(a, b)
>>> -
>>> -#undef libc_hidden_builtin_def
>>> -#define libc_hidden_builtin_def(name)
>>> -
>>> -extern __typeof (memrchr) __memrchr_ppc attribute_hidden;
>>> -
>>> -#include <string/memrchr.c>
>>> +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c>
>>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>>> index fb09fdf89c..441598ace5 100644
>>> --- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>>> +++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
>>> @@ -23,10 +23,13 @@
>>>    extern __typeof (__memrchr) __memrchr_ppc attribute_hidden;
>>>  extern __typeof (__memrchr) __memrchr_power7 attribute_hidden;
>>> +extern __typeof (__memrchr) __memrchr_power8 attribute_hidden;
>>>    /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
>>>     ifunc symbol properly.  */
>>>  libc_ifunc (__memrchr,
>>> +        (hwcap2 & PPC_FEATURE2_ARCH_2_07)
>>> +        ? __memrchr_power8 :
>>>          (hwcap & PPC_FEATURE_HAS_VSX)
>>>              ? __memrchr_power7
>>>              : __memrchr_ppc);
>> I think indentation is a little off here, compared to other ifunc implementations.
>> Something like.
>>     (hwcap2 & PPC_FEATURE2_ARCH_2_07)
>>     ? __memrchr_power8 :
>>       (hwcap & PPC_FEATURE_HAS_VSX)
>>       ? __memrchr_power7
>>     : __memrchr_ppc);
> 
> Ack
> 
>>> diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S
>>> index 521b3c84a2..22b01ec69c 100644
>>> --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S
>>> +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S
>>> @@ -233,11 +233,35 @@ L(found):
>>>  #endif
>>>      addi    r8, r8, 63
>>>      sub    r3, r8, r6    /* Compute final address.  */
>>> +    cmpld    cr7, r3, r10
>>> +    bgelr    cr7
>>> +    li    r3, 0
>>>      blr
>>>        /* Found a match in last 16 bytes.  */
>>>      .align    4
>>>  L(found_16B):
>>> +    cmpld    r8, r10        /* Are we on the last QW?  */
>>> +    bge    L(last)
>>> +    /* Now discard bytes before starting address.  */
>>> +    sub    r9, r10, r8
>>> +    MTVRD(v9, r9)
>>> +    vspltisb    v8, 3
>>> +    /* Mask unwanted bytes.  */
>>> +#ifdef __LITTLE_ENDIAN__
>>> +    lvsr    v7, 0, r10
>>> +    vperm   v6, v0, v6, v7
>>> +    vsldoi    v9, v0, v9, 8
>>> +    vsl    v9, v9, v8
>>> +    vslo    v6, v6, v9
>>> +#else
>>> +    lvsl    v7, 0, r10
>>> +    vperm   v6, v6, v0, v7
>>> +    vsldoi    v9, v0, v9, 8
>>> +    vsl    v9, v9, v8
>>> +    vsro    v6, v6, v9
>>> +#endif
>>> +L(last):
>>>      /* Permute the first bit of each byte into bits 48-63.  */
>>>      VBPERMQ(v6, v6, v10)
>>>      /* Shift each component into its correct position for merging.  */
>> Isn't already covered by a testcase? If not, could you add one to test-memrchr?
> 
> Already covered in string/tester.c.

LGTM then.

> -- 
> Thanks
> Rajalakshmi S
>
Joseph Myers Oct. 6, 2017, 3:01 p.m. UTC | #4
The commit of this patch appears to be missing an update to the ChangeLog 
file.
Rajalakshmi Srinivasaraghavan Oct. 6, 2017, 4:14 p.m. UTC | #5
On 10/06/2017 08:31 PM, Joseph Myers wrote:
> The commit of this patch appears to be missing an update to the ChangeLog
> file.
> 

Thanks for the note. Done.
diff mbox series

Patch

diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
index be24689336..e2d53ac0f4 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c
@@ -15,17 +15,4 @@ 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
-
-#include <string.h>
-
-#define MEMRCHR  __memrchr_ppc
-
-#undef weak_alias
-#define weak_alias(a, b)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-extern __typeof (memrchr) __memrchr_ppc attribute_hidden;
-
-#include <string/memrchr.c>
+#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
index fb09fdf89c..441598ace5 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memrchr.c
@@ -23,10 +23,13 @@ 
 
 extern __typeof (__memrchr) __memrchr_ppc attribute_hidden;
 extern __typeof (__memrchr) __memrchr_power7 attribute_hidden;
+extern __typeof (__memrchr) __memrchr_power8 attribute_hidden;
 
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
    ifunc symbol properly.  */
 libc_ifunc (__memrchr,
+	    (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+	    ? __memrchr_power8 :
 	    (hwcap & PPC_FEATURE_HAS_VSX)
             ? __memrchr_power7
             : __memrchr_ppc);
diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S
index 521b3c84a2..22b01ec69c 100644
--- a/sysdeps/powerpc/powerpc64/power8/memrchr.S
+++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S
@@ -233,11 +233,35 @@  L(found):
 #endif
 	addi	r8, r8, 63
 	sub	r3, r8, r6	/* Compute final address.  */
+	cmpld	cr7, r3, r10
+	bgelr	cr7
+	li	r3, 0
 	blr
 
 	/* Found a match in last 16 bytes.  */
 	.align	4
 L(found_16B):
+	cmpld	r8, r10		/* Are we on the last QW?  */
+	bge	L(last)
+	/* Now discard bytes before starting address.  */
+	sub	r9, r10, r8
+	MTVRD(v9, r9)
+	vspltisb	v8, 3
+	/* Mask unwanted bytes.  */
+#ifdef __LITTLE_ENDIAN__
+	lvsr	v7, 0, r10
+	vperm   v6, v0, v6, v7
+	vsldoi	v9, v0, v9, 8
+	vsl	v9, v9, v8
+	vslo	v6, v6, v9
+#else
+	lvsl	v7, 0, r10
+	vperm   v6, v6, v0, v7
+	vsldoi	v9, v0, v9, 8
+	vsl	v9, v9, v8
+	vsro	v6, v6, v9
+#endif
+L(last):
 	/* Permute the first bit of each byte into bits 48-63.  */
 	VBPERMQ(v6, v6, v10)
 	/* Shift each component into its correct position for merging.  */