@@ -92,8 +92,8 @@ ENTRY (strcmp)
/* (2 mod 4) alignment for loop instruction */
.Lunaligned:
#if XCHAL_HAVE_LOOPS
- _movi.n a8, 0 /* set up for the maximum loop count */
- loop a8, .Lretdiff /* loop forever (almost anyway) */
+ movi a11, 0 /* set up for the maximum loop count */
+ loop a11, .Lretdiff /* loop forever (almost anyway) */
#endif
.Lnextbyte:
l8ui a8, a2, 0
@@ -131,11 +131,10 @@ ENTRY (strcmp)
#if XCHAL_HAVE_LOOPS
.Laligned:
.begin no-transform
+ movi a11, 0
l32r a4, .Lmask0 /* mask for byte 0 */
l32r a7, .Lmask4
- /* Loop forever. (a4 is more than than the maximum number
- of iterations) */
- loop a4, .Laligned_done
+ loop a11, .Laligned_done /* Loop forever. */
/* First unrolled loop body. */
l32i a8, a2, 0 /* get word from s1 */
@@ -156,8 +155,6 @@ ENTRY (strcmp)
addi a2, a2, 8 /* advance s1 pointer */
addi a3, a3, 8 /* advance s2 pointer */
.Laligned_done:
- or a1, a1, a1 /* nop */
-
.Lprobeq2:
/* Adjust pointers to account for the loop unrolling. */
addi a2, a2, 4
@@ -198,7 +195,7 @@ ENTRY (strcmp)
#if XCHAL_HAVE_LOOPS
/* align (1 mod 4) */
- loop a4, .Leq /* loop forever (a4 is bigger than max iters) */
+ loop a11, .Leq /* loop forever */
.end no-transform
l32i a8, a2, 0 /* get word from s1 */
Loops with 'loop forever' annotation inside strcmp are actually meant to loop forever. Falling through the end of the first loop may result in equal strings being compared unequal, e.g.: #include <string.h> int main(void) { char a[4096] __attribute__((aligned(4))); char b[4096] __attribute__((aligned(4))); memset(a, ' ', 258 * 8); memset(b, ' ', 258 * 8); a[255 * 8] = 0; a[256 * 8] = 'a'; b[255 * 8] = 0; b[256 * 8] = 'b'; return !(strcmp(a, b) == 0); } Falling through the end of the second loop may result in unequal strings being compared as equal, e.g.: #include <string.h> int main(void) { char a[4096] __attribute__((aligned(4))); char b[4096] __attribute__((aligned(4))); memset(a, ' ', 514 * 4); memset(b, ' ', 514 * 4); a[514 * 4 + 0] = 'a'; a[514 * 4 + 1] = 0; b[514 * 4 + 0] = 'b'; b[514 * 4 + 1] = 0; return !(strcmp(a, b) != 0); } Use 0 as a loop counter to make 2^32 - 1 iterations which is enough to cover all addressable memory. While at it drop useless nop at the end of the first loop and use a11 for all loop counters. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> --- libc/string/xtensa/strcmp.S | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-)