@@ -161,12 +161,7 @@ ENTRY(memcmp)
and tmp1, src1, #0x7
orr tmp3, xzr, #0x8
- and tmp2, src2, #0x7
- sub tmp1, tmp3, tmp1
- sub tmp2, tmp3, tmp2
- cmp tmp1, tmp2
- /* Choose the maximum. */
- csel pos, tmp1, tmp2, hi
+ sub pos, tmp3, tmp1
/* Increment SRC pointers by POS so one of the SRC pointers is
word-aligned. */
add src1, src1, pos
And the performance looks about the same with aligning to src2 instead
of src1.
With the extra patch:
@@ -159,7 +159,7 @@ ENTRY(memcmp)
/* Sources are not aligned align one of the sources find max offset
from aligned boundary. */
- and tmp1, src1, #0x7
+ and tmp1, src2, #0x7
orr tmp3, xzr, #0x8
sub pos, tmp3, tmp1