[RFC] PowerPC - e6500 optimized memcpy function
diff mbox

Message ID BLUPR03MB1458BA6C211E76F11D2C2227C26B0@BLUPR03MB1458.namprd03.prod.outlook.com
State New
Headers show

Commit Message

rohitarulraj@freescale.com Aug. 31, 2015, 5:10 a.m. UTC
Hello All,

Please find below, patch for optimized implementation of 'memcpy' for PowerPC e6500 (32-bit & 64-bit) target using Altivec instructions.

2015-08-31  Rohit Arul Raj <rohitarulraj@freescale.com>

                * sysdeps/powerpc/powerpc32/e6500/memcpy.S: New File: optimized memcpy
                implementation using altivec instructions.
                * sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c
                (__libc_ifunc_impl_list): Add check for e6500 memcpy function.
                * sysdeps/powerpc/powerpc32/power4/multiarch/Makefile: Add
                memcpy-e6500 object.
                * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c: Add
                check for e6500 memcpy function.
                * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S: New File:
                multiarch e6500 memcpy.
                * sysdeps/powerpc/powerpc64/e6500/memcpy.S: New File: optimized memcpy
                implementation using altivec instructions.
                * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
                (__libc_ifunc_impl_list): Add check for e6500 memcpy function.
                * sysdeps/powerpc/powerpc64/multiarch/Makefile: Add
                memcpy-e6500 object.
                * sysdeps/powerpc/powerpc64/multiarch/memcpy.c: Add
                check for e6500 memcpy function.
                * sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S: New File:
                multiarch e6500 memcpy.


The patch was generated on top of glibc v2.20 source base.
The patch was tested with dejaGNU and glibc testsuite. There were no regressions.

The benchsuite (both 32-bit and 64-bit) results are attached for your reference.

Please let me know your comments.

Regards,
Rohit
builtin_memcpy  simple_memcpy   __memcpy_e6500  __memcpy_ppc
Length    1, alignment  0/ 0:   37.8594 14.7344 24.875  29.6562
Length    1, alignment  0/ 0:   36.0312 14.6094 24.8906 29.5781
Length    1, alignment  0/ 0:   36.0312 14.625  24.6094 29.5781
Length    1, alignment  0/ 0:   35.8438 14.6094 24.6406 29.5938
Length    2, alignment  0/ 0:   35.9688 18.8125 25.6719 28.5781
Length    2, alignment  1/ 0:   35.7344 18.3438 25.6719 28.5781
Length    2, alignment  0/ 1:   35.8594 18.8125 25.5781 28.5781
Length    2, alignment  1/ 1:   35.7344 18.7188 25.6094 28.5781
Length    4, alignment  0/ 0:   35.75   25.8594 25.625  28.5938
Length    4, alignment  2/ 0:   35.75   25.1406 25.5938 28.5938
Length    4, alignment  0/ 2:   35.75   29.0625 25.625  28.5938
Length    4, alignment  2/ 2:   35.75   25.1094 25.625  28.5938
Length    8, alignment  0/ 0:   37.75   53.75   29.5469 26.6875
Length    8, alignment  3/ 0:   38.75   49.7188 29.5781 27.6094
Length    8, alignment  0/ 3:   37.75   37.8906 29.5781 27.6094
Length    8, alignment  3/ 3:   39.75   52.7812 29.625  29.6094
Length   16, alignment  0/ 0:   39.7656 73.5938 30.5625 37.0781
Length   16, alignment  4/ 0:   42.75   67.5938 33.0469 37.0781
Length   16, alignment  0/ 4:   41.75   67.5938 31.5938 41.0156
Length   16, alignment  4/ 4:   45.75   73.3438 35.6719 37.0625
Length   32, alignment  0/ 0:   46.7188 121.516 36.5781 38.5781
Length   32, alignment  5/ 0:   55.875  115.562 45.6406 49.0156
Length   32, alignment  0/ 5:   64.7812 115.609 54.9062 66.9375
Length   32, alignment  5/ 5:   52.7812 120.531 42.6719 49.5469
Length   64, alignment  0/ 0:   47.8281 217.547 37.5781 45.9688
Length   64, alignment  6/ 0:   59.7344 212.094 49.5781 65.375
Length   64, alignment  0/ 6:   83.8906 211.672 74.6875 78.7031
Length   64, alignment  6/ 6:   60.8906 217.188 50.6094 55.7656
Length  128, alignment  0/ 0:   55.75   409.578 45.7969 72.9844
Length  128, alignment  7/ 0:   75.7344 403.578 65.5781 97.6875
Length  128, alignment  0/ 7:   104.188 403.594 93.7031 111.953
Length  128, alignment  7/ 7:   70.8438 404.094 60.6719 73.0625
Length  256, alignment  0/ 0:   73.2656 793.594 63.2188 101.844
Length  256, alignment  8/ 0:   111.297 787.531 105.938 101.781
Length  256, alignment  0/ 8:   147.766 787.562 130.844 126.609
Length  256, alignment  8/ 8:   85.4688 793.797 75.2812 98.6094
Length  512, alignment  0/ 0:   112.875 1561.58 96.9219 168.672
Length  512, alignment  9/ 0:   178.156 1555.56 168.859 294.125
Length  512, alignment  0/ 9:   238.359 1555.73 231.562 307.656
Length  512, alignment  9/ 9:   139.656 1560.64 111.266 182.641
Length 1024, alignment  0/ 0:   182.984 3097.59 172.719 302.812
Length 1024, alignment 10/ 0:   306     3091.73 296.844 549.875
Length 1024, alignment  0/10:   401.906 3091.69 388.141 563.297
Length 1024, alignment 10/10:   210.938 3097.58 199.797 319.828
Length 2048, alignment  0/ 0:   322.859 6169.55 312.766 570.984
Length 2048, alignment 11/ 0:   561.922 6163.56 552.875 1061.86
Length 2048, alignment  0/11:   722.297 6163.56 708.297 1078.56
Length 2048, alignment 11/11:   363.188 6168.59 352.031 604.938
Length 4096, alignment  0/ 0:   602.953 12313.7 593.266 1106.95
Length 4096, alignment 12/ 0:   1074.69 12501.5 1065.22 2086.31
Length 4096, alignment  0/12:   1358.22 12307.7 1351.7  2098.3
Length 4096, alignment 12/12:   667.125 12313.6 655.625 1159.72
Length 8192, alignment  0/ 0:   1164.06 24602.5 1153.8  2179.3
Length 8192, alignment 13/ 0:   2098.98 24801.8 2089.8  4134.11
Length 8192, alignment  0/13:   2638.25 24596.2 2632.11 4146.66
Length 8192, alignment 13/13:   1274.2  24600.8 1262.89 2282.88
Length 16384, alignment  0/ 0:  2288.2  49196.8 2277.77 4346.44
Length 16384, alignment 14/ 0:  4154.19 49435   4148.66 8259.53
Length 16384, alignment  0/14:  5209.08 49193.1 5194.34 8267.16
Length 16384, alignment 14/14:  2495.05 49199   2482.92 4538.16
Length 32768, alignment  0/ 0:  5135.2  99129   5030.69 9331.12
Length 32768, alignment 15/ 0:  9067.25 99174.2 9337.7  17331.3
Length 32768, alignment  0/15:  10884.4 99096.1 10807.6 17231.7
Length 32768, alignment 15/15:  5396.95 99130.8 5302.39 9556.67
Length 65536, alignment  0/ 0:  18723.5 212345  17758.1 34053.6
Length 65536, alignment 16/ 0:  17515.3 212598  17739.8 32863.8
Length 65536, alignment  0/16:  21602.1 212394  21775.2 37975.7
Length 65536, alignment 16/16:  17513.1 212393  17489.5 33074.7
Length    0, alignment  0/ 0:   33.9219 13.1406 23.625  30.5625
Length    0, alignment  0/ 0:   33.7969 13.0469 23.5938 28.625
Length    0, alignment  0/ 0:   33.7656 13.0469 23.6719 30.5938
Length    0, alignment  0/ 0:   33.7656 13.0469 23.7031 30.5938
Length    1, alignment  0/ 0:   34.7344 15.0625 24.6094 29.5781
Length    1, alignment  1/ 0:   35.0156 15.0938 24.6094 29.5781
Length    1, alignment  0/ 1:   34.7344 14.9688 24.6562 29.5938
Length    1, alignment  1/ 1:   35.8594 14.8438 24.6094 29.5781
Length    2, alignment  0/ 0:   35.8438 19.3438 25.7812 28.5781
Length    2, alignment  2/ 0:   35.4688 17.8281 25.5781 28.5938
Length    2, alignment  0/ 2:   35.4844 18.2812 25.6719 28.5781
Length    2, alignment  2/ 2:   35.8281 18.8281 25.5781 28.5781
Length    3, alignment  0/ 0:   35.75   21.6562 25.5938 31.625
Length    3, alignment  3/ 0:   35.75   21.0938 25.5938 31.625
Length    3, alignment  0/ 3:   35.75   21.4219 25.5938 31.625
Length    3, alignment  3/ 3:   35.75   21.4219 25.5938 31.625
Length    4, alignment  0/ 0:   35.75   24.4219 25.5938 28.5938
Length    4, alignment  4/ 0:   35.875  24.4219 25.5938 28.5938
Length    4, alignment  0/ 4:   35.75   24.0938 25.5938 28.5938
Length    4, alignment  4/ 4:   35.75   25.625  25.625  28.5938
Length    5, alignment  0/ 0:   34.75   28.0156 24.6406 30.6094
Length    5, alignment  5/ 0:   38.75   27.9375 28.6094 29.0938
Length    5, alignment  0/ 5:   37.75   27.9531 27.6094 29.0938
Length    5, alignment  5/ 5:   41.75   27.9375 31.6094 30.75
Length    6, alignment  0/ 0:   34.75   31.0469 24.6094 30.6094
Length    6, alignment  6/ 0:   36.75   30.9531 26.6094 29.0938
Length    6, alignment  0/ 6:   36.75   30.9531 26.6094 29.0938
Length    6, alignment  6/ 6:   38.75   31.3125 28.6094 30.6094
Length    7, alignment  0/ 0:   34.75   33.8906 24.6094 32.6875
Length    7, alignment  7/ 0:   36.75   33.7969 26.6094 30.6094
Length    7, alignment  0/ 7:   36.75   33.8281 26.6094 30.6094
Length    7, alignment  7/ 7:   38.75   33.7969 28.6094 32.6094
Length    8, alignment  0/ 0:   39.75   53.8594 29.5625 26.5938
Length    8, alignment  8/ 0:   37.75   49.6406 29.5781 26.5938
Length    8, alignment  0/ 8:   37.75   49.6406 29.6094 26.5938
Length    8, alignment  8/ 8:   37.75   53.5781 29.6094 26.5938
Length    9, alignment  0/ 0:   38.7344 53.5938 30.6562 33.5469
Length    9, alignment  9/ 0:   40.8594 53.625  30.5938 38.5781
Length    9, alignment  0/ 9:   40      53.6094 30.5781 34.0469
Length    9, alignment  9/ 9:   41.8594 55.4688 31.5781 37.5625
Length   10, alignment  0/ 0:   40.9531 56.6562 30.7812 32.5781
Length   10, alignment 10/ 0:   40.8281 56.5    30.5781 39.5469
Length   10, alignment  0/10:   40.7344 56.5156 30.7188 32.5781
Length   10, alignment 10/10:   41.7344 58.5156 31.6406 39.5781
Length   11, alignment  0/ 0:   40.75   52.7344 30.6094 34.5781
Length   11, alignment 11/ 0:   40.75   52.7344 30.6094 36.5938
Length   11, alignment  0/11:   40.75   52.7344 30.5938 34.5938
Length   11, alignment 11/11:   41.75   52.7344 31.5938 38.5469
Length   12, alignment  0/ 0:   40.75   55.7344 30.5938 34.1875
Length   12, alignment 12/ 0:   40.75   55.5625 30.5938 34.0938
Length   12, alignment  0/12:   40.75   55.8438 30.5938 34.0938
Length   12, alignment 12/12:   41.75   55.5938 31.5938 35.5938
Length   13, alignment  0/ 0:   39.75   58.7656 29.6094 34.6719
Length   13, alignment 13/ 0:   43.8594 58.7344 33.7812 39.6562
Length   13, alignment  0/13:   45.75   58.5781 32.5938 36.7031
Length   13, alignment 13/13:   51.6562 58.5938 38.6094 39.6406
Length   14, alignment  0/ 0:   39.75   61.5938 29.6094 36.6562
Length   14, alignment 14/ 0:   41.8594 61.7344 31.7344 39.625
Length   14, alignment  0/14:   41.75   61.7344 31.7188 36.7031
Length   14, alignment 14/14:   45.75   61.7656 35.5938 40.5625
Length   15, alignment  0/ 0:   39.75   64.6875 29.5938 36.7031
Length   15, alignment 15/ 0:   41.75   64.7344 32.1719 38.5938
Length   15, alignment  0/15:   41.75   64.5625 31.6094 38.6719
Length   15, alignment 15/15:   45.75   64.5625 35.5938 38.625
Length   16, alignment  0/ 0:   39.7656 73.4688 30.5312 37.0625
Length   16, alignment 16/ 0:   39.7656 67.5625 30.9375 37.0625
Length   16, alignment  0/16:   39.7656 67.5625 30.5312 37.0625
Length   16, alignment 16/16:   39.7656 73.5    30.5625 37.0625
Length   17, alignment  0/ 0:   42.7656 70.5781 32.7812 36.5781
Length   17, alignment 17/ 0:   46.7656 70.5469 35.6875 42.6719
Length   17, alignment  0/17:   46.75   70.5625 36.6719 39.5781
Length   17, alignment 17/17:   51.75   70.5781 41.5625 42.5312
Length   18, alignment  0/ 0:   44.7656 74.0156 34.5938 37.0938
Length   18, alignment 18/ 0:   47.7656 73.5625 37.5938 45.6719
Length   18, alignment  0/18:   42.9062 73.5625 32.6094 40.0625
Length   18, alignment 18/18:   47.8906 73.5938 37.5781 42.5625
Length   19, alignment  0/ 0:   44.9062 76.5781 34.5781 39.5781
Length   19, alignment 19/ 0:   47.8906 76.5625 37.5625 44.0625
Length   19, alignment  0/19:   46.7656 76.5469 36.5156 39.5938
Length   19, alignment 19/19:   51.75   76.5781 41.5938 41.5781
Length   20, alignment  0/ 0:   45      79.8906 34.6562 35.5938
Length   20, alignment 20/ 0:   47.7656 79.5625 37.7031 35.5938
Length   20, alignment  0/20:   45.7656 79.5625 36.5312 35.6094
Length   20, alignment 20/20:   48.7656 79.5938 38.5938 35.6406
Length   21, alignment  0/ 0:   43.7969 82.5938 33.5938 39.6094
Length   21, alignment 21/ 0:   49.875  82.5625 39.5781 45.5781
Length   21, alignment  0/21:   47.9062 82.5781 39.0938 41.6094
Length   21, alignment 21/21:   50.8906 82.6094 40.7969 42.5781
Length   22, alignment  0/ 0:   43.7969 85.625  33.6875 39.6094
Length   22, alignment 22/ 0:   47.875  85.5625 37.6094 44.5938
Length   22, alignment  0/22:   47.7656 85.5625 37.5625 41.6406
Length   22, alignment 22/22:   50.8906 85.6094 40.6094 41.5938
Length   23, alignment  0/ 0:   43.7656 88.6875 33.5469 41.625
Length   23, alignment 23/ 0:   47.875  88.5625 37.5938 43.625
Length   23, alignment  0/23:   47.7656 88.5625 37.5625 43.5625
Length   23, alignment 23/23:   50.7656 88.5625 40.5781 43.625
Length   24, alignment  0/ 0:   44.8438 97.5    35.6562 40.1562
Length   24, alignment 24/ 0:   44.8438 91.8438 34.8906 40.0312
Length   24, alignment  0/24:   45.5156 91.6562 36.7656 40.0938
Length   24, alignment 24/24:   48.7812 97.5    38.7344 40.0312
Length   25, alignment  0/ 0:   44.8906 94.5781 35.1719 40.0469
Length   25, alignment 25/ 0:   50.75   94.5469 40.5781 47.6094
Length   25, alignment  0/25:   52.75   94.5469 42.5781 45.0781
Length   25, alignment 25/25:   57.875  94.5781 47.5625 45.5781
Length   26, alignment  0/ 0:   46.7656 97.6719 35.7188 38.6094
Length   26, alignment 26/ 0:   50.9062 97.5625 40.5781 50.5938
Length   26, alignment  0/26:   50.7656 97.5625 40.5625 43.5938
Length   26, alignment 26/26:   57.7656 97.5938 47.6094 45.6406
Length   27, alignment  0/ 0:   46.7656 100.234 36.5625 43.5938
Length   27, alignment 27/ 0:   50.75   100.547 39.7656 47.625
Length   27, alignment  0/27:   52.4219 100.547 42.8594 45.6406
Length   27, alignment 27/27:   56.75   100.578 46.5625 49.5938
Length   28, alignment  0/ 0:   46.7656 103.625 36.5781 39.625
Length   28, alignment 28/ 0:   50.7656 103.562 40.6875 39.625
Length   28, alignment  0/28:   51.9844 103.562 41.5781 39.6094
Length   28, alignment 28/28:   57.7656 103.594 48.1094 42.6562
Length   29, alignment  0/ 0:   45.7656 106.594 35.5781 43.5781
Length   29, alignment 29/ 0:   53.7656 106.594 43.5781 49.5938
Length   29, alignment  0/29:   53.9062 106.562 43.7344 47.5938
Length   29, alignment 29/29:   59.8906 106.594 49.7969 44.5938
Length   30, alignment  0/ 0:   45.7656 110.047 35.5938 43.5938
Length   30, alignment 30/ 0:   51.7656 109.562 41.6562 50.5938
Length   30, alignment  0/30:   53.7656 109.562 43.7344 47.5938
Length   30, alignment 30/30:   59.8906 109.594 49.7031 45.5938
Length   31, alignment  0/ 0:   45.7656 112.688 35.5938 45.5938
Length   31, alignment 31/ 0:   51.7656 113.047 41.0469 49.5938
Length   31, alignment  0/31:   53.8594 112.812 43.5938 49.5938
Length   31, alignment 31/31:   58.8594 112.812 48.5938 47.5938
Length   48, alignment  0/ 0:   49.7188 169.5   39.2812 41.5625
Length   48, alignment  3/ 0:   60.75   163.594 51.7812 57.7969
Length   48, alignment  0/ 3:   71.9062 180.625 62.6562 77.2344
Length   48, alignment  3/ 3:   58.8906 168.719 48.6719 53.6562
Length   80, alignment  0/ 0:   53.875  265.656 43.5625 48.7812
Length   80, alignment  5/ 0:   64.7656 259.672 54.5312 73.5156
Length   80, alignment  0/ 5:   72.8906 259.656 63.0312 92.3125
Length   80, alignment  5/ 5:   60      264.844 50.0156 63.0781
Length   96, alignment  0/ 0:   58.1562 313.75  47.7656 54.3281
Length   96, alignment  6/ 0:   74.8438 307.672 65.0938 81.5
Length   96, alignment  0/ 6:   83.8906 307.797 73.625  94.6094
Length   96, alignment  6/ 6:   64.7656 313.875 54.6875 64.875
Length  112, alignment  0/ 0:   60.7188 361.562 50.5469 57.0781
Length  112, alignment  7/ 0:   80.9062 355.562 70.8438 89.6094
Length  112, alignment  0/ 7:   91.8906 355.594 82      103.609
Length  112, alignment  7/ 7:   68.2031 355.781 57.5781 70.4219
Length  144, alignment  0/ 0:   63.1875 457.688 51.9375 65.0156
Length  144, alignment  9/ 0:   81.8906 451.672 71.5312 105.5
Length  144, alignment  0/ 9:   97.7344 451.656 87.6562 119.453
Length  144, alignment  9/ 9:   72.3906 456.625 61.9062 82.0469
Length  160, alignment  0/ 0:   66.3438 505.641 55.7656 80.6094
Length  160, alignment 10/ 0:   90.8125 499.672 80.5469 113.156
Length  160, alignment  0/10:   112.953 500.047 103.172 127.203
Length  160, alignment 10/10:   76.4688 505.672 65.8906 79.1094
Length  176, alignment  0/ 0:   70.1562 553.594 58.6875 83.7812
Length  176, alignment 11/ 0:   96.7656 547.625 88.1094 125.766
Length  176, alignment  0/11:   121.172 547.625 110.656 143.297
Length  176, alignment 11/11:   77.9375 552.672 67.5938 85.7031
Length  192, alignment  0/ 0:   64.3281 601.672 54.2969 82.8438
Length  192, alignment 12/ 0:   98.2031 595.641 88.9688 137.172
Length  192, alignment  0/12:   137.062 595.672 126.672 152.047
Length  192, alignment 12/12:   81.9531 601.594 72.2656 87.6875
Length  208, alignment  0/ 0:   70.625  649.547 59.7969 91.8281
Length  208, alignment 13/ 0:   106.141 643.625 95.5781 141.969
Length  208, alignment  0/13:   119.078 643.734 110.938 154.234
Length  208, alignment 13/13:   80.2812 648.594 70.4688 95.7188
Length  224, alignment  0/ 0:   81.7969 697.594 64.0469 93.75
Length  224, alignment 14/ 0:   115.953 691.562 97.1875 149.859
Length  224, alignment  0/14:   133.969 691.578 126.75  161.469
Length  224, alignment 14/14:   83.1719 697.578 73.4219 97.1719
Length  240, alignment  0/ 0:   79.2188 745.656 68.125  93.8594
Length  240, alignment 15/ 0:   122.141 739.625 111.734 158.062
Length  240, alignment  0/15:   141.969 739.75  134.734 169.453
Length  240, alignment 15/15:   86.2812 739.641 76.125  116.781
Length  272, alignment  0/ 0:   78.4688 841.766 82.2969 101.766
Length  272, alignment 17/ 0:   122.016 835.641 111.719 173.844
Length  272, alignment  0/17:   156.578 835.719 146.656 249.031
Length  272, alignment 17/17:   84.0938 840.641 74.4219 115.969
Length  288, alignment  0/ 0:   96.0781 889.594 85.6406 109.797
Length  288, alignment 18/ 0:   132.031 883.703 121.828 181.844
Length  288, alignment  0/18:   166.703 883.562 156.609 257.438
Length  288, alignment 18/18:   90.2188 889.578 80.2188 113.188
Length  304, alignment  0/ 0:   97.6562 937.641 76.6719 109.781
Length  304, alignment 19/ 0:   138.094 931.672 119.609 189.766
Length  304, alignment  0/19:   175.672 931.672 166.828 271.453
Length  304, alignment 19/19:   93.7031 936.609 83.0312 137.531
Length  320, alignment  0/ 0:   89.9688 985.625 85.5    117.641
Length  320, alignment 20/ 0:   130.047 979.75  120.844 197.75
Length  320, alignment  0/20:   190.688 979.672 179.766 281.234
Length  320, alignment 20/20:   96.0469 985.656 86.7031 119.844
Length  336, alignment  0/ 0:   95.0625 1033.59 83.7812 117.922
Length  336, alignment 21/ 0:   137.969 1027.81 127.609 205.766
Length  336, alignment  0/21:   185.688 1027.73 175.5   294.75
Length  336, alignment 21/21:   92.125  1032.48 82.0312 143.031
Length  352, alignment  0/ 0:   92.1875 1081.61 81.7344 125.828
Length  352, alignment 22/ 0:   148.094 1075.56 137.594 213.859
Length  352, alignment  0/22:   193.844 1075.73 183.641 305.344
Length  352, alignment 22/22:   97.3281 1081.69 87.0156 133.766
Length  368, alignment  0/ 0:   100.781 1129.59 95.5312 125.922
Length  368, alignment 23/ 0:   154.031 1123.53 136.438 221.953
Length  368, alignment  0/23:   204.828 1123.73 194.297 320.172
Length  368, alignment 23/23:   100.375 1123.67 90.0469 144.828
Length  384, alignment  0/ 0:   102.812 1177.61 94.9062 133.938
Length  384, alignment 24/ 0:   148.266 1171.56 138.656 133.719
Length  384, alignment  0/24:   232.812 1171.56 222.734 226.578
Length  384, alignment 24/24:   101.203 1177.64 90.9688 130.656
Length  400, alignment  0/ 0:   104.344 1225.59 93.8125 133.75
Length  400, alignment 25/ 0:   154.422 1219.67 143.844 237.953
Length  400, alignment  0/25:   218.75  1219.55 208.391 519.875
Length  400, alignment 25/25:   104.531 1224.81 94.25   155.734
Length  416, alignment  0/ 0:   108.109 1273.72 96.6562 141.969
Length  416, alignment 26/ 0:   164.141 1267.56 153.672 245.766
Length  416, alignment  0/26:   225.906 1267.72 215.688 320.016
Length  416, alignment 26/26:   108.156 1273.77 98.0156 153.672
Length  432, alignment  0/ 0:   105.125 1321.64 93.1719 145.297
Length  432, alignment 27/ 0:   170.078 1315.59 159.891 253.766
Length  432, alignment  0/27:   238.078 1315.73 227.438 332.719
Length  432, alignment 27/27:   110.125 1320.69 99.9375 162.688
Length  448, alignment  0/ 0:   105.016 1369.61 97.6875 150.016
Length  448, alignment 28/ 0:   162.266 1363.62 153     261.766
Length  448, alignment  0/28:   245.906 1363.73 235.359 335.375
Length  448, alignment 28/28:   114.422 1369.59 104.969 165.641
Length  464, alignment  0/ 0:   112.109 1417.59 102.578 156.703
Length  464, alignment 29/ 0:   170.141 1411.56 152.141 269.766
Length  464, alignment  0/29:   246.562 1411.84 236.484 352.781
Length  464, alignment 29/29:   124.844 1416.59 108.609 171.781
Length  480, alignment  0/ 0:   116     1465.61 105.781 160.719
Length  480, alignment 30/ 0:   180.016 1459.64 169.688 277.766
Length  480, alignment  0/30:   257.453 1459.73 243.766 362.078
Length  480, alignment 30/30:   120.906 1465.67 106.062 176.484
Length  496, alignment  0/ 0:   117.875 1513.59 107.609 164.672
Length  496, alignment 31/ 0:   186.578 1507.7  175.953 286
Length  496, alignment  0/31:   269.719 1507.56 255.719 368.203
Length  496, alignment 31/31:   130.75  1507.56 115.156 181.75
Length 4096, alignment  0/ 0:   605.203 12313.8 592.844 1107
builtin_memcpy  simple_memcpy   __memcpy_e6500  __memcpy_ppc
Length    1, alignment  0/ 0:   23.5312 12.5781 15.5938 21.625
Length    1, alignment  0/ 0:   19.5312 12.6094 15.5156 21.625
Length    1, alignment  0/ 0:   19.5312 12.5781 15.7188 21.8125
Length    1, alignment  0/ 0:   19.4531 12.5781 15.5156 21.7656
Length    2, alignment  0/ 0:   18.625  16.3906 16.5938 20.6094
Length    2, alignment  1/ 0:   18.625  16.3594 16.5938 23.0625
Length    2, alignment  0/ 1:   18.625  17.7969 16.9531 20.6094
Length    2, alignment  1/ 1:   18.625  16.0625 16.7344 20.6094
Length    4, alignment  0/ 0:   18.6406 22.4688 16.5938 22.5938
Length    4, alignment  2/ 0:   18.625  22.25   16.9531 22.8125
Length    4, alignment  0/ 2:   18.625  26.9219 16.5938 22.6406
Length    4, alignment  2/ 2:   18.625  22.1406 16.6094 20.6094
Length    8, alignment  0/ 0:   24.5625 33.9219 20.5781 20.5625
Length    8, alignment  3/ 0:   24.6094 34.375  21      23.5156
Length    8, alignment  0/ 3:   24.5781 35.2656 20.6094 23.5
Length    8, alignment  3/ 3:   24.6406 34.0312 20.5938 25.6094
Length   16, alignment  0/ 0:   31.9375 67.5781 24.6875 30.625
Length   16, alignment  4/ 0:   31.5781 67.5781 24.6875 30.5312
Length   16, alignment  0/ 4:   31      67.4688 28.5938 31.625
Length   16, alignment  4/ 4:   34.9844 67.5781 32.5938 29.5781
Length   32, alignment  0/ 0:   37.8438 115.469 33.625  40.75
Length   32, alignment  5/ 0:   42.75   115.828 39.7031 54.7188
Length   32, alignment  0/ 5:   59.9219 115.828 60.4844 60.5156
Length   32, alignment  5/ 5:   48.625  115.578 48.5781 48.1875
Length   64, alignment  0/ 0:   29.7344 211.469 31.4844 57.375
Length   64, alignment  6/ 0:   40.6719 211.469 41.5625 84.9062
Length   64, alignment  0/ 6:   99.6562 211.578 97.0156 93.1562
Length   64, alignment  6/ 6:   63.8125 211.578 60.7344 62.1562
Length  128, alignment  0/ 0:   37.8438 403.469 34.7188 94.5938
Length  128, alignment  7/ 0:   57.0469 403.578 53.6719 145.109
Length  128, alignment  0/ 7:   120.656 403.578 117.641 155.703
Length  128, alignment  7/ 7:   69.0938 403.578 65.5938 98.0781
Length  256, alignment  0/ 0:   54.6875 787.469 52.2656 161.5
Length  256, alignment  8/ 0:   96.9375 787.469 93.6406 161.969
Length  256, alignment  0/ 8:   160.734 787.984 158.438 278.672
Length  256, alignment  8/ 8:   84.0156 787.469 80.3594 163.25
Length  512, alignment  0/ 0:   95.6406 1555.47 93.7969 292.625
Length  512, alignment  9/ 0:   159.812 1555.47 157.578 509.828
Length  512, alignment  0/ 9:   245.438 1555.47 245.703 797.875
Length  512, alignment  9/ 9:   124.422 1555.47 121.484 298.016
Length 1024, alignment  0/ 0:   165.688 3091.47 163.797 551.703
Length 1024, alignment 10/ 0:   287.812 3091.47 285.734 989.578
Length 1024, alignment  0/10:   407.188 3091.67 403.859 1567.45
Length 1024, alignment 10/10:   210.516 3091.47 207.812 556.984
Length 2048, alignment  0/ 0:   305.859 6163.58 303.906 1072.97
Length 2048, alignment 11/ 0:   543.906 6163.47 541.859 1949.58
Length 2048, alignment  0/11:   726.953 6163.58 723.75  3101.7
Length 2048, alignment 11/11:   362.516 6163.47 359.469 1089.05
Length 4096, alignment  0/ 0:   586.016 12307.5 583.703 2112.02
Length 4096, alignment 12/ 0:   1055.89 12307.7 1054.3  2112.88
Length 4096, alignment  0/12:   1367.38 12308.1 1361.23 4119.38
Length 4096, alignment 12/12:   666.297 12308.2 659.219 2123.75
Length 8192, alignment  0/ 0:   1146.84 24601.9 1144.64 4197.48
Length 8192, alignment 13/ 0:   2397.16 24602.5 2079.22 7718.64
Length 8192, alignment  0/13:   2654.56 24601.7 2645.52 10010
Length 8192, alignment 13/13:   1278.2  24871.9 1271.98 4225.17
Length 16384, alignment  0/ 0:  2370.12 49285.9 2368.58 8508.58
Length 16384, alignment 14/ 0:  4285.86 49574.4 4283.59 15658.3
Length 16384, alignment  0/14:  5320.25 49550.2 5313.17 19189.7
Length 16384, alignment 14/14:  2588.95 49291.3 2848.22 8565.11
Length 32768, alignment  0/ 0:  9262.19 106251  9253.75 24271.7
Length 32768, alignment 15/ 0:  16449.1 106236  16666   40019.6
Length 32768, alignment  0/15:  16507.3 106212  16500   44826.7
Length 32768, alignment 15/15:  8809.66 106224  8802.69 24342.8
Length 65536, alignment  0/ 0:  18975   212372  18476   48476.1
Length 65536, alignment 16/ 0:  17684.5 212591  17458.8 48647.8
Length 65536, alignment  0/16:  21750.5 212373  21532.7 83225.2
Length 65536, alignment 16/16:  17457.6 212418  17455.5 48713.7
Length    0, alignment  0/ 0:   18.5625 9.07812 14.5781 22.625
Length    0, alignment  0/ 0:   18.5625 9.07812 14.5781 22.5781
Length    0, alignment  0/ 0:   18.5625 9.07812 14.5781 22.5938
Length    0, alignment  0/ 0:   18.5625 9.07812 14.5781 22.5781
Length    1, alignment  0/ 0:   19.5312 12.5781 15.5938 21.625
Length    1, alignment  1/ 0:   19.5312 12.9219 15.5625 21.625
Length    1, alignment  0/ 1:   19.4531 12.6875 15.5938 21.75
Length    1, alignment  1/ 1:   19.6719 12.5781 15.75   21.8438
Length    2, alignment  0/ 0:   18.625  16.8906 16.5938 20.6094
Length    2, alignment  2/ 0:   18.625  16.7344 16.5938 22.625
Length    2, alignment  0/ 2:   18.625  16.5    16.5938 20.625
Length    2, alignment  2/ 2:   18.625  16.1562 16.5938 22.7344
Length    3, alignment  0/ 0:   18.625  19.75   16.5938 27.5938
Length    3, alignment  3/ 0:   18.625  19.3438 16.5938 24.6406
Length    3, alignment  0/ 3:   18.625  19.1406 16.5938 27.5312
Length    3, alignment  3/ 3:   18.625  19.1406 16.5938 22.5938
Length    4, alignment  0/ 0:   18.625  22.1406 16.5156 20.6094
Length    4, alignment  4/ 0:   18.625  22.1406 16.5938 22.8125
Length    4, alignment  0/ 4:   18.625  22.25   16.5938 20.6094
Length    4, alignment  4/ 4:   18.7188 22.2031 16.5938 20.6094
Length    5, alignment  0/ 0:   20.5312 25.1406 16.5781 21.5625
Length    5, alignment  5/ 0:   21.625  25.25   19.5156 23.5312
Length    5, alignment  0/ 5:   21.5938 25.25   20.3594 24.1875
Length    5, alignment  5/ 5:   26.25   25.1406 23.4844 27.6406
Length    6, alignment  0/ 0:   20.5312 27.9531 16.5156 21.4844
Length    6, alignment  6/ 0:   20.5938 28.1406 17.875  25.6094
Length    6, alignment  0/ 6:   23.5    28.5    17.5938 23.5312
Length    6, alignment  6/ 6:   25.5    28.1406 19.5938 25.4531
Length    7, alignment  0/ 0:   20.5312 31.1406 16.5781 29.5312
Length    7, alignment  7/ 0:   20.5938 31.1406 17.5938 25.5312
Length    7, alignment  0/ 7:   23.5    31.25   17.5938 25.8906
Length    7, alignment  7/ 7:   25.5    31.1875 19.5938 27.5312
Length    8, alignment  0/ 0:   24.6094 34.0312 20.5938 20.5469
Length    8, alignment  8/ 0:   24.5469 34.0312 20.5781 20.5625
Length    8, alignment  0/ 8:   28.4219 34.0312 20.5781 20.9062
Length    8, alignment  8/ 8:   28.7812 33.9219 20.5781 20.5469
Length    9, alignment  0/ 0:   25.5625 52.3906 21.625  27.5
Length    9, alignment  9/ 0:   25.625  52.3906 21.7188 35.9219
Length    9, alignment  0/ 9:   29.4688 52.2812 21.6406 27.6406
Length    9, alignment  9/ 9:   29.5312 52.3906 23.1406 32.5312
Length   10, alignment  0/ 0:   25.5938 55.6094 22.5938 26.5312
Length   10, alignment 10/ 0:   25.625  56.5625 22.6406 33.5938
Length   10, alignment  0/10:   29.4688 56.4688 22.5938 28.6875
Length   10, alignment 10/10:   31.4688 56.3594 24.6094 33.625
Length   11, alignment  0/ 0:   25.5938 59.3594 22.5938 28.6875
Length   11, alignment 11/ 0:   25.625  59.3594 22.5625 32.1875
Length   11, alignment  0/11:   29.4688 59.7188 22.6094 30.9531
Length   11, alignment 11/11:   31.4688 59.3594 24.5938 34.8594
Length   12, alignment  0/ 0:   25.5938 62.8125 22.5938 28.5625
Length   12, alignment 12/ 0:   25.5938 62.3594 22.5938 27.5625
Length   12, alignment  0/12:   25.5938 62.4531 22.5938 28.5625
Length   12, alignment 12/12:   25.5938 62.3906 22.5938 28.5
Length   13, alignment  0/ 0:   29.4375 58.4688 21.6094 28.6719
Length   13, alignment 13/ 0:   33.4688 58.4688 26.5938 34.5938
Length   13, alignment  0/13:   29.5625 58.4688 26.5938 32.5
Length   13, alignment 13/13:   35.5625 58.6719 32.5156 32.6406
Length   14, alignment  0/ 0:   29.4375 61.4688 21.6094 32.5
Length   14, alignment 14/ 0:   31.4688 61.4688 24.5156 32.7031
Length   14, alignment  0/14:   27.5938 68.0938 24.5938 32.5781
Length   14, alignment 14/14:   35.5938 61.4688 32.5938 32.7344
Length   15, alignment  0/ 0:   29.4375 64.5156 21.9375 29.6562
Length   15, alignment 15/ 0:   31.4688 64.5156 24.5312 34.5
Length   15, alignment  0/15:   27.5938 64.4688 24.5938 34.5
Length   15, alignment 15/15:   35.5938 64.4688 32.5938 31.7031
Length   16, alignment  0/ 0:   31.8906 67.6719 24.6875 30.75
Length   16, alignment 16/ 0:   31.9688 67.5781 25.0312 30.5469
Length   16, alignment  0/16:   28.6719 67.5781 24.6875 30.5312
Length   16, alignment 16/16:   28.6719 67.4688 24.6875 30.5312
Length   17, alignment  0/ 0:   32.6094 70.5781 28.5938 29.5938
Length   17, alignment 17/ 0:   37.5469 70.5781 30.5781 38.8906
Length   17, alignment  0/17:   31.7656 70.4688 28.6406 32.5938
Length   17, alignment 17/17:   39.625  70.4688 36.6562 38.6875
Length   18, alignment  0/ 0:   36.6406 73.5781 28.625  28.5938
Length   18, alignment 18/ 0:   38.5469 73.4688 30.6875 36.625
Length   18, alignment  0/18:   28.7969 73.5781 25.5625 32.6875
Length   18, alignment 18/18:   35.5938 73.9375 32.5625 36.5312
Length   19, alignment  0/ 0:   35.7344 76.8281 28.0625 35.5938
Length   19, alignment 19/ 0:   37.5    76.4688 30.5469 36.9375
Length   19, alignment  0/19:   31.6719 76.9375 32.4375 35.5156
Length   19, alignment 19/19:   39.5938 76.4688 40.4375 38.7656
Length   20, alignment  0/ 0:   34.5781 79.8906 27.1094 30.9688
Length   20, alignment 20/ 0:   34.4688 79.5781 27.1094 29.5625
Length   20, alignment  0/20:   32.5469 79.4688 33.5    29.6406
Length   20, alignment 20/20:   32.6875 79.4688 33.5156 31.5938
Length   21, alignment  0/ 0:   32.6094 82.4688 27.5312 35.8125
Length   21, alignment 21/ 0:   36.6719 82.5781 33.5938 38.625
Length   21, alignment  0/21:   35.7031 82.4688 36.4844 37.5312
Length   21, alignment 21/21:   43.625  82.9375 44.4688 34.5938
Length   22, alignment  0/ 0:   32.6094 85.5781 27.5312 35.5
Length   22, alignment 22/ 0:   38.6875 85.5781 31.6562 38.7188
Length   22, alignment  0/22:   35.625  85.9375 36.5938 37.5
Length   22, alignment 22/22:   43.625  85.4688 44.5156 36.6094
Length   23, alignment  0/ 0:   32.5    88.4688 27.5312 32.7031
Length   23, alignment 23/ 0:   38.4688 88.4688 31.6562 39.8438
Length   23, alignment  0/23:   36.7969 88.8281 37.4375 39.5
Length   23, alignment 23/23:   43.625  88.4688 44.4688 35.5312
Length   24, alignment  0/ 0:   31.7031 91.5781 29.5625 33.6875
Length   24, alignment 24/ 0:   31.5938 91.5156 29.5781 33.6875
Length   24, alignment  0/24:   32.75   91.9375 33.8594 32.6719
Length   24, alignment 24/24:   39.6406 91.9375 40.5469 31.5781
Length   25, alignment  0/ 0:   32.7969 94.6719 30.5781 32.6875
Length   25, alignment 25/ 0:   37.6875 94.6719 34.6719 43.5938
Length   25, alignment  0/25:   43.7344 94.4688 40.6094 43.4688
Length   25, alignment 25/25:   52.625  94.4688 49.5625 42.375
Length   26, alignment  0/ 0:   33.6719 97.4688 30.6562 32.7344
Length   26, alignment 26/ 0:   37.6562 97.4688 35.6719 42.75
Length   26, alignment  0/26:   39.7344 97.4688 40.5781 43.375
Length   26, alignment 26/26:   52.5625 97.4688 49.6094 43.6562
Length   27, alignment  0/ 0:   33.7344 100.469 30.6719 34.5625
Length   27, alignment 27/ 0:   37.7344 100.469 35.6719 44.0312
Length   27, alignment  0/27:   43.7969 100.469 40.6094 45.375
Length   27, alignment 27/27:   51.625  100.469 48.5781 45.5
Length   28, alignment  0/ 0:   33.7031 103.469 30.6406 33.5625
Length   28, alignment 28/ 0:   33.5938 103.469 30.6719 33.5625
Length   28, alignment  0/28:   37.6875 103.578 34.625  34.75
Length   28, alignment 28/28:   45.5469 103.469 43.5625 33.5938
Length   29, alignment  0/ 0:   32.625  106.938 29.75   39.6875
Length   29, alignment 29/ 0:   42.7344 106.578 39.6562 45.5
Length   29, alignment  0/29:   45.7031 106.578 42.8281 45.6562
Length   29, alignment 29/29:   56.5469 106.938 53.625  39.7031
Length   30, alignment  0/ 0:   32.625  109.812 29.75   39.4219
Length   30, alignment 30/ 0:   40.6562 109.688 37.6719 44.5938
Length   30, alignment  0/30:   45.75   109.578 42.7969 43.5
Length   30, alignment 30/30:   56.5938 109.578 53.9219 39.6406
Length   31, alignment  0/ 0:   32.7344 112.578 29.8594 41.5
Length   31, alignment 31/ 0:   40.6719 112.469 38.0156 45.5
Length   31, alignment  0/31:   45.7031 112.469 42.75   45.8281
Length   31, alignment 31/31:   57.5938 112.469 54.4688 38.6562
Length   48, alignment  0/ 0:   41.9062 163.812 43.5    49.1719
Length   48, alignment  3/ 0:   53.7969 163.469 54.5312 69.7969
Length   48, alignment  0/ 3:   71.7031 178.203 68.5625 85.75
Length   48, alignment  3/ 3:   54.75   163.922 51.6094 66.25
Length   80, alignment  0/ 0:   38.7188 259.719 35.7969 65.5469
Length   80, alignment  5/ 0:   49.0781 259.719 46.9219 100.109
Length   80, alignment  0/ 5:   62.7031 259.672 59.5625 108.188
Length   80, alignment  5/ 5:   46.7031 259.469 43.6719 78.2969
Length   96, alignment  0/ 0:   45.75   307.422 43.5625 73.1562
Length   96, alignment  6/ 0:   62.8438 307.469 59.625  115.109
Length   96, alignment  0/ 6:   81.7188 307.578 78.5625 123.078
Length   96, alignment  6/ 6:   54.625  307.828 51.5938 78.0469
Length  112, alignment  0/ 0:   51.9062 355.469 49.5938 86.6719
Length  112, alignment  7/ 0:   73.6875 355.578 70.625  129.906
Length  112, alignment  0/ 7:   100.641 355.938 97.625  136.125
Length  112, alignment  7/ 7:   62.7188 355.578 59.6875 90.8906
Length  144, alignment  0/ 0:   47.3594 451.469 43.7031 102.719
Length  144, alignment  9/ 0:   67.2031 451.469 64.8594 164.641
Length  144, alignment  0/ 9:   85.7344 451.922 86.4219 245.766
Length  144, alignment  9/ 9:   60.9219 451.469 57.6719 109.391
Length  160, alignment  0/ 0:   53.875  499.469 51.5781 110.703
Length  160, alignment 10/ 0:   79.0781 499.469 75.7031 179.578
Length  160, alignment  0/10:   106.141 499.578 102.641 270.766
Length  160, alignment 10/10:   66.7344 499.469 63.7188 117.734
Length  176, alignment  0/ 0:   59.9062 547.469 57.6094 118.656
Length  176, alignment 11/ 0:   89.9219 547.469 88.0781 194.859
Length  176, alignment  0/11:   125.844 547.469 122.656 293.891
Length  176, alignment 11/11:   74.0312 547.469 70.7656 131.781
Length  192, alignment  0/ 0:   46.3906 595.469 44.0781 126.953
Length  192, alignment 12/ 0:   73.5312 595.469 70.1875 126.906
Length  192, alignment  0/12:   138.156 595.594 135.734 215.016
Length  192, alignment 12/12:   76.9531 595.484 73.6719 130.672
Length  208, alignment  0/ 0:   55.3125 643.469 53.25   134.922
Length  208, alignment 13/ 0:   81.1875 643.469 79.0312 224.672
Length  208, alignment  0/13:   111.578 643.938 108.812 284.062
Length  208, alignment 13/13:   69.25   643.469 65.9375 146.828
Length  224, alignment  0/ 0:   62.5625 691.469 60.5156 142.953
Length  224, alignment 14/ 0:   95.0625 691.469 92.0625 239.641
Length  224, alignment  0/14:   127.344 691.578 124.016 307.125
Length  224, alignment 14/14:   81.8125 691.938 72.9531 149.844
Length  240, alignment  0/ 0:   68.8438 739.469 66.1562 153.5
Length  240, alignment 15/ 0:   106.672 739.469 103.062 255.078
Length  240, alignment  0/15:   151.281 739.844 148     322.984
Length  240, alignment 15/15:   82.6406 739.516 79.1875 160.984
Length  272, alignment  0/ 0:   77.7031 835.469 75.4219 168.312
Length  272, alignment 17/ 0:   107.844 835.469 105.75  284.734
Length  272, alignment  0/17:   140.281 835.578 140.641 290.438
Length  272, alignment 17/17:   70.9844 835.922 67.9688 177.766
Length  288, alignment  0/ 0:   72.1562 883.578 70.2031 176.109
Length  288, alignment 18/ 0:   119.719 883.578 116.672 299.641
Length  288, alignment  0/18:   158.875 883.672 158.469 304.969
Length  288, alignment 18/18:   79.1875 883.469 76.125  184.859
Length  304, alignment  0/ 0:   79.4844 931.469 77.625  185.547
Length  304, alignment 19/ 0:   122.266 931.469 120.219 314.578
Length  304, alignment  0/19:   176.688 931.469 174.109 320.422
Length  304, alignment 19/19:   95.6094 931.812 85.125  198.828
Length  320, alignment  0/ 0:   63.3125 979.469 61.0469 193.641
Length  320, alignment 20/ 0:   111.859 979.828 109.547 193.656
Length  320, alignment  0/20:   182.578 979.484 180.625 342.656
Length  320, alignment 20/20:   91.6094 979.484 88.3594 194.688
Length  336, alignment  0/ 0:   79.8594 1027.58 77.625  201.766
Length  336, alignment 21/ 0:   123.703 1027.47 120.562 344.781
Length  336, alignment  0/21:   174.734 1027.47 171.547 350.75
Length  336, alignment 21/21:   80.1875 1027.47 76      210.875
Length  352, alignment  0/ 0:   92.8281 1075.42 90.7031 210
Length  352, alignment 22/ 0:   135.906 1075.83 132.797 359.641
Length  352, alignment  0/22:   188.688 1075.47 185.469 365.125
Length  352, alignment 22/22:   87.3594 1075.47 84.1406 213.906
Length  368, alignment  0/ 0:   88.3438 1123.47 85.7969 217.766
Length  368, alignment 23/ 0:   146.844 1123.47 143.656 374.641
Length  368, alignment  0/23:   205.719 1123.47 203.547 379.984
Length  368, alignment 23/23:   95.3281 1123.47 92.2812 224.688
Length  384, alignment  0/ 0:   84.4531 1171.41 82.4844 225.781
Length  384, alignment 24/ 0:   131.547 1171.47 128.406 225.781
Length  384, alignment  0/24:   249.438 1171.47 256.125 339.703
Length  384, alignment 24/24:   100.625 1171.47 97.0312 231.062
Length  400, alignment  0/ 0:   88.9531 1219.48 86.7188 233.859
Length  400, alignment 25/ 0:   139.984 1219.81 137.797 404.578
Length  400, alignment  0/25:   207.016 1219.92 203.656 410.891
Length  400, alignment 25/25:   100.234 1219.47 97.1719 241.703
Length  416, alignment  0/ 0:   94.3125 1267.58 94.9531 241.734
Length  416, alignment 26/ 0:   151.859 1267.69 148.859 419.672
Length  416, alignment  0/26:   219.5   1267.58 215.984 424.734
Length  416, alignment 26/26:   99.7031 1267.81 102.141 245.844
Length  432, alignment  0/ 0:   106.781 1315.47 104.625 249.953
Length  432, alignment 27/ 0:   162.969 1315.47 160.953 434.75
Length  432, alignment  0/27:   233.703 1315.81 229.281 439.625
Length  432, alignment 27/27:   105.906 1315.47 102.859 260
Length  448, alignment  0/ 0:   88.6406 1363.58 86.6094 257.875
Length  448, alignment 28/ 0:   143.984 1363.47 141.672 257.75
Length  448, alignment  0/28:   233.078 1363.47 232.359 362.734
Length  448, alignment 28/28:   109.281 1583.64 106.906 258.797
Length  464, alignment  0/ 0:   97.7031 1411.47 95.7031 265.672
Length  464, alignment 29/ 0:   156     1411.41 152.828 465.344
Length  464, alignment  0/29:   236.734 1411.47 233.844 470.578
Length  464, alignment 29/29:   101.078 1411.58 98.0781 274.938
Length  480, alignment  0/ 0:   104.984 1459.47 102.891 273.688
Length  480, alignment 30/ 0:   167.828 1459.47 164.734 479.672
Length  480, alignment  0/30:   248.062 1459.52 244.969 484.734
Length  480, alignment 30/30:   113.75  1459.47 110.641 278.094
Length  496, alignment  0/ 0:   110.953 1507.58 108.781 283.312
Length  496, alignment 31/ 0:   178.906 1507.47 175.734 495.141
Length  496, alignment  0/31:   259.953 1507.47 255.891 499.891
Length  496, alignment 31/31:   118.391 1507.47 115.656 289.031
Length 4096, alignment  0/ 0:   586.672 12307.9 584.172 2112.75

Patch
diff mbox

diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/e6500/memcpy.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/e6500/memcpy.S
--- glibc-2.20/sysdeps/powerpc/powerpc32/e6500/memcpy.S   1969-12-31 18:00:00.000000000 -0600
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/e6500/memcpy.S     2015-08-29 15:45:37.044421872 -0500
@@ -0,0 +1,212 @@ 
+/* Optimized memcpy implementation for e6500 32-bit PowerPC.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+   Returns 'dst'.
+
+             r3 = destination
+             r4 = source
+             r5 = byte count
+
+             volatile fixed point registers usable:
+             r0, r3-r12
+
+             volatile floating point registers usable:
+             f0-f13.  */
+
+EALIGN (memcpy, 5, 0)
+             cmplw   cr0, r4, r3                             /* if source==destination, return.  */
+             beqlr     cr0
+             /* if number of bytes is less than 16, (optimal value TBD),
+                but greater than zero copy byte-by-byte.  */
+             cmplwi  r5, 16
+             mr          r6, r3
+             blt           L(copy_remaining)
+             neg        r0, r3
+             andi.      r11, r0, 15
+             beq        L(dst_align16)
+             lwz         r0, 0(r4)
+             lwz         r7, 4(r4)
+             lwz         r8, 8(r4)
+             lwz         r9, 12(r4)
+             subf       r5, r11, r5
+             add        r4, r4, r11
+             stw         r0, 0(r6)
+             stw         r7, 4(r6)
+             stw         r8, 8(r6)
+             stw         r9, 12(r6)
+             add        r6, r6, r11
+L(dst_align16):
+             cmplwi  7, r5, 63
+             ble          7, L(copy_remaining)
+             srwi        r11, r5, 6                               /* No of 64 byte copy count.  */
+             rlwinm  r5, r5, 0, 26, 31   /* remaining bytes.  */
+             rlwinm. r0, r4, 0, 28, 31
+             mtctr     r11
+             li              r7, 16
+             li              r8, 32
+             li              r9, 48
+             bne        0, L(src_naligned)
+L(copy_salign16):
+             lvx          v14, 0, r4                              /* copy 64 bytes.  */
+             lvx          v15, r7, r4
+             lvx          v16, r8, r4
+             lvx          v17, r9, r4
+             addi       r4, r4, 64
+             stvx        v14, 0, r6
+             stvx        v15, r7, r6
+             stvx        v16, r8, r6
+             stvx        v17, r9, r6
+             addi       r6, r6, 64
+             bdnz      L(copy_salign16)
+L(copy_remaining):
+             srwi.      r11, r5, 3                               /* No of 8 byte copy count.  */
+             rlwinm  r5, r5, 0, 29, 31   /* remaining bytes.  */
+             beq        0, L(copy_bytes)
+             mtcrf     0x01, r11
+             bf            cr7*4+1, L(cp16b)
+
+             lwz         r0, 0(r4)                                /* copy 32 bytes */
+             lwz         r7, 4(r4)
+             lwz         r8, 8(r4)
+             lwz         r9, 12(r4)
+
+             stw         r0, 0(r6)
+             stw         r7, 4(r6)
+             stw         r8, 8(r6)
+             stw         r9, 12(r6)
+
+             lwz         r0, 16(r4)
+             lwz         r7, 20(r4)
+             lwz         r8, 24(r4)
+             lwz         r9, 28(r4)
+             addi       r4, r4, 32
+
+             stw         r0, 16(r6)
+             stw         r7, 20(r6)
+             stw         r8, 24(r6)
+             stw         r9, 28(r6)
+             addi r6, r6, 32
+L(cp16b):
+             bf            cr7*4+2, L(cp8b)
+             lwz         r0, 0(r4)                                /* copy 16 bytes */
+             lwz         r7, 4(r4)
+             lwz         r8, 8(r4)
+             lwz         r9, 12(r4)
+
+             addi       r4, r4, 16
+
+             stw         r0, 0(r6)
+             stw         r7, 4(r6)
+             stw         r8, 8(r6)
+             stw         r9, 12(r6)
+             addi       r6, r6, 16
+L(cp8b):
+             bf            cr7*4+3, L(copy_bytes)
+             lwz         r0, 0(r4)                                /* copy 8 bytes */
+             lwz         r7, 4(r4)
+             addi       r4, r4, 8
+
+             stw         r0, 0(r6)
+             stw         r7, 4(r6)
+             addi       r6, r6, 8
+L(copy_bytes):
+             cmplwi  cr1, r5, 4
+             cmplwi  cr0, r5, 1
+             bgt         cr1, L(gt4b)                         /* nb > 4?  (5, 6, 7 bytes).  */
+             ble          cr0, L(lt1b)                           /* nb <= 1? (0, 1 bytes).  */
+             addi       r0, r5, -2                               /* 2, 3, 4 bytes.  */
+             lhz          r9, 0(r4)
+             lhzx        r11, r4, r0
+             sth          r9, 0(r6)
+             sthx       r11, r6, r0
+             blr
+L(gt4b):
+             addi       r0, r5, -4                               /* 5, 6, 7 bytes.  */
+             lwz         r9, 0(r4)
+             lwzx       r11, r4, r0
+             stw         r9, 0(r6)
+             stwx      r11, r6, r0
+             blr
+L(lt1b):
+             mtocrf  0x1, r5                                   /* nb == 0 ? return.  */
+             bflr         31
+             lbz          r0, 0(r4)                                /* nb == 1.  */
+             stb          r0, 0(r6)
+             blr
+
+L(src_naligned):
+#ifndef _SOFT_FLOAT
+             rlwinm. r0, r4, 0, 29, 31
+             beq        0, L(copy_salign8)
+#endif
+L(copy_snalign):                                            /* copy 64 bytes.  */
+             lvx          v0, 0, r4                /* load MSQ.  */
+             lvsl          v18, 0, r4                              /* set permute control vector.  */
+             lvx          v19, r7, r4                            /* load LSQ.  */
+             vperm   v14, v0, v19, v18               /* align the data.  */
+             lvx          v0, r7, r4                               /* load MSQ.  */
+             lvsl          v18, r7, r4                            /* set permute control vector.  */
+             lvx          v19, r8, r4                            /* load LSQ.  */
+             vperm   v15, v0, v19, v18               /* align the data.  */
+             lvx          v0, r8, r4                               /* load MSQ.  */
+             lvsl          v18, r8, r4                            /* set permute control vector.  */
+             lvx          v19, r9, r4                            /* load LSQ.  */
+             vperm   v16, v0, v19, v18               /* align the data.  */
+             lvx          v0, r9, r4                               /* load MSQ.  */
+             lvsl          v18, r9, r4                            /* set permute control vector.  */
+             addi       r4, r4, 64
+             lvx          v19, 0, r4                              /* load LSQ.  */
+             vperm   v17, v0, v19, v18               /* align the data.  */
+             stvx        v14, 0, r6
+             stvx        v15, r7, r6
+             stvx        v16, r8, r6
+             stvx        v17, r9, r6
+             addi       r6, r6, 64
+             bdnz      L(copy_snalign)
+             b             L(copy_remaining)
+
+#ifndef _SOFT_FLOAT
+L(copy_salign8):
+             lfd           0, 0(r4)                  /* copy 64 bytes.  */
+             lfd           1, 8(r4)
+             lfd           2, 16(r4)
+             lfd           3, 24(r4)
+             stfd        0, 0(r6)
+             stfd        1, 8(r6)
+             stfd        2, 16(r6)
+             stfd        3, 24(r6)
+             lfd           0, 32(r4)
+             lfd           1, 40(r4)
+             lfd           2, 48(r4)
+             lfd           3, 56(r4)
+             addi       r4, r4, 64
+             stfd        0, 32(r6)
+             stfd        1, 40(r6)
+             stfd        2, 48(r6)
+             stfd        3, 56(r6)
+             addi       r6, r6, 64
+             bdnz      L(copy_salign8)
+             b             L(copy_remaining)
+#endif
+
+END (memcpy)
+libc_hidden_builtin_def (memcpy)
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c
--- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c 2015-08-29 15:42:09.769408236 -0500
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c   2015-08-29 15:45:37.044421872 -0500
@@ -58,6 +58,10 @@ 
                                                     __memcpy_power6)
                     IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_CELL_BE,
                                                     __memcpy_cell)
+                   IFUNC_IMPL_ADD (array, i, memcpy,
+                                                   (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500)
+                                                   && (hwcap2 & PPC_FEATURE2_HAS_ISEL)),
+                                                   __memcpy_e6500)
                     IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc))

   /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c.  */
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile
--- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile             2015-08-29 15:42:09.769408236 -0500
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile                2015-08-29 15:46:34.217426773 -0500
@@ -1,7 +1,7 @@ 
ifeq ($(subdir),string)
sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
-                                 memcpy-ppc32 memcmp-power7 memcmp-e6500 memcmp-ppc32 \
-                                 memset-power7 memset-power6 memset-ppc32 \
+                                memcpy-e6500 memcpy-ppc32 memcmp-power7 memcmp-e6500 \
+                                memcmp-ppc32 memset-power7 memset-power6 memset-ppc32 \
                                  bzero-power7 bzero-power6 bzero-ppc32 \
                                  mempcpy-power7 mempcpy-ppc32 memchr-power7 \
                                  memchr-ppc32 memrchr-power7 memrchr-ppc32 rawmemchr-power7 \
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c
--- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c          2015-08-29 15:41:52.333407557 -0500
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c             2015-08-29 15:45:37.044421872 -0500
@@ -25,6 +25,7 @@ 
# include "init-arch.h"

 extern __typeof (memcpy) __memcpy_ppc attribute_hidden;
+extern __typeof (memcpy) __memcpy_e6500 attribute_hidden;
extern __typeof (memcpy) __memcpy_cell attribute_hidden;
extern __typeof (memcpy) __memcpy_power6 attribute_hidden;
extern __typeof (memcpy) __memcpy_a2 attribute_hidden;
@@ -40,6 +41,9 @@ 
                               (hwcap & PPC_FEATURE_ARCH_2_05)
                               ? __memcpy_power6 :
                                 (hwcap & PPC_FEATURE_CELL_BE)
-                                ? __memcpy_cell
+                               ? __memcpy_cell :
+                                 (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500)
+                                 && (hwcap2 & PPC_FEATURE2_HAS_ISEL))
+                                 ? __memcpy_e6500
             : __memcpy_ppc);
#endif
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S
--- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S            1969-12-31 18:00:00.000000000 -0600
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S               2015-08-29 15:45:37.045421842 -0500
@@ -0,0 +1,38 @@ 
+/* Optimized memcpy implementation for PowerPC32/e6500.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words)                                                  \
+ .globl C_SYMBOL_NAME(__memcpy_e6500);                                                                \
+ .type C_SYMBOL_NAME(__memcpy_e6500),@function;                          \
+ .align ALIGNARG(alignt);                                                                           \
+ EALIGN_W_##words;                                                                                                \
+ C_LABEL(__memcpy_e6500)                                                                  \
+ cfi_startproc;
+
+#undef END
+#define END(name)                                                                                     \
+ cfi_endproc;                                                                                                   \
+ ASM_SIZE_DIRECTIVE(__memcpy_e6500)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc32/e6500/memcpy.S>
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/e6500/memcpy.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/e6500/memcpy.S
--- glibc-2.20/sysdeps/powerpc/powerpc64/e6500/memcpy.S   1969-12-31 18:00:00.000000000 -0600
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/e6500/memcpy.S     2015-08-29 15:45:37.045421842 -0500
@@ -0,0 +1,184 @@ 
+/* Optimized memcpy implementation for e6500 64-bit PowerPC.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+   Returns 'dst'.
+
+             r3 = destination
+             r4 = source
+             r5 = byte count
+
+             volatile fixed point registers usable:
+             r0, r3-r12
+
+             volatile floating point registers usable:
+             f0-f13.  */
+
+EALIGN (memcpy, 5, 0)
+             CALL_MCOUNT 3
+             cmpld    cr0, r4, r3                             /* if source==destination, return.  */
+             beqlr     cr0
+             /* if number of bytes is less than 16 but greater than zero,
+                copy byte-by-byte.  */
+             cmpldi   r5, 16
+             mr          r6, r3
+             ble          L(copy_remaining)
+             neg        r0, r3
+             andi.      r11, r0, 15
+             beq        L(dst_align)
+             ld            r12, 0(r4)
+             ld            r0, 8(r4)
+             subf       r5, r11, r5
+             add        r4, r4, r11
+             std          r12, 0(r6)
+             std          r0, 8(r6)
+             add        r6, r6, r11
+L(dst_align):
+             cmpldi   7, r5, 63
+             ble          7, L(copy_remaining)
+             srwi        r11, r5, 6                               /* No of 64 byte copy count.  */
+             rlwinm  r5, r5, 0, 26, 31   /* remaining bytes.  */
+             rlwinm. r0, r4, 0, 28, 31
+             mtctr     r11
+             li              r7, 16
+             li              r8, 32
+             li              r9, 48
+             bne        0, L(src_naligned)
+L(copy_salign):
+             lvx          v14, 0, r4
+             lvx          v15, r7, r4
+             lvx          v16, r8, r4
+             lvx          v17, r9, r4
+             addi       r4, r4, 64
+             stvx        v14, 0, r6
+             stvx        v15, r7, r6
+             stvx        v16, r8, r6
+             stvx        v17, r9, r6
+             addi       r6, r6, 64
+             bdnz      L(copy_salign)
+L(copy_remaining):
+             srwi.      r11, r5, 3                               /* No of 8 byte copy count.  */
+             rlwinm  r5, r5, 0, 29, 31   /* remaining bytes.  */
+             beq        0, L(copy_bytes)
+             mtcrf     0x01, r11
+             bf            cr7*4+1, L(cp16b)
+             ld            r0, 0(r4)                                /* copy 32 bytes.  */
+             ld            r7, 8(r4)
+             ld            r8, 16(r4)
+             ld            r9, 24(r4)
+             addi       r4, r4, 32
+             std          r0, 0(r6)
+             std          r7, 8(r6)
+             std          r8, 16(r6)
+             std          r9, 24(r6)
+             addi       r6, r6, 32
+L(cp16b):
+             bf            cr7*4+2, L(cp8b)
+             ld            r7, 0(r4)                                /* copy 16 bytes.  */
+             ld            r8, 8(r4)
+             addi       r4, r4, 16
+             std          r7, 0(r6)
+             std          r8, 8(r6)
+             addi       r6, r6, 16
+L(cp8b):
+             bf            cr7*4+3, L(copy_bytes)
+             ld            r7, 0(r4)                                /* copy 8 bytes.  */
+             addi       r4, r4, 8
+             std          r7, 0(r6)
+             addi       r6, r6, 8
+L(copy_bytes):
+             cmpldi   cr1, r5, 4
+             cmpldi   cr0, r5, 1
+             bgt         cr1, L(gt4b)                         /* nb > 4?  (5, 6, 7 bytes).  */
+             ble          cr0, L(lt1b)                           /* nb <= 1? (0, 1 bytes).  */
+             addi       r0, r5, -2                               /* 2, 3, 4 bytes.  */
+             lhz          r9, 0(r4)
+             lhzx        r11, r4, r0
+             sth          r9, 0(r6)
+             sthx       r11, r6, r0
+             blr
+L(gt4b):
+             addi       r0, r5, -4                               /* 5, 6, 7 bytes.  */
+             lwz         r9, 0(r4)
+             lwzx       r11, r4, r0
+             stw         r9, 0(r6)
+             stwx      r11, r6, r0
+             blr
+L(lt1b):
+             mtocrf  0x1, r5                                   /* nb == 0 ? return.  */
+             bflr         31
+             lbz          r0, 0(r4)                                /* nb == 1.  */
+             stb          r0, 0(r6)
+             blr
+
+L(src_naligned):
+             rlwinm. r0, r4, 0, 29, 31
+             beq        0, L(copy_salign8)
+L(copy_snalign):
+             lvx          v0, 0, r4                /* load MSQ.  */
+             lvsl          v18, 0, r4                              /* set permute control vector.  */
+             lvx          v19, r7, r4                            /* load LSQ.  */
+             vperm   v14, v0, v19, v18               /* align the data.  */
+             lvx          v0, r7, r4                               /* load MSQ.  */
+             lvsl          v18, r7, r4                            /* set permute control vector.  */
+             lvx          v19, r8, r4                            /* load LSQ.  */
+             vperm   v15, v0, v19, v18               /* align the data.  */
+             lvx          v0, r8, r4                               /* load MSQ.  */
+             lvsl          v18, r8, r4                            /* set permute control vector.  */
+             lvx          v19, r9, r4                            /* load LSQ.  */
+             vperm   v16, v0, v19, v18               /* align the data.  */
+             lvx          v0, r9, r4                               /* load MSQ.  */
+             lvsl          v18, r9, r4                            /* set permute control vector.  */
+             addi       r4, r4, 64
+             lvx          v19, 0, r4                              /* load LSQ.  */
+             vperm   v17, v0, v19, v18               /* align the data.  */
+             stvx        v14, 0, r6
+             stvx        v15, r7, r6
+             stvx        v16, r8, r6
+             stvx        v17, r9, r6
+             addi       r6, r6, 64
+             bdnz      L(copy_snalign)
+             b             L(copy_remaining)
+
+L(copy_salign8):
+             ld            r0, 0(r4)
+             ld            r7, 8(r4)
+             ld            r8, 16(r4)
+             ld            r9, 24(r4)
+             std          r0, 0(r6)
+             std          r7, 8(r6)
+             std          r8, 16(r6)
+             std          r9, 24(r6)
+             ld            r0, 32(r4)
+             ld            r7, 40(r4)
+             ld            r8, 48(r4)
+             ld            r9, 56(r4)
+             addi       r4, r4, 64
+             std          r0, 32(r6)
+             std          r7, 40(r6)
+             std          r8, 48(r6)
+             std          r9, 56(r6)
+             addi       r6, r6, 64
+             bdnz      L(copy_salign8)
+             b             L(copy_remaining)
+
+END_GEN_TB (memcpy,TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
--- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c  2015-08-29 15:42:09.771408290 -0500
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c     2015-08-29 15:45:37.045421842 -0500
@@ -60,6 +60,10 @@ 
                                                     __memcpy_cell)
                     IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_POWER4,
                                                     __memcpy_power4)
+                   IFUNC_IMPL_ADD (array, i, memcpy,
+                                                   (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500)
+                                                   && (hwcap2 & PPC_FEATURE2_HAS_ISEL)),
+                                                   __memcpy_e6500)
                     IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc))

   /* Support sysdeps/powerpc/powerpc64/multiarch/memmove.c.  */
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/Makefile glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/Makefile
--- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/Makefile               2015-08-29 15:42:09.771408290 -0500
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/Makefile  2015-08-29 15:47:51.985430863 -0500
@@ -1,7 +1,7 @@ 
ifeq ($(subdir),string)
sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
-                                 memcpy-power4 memcpy-ppc64 memcmp-power7 memcmp-power4 \
-                                 memcmp-e6500 memcmp-ppc64 \
+                                memcpy-power4 memcpy-e6500 memcpy-ppc64 memcmp-power7 \
+                                memcmp-power4 memcmp-e6500 memcmp-ppc64 \
                                  memset-power7 memset-power6 memset-power4 \
                                  memset-ppc64 bzero-power4 bzero-power6 bzero-power7 \
                                  mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy.c
--- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy.c            2015-08-29 15:41:52.354407558 -0500
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy.c               2015-08-29 15:45:37.045421842 -0500
@@ -30,6 +30,7 @@ 
extern __typeof (__redirect_memcpy) __libc_memcpy;

 extern __typeof (__redirect_memcpy) __memcpy_ppc attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_e6500 attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_power4 attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden;
@@ -46,7 +47,10 @@ 
                                 (hwcap & PPC_FEATURE_CELL_BE)
                                 ? __memcpy_cell :
                                   (hwcap & PPC_FEATURE_POWER4)
-                                  ? __memcpy_power4
+                                 ? __memcpy_power4 :
+                                   (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500)
+                                   && (hwcap2 & PPC_FEATURE2_HAS_ISEL))
+                                   ? __memcpy_e6500
             : __memcpy_ppc);

 #undef memcpy
diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S
--- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S              1969-12-31 18:00:00.000000000 -0600
+++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S 2015-08-29 15:45:37.045421842 -0500
@@ -0,0 +1,40 @@ 
+/* Optimized memcpy implementation for PowerPC64/e6500.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words)                                                  \
+  .section ".text";                                                                                           \
+  ENTRY_2(__memcpy_e6500)                                                                \
+  .align ALIGNARG(alignt);                                                                          \
+  EALIGN_W_##words;                                                                                               \
+  BODY_LABEL(__memcpy_e6500):                                                                       \
+  cfi_startproc;                                                                                \
+  LOCALENTRY(__memcpy_e6500)
+
+#undef END_GEN_TB
+#define END_GEN_TB(name, mask)                                                                    \
+  cfi_endproc;                                                                                                  \
+  TRACEBACK_MASK(__memcpy_e6500,mask)                                                               \
+  END_2(__memcpy_e6500)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/e6500/memcpy.S>