Message ID | 20220603044229.2180216-4-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/8] x86: Create header for VEC classes in x86 strings library | expand |
On Thu, Jun 2, 2022 at 11:42 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > The new code: > 1. prioritizes smaller lengths more. > 2. optimizes target placement more carefully. > 3. reuses logic more. > 4. fixes up various inefficiencies in the logic. > > The total code size saving is: 394 bytes > Geometric Mean of all benchmarks New / Old: 0.874 > > Regressions: > 1. The page cross case is now colder, especially re-entry from the > page cross case if a match is not found in the first VEC > (roughly 50%). My general opinion with this patch is this is > acceptable given the "coldness" of this case (less than 4%) and > generally performance improvement in the other far more common > cases. > > 2. There are some regressions 5-15% for medium/large user-arg > lengths that have a match in the first VEC. This is because the > logic was rewritten to optimize finds in the first VEC if the > user-arg length is shorter (where we see roughly 20-50% > performance improvements). It is not always the case this is a > regression. My intuition is some frontend quirk is partially > explaining the data although I haven't been able to find the > root cause. > > Full xcheck passes on x86_64. > --- Least confident with numbers in this patch. Geometric mean of N = 30 runs. Benchmarked on Tigerlake: https://ark.intel.com/content/www/us/en/ark/products/208921/intel-core-i71165g7-processor-12m-cache-up-to-4-70-ghz-with-ipu.html Aggregate Geometric Mean of New / Old: 0.8743388468654057 Results For: memrchr len, align, pos, seek_char, invert_pos, New / Old 2048, 0, 32, 23, 0, 0.993 256, 1, 64, 23, 0, 0.903 2048, 0, 32, 0, 0, 0.89 256, 1, 64, 0, 0, 0.904 256, 4081, 64, 0, 0, 0.907 256, 0, 1, 23, 0, 0.95 256, 0, 1, 0, 0, 0.95 256, 1, 1, 23, 0, 0.885 256, 1, 1, 0, 0, 0.883 2048, 0, 64, 23, 0, 0.8 256, 2, 64, 23, 0, 0.905 2048, 0, 64, 0, 0, 0.795 256, 2, 64, 0, 0, 0.905 256, 0, 2, 23, 0, 0.949 256, 0, 2, 0, 0, 0.949 256, 2, 2, 23, 0, 0.885 256, 2, 2, 0, 0, 0.886 2048, 0, 128, 23, 0, 0.781 256, 3, 64, 23, 0, 0.904 2048, 0, 128, 0, 0, 0.804 256, 3, 64, 0, 0, 0.904 256, 0, 3, 23, 0, 0.948 256, 0, 3, 0, 0, 0.948 256, 3, 3, 23, 0, 0.886 256, 3, 3, 0, 0, 0.881 2048, 0, 256, 23, 0, 0.715 256, 4, 64, 23, 0, 0.896 2048, 0, 256, 0, 0, 0.747 256, 4, 64, 0, 0, 0.897 256, 0, 4, 23, 0, 0.948 256, 0, 4, 0, 0, 0.95 256, 4, 4, 23, 0, 0.884 256, 4, 4, 0, 0, 0.885 2048, 0, 512, 23, 0, 0.66 256, 5, 64, 23, 0, 0.905 2048, 0, 512, 0, 0, 0.674 256, 5, 64, 0, 0, 0.905 256, 0, 5, 23, 0, 0.951 256, 0, 5, 0, 0, 0.95 256, 5, 5, 23, 0, 0.885 256, 5, 5, 0, 0, 0.883 2048, 0, 1024, 23, 0, 0.952 256, 6, 64, 23, 0, 0.905 2048, 0, 1024, 0, 0, 0.952 256, 6, 64, 0, 0, 0.904 256, 0, 6, 23, 0, 0.95 256, 0, 6, 0, 0, 0.95 256, 6, 6, 23, 0, 0.884 256, 6, 6, 0, 0, 0.884 2048, 0, 2048, 23, 0, 0.843 256, 7, 64, 23, 0, 0.904 2048, 0, 2048, 0, 0, 0.839 256, 7, 64, 0, 0, 0.906 256, 0, 7, 23, 0, 0.951 256, 0, 7, 0, 0, 0.951 256, 7, 7, 23, 0, 0.887 256, 7, 7, 0, 0, 0.885 192, 1, 32, 23, 0, 0.867 192, 1, 32, 0, 0, 0.866 256, 1, 32, 23, 0, 0.888 256, 1, 32, 0, 0, 0.888 512, 1, 32, 23, 0, 1.103 512, 1, 32, 0, 0, 1.102 256, 4081, 32, 23, 0, 0.924 192, 2, 64, 23, 0, 1.081 192, 2, 64, 0, 0, 1.081 512, 2, 64, 23, 0, 1.131 512, 2, 64, 0, 0, 1.129 256, 4081, 64, 23, 0, 0.905 192, 3, 96, 23, 0, 1.174 192, 3, 96, 0, 0, 1.174 256, 3, 96, 23, 0, 0.73 256, 3, 96, 0, 0, 0.73 512, 3, 96, 23, 0, 0.755 512, 3, 96, 0, 0, 0.757 256, 4081, 96, 23, 0, 0.835 192, 4, 128, 23, 0, 0.898 192, 4, 128, 0, 0, 0.895 256, 4, 128, 23, 0, 1.081 256, 4, 128, 0, 0, 1.082 512, 4, 128, 23, 0, 1.088 512, 4, 128, 0, 0, 1.087 256, 4081, 128, 23, 0, 1.252 192, 5, 160, 23, 0, 0.894 192, 5, 160, 0, 0, 0.894 256, 5, 160, 23, 0, 1.174 256, 5, 160, 0, 0, 1.174 512, 5, 160, 23, 0, 1.093 512, 5, 160, 0, 0, 1.097 256, 4081, 160, 23, 0, 1.255 192, 6, 192, 23, 0, 0.869 192, 6, 192, 0, 0, 0.869 256, 6, 192, 23, 0, 0.903 256, 6, 192, 0, 0, 0.899 512, 6, 192, 23, 0, 0.999 512, 6, 192, 0, 0, 1.0 256, 4081, 192, 23, 0, 0.91 192, 7, 224, 23, 0, 0.869 192, 7, 224, 0, 0, 0.868 256, 7, 224, 23, 0, 0.893 256, 7, 224, 0, 0, 0.893 512, 7, 224, 23, 0, 0.718 512, 7, 224, 0, 0, 0.718 256, 4081, 224, 23, 0, 0.903 2, 0, 1, 23, 0, 1.026 2, 0, 1, 0, 0, 1.029 2, 1, 1, 23, 0, 0.874 2, 1, 1, 0, 0, 0.875 0, 0, 1, 23, 0, 0.583 0, 0, 1, 0, 0, 0.583 0, 1, 1, 23, 0, 0.539 0, 1, 1, 0, 0, 0.538 2, 2048, 1, 23, 0, 0.751 2, 2048, 1, 0, 0, 0.749 2, 2049, 1, 23, 0, 0.638 2, 2049, 1, 0, 0, 0.638 0, 2048, 1, 23, 0, 0.5 0, 2048, 1, 0, 0, 0.5 0, 2049, 1, 23, 0, 0.462 0, 2049, 1, 0, 0, 0.462 0, 4081, 1, 23, 0, 0.462 0, 4081, 1, 0, 0, 0.462 2, 4081, 1, 23, 0, 0.61 2, 4081, 1, 0, 0, 0.609 2, 0, 2, 0, 0, 0.889 3, 0, 2, 23, 0, 1.05 3, 0, 2, 0, 0, 1.034 3, 2, 2, 23, 0, 0.9 3, 2, 2, 0, 0, 0.887 1, 0, 2, 23, 0, 0.942 1, 0, 2, 0, 0, 0.941 1, 2, 2, 23, 0, 1.043 1, 2, 2, 0, 0, 1.11 3, 2048, 2, 23, 0, 0.75 3, 2048, 2, 0, 0, 0.75 3, 2050, 2, 23, 0, 0.638 3, 2050, 2, 0, 0, 0.639 1, 2048, 2, 23, 0, 0.666 1, 2048, 2, 0, 0, 0.668 1, 2050, 2, 23, 0, 0.734 1, 2050, 2, 0, 0, 0.727 1, 4081, 2, 23, 0, 0.725 1, 4081, 2, 0, 0, 0.726 3, 4081, 2, 23, 0, 0.614 3, 4081, 2, 0, 0, 0.619 3, 0, 1, 23, 0, 1.043 4, 0, 3, 23, 0, 1.04 4, 0, 3, 0, 0, 1.043 4, 3, 3, 23, 0, 0.886 4, 3, 3, 0, 0, 0.901 2, 0, 3, 23, 0, 0.923 2, 0, 3, 0, 0, 0.933 2, 3, 3, 23, 0, 1.01 2, 3, 3, 0, 0, 1.083 4, 2048, 3, 23, 0, 0.751 4, 2048, 3, 0, 0, 0.75 4, 2051, 3, 23, 0, 0.638 4, 2051, 3, 0, 0, 0.641 2, 2048, 3, 23, 0, 0.67 2, 2048, 3, 0, 0, 0.67 2, 2051, 3, 23, 0, 0.728 2, 2051, 3, 0, 0, 0.73 2, 4081, 3, 23, 0, 0.727 2, 4081, 3, 0, 0, 0.726 4, 4081, 3, 23, 0, 0.613 4, 4081, 3, 0, 0, 0.63 4, 0, 1, 23, 0, 1.073 4, 0, 2, 0, 0, 1.055 5, 0, 4, 23, 0, 1.055 5, 0, 4, 0, 0, 1.066 5, 4, 4, 23, 0, 0.893 5, 4, 4, 0, 0, 0.892 3, 0, 4, 23, 0, 0.911 3, 0, 4, 0, 0, 0.913 3, 4, 4, 23, 0, 0.988 3, 4, 4, 0, 0, 1.055 5, 2048, 4, 23, 0, 0.751 5, 2048, 4, 0, 0, 0.752 5, 2052, 4, 23, 0, 0.64 5, 2052, 4, 0, 0, 0.639 3, 2048, 4, 23, 0, 0.668 3, 2048, 4, 0, 0, 0.669 3, 2052, 4, 23, 0, 0.73 3, 2052, 4, 0, 0, 0.731 3, 4081, 4, 23, 0, 0.726 3, 4081, 4, 0, 0, 0.73 5, 4081, 4, 23, 0, 0.62 5, 4081, 4, 0, 0, 0.611 5, 0, 1, 23, 0, 1.044 5, 0, 2, 0, 0, 1.048 6, 0, 5, 23, 0, 1.062 6, 0, 5, 0, 0, 1.064 6, 5, 5, 23, 0, 0.898 6, 5, 5, 0, 0, 0.896 4, 0, 5, 23, 0, 0.894 4, 0, 5, 0, 0, 0.894 4, 5, 5, 23, 0, 0.974 4, 5, 5, 0, 0, 1.042 6, 2048, 5, 23, 0, 0.752 6, 2048, 5, 0, 0, 0.751 6, 2053, 5, 23, 0, 0.639 6, 2053, 5, 0, 0, 0.638 4, 2048, 5, 23, 0, 0.667 4, 2048, 5, 0, 0, 0.668 4, 2053, 5, 23, 0, 0.73 4, 2053, 5, 0, 0, 0.729 4, 4081, 5, 23, 0, 0.726 4, 4081, 5, 0, 0, 0.727 6, 4081, 5, 23, 0, 0.626 6, 4081, 5, 0, 0, 0.619 6, 0, 1, 23, 0, 1.045 6, 0, 2, 0, 0, 1.049 7, 0, 6, 23, 0, 1.032 7, 0, 6, 0, 0, 1.038 7, 6, 6, 23, 0, 0.889 7, 6, 6, 0, 0, 0.894 5, 0, 6, 23, 0, 0.89 5, 0, 6, 0, 0, 0.891 5, 6, 6, 23, 0, 0.971 5, 6, 6, 0, 0, 0.997 7, 2048, 6, 23, 0, 0.751 7, 2048, 6, 0, 0, 0.747 7, 2054, 6, 23, 0, 0.639 7, 2054, 6, 0, 0, 0.64 5, 2048, 6, 23, 0, 0.667 5, 2048, 6, 0, 0, 0.669 5, 2054, 6, 23, 0, 0.732 5, 2054, 6, 0, 0, 0.728 5, 4081, 6, 23, 0, 0.729 5, 4081, 6, 0, 0, 0.727 7, 4081, 6, 23, 0, 0.631 7, 4081, 6, 0, 0, 0.619 7, 0, 1, 23, 0, 1.042 7, 0, 2, 0, 0, 1.039 8, 0, 7, 23, 0, 1.034 8, 0, 7, 0, 0, 1.04 8, 7, 7, 23, 0, 0.876 8, 7, 7, 0, 0, 0.883 6, 0, 7, 23, 0, 0.891 6, 0, 7, 0, 0, 0.895 6, 7, 7, 23, 0, 0.986 6, 7, 7, 0, 0, 0.996 8, 2048, 7, 23, 0, 0.754 8, 2048, 7, 0, 0, 0.754 8, 2055, 7, 23, 0, 0.638 8, 2055, 7, 0, 0, 0.638 6, 2048, 7, 23, 0, 0.667 6, 2048, 7, 0, 0, 0.67 6, 2055, 7, 23, 0, 0.73 6, 2055, 7, 0, 0, 0.729 6, 4081, 7, 23, 0, 0.726 6, 4081, 7, 0, 0, 0.727 8, 4081, 7, 23, 0, 0.61 8, 4081, 7, 0, 0, 0.616 8, 0, 1, 23, 0, 1.031 8, 0, 2, 0, 0, 1.032 9, 0, 8, 23, 0, 1.044 9, 0, 8, 0, 0, 1.037 9, 8, 8, 23, 0, 0.652 9, 8, 8, 0, 0, 0.643 7, 0, 8, 23, 0, 0.897 7, 0, 8, 0, 0, 0.889 7, 8, 8, 23, 0, 0.969 7, 8, 8, 0, 0, 1.015 9, 2048, 8, 23, 0, 0.753 9, 2048, 8, 0, 0, 0.75 9, 2056, 8, 23, 0, 0.645 9, 2056, 8, 0, 0, 0.655 7, 2048, 8, 23, 0, 0.667 7, 2048, 8, 0, 0, 0.671 7, 2056, 8, 23, 0, 0.731 7, 2056, 8, 0, 0, 0.731 7, 4081, 8, 23, 0, 0.723 7, 4081, 8, 0, 0, 0.724 9, 4081, 8, 23, 0, 0.653 9, 4081, 8, 0, 0, 0.638 9, 0, 1, 23, 0, 1.037 9, 0, 2, 0, 0, 1.032 10, 0, 9, 23, 0, 1.033 10, 0, 9, 0, 0, 1.03 10, 9, 9, 23, 0, 0.66 10, 9, 9, 0, 0, 0.657 8, 0, 9, 23, 0, 0.888 8, 0, 9, 0, 0, 0.891 8, 9, 9, 23, 0, 0.631 8, 9, 9, 0, 0, 0.632 10, 2048, 9, 23, 0, 0.767 10, 2048, 9, 0, 0, 0.759 10, 2057, 9, 23, 0, 0.666 10, 2057, 9, 0, 0, 0.647 8, 2048, 9, 23, 0, 0.669 8, 2048, 9, 0, 0, 0.668 8, 2057, 9, 23, 0, 0.629 8, 2057, 9, 0, 0, 0.641 8, 4081, 9, 23, 0, 0.727 8, 4081, 9, 0, 0, 0.764 10, 4081, 9, 23, 0, 0.642 10, 4081, 9, 0, 0, 0.653 10, 0, 1, 23, 0, 1.031 10, 0, 2, 0, 0, 1.038 11, 0, 10, 23, 0, 1.032 11, 0, 10, 0, 0, 1.029 11, 10, 10, 23, 0, 0.652 11, 10, 10, 0, 0, 0.656 9, 0, 10, 23, 0, 0.893 9, 0, 10, 0, 0, 0.894 9, 10, 10, 23, 0, 0.629 9, 10, 10, 0, 0, 0.707 11, 2048, 10, 23, 0, 0.753 11, 2048, 10, 0, 0, 0.749 11, 2058, 10, 23, 0, 0.662 11, 2058, 10, 0, 0, 0.661 9, 2048, 10, 23, 0, 0.673 9, 2048, 10, 0, 0, 0.666 9, 2058, 10, 23, 0, 0.629 9, 2058, 10, 0, 0, 0.663 9, 4081, 10, 23, 0, 0.727 9, 4081, 10, 0, 0, 0.779 11, 4081, 10, 23, 0, 0.624 11, 4081, 10, 0, 0, 0.619 11, 0, 1, 23, 0, 1.03 11, 0, 2, 0, 0, 1.03 12, 0, 11, 23, 0, 1.039 12, 0, 11, 0, 0, 1.03 12, 11, 11, 23, 0, 0.653 12, 11, 11, 0, 0, 0.652 10, 0, 11, 23, 0, 0.896 10, 0, 11, 0, 0, 0.889 10, 11, 11, 23, 0, 0.628 10, 11, 11, 0, 0, 0.696 12, 2048, 11, 23, 0, 0.752 12, 2048, 11, 0, 0, 0.754 12, 2059, 11, 23, 0, 0.657 12, 2059, 11, 0, 0, 0.652 10, 2048, 11, 23, 0, 0.67 10, 2048, 11, 0, 0, 0.668 10, 2059, 11, 23, 0, 0.627 10, 2059, 11, 0, 0, 0.677 10, 4081, 11, 23, 0, 0.726 10, 4081, 11, 0, 0, 0.771 12, 4081, 11, 23, 0, 0.648 12, 4081, 11, 0, 0, 0.624 12, 0, 1, 23, 0, 1.047 12, 0, 2, 0, 0, 1.042 13, 0, 12, 23, 0, 1.043 13, 0, 12, 0, 0, 1.04 13, 12, 12, 23, 0, 0.66 13, 12, 12, 0, 0, 0.647 11, 0, 12, 23, 0, 0.891 11, 0, 12, 0, 0, 0.895 11, 12, 12, 23, 0, 0.629 11, 12, 12, 0, 0, 0.655 13, 2048, 12, 23, 0, 0.749 13, 2048, 12, 0, 0, 0.748 13, 2060, 12, 23, 0, 0.647 13, 2060, 12, 0, 0, 0.636 11, 2048, 12, 23, 0, 0.669 11, 2048, 12, 0, 0, 0.668 11, 2060, 12, 23, 0, 0.627 11, 2060, 12, 0, 0, 0.664 11, 4081, 12, 23, 0, 0.725 11, 4081, 12, 0, 0, 0.766 13, 4081, 12, 23, 0, 0.674 13, 4081, 12, 0, 0, 0.633 13, 0, 1, 23, 0, 1.036 13, 0, 2, 0, 0, 1.029 14, 0, 13, 23, 0, 1.029 14, 0, 13, 0, 0, 1.032 14, 13, 13, 23, 0, 0.646 14, 13, 13, 0, 0, 0.655 12, 0, 13, 23, 0, 0.889 12, 0, 13, 0, 0, 0.89 12, 13, 13, 23, 0, 0.628 12, 13, 13, 0, 0, 0.684 14, 2048, 13, 23, 0, 0.748 14, 2048, 13, 0, 0, 0.749 14, 2061, 13, 23, 0, 0.644 14, 2061, 13, 0, 0, 0.651 12, 2048, 13, 23, 0, 0.67 12, 2048, 13, 0, 0, 0.667 12, 2061, 13, 23, 0, 0.627 12, 2061, 13, 0, 0, 0.655 12, 4081, 13, 23, 0, 0.725 12, 4081, 13, 0, 0, 0.758 14, 4081, 13, 23, 0, 0.645 14, 4081, 13, 0, 0, 0.638 14, 0, 1, 23, 0, 1.046 14, 0, 2, 0, 0, 1.029 15, 0, 14, 23, 0, 1.028 15, 0, 14, 0, 0, 1.029 15, 14, 14, 23, 0, 0.65 15, 14, 14, 0, 0, 0.671 13, 0, 14, 23, 0, 0.891 13, 0, 14, 0, 0, 0.89 13, 14, 14, 23, 0, 0.637 13, 14, 14, 0, 0, 0.628 15, 2048, 14, 23, 0, 0.75 15, 2048, 14, 0, 0, 0.751 15, 2062, 14, 23, 0, 0.647 15, 2062, 14, 0, 0, 0.655 13, 2048, 14, 23, 0, 0.667 13, 2048, 14, 0, 0, 0.667 13, 2062, 14, 23, 0, 0.658 13, 2062, 14, 0, 0, 0.655 13, 4081, 14, 23, 0, 0.726 13, 4081, 14, 0, 0, 0.778 15, 4081, 14, 23, 0, 0.872 15, 4081, 14, 0, 0, 0.872 15, 0, 1, 23, 0, 1.052 15, 0, 2, 0, 0, 1.028 16, 0, 15, 23, 0, 0.724 16, 0, 15, 0, 0, 0.724 16, 15, 15, 23, 0, 0.65 16, 15, 15, 0, 0, 0.65 14, 0, 15, 23, 0, 0.889 14, 0, 15, 0, 0, 0.889 14, 15, 15, 23, 0, 0.626 14, 15, 15, 0, 0, 0.665 16, 2048, 15, 23, 0, 0.735 16, 2048, 15, 0, 0, 0.717 16, 2063, 15, 23, 0, 0.648 16, 2063, 15, 0, 0, 0.651 14, 2048, 15, 23, 0, 0.667 14, 2048, 15, 0, 0, 0.667 14, 2063, 15, 23, 0, 0.627 14, 2063, 15, 0, 0, 0.694 14, 4081, 15, 23, 0, 0.725 14, 4081, 15, 0, 0, 0.801 16, 4081, 15, 23, 0, 0.999 16, 4081, 15, 0, 0, 0.999 16, 0, 1, 23, 0, 0.751 16, 0, 2, 0, 0, 0.731 17, 0, 16, 23, 0, 1.167 17, 0, 16, 0, 0, 1.165 17, 16, 16, 23, 0, 1.167 17, 16, 16, 0, 0, 1.167 15, 0, 16, 23, 0, 0.889 15, 0, 16, 0, 0, 0.889 15, 16, 16, 23, 0, 0.666 15, 16, 16, 0, 0, 0.712 17, 2048, 16, 23, 0, 1.167 17, 2048, 16, 0, 0, 1.167 17, 2064, 16, 23, 0, 1.167 17, 2064, 16, 0, 0, 1.167 15, 2048, 16, 23, 0, 0.667 15, 2048, 16, 0, 0, 0.667 15, 2064, 16, 23, 0, 0.667 15, 2064, 16, 0, 0, 0.696 15, 4081, 16, 23, 0, 0.956 15, 4081, 16, 0, 0, 1.098 17, 4081, 16, 23, 0, 1.5 17, 4081, 16, 0, 0, 1.5 17, 0, 1, 23, 0, 1.167 17, 0, 2, 0, 0, 1.167 18, 0, 17, 23, 0, 1.167 18, 0, 17, 0, 0, 1.167 18, 17, 17, 23, 0, 1.167 18, 17, 17, 0, 0, 1.167 16, 0, 17, 23, 0, 0.667 16, 0, 17, 0, 0, 0.667 16, 17, 17, 23, 0, 0.627 16, 17, 17, 0, 0, 0.627 18, 2048, 17, 23, 0, 1.167 18, 2048, 17, 0, 0, 1.167 18, 2065, 17, 23, 0, 1.167 18, 2065, 17, 0, 0, 1.167 16, 2048, 17, 23, 0, 0.667 16, 2048, 17, 0, 0, 0.667 16, 2065, 17, 23, 0, 0.627 16, 2065, 17, 0, 0, 0.627 16, 4081, 17, 23, 0, 1.046 16, 4081, 17, 0, 0, 1.095 18, 4081, 17, 23, 0, 1.5 18, 4081, 17, 0, 0, 1.5 18, 0, 1, 23, 0, 0.852 18, 0, 2, 0, 0, 1.167 19, 0, 18, 23, 0, 1.167 19, 0, 18, 0, 0, 1.167 19, 18, 18, 23, 0, 1.167 19, 18, 18, 0, 0, 1.167 17, 0, 18, 23, 0, 0.889 17, 0, 18, 0, 0, 0.889 17, 18, 18, 23, 0, 0.889 17, 18, 18, 0, 0, 0.8 19, 2048, 18, 23, 0, 1.167 19, 2048, 18, 0, 0, 1.167 19, 2066, 18, 23, 0, 1.167 19, 2066, 18, 0, 0, 1.167 17, 2048, 18, 23, 0, 0.889 17, 2048, 18, 0, 0, 0.889 17, 2066, 18, 23, 0, 0.889 17, 2066, 18, 0, 0, 0.8 17, 4081, 18, 23, 0, 1.11 17, 4081, 18, 0, 0, 1.047 19, 4081, 18, 23, 0, 1.5 19, 4081, 18, 0, 0, 1.5 19, 0, 1, 23, 0, 0.897 19, 0, 2, 0, 0, 0.878 20, 0, 19, 23, 0, 1.167 20, 0, 19, 0, 0, 1.167 20, 19, 19, 23, 0, 1.167 20, 19, 19, 0, 0, 1.167 18, 0, 19, 23, 0, 0.889 18, 0, 19, 0, 0, 0.889 18, 19, 19, 23, 0, 0.889 18, 19, 19, 0, 0, 0.8 20, 2048, 19, 23, 0, 1.167 20, 2048, 19, 0, 0, 1.167 20, 2067, 19, 23, 0, 1.167 20, 2067, 19, 0, 0, 1.167 18, 2048, 19, 23, 0, 0.889 18, 2048, 19, 0, 0, 0.889 18, 2067, 19, 23, 0, 0.889 18, 2067, 19, 0, 0, 0.8 18, 4081, 19, 23, 0, 1.11 18, 4081, 19, 0, 0, 1.047 20, 4081, 19, 23, 0, 1.5 20, 4081, 19, 0, 0, 1.5 20, 0, 1, 23, 0, 0.906 20, 0, 2, 0, 0, 0.899 21, 0, 20, 23, 0, 1.167 21, 0, 20, 0, 0, 1.167 21, 20, 20, 23, 0, 1.167 21, 20, 20, 0, 0, 1.167 19, 0, 20, 23, 0, 0.889 19, 0, 20, 0, 0, 0.889 19, 20, 20, 23, 0, 0.889 19, 20, 20, 0, 0, 0.8 21, 2048, 20, 23, 0, 1.167 21, 2048, 20, 0, 0, 1.167 21, 2068, 20, 23, 0, 1.167 21, 2068, 20, 0, 0, 1.167 19, 2048, 20, 23, 0, 0.889 19, 2048, 20, 0, 0, 0.889 19, 2068, 20, 23, 0, 0.889 19, 2068, 20, 0, 0, 0.8 19, 4081, 20, 23, 0, 1.11 19, 4081, 20, 0, 0, 1.047 21, 4081, 20, 23, 0, 1.5 21, 4081, 20, 0, 0, 1.5 21, 0, 1, 23, 0, 0.902 21, 0, 2, 0, 0, 0.891 22, 0, 21, 23, 0, 1.167 22, 0, 21, 0, 0, 1.167 22, 21, 21, 23, 0, 1.167 22, 21, 21, 0, 0, 1.167 20, 0, 21, 23, 0, 0.889 20, 0, 21, 0, 0, 0.889 20, 21, 21, 23, 0, 0.889 20, 21, 21, 0, 0, 0.8 22, 2048, 21, 23, 0, 1.167 22, 2048, 21, 0, 0, 1.167 22, 2069, 21, 23, 0, 1.167 22, 2069, 21, 0, 0, 1.167 20, 2048, 21, 23, 0, 0.889 20, 2048, 21, 0, 0, 0.889 20, 2069, 21, 23, 0, 0.889 20, 2069, 21, 0, 0, 0.8 20, 4081, 21, 23, 0, 1.11 20, 4081, 21, 0, 0, 1.06 22, 4081, 21, 23, 0, 1.5 22, 4081, 21, 0, 0, 1.5 22, 0, 1, 23, 0, 0.915 22, 0, 2, 0, 0, 0.906 23, 0, 22, 23, 0, 1.167 23, 0, 22, 0, 0, 1.167 23, 22, 22, 23, 0, 1.167 23, 22, 22, 0, 0, 1.167 21, 0, 22, 23, 0, 0.889 21, 0, 22, 0, 0, 0.889 21, 22, 22, 23, 0, 0.889 21, 22, 22, 0, 0, 0.8 23, 2048, 22, 23, 0, 1.167 23, 2048, 22, 0, 0, 1.167 23, 2070, 22, 23, 0, 1.167 23, 2070, 22, 0, 0, 1.167 21, 2048, 22, 23, 0, 0.889 21, 2048, 22, 0, 0, 0.889 21, 2070, 22, 23, 0, 0.889 21, 2070, 22, 0, 0, 0.8 21, 4081, 22, 23, 0, 1.11 21, 4081, 22, 0, 0, 1.059 23, 4081, 22, 23, 0, 1.5 23, 4081, 22, 0, 0, 1.5 23, 0, 1, 23, 0, 0.914 23, 0, 2, 0, 0, 0.907 24, 0, 23, 23, 0, 1.167 24, 0, 23, 0, 0, 1.167 24, 23, 23, 23, 0, 1.167 24, 23, 23, 0, 0, 1.167 22, 0, 23, 23, 0, 0.889 22, 0, 23, 0, 0, 0.889 22, 23, 23, 23, 0, 0.889 22, 23, 23, 0, 0, 0.8 24, 2048, 23, 23, 0, 1.167 24, 2048, 23, 0, 0, 1.167 24, 2071, 23, 23, 0, 1.167 24, 2071, 23, 0, 0, 1.167 22, 2048, 23, 23, 0, 0.889 22, 2048, 23, 0, 0, 0.889 22, 2071, 23, 23, 0, 0.889 22, 2071, 23, 0, 0, 0.8 22, 4081, 23, 23, 0, 1.11 22, 4081, 23, 0, 0, 1.049 24, 4081, 23, 23, 0, 1.5 24, 4081, 23, 0, 0, 1.5 24, 0, 1, 23, 0, 0.915 24, 0, 2, 0, 0, 0.915 25, 0, 24, 23, 0, 1.167 25, 0, 24, 0, 0, 1.167 25, 24, 24, 23, 0, 1.167 25, 24, 24, 0, 0, 1.167 23, 0, 24, 23, 0, 0.889 23, 0, 24, 0, 0, 0.889 23, 24, 24, 23, 0, 0.889 23, 24, 24, 0, 0, 0.8 25, 2048, 24, 23, 0, 1.167 25, 2048, 24, 0, 0, 1.167 25, 2072, 24, 23, 0, 1.167 25, 2072, 24, 0, 0, 1.167 23, 2048, 24, 23, 0, 0.889 23, 2048, 24, 0, 0, 0.889 23, 2072, 24, 23, 0, 0.889 23, 2072, 24, 0, 0, 0.8 23, 4081, 24, 23, 0, 1.11 23, 4081, 24, 0, 0, 1.05 25, 4081, 24, 23, 0, 1.5 25, 4081, 24, 0, 0, 1.5 25, 0, 1, 23, 0, 0.917 25, 0, 2, 0, 0, 0.918 26, 0, 25, 23, 0, 1.167 26, 0, 25, 0, 0, 1.167 26, 25, 25, 23, 0, 1.167 26, 25, 25, 0, 0, 1.167 24, 0, 25, 23, 0, 0.889 24, 0, 25, 0, 0, 0.889 24, 25, 25, 23, 0, 0.898 24, 25, 25, 0, 0, 0.832 26, 2048, 25, 23, 0, 1.167 26, 2048, 25, 0, 0, 1.167 26, 2073, 25, 23, 0, 1.167 26, 2073, 25, 0, 0, 1.167 24, 2048, 25, 23, 0, 0.889 24, 2048, 25, 0, 0, 0.889 24, 2073, 25, 23, 0, 0.879 24, 2073, 25, 0, 0, 0.814 24, 4081, 25, 23, 0, 1.11 24, 4081, 25, 0, 0, 1.049 26, 4081, 25, 23, 0, 1.5 26, 4081, 25, 0, 0, 1.5 26, 0, 1, 23, 0, 0.869 26, 0, 2, 0, 0, 0.869 27, 0, 26, 23, 0, 1.167 27, 0, 26, 0, 0, 1.167 27, 26, 26, 23, 0, 1.167 27, 26, 26, 0, 0, 1.167 25, 0, 26, 23, 0, 0.889 25, 0, 26, 0, 0, 0.889 25, 26, 26, 23, 0, 0.871 25, 26, 26, 0, 0, 0.827 27, 2048, 26, 23, 0, 1.167 27, 2048, 26, 0, 0, 1.167 27, 2074, 26, 23, 0, 1.167 27, 2074, 26, 0, 0, 1.167 25, 2048, 26, 23, 0, 0.889 25, 2048, 26, 0, 0, 0.889 25, 2074, 26, 23, 0, 0.88 25, 2074, 26, 0, 0, 0.823 25, 4081, 26, 23, 0, 1.11 25, 4081, 26, 0, 0, 1.047 27, 4081, 26, 23, 0, 1.5 27, 4081, 26, 0, 0, 1.5 27, 0, 1, 23, 0, 0.865 27, 0, 2, 0, 0, 0.857 28, 0, 27, 23, 0, 1.167 28, 0, 27, 0, 0, 1.167 28, 27, 27, 23, 0, 1.167 28, 27, 27, 0, 0, 1.167 26, 0, 27, 23, 0, 0.889 26, 0, 27, 0, 0, 0.889 26, 27, 27, 23, 0, 0.884 26, 27, 27, 0, 0, 0.82 28, 2048, 27, 23, 0, 1.167 28, 2048, 27, 0, 0, 1.167 28, 2075, 27, 23, 0, 1.167 28, 2075, 27, 0, 0, 1.167 26, 2048, 27, 23, 0, 0.889 26, 2048, 27, 0, 0, 0.889 26, 2075, 27, 23, 0, 0.892 26, 2075, 27, 0, 0, 0.83 26, 4081, 27, 23, 0, 1.11 26, 4081, 27, 0, 0, 1.054 28, 4081, 27, 23, 0, 1.5 28, 4081, 27, 0, 0, 1.5 28, 0, 1, 23, 0, 0.866 28, 0, 2, 0, 0, 0.867 29, 0, 28, 23, 0, 1.167 29, 0, 28, 0, 0, 1.167 29, 28, 28, 23, 0, 1.167 29, 28, 28, 0, 0, 1.167 27, 0, 28, 23, 0, 0.889 27, 0, 28, 0, 0, 0.889 27, 28, 28, 23, 0, 0.892 27, 28, 28, 0, 0, 0.825 29, 2048, 28, 23, 0, 1.167 29, 2048, 28, 0, 0, 1.167 29, 2076, 28, 23, 0, 1.167 29, 2076, 28, 0, 0, 1.167 27, 2048, 28, 23, 0, 0.889 27, 2048, 28, 0, 0, 0.888 27, 2076, 28, 23, 0, 0.898 27, 2076, 28, 0, 0, 0.821 27, 4081, 28, 23, 0, 1.11 27, 4081, 28, 0, 0, 1.052 29, 4081, 28, 23, 0, 1.5 29, 4081, 28, 0, 0, 1.5 29, 0, 1, 23, 0, 0.854 29, 0, 2, 0, 0, 0.86 30, 0, 29, 23, 0, 1.166 30, 0, 29, 0, 0, 1.167 30, 29, 29, 23, 0, 1.167 30, 29, 29, 0, 0, 1.167 28, 0, 29, 23, 0, 0.887 28, 0, 29, 0, 0, 0.888 28, 29, 29, 23, 0, 0.891 28, 29, 29, 0, 0, 0.843 30, 2048, 29, 23, 0, 1.166 30, 2048, 29, 0, 0, 1.167 30, 2077, 29, 23, 0, 1.167 30, 2077, 29, 0, 0, 1.165 28, 2048, 29, 23, 0, 0.886 28, 2048, 29, 0, 0, 0.887 28, 2077, 29, 23, 0, 0.891 28, 2077, 29, 0, 0, 0.836 28, 4081, 29, 23, 0, 1.106 28, 4081, 29, 0, 0, 1.063 30, 4081, 29, 23, 0, 1.496 30, 4081, 29, 0, 0, 1.496 30, 0, 1, 23, 0, 0.874 30, 0, 2, 0, 0, 0.873 31, 0, 30, 23, 0, 1.164 31, 0, 30, 0, 0, 1.161 31, 30, 30, 23, 0, 1.162 31, 30, 30, 0, 0, 1.163 29, 0, 30, 23, 0, 0.884 29, 0, 30, 0, 0, 0.884 29, 30, 30, 23, 0, 0.893 29, 30, 30, 0, 0, 0.847 31, 2048, 30, 23, 0, 1.163 31, 2048, 30, 0, 0, 1.162 31, 2078, 30, 23, 0, 1.161 31, 2078, 30, 0, 0, 1.161 29, 2048, 30, 23, 0, 0.884 29, 2048, 30, 0, 0, 0.884 29, 2078, 30, 23, 0, 0.894 29, 2078, 30, 0, 0, 0.848 29, 4081, 30, 23, 0, 1.102 29, 4081, 30, 0, 0, 1.074 31, 4081, 30, 23, 0, 1.159 31, 4081, 30, 0, 0, 1.16 31, 0, 1, 23, 0, 0.859 31, 0, 2, 0, 0, 0.858 32, 0, 31, 23, 0, 1.161 32, 0, 31, 0, 0, 1.161 32, 31, 31, 23, 0, 1.161 32, 31, 31, 0, 0, 1.161 30, 0, 31, 23, 0, 0.882 30, 0, 31, 0, 0, 0.883 30, 31, 31, 23, 0, 0.897 30, 31, 31, 0, 0, 0.854 32, 2048, 31, 23, 0, 1.161 32, 2048, 31, 0, 0, 1.161 32, 2079, 31, 23, 0, 1.159 32, 2079, 31, 0, 0, 1.158 30, 2048, 31, 23, 0, 0.881 30, 2048, 31, 0, 0, 0.882 30, 2079, 31, 23, 0, 0.891 30, 2079, 31, 0, 0, 0.851 30, 4081, 31, 23, 0, 1.1 30, 4081, 31, 0, 0, 1.066 32, 4081, 31, 23, 0, 1.157 32, 4081, 31, 0, 0, 1.157 32, 0, 1, 23, 0, 0.798 32, 0, 2, 0, 0, 0.798 2048, 0, 32, 23, 1, 0.993 256, 1, 64, 23, 1, 0.89 2048, 0, 32, 0, 1, 0.992 256, 1, 64, 0, 1, 0.894 256, 4081, 64, 0, 1, 0.903 256, 0, 1, 23, 1, 1.158 256, 0, 1, 0, 1, 1.157 256, 1, 1, 23, 1, 1.158 256, 1, 1, 0, 1, 1.158 2048, 0, 64, 23, 1, 0.79 256, 2, 64, 23, 1, 0.894 2048, 0, 64, 0, 1, 0.79 256, 2, 64, 0, 1, 0.894 256, 0, 2, 23, 1, 1.161 256, 0, 2, 0, 1, 1.161 256, 2, 2, 23, 1, 1.161 256, 2, 2, 0, 1, 1.161 2048, 0, 128, 23, 1, 1.319 256, 3, 64, 23, 1, 0.897 2048, 0, 128, 0, 1, 1.323 256, 3, 64, 0, 1, 0.9 256, 0, 3, 23, 1, 1.166 256, 0, 3, 0, 1, 1.167 256, 3, 3, 23, 1, 1.167 256, 3, 3, 0, 1, 1.167 2048, 0, 256, 23, 1, 0.995 256, 4, 64, 23, 1, 0.902 2048, 0, 256, 0, 1, 0.993 256, 4, 64, 0, 1, 0.901 256, 0, 4, 23, 1, 1.167 256, 0, 4, 0, 1, 1.167 256, 4, 4, 23, 1, 1.167 256, 4, 4, 0, 1, 1.167 2048, 0, 512, 23, 1, 1.109 256, 5, 64, 23, 1, 0.903 2048, 0, 512, 0, 1, 1.109 256, 5, 64, 0, 1, 0.897 256, 0, 5, 23, 1, 1.167 256, 0, 5, 0, 1, 1.167 256, 5, 5, 23, 1, 1.167 256, 5, 5, 0, 1, 1.167 2048, 0, 1024, 23, 1, 0.951 256, 6, 64, 23, 1, 0.902 2048, 0, 1024, 0, 1, 0.953 256, 6, 64, 0, 1, 0.9 256, 0, 6, 23, 1, 1.167 256, 0, 6, 0, 1, 1.167 256, 6, 6, 23, 1, 1.167 256, 6, 6, 0, 1, 1.167 2048, 0, 2048, 23, 1, 0.896 256, 7, 64, 23, 1, 0.901 2048, 0, 2048, 0, 1, 0.845 256, 7, 64, 0, 1, 0.9 256, 0, 7, 23, 1, 1.165 256, 0, 7, 0, 1, 1.165 256, 7, 7, 23, 1, 1.165 256, 7, 7, 0, 1, 1.165 192, 1, 32, 23, 1, 0.892 192, 1, 32, 0, 1, 0.892 256, 1, 32, 23, 1, 0.892 256, 1, 32, 0, 1, 0.892 512, 1, 32, 23, 1, 0.892 512, 1, 32, 0, 1, 0.892 256, 4081, 32, 23, 1, 0.902 192, 2, 64, 23, 1, 0.902 192, 2, 64, 0, 1, 0.898 512, 2, 64, 23, 1, 0.9 512, 2, 64, 0, 1, 0.899 256, 4081, 64, 23, 1, 0.908 192, 3, 96, 23, 1, 1.174 192, 3, 96, 0, 1, 1.174 256, 3, 96, 23, 1, 1.174 256, 3, 96, 0, 1, 1.174 512, 3, 96, 23, 1, 1.174 512, 3, 96, 0, 1, 1.174 256, 4081, 96, 23, 1, 1.255 192, 4, 128, 23, 1, 1.08 192, 4, 128, 0, 1, 1.081 256, 4, 128, 23, 1, 1.081 256, 4, 128, 0, 1, 1.081 512, 4, 128, 23, 1, 1.08 512, 4, 128, 0, 1, 1.081 256, 4081, 128, 23, 1, 1.25 192, 5, 160, 23, 1, 0.862 192, 5, 160, 0, 1, 0.864 256, 5, 160, 23, 1, 0.729 256, 5, 160, 0, 1, 0.73 512, 5, 160, 23, 1, 0.73 512, 5, 160, 0, 1, 0.729 256, 4081, 160, 23, 1, 0.834 192, 6, 192, 23, 1, 0.868 192, 6, 192, 0, 1, 0.868 256, 6, 192, 23, 1, 0.903 256, 6, 192, 0, 1, 0.903 512, 6, 192, 23, 1, 0.902 512, 6, 192, 0, 1, 0.902 256, 4081, 192, 23, 1, 0.902 192, 7, 224, 23, 1, 0.866 192, 7, 224, 0, 1, 0.865 256, 7, 224, 23, 1, 0.885 256, 7, 224, 0, 1, 0.885 512, 7, 224, 23, 1, 0.948 512, 7, 224, 0, 1, 0.95 256, 4081, 224, 23, 1, 0.921 2, 0, 1, 23, 1, 1.02 2, 0, 1, 0, 1, 1.026 2, 1, 1, 23, 1, 0.873 2, 1, 1, 0, 1, 0.873 0, 0, 1, 23, 1, 0.581 0, 0, 1, 0, 1, 0.581 0, 1, 1, 23, 1, 0.537 0, 1, 1, 0, 1, 0.537 2, 2048, 1, 23, 1, 0.749 2, 2048, 1, 0, 1, 0.747 2, 2049, 1, 23, 1, 0.636 2, 2049, 1, 0, 1, 0.636 0, 2048, 1, 23, 1, 0.498 0, 2048, 1, 0, 1, 0.498 0, 2049, 1, 23, 1, 0.46 0, 2049, 1, 0, 1, 0.46 0, 4081, 1, 23, 1, 0.46 0, 4081, 1, 0, 1, 0.46 2, 4081, 1, 23, 1, 0.611 2, 4081, 1, 0, 1, 0.608 2, 0, 2, 0, 1, 0.894 3, 0, 2, 23, 1, 1.06 3, 0, 2, 0, 1, 1.027 3, 2, 2, 23, 1, 0.893 3, 2, 2, 0, 1, 0.892 1, 0, 2, 23, 1, 0.916 1, 0, 2, 0, 1, 0.916 1, 2, 2, 23, 1, 1.004 1, 2, 2, 0, 1, 1.094 3, 2048, 2, 23, 1, 0.749 3, 2048, 2, 0, 1, 0.749 3, 2050, 2, 23, 1, 0.636 3, 2050, 2, 0, 1, 0.636 1, 2048, 2, 23, 1, 0.665 1, 2048, 2, 0, 1, 0.664 1, 2050, 2, 23, 1, 0.723 1, 2050, 2, 0, 1, 0.724 1, 4081, 2, 23, 1, 0.72 1, 4081, 2, 0, 1, 0.74 3, 4081, 2, 23, 1, 0.618 3, 4081, 2, 0, 1, 0.597 3, 0, 1, 23, 1, 1.032 4, 0, 3, 23, 1, 1.036 4, 0, 3, 0, 1, 1.05 4, 3, 3, 23, 1, 0.889 4, 3, 3, 0, 1, 0.881 2, 0, 3, 23, 1, 0.907 2, 0, 3, 0, 1, 0.902 2, 3, 3, 23, 1, 0.982 2, 3, 3, 0, 1, 1.059 4, 2048, 3, 23, 1, 0.748 4, 2048, 3, 0, 1, 0.748 4, 2051, 3, 23, 1, 0.637 4, 2051, 3, 0, 1, 0.637 2, 2048, 3, 23, 1, 0.664 2, 2048, 3, 0, 1, 0.665 2, 2051, 3, 23, 1, 0.726 2, 2051, 3, 0, 1, 0.726 2, 4081, 3, 23, 1, 0.727 2, 4081, 3, 0, 1, 0.727 4, 4081, 3, 23, 1, 0.647 4, 4081, 3, 0, 1, 0.622 4, 0, 1, 23, 1, 1.043 4, 0, 2, 0, 1, 1.039 5, 0, 4, 23, 1, 1.043 5, 0, 4, 0, 1, 1.055 5, 4, 4, 23, 1, 0.889 5, 4, 4, 0, 1, 0.878 3, 0, 4, 23, 1, 0.889 3, 0, 4, 0, 1, 0.894 3, 4, 4, 23, 1, 0.958 3, 4, 4, 0, 1, 1.033 5, 2048, 4, 23, 1, 0.75 5, 2048, 4, 0, 1, 0.75 5, 2052, 4, 23, 1, 0.638 5, 2052, 4, 0, 1, 0.637 3, 2048, 4, 23, 1, 0.666 3, 2048, 4, 0, 1, 0.666 3, 2052, 4, 23, 1, 0.706 3, 2052, 4, 0, 1, 0.713 3, 4081, 4, 23, 1, 0.712 3, 4081, 4, 0, 1, 0.711 5, 4081, 4, 23, 1, 0.66 5, 4081, 4, 0, 1, 0.629 5, 0, 1, 23, 1, 1.026 5, 0, 2, 0, 1, 1.057 6, 0, 5, 23, 1, 1.031 6, 0, 5, 0, 1, 1.029 6, 5, 5, 23, 1, 0.894 6, 5, 5, 0, 1, 0.887 4, 0, 5, 23, 1, 0.889 4, 0, 5, 0, 1, 0.889 4, 5, 5, 23, 1, 0.932 4, 5, 5, 0, 1, 1.026 6, 2048, 5, 23, 1, 0.749 6, 2048, 5, 0, 1, 0.75 6, 2053, 5, 23, 1, 0.638 6, 2053, 5, 0, 1, 0.638 4, 2048, 5, 23, 1, 0.667 4, 2048, 5, 0, 1, 0.667 4, 2053, 5, 23, 1, 0.692 4, 2053, 5, 0, 1, 0.699 4, 4081, 5, 23, 1, 0.698 4, 4081, 5, 0, 1, 0.698 6, 4081, 5, 23, 1, 0.639 6, 4081, 5, 0, 1, 0.619 6, 0, 1, 23, 1, 1.027 6, 0, 2, 0, 1, 1.026 7, 0, 6, 23, 1, 1.028 7, 0, 6, 0, 1, 1.028 7, 6, 6, 23, 1, 0.874 7, 6, 6, 0, 1, 0.882 5, 0, 6, 23, 1, 0.888 5, 0, 6, 0, 1, 0.888 5, 6, 6, 23, 1, 0.942 5, 6, 6, 0, 1, 1.014 7, 2048, 6, 23, 1, 0.75 7, 2048, 6, 0, 1, 0.749 7, 2054, 6, 23, 1, 0.637 7, 2054, 6, 0, 1, 0.638 5, 2048, 6, 23, 1, 0.667 5, 2048, 6, 0, 1, 0.666 5, 2054, 6, 23, 1, 0.706 5, 2054, 6, 0, 1, 0.702 5, 4081, 6, 23, 1, 0.705 5, 4081, 6, 0, 1, 0.705 7, 4081, 6, 23, 1, 0.659 7, 4081, 6, 0, 1, 0.638 7, 0, 1, 23, 1, 1.042 7, 0, 2, 0, 1, 1.035 8, 0, 7, 23, 1, 1.033 8, 0, 7, 0, 1, 1.027 8, 7, 7, 23, 1, 0.886 8, 7, 7, 0, 1, 0.875 6, 0, 7, 23, 1, 0.889 6, 0, 7, 0, 1, 0.889 6, 7, 7, 23, 1, 0.912 6, 7, 7, 0, 1, 0.982 8, 2048, 7, 23, 1, 0.755 8, 2048, 7, 0, 1, 0.749 8, 2055, 7, 23, 1, 0.638 8, 2055, 7, 0, 1, 0.638 6, 2048, 7, 23, 1, 0.667 6, 2048, 7, 0, 1, 0.667 6, 2055, 7, 23, 1, 0.692 6, 2055, 7, 0, 1, 0.693 6, 4081, 7, 23, 1, 0.689 6, 4081, 7, 0, 1, 0.723 8, 4081, 7, 23, 1, 0.64 8, 4081, 7, 0, 1, 0.631 8, 0, 1, 23, 1, 1.028 8, 0, 2, 0, 1, 1.039 9, 0, 8, 23, 1, 1.029 9, 0, 8, 0, 1, 1.028 9, 8, 8, 23, 1, 0.55 9, 8, 8, 0, 1, 0.542 7, 0, 8, 23, 1, 0.889 7, 0, 8, 0, 1, 0.889 7, 8, 8, 23, 1, 0.934 7, 8, 8, 0, 1, 1.011 9, 2048, 8, 23, 1, 0.751 9, 2048, 8, 0, 1, 0.75 9, 2056, 8, 23, 1, 0.553 9, 2056, 8, 0, 1, 0.542 7, 2048, 8, 23, 1, 0.667 7, 2048, 8, 0, 1, 0.667 7, 2056, 8, 23, 1, 0.712 7, 2056, 8, 0, 1, 0.73 7, 4081, 8, 23, 1, 0.716 7, 4081, 8, 0, 1, 0.76 9, 4081, 8, 23, 1, 0.632 9, 4081, 8, 0, 1, 0.624 9, 0, 1, 23, 1, 1.028 9, 0, 2, 0, 1, 1.028 10, 0, 9, 23, 1, 1.027 10, 0, 9, 0, 1, 1.028 10, 9, 9, 23, 1, 0.545 10, 9, 9, 0, 1, 0.536 8, 0, 9, 23, 1, 0.889 8, 0, 9, 0, 1, 0.889 8, 9, 9, 23, 1, 0.627 8, 9, 9, 0, 1, 0.637 10, 2048, 9, 23, 1, 0.751 10, 2048, 9, 0, 1, 0.75 10, 2057, 9, 23, 1, 0.545 10, 2057, 9, 0, 1, 0.547 8, 2048, 9, 23, 1, 0.667 8, 2048, 9, 0, 1, 0.667 8, 2057, 9, 23, 1, 0.627 8, 2057, 9, 0, 1, 0.633 8, 4081, 9, 23, 1, 0.726 8, 4081, 9, 0, 1, 0.775 10, 4081, 9, 23, 1, 0.657 10, 4081, 9, 0, 1, 0.642 10, 0, 1, 23, 1, 1.03 10, 0, 2, 0, 1, 1.033 11, 0, 10, 23, 1, 1.029 11, 0, 10, 0, 1, 1.03 11, 10, 10, 23, 1, 0.542 11, 10, 10, 0, 1, 0.549 9, 0, 10, 23, 1, 0.889 9, 0, 10, 0, 1, 0.889 9, 10, 10, 23, 1, 0.627 9, 10, 10, 0, 1, 0.646 11, 2048, 10, 23, 1, 0.751 11, 2048, 10, 0, 1, 0.75 11, 2058, 10, 23, 1, 0.553 11, 2058, 10, 0, 1, 0.538 9, 2048, 10, 23, 1, 0.667 9, 2048, 10, 0, 1, 0.667 9, 2058, 10, 23, 1, 0.627 9, 2058, 10, 0, 1, 0.656 9, 4081, 10, 23, 1, 0.726 9, 4081, 10, 0, 1, 0.773 11, 4081, 10, 23, 1, 0.625 11, 4081, 10, 0, 1, 0.613 11, 0, 1, 23, 1, 1.029 11, 0, 2, 0, 1, 1.029 12, 0, 11, 23, 1, 1.028 12, 0, 11, 0, 1, 1.028 12, 11, 11, 23, 1, 0.545 12, 11, 11, 0, 1, 0.537 10, 0, 11, 23, 1, 0.889 10, 0, 11, 0, 1, 0.889 10, 11, 11, 23, 1, 0.627 10, 11, 11, 0, 1, 0.655 12, 2048, 11, 23, 1, 0.757 12, 2048, 11, 0, 1, 0.75 12, 2059, 11, 23, 1, 0.536 12, 2059, 11, 0, 1, 0.545 10, 2048, 11, 23, 1, 0.672 10, 2048, 11, 0, 1, 0.667 10, 2059, 11, 23, 1, 0.627 10, 2059, 11, 0, 1, 0.66 10, 4081, 11, 23, 1, 0.726 10, 4081, 11, 0, 1, 0.793 12, 4081, 11, 23, 1, 0.627 12, 4081, 11, 0, 1, 0.633 12, 0, 1, 23, 1, 1.028 12, 0, 2, 0, 1, 1.029 13, 0, 12, 23, 1, 1.028 13, 0, 12, 0, 1, 1.028 13, 12, 12, 23, 1, 0.547 13, 12, 12, 0, 1, 0.542 11, 0, 12, 23, 1, 0.889 11, 0, 12, 0, 1, 0.889 11, 12, 12, 23, 1, 0.627 11, 12, 12, 0, 1, 0.69 13, 2048, 12, 23, 1, 0.75 13, 2048, 12, 0, 1, 0.75 13, 2060, 12, 23, 1, 0.55 13, 2060, 12, 0, 1, 0.542 11, 2048, 12, 23, 1, 0.667 11, 2048, 12, 0, 1, 0.667 11, 2060, 12, 23, 1, 0.627 11, 2060, 12, 0, 1, 0.646 11, 4081, 12, 23, 1, 0.726 11, 4081, 12, 0, 1, 0.78 13, 4081, 12, 23, 1, 0.632 13, 4081, 12, 0, 1, 0.619 13, 0, 1, 23, 1, 1.028 13, 0, 2, 0, 1, 1.028 14, 0, 13, 23, 1, 1.032 14, 0, 13, 0, 1, 1.038 14, 13, 13, 23, 1, 0.55 14, 13, 13, 0, 1, 0.539 12, 0, 13, 23, 1, 0.889 12, 0, 13, 0, 1, 0.889 12, 13, 13, 23, 1, 0.627 12, 13, 13, 0, 1, 0.655 14, 2048, 13, 23, 1, 0.751 14, 2048, 13, 0, 1, 0.751 14, 2061, 13, 23, 1, 0.542 14, 2061, 13, 0, 1, 0.547 12, 2048, 13, 23, 1, 0.667 12, 2048, 13, 0, 1, 0.667 12, 2061, 13, 23, 1, 0.627 12, 2061, 13, 0, 1, 0.646 12, 4081, 13, 23, 1, 0.726 12, 4081, 13, 0, 1, 0.769 14, 4081, 13, 23, 1, 0.627 14, 4081, 13, 0, 1, 0.62 14, 0, 1, 23, 1, 1.035 14, 0, 2, 0, 1, 1.033 15, 0, 14, 23, 1, 1.028 15, 0, 14, 0, 1, 1.028 15, 14, 14, 23, 1, 0.545 15, 14, 14, 0, 1, 0.531 13, 0, 14, 23, 1, 0.889 13, 0, 14, 0, 1, 0.889 13, 14, 14, 23, 1, 0.628 13, 14, 14, 0, 1, 0.628 15, 2048, 14, 23, 1, 0.751 15, 2048, 14, 0, 1, 0.75 15, 2062, 14, 23, 1, 0.542 15, 2062, 14, 0, 1, 0.536 13, 2048, 14, 23, 1, 0.667 13, 2048, 14, 0, 1, 0.667 13, 2062, 14, 23, 1, 0.627 13, 2062, 14, 0, 1, 0.628 13, 4081, 14, 23, 1, 0.726 13, 4081, 14, 0, 1, 0.747 15, 4081, 14, 23, 1, 0.874 15, 4081, 14, 0, 1, 0.879 15, 0, 1, 23, 1, 1.028 15, 0, 2, 0, 1, 1.028 16, 0, 15, 23, 1, 0.728 16, 0, 15, 0, 1, 0.735 16, 15, 15, 23, 1, 0.647 16, 15, 15, 0, 1, 0.647 14, 0, 15, 23, 1, 0.889 14, 0, 15, 0, 1, 0.889 14, 15, 15, 23, 1, 0.627 14, 15, 15, 0, 1, 0.647 16, 2048, 15, 23, 1, 0.732 16, 2048, 15, 0, 1, 0.714 16, 2063, 15, 23, 1, 0.65 16, 2063, 15, 0, 1, 0.65 14, 2048, 15, 23, 1, 0.667 14, 2048, 15, 0, 1, 0.667 14, 2063, 15, 23, 1, 0.627 14, 2063, 15, 0, 1, 0.674 14, 4081, 15, 23, 1, 0.724 14, 4081, 15, 0, 1, 0.777 16, 4081, 15, 23, 1, 1.01 16, 4081, 15, 0, 1, 0.997 16, 0, 1, 23, 1, 0.722 16, 0, 2, 0, 1, 0.725 17, 0, 16, 23, 1, 1.167 17, 0, 16, 0, 1, 1.167 17, 16, 16, 23, 1, 1.167 17, 16, 16, 0, 1, 1.167 15, 0, 16, 23, 1, 0.891 15, 0, 16, 0, 1, 0.892 15, 16, 16, 23, 1, 0.668 15, 16, 16, 0, 1, 0.699 17, 2048, 16, 23, 1, 1.167 17, 2048, 16, 0, 1, 1.167 17, 2064, 16, 23, 1, 1.167 17, 2064, 16, 0, 1, 1.167 15, 2048, 16, 23, 1, 0.668 15, 2048, 16, 0, 1, 0.667 15, 2064, 16, 23, 1, 0.667 15, 2064, 16, 0, 1, 0.771 15, 4081, 16, 23, 1, 0.933 15, 4081, 16, 0, 1, 1.056 17, 4081, 16, 23, 1, 1.78 17, 4081, 16, 0, 1, 1.789 17, 0, 1, 23, 1, 1.17 17, 0, 2, 0, 1, 1.169 18, 0, 17, 23, 1, 0.859 18, 0, 17, 0, 1, 0.857 18, 17, 17, 23, 1, 0.857 18, 17, 17, 0, 1, 0.857 16, 0, 17, 23, 1, 0.673 16, 0, 17, 0, 1, 0.672 16, 17, 17, 23, 1, 0.628 16, 17, 17, 0, 1, 0.628 18, 2048, 17, 23, 1, 0.861 18, 2048, 17, 0, 1, 0.859 18, 2065, 17, 23, 1, 0.86 18, 2065, 17, 0, 1, 0.857 16, 2048, 17, 23, 1, 0.668 16, 2048, 17, 0, 1, 0.668 16, 2065, 17, 23, 1, 0.627 16, 2065, 17, 0, 1, 0.627 16, 4081, 17, 23, 1, 1.049 16, 4081, 17, 0, 1, 1.174 18, 4081, 17, 23, 1, 1.068 18, 4081, 17, 0, 1, 1.064 18, 0, 1, 23, 1, 1.172 18, 0, 2, 0, 1, 1.172 19, 0, 18, 23, 1, 0.865 19, 0, 18, 0, 1, 0.864 19, 18, 18, 23, 1, 0.86 19, 18, 18, 0, 1, 0.861 17, 0, 18, 23, 1, 0.895 17, 0, 18, 0, 1, 0.895 17, 18, 18, 23, 1, 0.896 17, 18, 18, 0, 1, 0.836 19, 2048, 18, 23, 1, 0.866 19, 2048, 18, 0, 1, 0.866 19, 2066, 18, 23, 1, 0.866 19, 2066, 18, 0, 1, 0.863 17, 2048, 18, 23, 1, 0.896 17, 2048, 18, 0, 1, 0.895 17, 2066, 18, 23, 1, 0.895 17, 2066, 18, 0, 1, 0.877 17, 4081, 18, 23, 1, 1.115 17, 4081, 18, 0, 1, 1.07 19, 4081, 18, 23, 1, 1.061 19, 4081, 18, 0, 1, 1.06 19, 0, 1, 23, 1, 1.168 19, 0, 2, 0, 1, 1.168 20, 0, 19, 23, 1, 0.855 20, 0, 19, 0, 1, 0.858 20, 19, 19, 23, 1, 0.856 20, 19, 19, 0, 1, 0.855 18, 0, 19, 23, 1, 0.89 18, 0, 19, 0, 1, 0.89 18, 19, 19, 23, 1, 0.89 18, 19, 19, 0, 1, 0.875 20, 2048, 19, 23, 1, 0.859 20, 2048, 19, 0, 1, 0.855 20, 2067, 19, 23, 1, 0.854 20, 2067, 19, 0, 1, 0.856 18, 2048, 19, 23, 1, 0.889 18, 2048, 19, 0, 1, 0.889 18, 2067, 19, 23, 1, 0.889 18, 2067, 19, 0, 1, 0.893 18, 4081, 19, 23, 1, 1.109 18, 4081, 19, 0, 1, 1.067 20, 4081, 19, 23, 1, 1.053 20, 4081, 19, 0, 1, 1.052 20, 0, 1, 23, 1, 1.165 20, 0, 2, 0, 1, 1.166 21, 0, 20, 23, 1, 0.855 21, 0, 20, 0, 1, 0.856 21, 20, 20, 23, 1, 0.854 21, 20, 20, 0, 1, 0.854 19, 0, 20, 23, 1, 0.888 19, 0, 20, 0, 1, 0.888 19, 20, 20, 23, 1, 0.888 19, 20, 20, 0, 1, 0.868 21, 2048, 20, 23, 1, 0.853 21, 2048, 20, 0, 1, 0.857 21, 2068, 20, 23, 1, 0.855 21, 2068, 20, 0, 1, 0.854 19, 2048, 20, 23, 1, 0.889 19, 2048, 20, 0, 1, 0.889 19, 2068, 20, 23, 1, 0.889 19, 2068, 20, 0, 1, 0.894 19, 4081, 20, 23, 1, 1.112 19, 4081, 20, 0, 1, 1.103 21, 4081, 20, 23, 1, 1.054 21, 4081, 20, 0, 1, 1.051 21, 0, 1, 23, 1, 1.169 21, 0, 2, 0, 1, 1.168 22, 0, 21, 23, 1, 0.853 22, 0, 21, 0, 1, 0.855 22, 21, 21, 23, 1, 0.856 22, 21, 21, 0, 1, 0.853 20, 0, 21, 23, 1, 0.889 20, 0, 21, 0, 1, 0.889 20, 21, 21, 23, 1, 0.889 20, 21, 21, 0, 1, 0.91 22, 2048, 21, 23, 1, 0.852 22, 2048, 21, 0, 1, 0.855 22, 2069, 21, 23, 1, 0.853 22, 2069, 21, 0, 1, 0.854 20, 2048, 21, 23, 1, 0.889 20, 2048, 21, 0, 1, 0.889 20, 2069, 21, 23, 1, 0.889 20, 2069, 21, 0, 1, 0.925 20, 4081, 21, 23, 1, 1.111 20, 4081, 21, 0, 1, 1.111 22, 4081, 21, 23, 1, 1.053 22, 4081, 21, 0, 1, 1.051 22, 0, 1, 23, 1, 1.167 22, 0, 2, 0, 1, 1.165 23, 0, 22, 23, 1, 0.853 23, 0, 22, 0, 1, 0.853 23, 22, 22, 23, 1, 0.853 23, 22, 22, 0, 1, 0.853 21, 0, 22, 23, 1, 0.888 21, 0, 22, 0, 1, 0.888 21, 22, 22, 23, 1, 0.889 21, 22, 22, 0, 1, 0.931 23, 2048, 22, 23, 1, 0.854 23, 2048, 22, 0, 1, 0.854 23, 2070, 22, 23, 1, 0.853 23, 2070, 22, 0, 1, 0.852 21, 2048, 22, 23, 1, 0.887 21, 2048, 22, 0, 1, 0.887 21, 2070, 22, 23, 1, 0.887 21, 2070, 22, 0, 1, 0.901 21, 4081, 22, 23, 1, 1.107 21, 4081, 22, 0, 1, 1.11 23, 4081, 22, 23, 1, 1.047 23, 4081, 22, 0, 1, 1.049 23, 0, 1, 23, 1, 1.163 23, 0, 2, 0, 1, 1.163 24, 0, 23, 23, 1, 0.851 24, 0, 23, 0, 1, 0.852 24, 23, 23, 23, 1, 0.852 24, 23, 23, 0, 1, 0.854 22, 0, 23, 23, 1, 0.888 22, 0, 23, 0, 1, 0.888 22, 23, 23, 23, 1, 0.888 22, 23, 23, 0, 1, 0.908 24, 2048, 23, 23, 1, 0.853 24, 2048, 23, 0, 1, 0.851 24, 2071, 23, 23, 1, 0.851 24, 2071, 23, 0, 1, 0.851 22, 2048, 23, 23, 1, 0.888 22, 2048, 23, 0, 1, 0.888 22, 2071, 23, 23, 1, 0.888 22, 2071, 23, 0, 1, 0.882 22, 4081, 23, 23, 1, 1.109 22, 4081, 23, 0, 1, 1.084 24, 4081, 23, 23, 1, 1.049 24, 4081, 23, 0, 1, 1.049 24, 0, 1, 23, 1, 1.164 24, 0, 2, 0, 1, 1.164 25, 0, 24, 23, 1, 0.855 25, 0, 24, 0, 1, 0.849 25, 24, 24, 23, 1, 0.859 25, 24, 24, 0, 1, 0.861 23, 0, 24, 23, 1, 0.885 23, 0, 24, 0, 1, 0.885 23, 24, 24, 23, 1, 0.887 23, 24, 24, 0, 1, 0.898 25, 2048, 24, 23, 1, 0.851 25, 2048, 24, 0, 1, 0.852 25, 2072, 24, 23, 1, 0.852 25, 2072, 24, 0, 1, 0.852 23, 2048, 24, 23, 1, 0.886 23, 2048, 24, 0, 1, 0.886 23, 2072, 24, 23, 1, 0.886 23, 2072, 24, 0, 1, 0.916 23, 4081, 24, 23, 1, 1.106 23, 4081, 24, 0, 1, 1.078 25, 4081, 24, 23, 1, 1.044 25, 4081, 24, 0, 1, 1.045 25, 0, 1, 23, 1, 1.163 25, 0, 2, 0, 1, 1.163 26, 0, 25, 23, 1, 0.849 26, 0, 25, 0, 1, 0.851 26, 25, 25, 23, 1, 0.844 26, 25, 25, 0, 1, 0.849 24, 0, 25, 23, 1, 0.885 24, 0, 25, 0, 1, 0.886 24, 25, 25, 23, 1, 0.875 24, 25, 25, 0, 1, 0.845 26, 2048, 25, 23, 1, 0.85 26, 2048, 25, 0, 1, 0.849 26, 2073, 25, 23, 1, 0.862 26, 2073, 25, 0, 1, 0.861 24, 2048, 25, 23, 1, 0.886 24, 2048, 25, 0, 1, 0.885 24, 2073, 25, 23, 1, 0.862 24, 2073, 25, 0, 1, 0.836 24, 4081, 25, 23, 1, 1.105 24, 4081, 25, 0, 1, 1.088 26, 4081, 25, 23, 1, 1.047 26, 4081, 25, 0, 1, 1.045 26, 0, 1, 23, 1, 1.163 26, 0, 2, 0, 1, 1.163 27, 0, 26, 23, 1, 0.853 27, 0, 26, 0, 1, 0.853 27, 26, 26, 23, 1, 0.85 27, 26, 26, 0, 1, 0.86 25, 0, 26, 23, 1, 0.888 25, 0, 26, 0, 1, 0.887 25, 26, 26, 23, 1, 0.867 25, 26, 26, 0, 1, 0.844 27, 2048, 26, 23, 1, 0.852 27, 2048, 26, 0, 1, 0.851 27, 2074, 26, 23, 1, 0.872 27, 2074, 26, 0, 1, 0.878 25, 2048, 26, 23, 1, 0.889 25, 2048, 26, 0, 1, 0.888 25, 2074, 26, 23, 1, 0.868 25, 2074, 26, 0, 1, 0.854 25, 4081, 26, 23, 1, 1.109 25, 4081, 26, 0, 1, 1.102 27, 4081, 26, 23, 1, 1.046 27, 4081, 26, 0, 1, 1.049 27, 0, 1, 23, 1, 1.165 27, 0, 2, 0, 1, 1.165 28, 0, 27, 23, 1, 0.853 28, 0, 27, 0, 1, 0.854 28, 27, 27, 23, 1, 0.873 28, 27, 27, 0, 1, 0.878 26, 0, 27, 23, 1, 0.887 26, 0, 27, 0, 1, 0.888 26, 27, 27, 23, 1, 0.875 26, 27, 27, 0, 1, 0.851 28, 2048, 27, 23, 1, 0.851 28, 2048, 27, 0, 1, 0.851 28, 2075, 27, 23, 1, 0.879 28, 2075, 27, 0, 1, 0.883 26, 2048, 27, 23, 1, 0.888 26, 2048, 27, 0, 1, 0.888 26, 2075, 27, 23, 1, 0.876 26, 2075, 27, 0, 1, 0.86 26, 4081, 27, 23, 1, 1.109 26, 4081, 27, 0, 1, 1.105 28, 4081, 27, 23, 1, 1.048 28, 4081, 27, 0, 1, 1.048 28, 0, 1, 23, 1, 1.164 28, 0, 2, 0, 1, 1.165 29, 0, 28, 23, 1, 0.854 29, 0, 28, 0, 1, 0.852 29, 28, 28, 23, 1, 0.887 29, 28, 28, 0, 1, 0.884 27, 0, 28, 23, 1, 0.887 27, 0, 28, 0, 1, 0.889 27, 28, 28, 23, 1, 0.885 27, 28, 28, 0, 1, 0.866 29, 2048, 28, 23, 1, 0.853 29, 2048, 28, 0, 1, 0.852 29, 2076, 28, 23, 1, 0.879 29, 2076, 28, 0, 1, 0.876 27, 2048, 28, 23, 1, 0.889 27, 2048, 28, 0, 1, 0.891 27, 2076, 28, 23, 1, 0.883 27, 2076, 28, 0, 1, 0.86 27, 4081, 28, 23, 1, 1.11 27, 4081, 28, 0, 1, 1.106 29, 4081, 28, 23, 1, 1.051 29, 4081, 28, 0, 1, 1.052 29, 0, 1, 23, 1, 1.168 29, 0, 2, 0, 1, 1.168 30, 0, 29, 23, 1, 0.856 30, 0, 29, 0, 1, 0.854 30, 29, 29, 23, 1, 0.873 30, 29, 29, 0, 1, 0.874 28, 0, 29, 23, 1, 0.891 28, 0, 29, 0, 1, 0.891 28, 29, 29, 23, 1, 0.884 28, 29, 29, 0, 1, 0.872 30, 2048, 29, 23, 1, 0.859 30, 2048, 29, 0, 1, 0.856 30, 2077, 29, 23, 1, 0.879 30, 2077, 29, 0, 1, 0.878 28, 2048, 29, 23, 1, 0.891 28, 2048, 29, 0, 1, 0.891 28, 2077, 29, 23, 1, 0.889 28, 2077, 29, 0, 1, 0.863 28, 4081, 29, 23, 1, 1.109 28, 4081, 29, 0, 1, 1.122 30, 4081, 29, 23, 1, 1.054 30, 4081, 29, 0, 1, 1.052 30, 0, 1, 23, 1, 1.163 30, 0, 2, 0, 1, 1.161 31, 0, 30, 23, 1, 0.851 31, 0, 30, 0, 1, 0.849 31, 30, 30, 23, 1, 0.871 31, 30, 30, 0, 1, 0.874 29, 0, 30, 23, 1, 0.884 29, 0, 30, 0, 1, 0.885 29, 30, 30, 23, 1, 0.888 29, 30, 30, 0, 1, 0.864 31, 2048, 30, 23, 1, 0.854 31, 2048, 30, 0, 1, 0.852 31, 2078, 30, 23, 1, 0.874 31, 2078, 30, 0, 1, 0.882 29, 2048, 30, 23, 1, 0.888 29, 2048, 30, 0, 1, 0.889 29, 2078, 30, 23, 1, 0.895 29, 2078, 30, 0, 1, 0.878 29, 4081, 30, 23, 1, 1.109 29, 4081, 30, 0, 1, 1.128 31, 4081, 30, 23, 1, 0.804 31, 4081, 30, 0, 1, 0.803 31, 0, 1, 23, 1, 1.167 31, 0, 2, 0, 1, 1.167 32, 0, 31, 23, 1, 0.802 32, 0, 31, 0, 1, 0.802 32, 31, 31, 23, 1, 0.798 32, 31, 31, 0, 1, 0.797 30, 0, 31, 23, 1, 0.88 30, 0, 31, 0, 1, 0.888 30, 31, 31, 23, 1, 0.96 30, 31, 31, 0, 1, 0.869 32, 2048, 31, 23, 1, 0.802 32, 2048, 31, 0, 1, 0.802 32, 2079, 31, 23, 1, 0.843 32, 2079, 31, 0, 1, 0.835 30, 2048, 31, 23, 1, 0.889 30, 2048, 31, 0, 1, 0.889 30, 2079, 31, 23, 1, 0.937 30, 2079, 31, 0, 1, 0.872 30, 4081, 31, 23, 1, 1.11 30, 4081, 31, 0, 1, 1.142 32, 4081, 31, 23, 1, 0.864 32, 4081, 31, 0, 1, 0.872 32, 0, 1, 23, 1, 1.167 32, 0, 2, 0, 1, 1.167 > sysdeps/x86_64/memrchr.S | 613 +++++++++++++++++++-------------------- > 1 file changed, 292 insertions(+), 321 deletions(-) > > diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S > index d1a9f47911..b0dffd2ae2 100644 > --- a/sysdeps/x86_64/memrchr.S > +++ b/sysdeps/x86_64/memrchr.S > @@ -18,362 +18,333 @@ > <https://www.gnu.org/licenses/>. */ > > #include <sysdep.h> > +#define VEC_SIZE 16 > +#define PAGE_SIZE 4096 > > .text > -ENTRY (__memrchr) > - movd %esi, %xmm1 > - > - sub $16, %RDX_LP > - jbe L(length_less16) > - > - punpcklbw %xmm1, %xmm1 > - punpcklbw %xmm1, %xmm1 > - > - add %RDX_LP, %RDI_LP > - pshufd $0, %xmm1, %xmm1 > - > - movdqu (%rdi), %xmm0 > - pcmpeqb %xmm1, %xmm0 > - > -/* Check if there is a match. */ > - pmovmskb %xmm0, %eax > - test %eax, %eax > - jnz L(matches0) > - > - sub $64, %rdi > - mov %edi, %ecx > - and $15, %ecx > - jz L(loop_prolog) > - > - add $16, %rdi > - add $16, %rdx > - and $-16, %rdi > - sub %rcx, %rdx > - > - .p2align 4 > -L(loop_prolog): > - sub $64, %rdx > - jbe L(exit_loop) > - > - movdqa 48(%rdi), %xmm0 > - pcmpeqb %xmm1, %xmm0 > - pmovmskb %xmm0, %eax > - test %eax, %eax > - jnz L(matches48) > - > - movdqa 32(%rdi), %xmm2 > - pcmpeqb %xmm1, %xmm2 > - pmovmskb %xmm2, %eax > - test %eax, %eax > - jnz L(matches32) > - > - movdqa 16(%rdi), %xmm3 > - pcmpeqb %xmm1, %xmm3 > - pmovmskb %xmm3, %eax > - test %eax, %eax > - jnz L(matches16) > - > - movdqa (%rdi), %xmm4 > - pcmpeqb %xmm1, %xmm4 > - pmovmskb %xmm4, %eax > - test %eax, %eax > - jnz L(matches0) > - > - sub $64, %rdi > - sub $64, %rdx > - jbe L(exit_loop) > - > - movdqa 48(%rdi), %xmm0 > - pcmpeqb %xmm1, %xmm0 > - pmovmskb %xmm0, %eax > - test %eax, %eax > - jnz L(matches48) > - > - movdqa 32(%rdi), %xmm2 > - pcmpeqb %xmm1, %xmm2 > - pmovmskb %xmm2, %eax > - test %eax, %eax > - jnz L(matches32) > - > - movdqa 16(%rdi), %xmm3 > - pcmpeqb %xmm1, %xmm3 > - pmovmskb %xmm3, %eax > - test %eax, %eax > - jnz L(matches16) > - > - movdqa (%rdi), %xmm3 > - pcmpeqb %xmm1, %xmm3 > - pmovmskb %xmm3, %eax > - test %eax, %eax > - jnz L(matches0) > - > - mov %edi, %ecx > - and $63, %ecx > - jz L(align64_loop) > - > - add $64, %rdi > - add $64, %rdx > - and $-64, %rdi > - sub %rcx, %rdx > - > - .p2align 4 > -L(align64_loop): > - sub $64, %rdi > - sub $64, %rdx > - jbe L(exit_loop) > - > - movdqa (%rdi), %xmm0 > - movdqa 16(%rdi), %xmm2 > - movdqa 32(%rdi), %xmm3 > - movdqa 48(%rdi), %xmm4 > - > - pcmpeqb %xmm1, %xmm0 > - pcmpeqb %xmm1, %xmm2 > - pcmpeqb %xmm1, %xmm3 > - pcmpeqb %xmm1, %xmm4 > - > - pmaxub %xmm3, %xmm0 > - pmaxub %xmm4, %xmm2 > - pmaxub %xmm0, %xmm2 > - pmovmskb %xmm2, %eax > - > - test %eax, %eax > - jz L(align64_loop) > - > - pmovmskb %xmm4, %eax > - test %eax, %eax > - jnz L(matches48) > - > - pmovmskb %xmm3, %eax > - test %eax, %eax > - jnz L(matches32) > - > - movdqa 16(%rdi), %xmm2 > - > - pcmpeqb %xmm1, %xmm2 > - pcmpeqb (%rdi), %xmm1 > - > - pmovmskb %xmm2, %eax > - test %eax, %eax > - jnz L(matches16) > - > - pmovmskb %xmm1, %eax > - bsr %eax, %eax > - > - add %rdi, %rax > +ENTRY_P2ALIGN(__memrchr, 6) > +#ifdef __ILP32__ > + /* Clear upper bits. */ > + mov %RDX_LP, %RDX_LP > +#endif > + movd %esi, %xmm0 > + > + /* Get end pointer. */ > + leaq (%rdx, %rdi), %rcx > + > + punpcklbw %xmm0, %xmm0 > + punpcklwd %xmm0, %xmm0 > + pshufd $0, %xmm0, %xmm0 > + > + /* Check if we can load 1x VEC without cross a page. */ > + testl $(PAGE_SIZE - VEC_SIZE), %ecx > + jz L(page_cross) > + > + /* NB: This load happens regardless of whether rdx (len) is zero. Since > + it doesn't cross a page and the standard gurantees any pointer have > + at least one-valid byte this load must be safe. For the entire > + history of the x86 memrchr implementation this has been possible so > + no code "should" be relying on a zero-length check before this load. > + The zero-length check is moved to the page cross case because it is > + 1) pretty cold and including it pushes the hot case len <= VEC_SIZE > + into 2-cache lines. */ > + movups -(VEC_SIZE)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > + > + subq $VEC_SIZE, %rdx > + ja L(more_1x_vec) > +L(ret_vec_x0_test): > + /* Zero-flag set if eax (src) is zero. Destination unchanged if src is > + zero. */ > + bsrl %eax, %eax > + jz L(ret_0) > + /* Check if the CHAR match is in bounds. Need to truly zero `eax` here > + if out of bounds. */ > + addl %edx, %eax > + jl L(zero_0) > + /* Since we subtracted VEC_SIZE from rdx earlier we can just add to base > + ptr. */ > + addq %rdi, %rax > +L(ret_0): > ret > > - .p2align 4 > -L(exit_loop): > - add $64, %edx > - cmp $32, %edx > - jbe L(exit_loop_32) > - > - movdqa 48(%rdi), %xmm0 > - pcmpeqb %xmm1, %xmm0 > - pmovmskb %xmm0, %eax > - test %eax, %eax > - jnz L(matches48) > - > - movdqa 32(%rdi), %xmm2 > - pcmpeqb %xmm1, %xmm2 > - pmovmskb %xmm2, %eax > - test %eax, %eax > - jnz L(matches32) > - > - movdqa 16(%rdi), %xmm3 > - pcmpeqb %xmm1, %xmm3 > - pmovmskb %xmm3, %eax > - test %eax, %eax > - jnz L(matches16_1) > - cmp $48, %edx > - jbe L(return_null) > - > - pcmpeqb (%rdi), %xmm1 > - pmovmskb %xmm1, %eax > - test %eax, %eax > - jnz L(matches0_1) > - xor %eax, %eax > + .p2align 4,, 5 > +L(ret_vec_x0): > + bsrl %eax, %eax > + leaq -(VEC_SIZE)(%rcx, %rax), %rax > ret > > - .p2align 4 > -L(exit_loop_32): > - movdqa 48(%rdi), %xmm0 > - pcmpeqb %xmm1, %xmm0 > - pmovmskb %xmm0, %eax > - test %eax, %eax > - jnz L(matches48_1) > - cmp $16, %edx > - jbe L(return_null) > - > - pcmpeqb 32(%rdi), %xmm1 > - pmovmskb %xmm1, %eax > - test %eax, %eax > - jnz L(matches32_1) > - xor %eax, %eax > + .p2align 4,, 2 > +L(zero_0): > + xorl %eax, %eax > ret > > - .p2align 4 > -L(matches0): > - bsr %eax, %eax > - add %rdi, %rax > - ret > - > - .p2align 4 > -L(matches16): > - bsr %eax, %eax > - lea 16(%rax, %rdi), %rax > - ret > > - .p2align 4 > -L(matches32): > - bsr %eax, %eax > - lea 32(%rax, %rdi), %rax > + .p2align 4,, 8 > +L(more_1x_vec): > + testl %eax, %eax > + jnz L(ret_vec_x0) > + > + /* Align rcx (pointer to string). */ > + decq %rcx > + andq $-VEC_SIZE, %rcx > + > + movq %rcx, %rdx > + /* NB: We could consistenyl save 1-byte in this pattern with `movaps > + %xmm0, %xmm1; pcmpeq IMM8(r), %xmm1; ...`. The reason against it is > + it adds more frontend uops (even if the moves can be eliminated) and > + some percentage of the time actual backend uops. */ > + movaps -(VEC_SIZE)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + subq %rdi, %rdx > + pmovmskb %xmm1, %eax > + > + cmpq $(VEC_SIZE * 2), %rdx > + ja L(more_2x_vec) > +L(last_2x_vec): > + subl $VEC_SIZE, %edx > + jbe L(ret_vec_x0_test) > + > + testl %eax, %eax > + jnz L(ret_vec_x0) > + > + movaps -(VEC_SIZE * 2)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > + > + subl $VEC_SIZE, %edx > + bsrl %eax, %eax > + jz L(ret_1) > + addl %edx, %eax > + jl L(zero_0) > + addq %rdi, %rax > +L(ret_1): > ret > > - .p2align 4 > -L(matches48): > - bsr %eax, %eax > - lea 48(%rax, %rdi), %rax > + /* Don't align. Otherwise lose 2-byte encoding in jump to L(page_cross) > + causes the hot pause (length <= VEC_SIZE) to span multiple cache > + lines. Naturally aligned % 16 to 8-bytes. */ > +L(page_cross): > + /* Zero length check. */ > + testq %rdx, %rdx > + jz L(zero_0) > + > + leaq -1(%rcx), %r8 > + andq $-(VEC_SIZE), %r8 > + > + movaps (%r8), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %esi > + /* Shift out negative alignment (because we are starting from endptr and > + working backwards). */ > + negl %ecx > + /* 32-bit shift but VEC_SIZE=16 so need to mask the shift count > + explicitly. */ > + andl $(VEC_SIZE - 1), %ecx > + shl %cl, %esi > + movzwl %si, %eax > + leaq (%rdi, %rdx), %rcx > + cmpq %rdi, %r8 > + ja L(more_1x_vec) > + subl $VEC_SIZE, %edx > + bsrl %eax, %eax > + jz L(ret_2) > + addl %edx, %eax > + jl L(zero_1) > + addq %rdi, %rax > +L(ret_2): > ret > > - .p2align 4 > -L(matches0_1): > - bsr %eax, %eax > - sub $64, %rdx > - add %rax, %rdx > - jl L(return_null) > - add %rdi, %rax > + /* Fits in aliging bytes. */ > +L(zero_1): > + xorl %eax, %eax > ret > > - .p2align 4 > -L(matches16_1): > - bsr %eax, %eax > - sub $48, %rdx > - add %rax, %rdx > - jl L(return_null) > - lea 16(%rdi, %rax), %rax > + .p2align 4,, 5 > +L(ret_vec_x1): > + bsrl %eax, %eax > + leaq -(VEC_SIZE * 2)(%rcx, %rax), %rax > ret > > - .p2align 4 > -L(matches32_1): > - bsr %eax, %eax > - sub $32, %rdx > - add %rax, %rdx > - jl L(return_null) > - lea 32(%rdi, %rax), %rax > - ret > + .p2align 4,, 8 > +L(more_2x_vec): > + testl %eax, %eax > + jnz L(ret_vec_x0) > > - .p2align 4 > -L(matches48_1): > - bsr %eax, %eax > - sub $16, %rdx > - add %rax, %rdx > - jl L(return_null) > - lea 48(%rdi, %rax), %rax > - ret > + movaps -(VEC_SIZE * 2)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > + testl %eax, %eax > + jnz L(ret_vec_x1) > > - .p2align 4 > -L(return_null): > - xor %eax, %eax > - ret > > - .p2align 4 > -L(length_less16_offset0): > - test %edx, %edx > - jz L(return_null) > + movaps -(VEC_SIZE * 3)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > > - mov %dl, %cl > - pcmpeqb (%rdi), %xmm1 > + subq $(VEC_SIZE * 4), %rdx > + ja L(more_4x_vec) > > - mov $1, %edx > - sal %cl, %edx > - sub $1, %edx > + addl $(VEC_SIZE), %edx > + jle L(ret_vec_x2_test) > > - pmovmskb %xmm1, %eax > +L(last_vec): > + testl %eax, %eax > + jnz L(ret_vec_x2) > > - and %edx, %eax > - test %eax, %eax > - jz L(return_null) > + movaps -(VEC_SIZE * 4)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > > - bsr %eax, %eax > - add %rdi, %rax > + subl $(VEC_SIZE), %edx > + bsrl %eax, %eax > + jz L(ret_3) > + addl %edx, %eax > + jl L(zero_2) > + addq %rdi, %rax > +L(ret_3): > ret > > - .p2align 4 > -L(length_less16): > - punpcklbw %xmm1, %xmm1 > - punpcklbw %xmm1, %xmm1 > - > - add $16, %edx > - > - pshufd $0, %xmm1, %xmm1 > - > - mov %edi, %ecx > - and $15, %ecx > - jz L(length_less16_offset0) > - > - mov %cl, %dh > - mov %ecx, %esi > - add %dl, %dh > - and $-16, %rdi > - > - sub $16, %dh > - ja L(length_less16_part2) > - > - pcmpeqb (%rdi), %xmm1 > - pmovmskb %xmm1, %eax > - > - sar %cl, %eax > - mov %dl, %cl > - > - mov $1, %edx > - sal %cl, %edx > - sub $1, %edx > - > - and %edx, %eax > - test %eax, %eax > - jz L(return_null) > - > - bsr %eax, %eax > - add %rdi, %rax > - add %rsi, %rax > + .p2align 4,, 6 > +L(ret_vec_x2_test): > + bsrl %eax, %eax > + jz L(zero_2) > + addl %edx, %eax > + jl L(zero_2) > + addq %rdi, %rax > ret > > - .p2align 4 > -L(length_less16_part2): > - movdqa 16(%rdi), %xmm2 > - pcmpeqb %xmm1, %xmm2 > - pmovmskb %xmm2, %eax > - > - mov %dh, %cl > - mov $1, %edx > - sal %cl, %edx > - sub $1, %edx > - > - and %edx, %eax > +L(zero_2): > + xorl %eax, %eax > + ret > > - test %eax, %eax > - jnz L(length_less16_part2_return) > > - pcmpeqb (%rdi), %xmm1 > - pmovmskb %xmm1, %eax > + .p2align 4,, 5 > +L(ret_vec_x2): > + bsrl %eax, %eax > + leaq -(VEC_SIZE * 3)(%rcx, %rax), %rax > + ret > > - mov %esi, %ecx > - sar %cl, %eax > - test %eax, %eax > - jz L(return_null) > + .p2align 4,, 5 > +L(ret_vec_x3): > + bsrl %eax, %eax > + leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax > + ret > > - bsr %eax, %eax > - add %rdi, %rax > - add %rsi, %rax > + .p2align 4,, 8 > +L(more_4x_vec): > + testl %eax, %eax > + jnz L(ret_vec_x2) > + > + movaps -(VEC_SIZE * 4)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > + > + testl %eax, %eax > + jnz L(ret_vec_x3) > + > + addq $-(VEC_SIZE * 4), %rcx > + cmpq $(VEC_SIZE * 4), %rdx > + jbe L(last_4x_vec) > + > + /* Offset everything by 4x VEC_SIZE here to save a few bytes at the end > + keeping the code from spilling to the next cache line. */ > + addq $(VEC_SIZE * 4 - 1), %rcx > + andq $-(VEC_SIZE * 4), %rcx > + leaq (VEC_SIZE * 4)(%rdi), %rdx > + andq $-(VEC_SIZE * 4), %rdx > + > + .p2align 4,, 11 > +L(loop_4x_vec): > + movaps (VEC_SIZE * -1)(%rcx), %xmm1 > + movaps (VEC_SIZE * -2)(%rcx), %xmm2 > + movaps (VEC_SIZE * -3)(%rcx), %xmm3 > + movaps (VEC_SIZE * -4)(%rcx), %xmm4 > + pcmpeqb %xmm0, %xmm1 > + pcmpeqb %xmm0, %xmm2 > + pcmpeqb %xmm0, %xmm3 > + pcmpeqb %xmm0, %xmm4 > + > + por %xmm1, %xmm2 > + por %xmm3, %xmm4 > + por %xmm2, %xmm4 > + > + pmovmskb %xmm4, %esi > + testl %esi, %esi > + jnz L(loop_end) > + > + addq $-(VEC_SIZE * 4), %rcx > + cmpq %rdx, %rcx > + jne L(loop_4x_vec) > + > + subl %edi, %edx > + > + /* Ends up being 1-byte nop. */ > + .p2align 4,, 2 > +L(last_4x_vec): > + movaps -(VEC_SIZE)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > + > + cmpl $(VEC_SIZE * 2), %edx > + jbe L(last_2x_vec) > + > + testl %eax, %eax > + jnz L(ret_vec_x0) > + > + > + movaps -(VEC_SIZE * 2)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > + > + testl %eax, %eax > + jnz L(ret_vec_end) > + > + movaps -(VEC_SIZE * 3)(%rcx), %xmm1 > + pcmpeqb %xmm0, %xmm1 > + pmovmskb %xmm1, %eax > + > + subl $(VEC_SIZE * 3), %edx > + ja L(last_vec) > + bsrl %eax, %eax > + jz L(ret_4) > + addl %edx, %eax > + jl L(zero_3) > + addq %rdi, %rax > +L(ret_4): > ret > > - .p2align 4 > -L(length_less16_part2_return): > - bsr %eax, %eax > - lea 16(%rax, %rdi), %rax > + /* Ends up being 1-byte nop. */ > + .p2align 4,, 3 > +L(loop_end): > + pmovmskb %xmm1, %eax > + sall $16, %eax > + jnz L(ret_vec_end) > + > + pmovmskb %xmm2, %eax > + testl %eax, %eax > + jnz L(ret_vec_end) > + > + pmovmskb %xmm3, %eax > + /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3) > + then it won't affect the result in esi (VEC4). If ecx is non-zero > + then CHAR in VEC3 and bsrq will use that position. */ > + sall $16, %eax > + orl %esi, %eax > + bsrl %eax, %eax > + leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax > ret > > -END (__memrchr) > +L(ret_vec_end): > + bsrl %eax, %eax > + leaq (VEC_SIZE * -2)(%rax, %rcx), %rax > + ret > + /* Use in L(last_4x_vec). In the same cache line. This is just a spare > + aligning bytes. */ > +L(zero_3): > + xorl %eax, %eax > + ret > + /* 2-bytes from next cache line. */ > +END(__memrchr) > weak_alias (__memrchr, memrchr) > -- > 2.34.1 >
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S index d1a9f47911..b0dffd2ae2 100644 --- a/sysdeps/x86_64/memrchr.S +++ b/sysdeps/x86_64/memrchr.S @@ -18,362 +18,333 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> +#define VEC_SIZE 16 +#define PAGE_SIZE 4096 .text -ENTRY (__memrchr) - movd %esi, %xmm1 - - sub $16, %RDX_LP - jbe L(length_less16) - - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - - add %RDX_LP, %RDI_LP - pshufd $0, %xmm1, %xmm1 - - movdqu (%rdi), %xmm0 - pcmpeqb %xmm1, %xmm0 - -/* Check if there is a match. */ - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches0) - - sub $64, %rdi - mov %edi, %ecx - and $15, %ecx - jz L(loop_prolog) - - add $16, %rdi - add $16, %rdx - and $-16, %rdi - sub %rcx, %rdx - - .p2align 4 -L(loop_prolog): - sub $64, %rdx - jbe L(exit_loop) - - movdqa 48(%rdi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%rdi), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%rdi), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%rdi), %xmm4 - pcmpeqb %xmm1, %xmm4 - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches0) - - sub $64, %rdi - sub $64, %rdx - jbe L(exit_loop) - - movdqa 48(%rdi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%rdi), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%rdi), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%rdi), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches0) - - mov %edi, %ecx - and $63, %ecx - jz L(align64_loop) - - add $64, %rdi - add $64, %rdx - and $-64, %rdi - sub %rcx, %rdx - - .p2align 4 -L(align64_loop): - sub $64, %rdi - sub $64, %rdx - jbe L(exit_loop) - - movdqa (%rdi), %xmm0 - movdqa 16(%rdi), %xmm2 - movdqa 32(%rdi), %xmm3 - movdqa 48(%rdi), %xmm4 - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm3, %xmm0 - pmaxub %xmm4, %xmm2 - pmaxub %xmm0, %xmm2 - pmovmskb %xmm2, %eax - - test %eax, %eax - jz L(align64_loop) - - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches48) - - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%rdi), %xmm2 - - pcmpeqb %xmm1, %xmm2 - pcmpeqb (%rdi), %xmm1 - - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - - pmovmskb %xmm1, %eax - bsr %eax, %eax - - add %rdi, %rax +ENTRY_P2ALIGN(__memrchr, 6) +#ifdef __ILP32__ + /* Clear upper bits. */ + mov %RDX_LP, %RDX_LP +#endif + movd %esi, %xmm0 + + /* Get end pointer. */ + leaq (%rdx, %rdi), %rcx + + punpcklbw %xmm0, %xmm0 + punpcklwd %xmm0, %xmm0 + pshufd $0, %xmm0, %xmm0 + + /* Check if we can load 1x VEC without cross a page. */ + testl $(PAGE_SIZE - VEC_SIZE), %ecx + jz L(page_cross) + + /* NB: This load happens regardless of whether rdx (len) is zero. Since + it doesn't cross a page and the standard gurantees any pointer have + at least one-valid byte this load must be safe. For the entire + history of the x86 memrchr implementation this has been possible so + no code "should" be relying on a zero-length check before this load. + The zero-length check is moved to the page cross case because it is + 1) pretty cold and including it pushes the hot case len <= VEC_SIZE + into 2-cache lines. */ + movups -(VEC_SIZE)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax + + subq $VEC_SIZE, %rdx + ja L(more_1x_vec) +L(ret_vec_x0_test): + /* Zero-flag set if eax (src) is zero. Destination unchanged if src is + zero. */ + bsrl %eax, %eax + jz L(ret_0) + /* Check if the CHAR match is in bounds. Need to truly zero `eax` here + if out of bounds. */ + addl %edx, %eax + jl L(zero_0) + /* Since we subtracted VEC_SIZE from rdx earlier we can just add to base + ptr. */ + addq %rdi, %rax +L(ret_0): ret - .p2align 4 -L(exit_loop): - add $64, %edx - cmp $32, %edx - jbe L(exit_loop_32) - - movdqa 48(%rdi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%rdi), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%rdi), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16_1) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb (%rdi), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches0_1) - xor %eax, %eax + .p2align 4,, 5 +L(ret_vec_x0): + bsrl %eax, %eax + leaq -(VEC_SIZE)(%rcx, %rax), %rax ret - .p2align 4 -L(exit_loop_32): - movdqa 48(%rdi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48_1) - cmp $16, %edx - jbe L(return_null) - - pcmpeqb 32(%rdi), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches32_1) - xor %eax, %eax + .p2align 4,, 2 +L(zero_0): + xorl %eax, %eax ret - .p2align 4 -L(matches0): - bsr %eax, %eax - add %rdi, %rax - ret - - .p2align 4 -L(matches16): - bsr %eax, %eax - lea 16(%rax, %rdi), %rax - ret - .p2align 4 -L(matches32): - bsr %eax, %eax - lea 32(%rax, %rdi), %rax + .p2align 4,, 8 +L(more_1x_vec): + testl %eax, %eax + jnz L(ret_vec_x0) + + /* Align rcx (pointer to string). */ + decq %rcx + andq $-VEC_SIZE, %rcx + + movq %rcx, %rdx + /* NB: We could consistenyl save 1-byte in this pattern with `movaps + %xmm0, %xmm1; pcmpeq IMM8(r), %xmm1; ...`. The reason against it is + it adds more frontend uops (even if the moves can be eliminated) and + some percentage of the time actual backend uops. */ + movaps -(VEC_SIZE)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + subq %rdi, %rdx + pmovmskb %xmm1, %eax + + cmpq $(VEC_SIZE * 2), %rdx + ja L(more_2x_vec) +L(last_2x_vec): + subl $VEC_SIZE, %edx + jbe L(ret_vec_x0_test) + + testl %eax, %eax + jnz L(ret_vec_x0) + + movaps -(VEC_SIZE * 2)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax + + subl $VEC_SIZE, %edx + bsrl %eax, %eax + jz L(ret_1) + addl %edx, %eax + jl L(zero_0) + addq %rdi, %rax +L(ret_1): ret - .p2align 4 -L(matches48): - bsr %eax, %eax - lea 48(%rax, %rdi), %rax + /* Don't align. Otherwise lose 2-byte encoding in jump to L(page_cross) + causes the hot pause (length <= VEC_SIZE) to span multiple cache + lines. Naturally aligned % 16 to 8-bytes. */ +L(page_cross): + /* Zero length check. */ + testq %rdx, %rdx + jz L(zero_0) + + leaq -1(%rcx), %r8 + andq $-(VEC_SIZE), %r8 + + movaps (%r8), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + /* Shift out negative alignment (because we are starting from endptr and + working backwards). */ + negl %ecx + /* 32-bit shift but VEC_SIZE=16 so need to mask the shift count + explicitly. */ + andl $(VEC_SIZE - 1), %ecx + shl %cl, %esi + movzwl %si, %eax + leaq (%rdi, %rdx), %rcx + cmpq %rdi, %r8 + ja L(more_1x_vec) + subl $VEC_SIZE, %edx + bsrl %eax, %eax + jz L(ret_2) + addl %edx, %eax + jl L(zero_1) + addq %rdi, %rax +L(ret_2): ret - .p2align 4 -L(matches0_1): - bsr %eax, %eax - sub $64, %rdx - add %rax, %rdx - jl L(return_null) - add %rdi, %rax + /* Fits in aliging bytes. */ +L(zero_1): + xorl %eax, %eax ret - .p2align 4 -L(matches16_1): - bsr %eax, %eax - sub $48, %rdx - add %rax, %rdx - jl L(return_null) - lea 16(%rdi, %rax), %rax + .p2align 4,, 5 +L(ret_vec_x1): + bsrl %eax, %eax + leaq -(VEC_SIZE * 2)(%rcx, %rax), %rax ret - .p2align 4 -L(matches32_1): - bsr %eax, %eax - sub $32, %rdx - add %rax, %rdx - jl L(return_null) - lea 32(%rdi, %rax), %rax - ret + .p2align 4,, 8 +L(more_2x_vec): + testl %eax, %eax + jnz L(ret_vec_x0) - .p2align 4 -L(matches48_1): - bsr %eax, %eax - sub $16, %rdx - add %rax, %rdx - jl L(return_null) - lea 48(%rdi, %rax), %rax - ret + movaps -(VEC_SIZE * 2)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax + testl %eax, %eax + jnz L(ret_vec_x1) - .p2align 4 -L(return_null): - xor %eax, %eax - ret - .p2align 4 -L(length_less16_offset0): - test %edx, %edx - jz L(return_null) + movaps -(VEC_SIZE * 3)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax - mov %dl, %cl - pcmpeqb (%rdi), %xmm1 + subq $(VEC_SIZE * 4), %rdx + ja L(more_4x_vec) - mov $1, %edx - sal %cl, %edx - sub $1, %edx + addl $(VEC_SIZE), %edx + jle L(ret_vec_x2_test) - pmovmskb %xmm1, %eax +L(last_vec): + testl %eax, %eax + jnz L(ret_vec_x2) - and %edx, %eax - test %eax, %eax - jz L(return_null) + movaps -(VEC_SIZE * 4)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax - bsr %eax, %eax - add %rdi, %rax + subl $(VEC_SIZE), %edx + bsrl %eax, %eax + jz L(ret_3) + addl %edx, %eax + jl L(zero_2) + addq %rdi, %rax +L(ret_3): ret - .p2align 4 -L(length_less16): - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - - add $16, %edx - - pshufd $0, %xmm1, %xmm1 - - mov %edi, %ecx - and $15, %ecx - jz L(length_less16_offset0) - - mov %cl, %dh - mov %ecx, %esi - add %dl, %dh - and $-16, %rdi - - sub $16, %dh - ja L(length_less16_part2) - - pcmpeqb (%rdi), %xmm1 - pmovmskb %xmm1, %eax - - sar %cl, %eax - mov %dl, %cl - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %eax - test %eax, %eax - jz L(return_null) - - bsr %eax, %eax - add %rdi, %rax - add %rsi, %rax + .p2align 4,, 6 +L(ret_vec_x2_test): + bsrl %eax, %eax + jz L(zero_2) + addl %edx, %eax + jl L(zero_2) + addq %rdi, %rax ret - .p2align 4 -L(length_less16_part2): - movdqa 16(%rdi), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - - mov %dh, %cl - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %eax +L(zero_2): + xorl %eax, %eax + ret - test %eax, %eax - jnz L(length_less16_part2_return) - pcmpeqb (%rdi), %xmm1 - pmovmskb %xmm1, %eax + .p2align 4,, 5 +L(ret_vec_x2): + bsrl %eax, %eax + leaq -(VEC_SIZE * 3)(%rcx, %rax), %rax + ret - mov %esi, %ecx - sar %cl, %eax - test %eax, %eax - jz L(return_null) + .p2align 4,, 5 +L(ret_vec_x3): + bsrl %eax, %eax + leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax + ret - bsr %eax, %eax - add %rdi, %rax - add %rsi, %rax + .p2align 4,, 8 +L(more_4x_vec): + testl %eax, %eax + jnz L(ret_vec_x2) + + movaps -(VEC_SIZE * 4)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax + + testl %eax, %eax + jnz L(ret_vec_x3) + + addq $-(VEC_SIZE * 4), %rcx + cmpq $(VEC_SIZE * 4), %rdx + jbe L(last_4x_vec) + + /* Offset everything by 4x VEC_SIZE here to save a few bytes at the end + keeping the code from spilling to the next cache line. */ + addq $(VEC_SIZE * 4 - 1), %rcx + andq $-(VEC_SIZE * 4), %rcx + leaq (VEC_SIZE * 4)(%rdi), %rdx + andq $-(VEC_SIZE * 4), %rdx + + .p2align 4,, 11 +L(loop_4x_vec): + movaps (VEC_SIZE * -1)(%rcx), %xmm1 + movaps (VEC_SIZE * -2)(%rcx), %xmm2 + movaps (VEC_SIZE * -3)(%rcx), %xmm3 + movaps (VEC_SIZE * -4)(%rcx), %xmm4 + pcmpeqb %xmm0, %xmm1 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm0, %xmm3 + pcmpeqb %xmm0, %xmm4 + + por %xmm1, %xmm2 + por %xmm3, %xmm4 + por %xmm2, %xmm4 + + pmovmskb %xmm4, %esi + testl %esi, %esi + jnz L(loop_end) + + addq $-(VEC_SIZE * 4), %rcx + cmpq %rdx, %rcx + jne L(loop_4x_vec) + + subl %edi, %edx + + /* Ends up being 1-byte nop. */ + .p2align 4,, 2 +L(last_4x_vec): + movaps -(VEC_SIZE)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax + + cmpl $(VEC_SIZE * 2), %edx + jbe L(last_2x_vec) + + testl %eax, %eax + jnz L(ret_vec_x0) + + + movaps -(VEC_SIZE * 2)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax + + testl %eax, %eax + jnz L(ret_vec_end) + + movaps -(VEC_SIZE * 3)(%rcx), %xmm1 + pcmpeqb %xmm0, %xmm1 + pmovmskb %xmm1, %eax + + subl $(VEC_SIZE * 3), %edx + ja L(last_vec) + bsrl %eax, %eax + jz L(ret_4) + addl %edx, %eax + jl L(zero_3) + addq %rdi, %rax +L(ret_4): ret - .p2align 4 -L(length_less16_part2_return): - bsr %eax, %eax - lea 16(%rax, %rdi), %rax + /* Ends up being 1-byte nop. */ + .p2align 4,, 3 +L(loop_end): + pmovmskb %xmm1, %eax + sall $16, %eax + jnz L(ret_vec_end) + + pmovmskb %xmm2, %eax + testl %eax, %eax + jnz L(ret_vec_end) + + pmovmskb %xmm3, %eax + /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3) + then it won't affect the result in esi (VEC4). If ecx is non-zero + then CHAR in VEC3 and bsrq will use that position. */ + sall $16, %eax + orl %esi, %eax + bsrl %eax, %eax + leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax ret -END (__memrchr) +L(ret_vec_end): + bsrl %eax, %eax + leaq (VEC_SIZE * -2)(%rax, %rcx), %rax + ret + /* Use in L(last_4x_vec). In the same cache line. This is just a spare + aligning bytes. */ +L(zero_3): + xorl %eax, %eax + ret + /* 2-bytes from next cache line. */ +END(__memrchr) weak_alias (__memrchr, memrchr)