@@ -1541,9 +1541,11 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (4), /* DI */
COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (8), /* HI */
- COSTS_N_INSNS (11), /* SI */
+ /* Expanding div/mod currently doesn't consider parallelism. So the cost
+ model is not realistic. We compensate by increasing the latencies a bit. */
+ {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (14), /* SI */
COSTS_N_INSNS (76), /* DI */
COSTS_N_INSNS (76)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
@@ -2342,11 +2344,11 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (4), /* DI */
COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (8), /* HI */
- /* 8-11 */
- COSTS_N_INSNS (11), /* SI */
- /* 24-81 */
+ /* Expanding div/mod currently doesn't consider parallelism. So the cost
+ model is not realistic. We compensate by increasing the latencies a bit. */
+ {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (14), /* SI */
COSTS_N_INSNS (81), /* DI */
COSTS_N_INSNS (81)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+#include <stdint.h>
+
+void bin2ascii(uint64_t val, char *dst) {
+ const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000;
+ int64_t hix = val / POW10_10;
+ int64_t lox = val % POW10_10;
+ int32_t v0 = hix / 100000;
+ int32_t v1 = hix % 100000;
+ int32_t v2 = lox / 100000;
+ int32_t v3 = lox % 100000;
+ for (int i = 4; i != 0; --i) {
+ dst[i + 0 * 5] = v0 % 10 + '0';
+ v0 /= 10;
+ dst[i + 1 * 5] = v1 % 10 + '0';
+ v1 /= 10;
+ dst[i + 2 * 5] = v2 % 10 + '0';
+ v2 /= 10;
+ dst[i + 3 * 5] = v3 % 10 + '0';
+ v3 /= 10;
+ }
+ dst[0 * 5] = v0 + '0';
+ dst[1 * 5] = v1 + '0';
+ dst[2 * 5] = v2 + '0';
+ dst[3 * 5] = v3 + '0';
+ dst[4 * 5] = 0;
+}
+
+/* { dg-final { scan-assembler-not "idiv" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=skylake-avx512" } */
+
+#include <stdint.h>
+
+void bin2ascii(uint64_t val, char *dst) {
+ const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000;
+ int64_t hix = val / POW10_10;
+ int64_t lox = val % POW10_10;
+ int32_t v0 = hix / 100000;
+ int32_t v1 = hix % 100000;
+ int32_t v2 = lox / 100000;
+ int32_t v3 = lox % 100000;
+ for (int i = 4; i != 0; --i) {
+ dst[i + 0 * 5] = v0 % 10 + '0';
+ v0 /= 10;
+ dst[i + 1 * 5] = v1 % 10 + '0';
+ v1 /= 10;
+ dst[i + 2 * 5] = v2 % 10 + '0';
+ v2 /= 10;
+ dst[i + 3 * 5] = v3 % 10 + '0';
+ v3 /= 10;
+ }
+ dst[0 * 5] = v0 + '0';
+ dst[1 * 5] = v1 + '0';
+ dst[2 * 5] = v2 + '0';
+ dst[3 * 5] = v3 + '0';
+ dst[4 * 5] = 0;
+}
+
+/* { dg-final { scan-assembler-not "idiv" } } */