@@ -880,6 +880,37 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
}
}
+ if (cost > 2 && TARGET_64BIT && TARGET_ZBA)
+ {
+ if ((value % 9) == 0
+ && (alt_cost = riscv_build_integer_1 (alt_codes, value / 9, mode) + 1) < cost)
+ {
+ alt_codes[alt_cost - 1].code = FMA;
+ alt_codes[alt_cost - 1].value = 9;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ if ((value % 5) == 0
+ && (alt_cost = riscv_build_integer_1 (alt_codes, value / 5, mode) + 1) < cost)
+ {
+ alt_codes[alt_cost - 1].code = FMA;
+ alt_codes[alt_cost - 1].value = 5;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ if ((value % 3) == 0
+ && (alt_cost = riscv_build_integer_1 (alt_codes, value / 3, mode) + 1) < cost)
+ {
+ alt_codes[alt_cost - 1].code = FMA;
+ alt_codes[alt_cost - 1].value = 3;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ }
+
/* Final cases, particularly focused on bseti. */
if (cost > 2 && TARGET_ZBS)
{
@@ -2542,6 +2573,14 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
x = gen_rtx_fmt_ee (AND, mode, x, GEN_INT (value));
x = riscv_emit_set (t, x);
}
+ else if (codes[i].code == FMA)
+ {
+ HOST_WIDE_INT value = exact_log2 (codes[i].value - 1);
+ rtx ashift = gen_rtx_fmt_ee (ASHIFT, mode, x, GEN_INT (value));
+ x = gen_rtx_fmt_ee (PLUS, mode, ashift, x);
+ rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
+ x = riscv_emit_set (t, x);
+ }
else
x = gen_rtx_fmt_ee (codes[i].code, mode,
x, GEN_INT (codes[i].value));
@@ -12,7 +12,7 @@
total number of instructions.
This isn't expected to change much and any change is worthy of a look. */
-/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|slli)" 5822 } } */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 5822 } } */
unsigned long foo_0x3(void) { return 0x3UL; }
unsigned long foo_0x5(void) { return 0x5UL; }
new file mode 100644
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+ which shouldn't vary based on the optimization level. -O1 seems to work
+ and eliminates the usual sources of extraneous dead code that would throw
+ off the counts. */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+ having thousands of tests each testing one variant, we just test the
+ total number of instructions.
+
+ This isn't expected to change much and any change is worthy of a look. */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 248 } } */
+
+unsigned long foo_0x1200000120000000 (void) { return 0x1200000120000000UL; }
+unsigned long foo_0x120000402000000 (void) { return 0x120000402000000UL; }
+unsigned long foo_0x1200100020000 (void) { return 0x1200100020000UL; }
+unsigned long foo_0x14000000a0000000 (void) { return 0x14000000a0000000UL; }
+unsigned long foo_0x140000082000000 (void) { return 0x140000082000000UL; }
+unsigned long foo_0x14000080200000 (void) { return 0x14000080200000UL; }
+unsigned long foo_0x1400080020000 (void) { return 0x1400080020000UL; }
+unsigned long foo_0x140080002000 (void) { return 0x140080002000UL; }
+unsigned long foo_0x1800000120000000 (void) { return 0x1800000120000000UL; }
+unsigned long foo_0x180000102000000 (void) { return 0x180000102000000UL; }
+unsigned long foo_0x180000801 (void) { return 0x180000801UL; }
+unsigned long foo_0x180000802 (void) { return 0x180000802UL; }
+unsigned long foo_0x180001001 (void) { return 0x180001001UL; }
+unsigned long foo_0x18000100200000 (void) { return 0x18000100200000UL; }
+unsigned long foo_0x180001002 (void) { return 0x180001002UL; }
+unsigned long foo_0x180003000 (void) { return 0x180003000UL; }
+unsigned long foo_0x1800100020000 (void) { return 0x1800100020000UL; }
+unsigned long foo_0x180081000 (void) { return 0x180081000UL; }
+unsigned long foo_0x182001000 (void) { return 0x182001000UL; }
+unsigned long foo_0x2400000240000000 (void) { return 0x2400000240000000UL; }
+unsigned long foo_0x24000080400000 (void) { return 0x24000080400000UL; }
+unsigned long foo_0x2400200040000 (void) { return 0x2400200040000UL; }
+unsigned long foo_0x2800000140000000 (void) { return 0x2800000140000000UL; }
+unsigned long foo_0x280000104000000 (void) { return 0x280000104000000UL; }
+unsigned long foo_0x28000100400000 (void) { return 0x28000100400000UL; }
+unsigned long foo_0x2800100040000 (void) { return 0x2800100040000UL; }
+unsigned long foo_0x280011000 (void) { return 0x280011000UL; }
+unsigned long foo_0x280100004000 (void) { return 0x280100004000UL; }
+unsigned long foo_0x280401000 (void) { return 0x280401000UL; }
+unsigned long foo_0x290001000 (void) { return 0x290001000UL; }
+unsigned long foo_0x30000000c0000000 (void) { return 0x30000000c0000000UL; }
+unsigned long foo_0x300000084000000 (void) { return 0x300000084000000UL; }
+unsigned long foo_0x300000801 (void) { return 0x300000801UL; }
+unsigned long foo_0x30000080400000 (void) { return 0x30000080400000UL; }
+unsigned long foo_0x300002004 (void) { return 0x300002004UL; }
+unsigned long foo_0x300006000 (void) { return 0x300006000UL; }
+unsigned long foo_0x3000080040000 (void) { return 0x3000080040000UL; }
+unsigned long foo_0x300021000 (void) { return 0x300021000UL; }
+unsigned long foo_0x300080004000 (void) { return 0x300080004000UL; }
+unsigned long foo_0x300102000 (void) { return 0x300102000UL; }
+unsigned long foo_0x300801000 (void) { return 0x300801000UL; }
+unsigned long foo_0x304002000 (void) { return 0x304002000UL; }
+unsigned long foo_0x320001000 (void) { return 0x320001000UL; }
+unsigned long foo_0x4800000480000000 (void) { return 0x4800000480000000UL; }
+unsigned long foo_0x48000100800000 (void) { return 0x48000100800000UL; }
+unsigned long foo_0x4800400080000 (void) { return 0x4800400080000UL; }
+unsigned long foo_0x5000000280000000 (void) { return 0x5000000280000000UL; }
+unsigned long foo_0x500000208000000 (void) { return 0x500000208000000UL; }
+unsigned long foo_0x50000200800000 (void) { return 0x50000200800000UL; }
+unsigned long foo_0x5000200080000 (void) { return 0x5000200080000UL; }
+unsigned long foo_0x500200008000 (void) { return 0x500200008000UL; }
+unsigned long foo_0x6000000180000000 (void) { return 0x6000000180000000UL; }
+unsigned long foo_0x600000108000000 (void) { return 0x600000108000000UL; }
+unsigned long foo_0x60000100800000 (void) { return 0x60000100800000UL; }
+unsigned long foo_0x6000100080000 (void) { return 0x6000100080000UL; }
+unsigned long foo_0x600100008000 (void) { return 0x600100008000UL; }
+unsigned long foo_0x900000090000000 (void) { return 0x900000090000000UL; }
+unsigned long foo_0x90000201000000 (void) { return 0x90000201000000UL; }
+unsigned long foo_0x900080010000 (void) { return 0x900080010000UL; }
+unsigned long foo_0x90200001000 (void) { return 0x90200001000UL; }
+unsigned long foo_0xc00000090000000 (void) { return 0xc00000090000000UL; }
+unsigned long foo_0xc0000081000000 (void) { return 0xc0000081000000UL; }
+unsigned long foo_0xc000080100000 (void) { return 0xc000080100000UL; }
+unsigned long foo_0xc00080010000 (void) { return 0xc00080010000UL; }
+unsigned long foo_0xc0080001000 (void) { return 0xc0080001000UL; }