diff mbox series

[to-be-committed,RISC-V] Try inverting for constant synthesis

Message ID a18924ac-5654-4225-9169-a2e73a4a40b9@ventanamicro.com
State New
Headers show
Series [to-be-committed,RISC-V] Try inverting for constant synthesis | expand

Commit Message

Jeff Law May 26, 2024, 4:18 p.m. UTC
So there's another class of constants we're failing to synthesize well. 
Specifically those where we can invert our original constant C into C' 
and C' takes at least 2 fewer instructions to synthesize than C.  In 
that case we can initially generate C', then use xori with the constant 
-1 to flip all the bits resulting in our target constant.

I've only seen this trigger when the final synthesis is li+srli+xori. 
The original synthesis took on various 4 or 5 instruction forms.

Most of the methods we use to improve constant synthesis are in 
riscv_build_integer_1.  I originally tried to put this code in there. 
But that'll end up with infinite recursion due to some other ADDI 
related code which wants to flip bits and try synthesis.

So this was put into riscv_build_integer and recurses into 
riscv_build_integer.  This isn't unprecedented, just a bit different 
than most of the other synthesis implementation bits.

This doesn't depend on any extensions.  So it should help any rv64 system.

Tested on my tester, obviously I'll wait for a verdict from the CI 
system before moving forward.

Jeff
gcc/

	* config/riscv/riscv.cc (riscv_build_integer_one): Verify there
	are no bits left to set in the constant when generating bseti.
	(riscv_built_integer): Synthesize ~value and if it's cheap use it
	with a trailing xori with -1.

gcc/testsuite

	* gcc.target/riscv/synthesis-8.c: New test.
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 560a454d7ab..1cca288cb57 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1023,7 +1023,7 @@  riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
 
       /* If LUI+ADDI+BSETI resulted in a more efficient
 	 sequence, then use it.  */
-      if (i < cost)
+      if (value == 0 && i < cost)
 	{
 	  memcpy (codes, alt_codes, sizeof (alt_codes));
 	  cost = i;
@@ -1075,6 +1075,31 @@  riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
 	}
     }
 
+  /* See if we can generate the inverted constant, then use
+     not to get the desired constant.
+
+     This can't be in riscv_build_integer_1 as it'll mutually
+     recurse with another case in there.  And it has to recurse
+     into riscv_build_integer so we get the trailing 0s case
+     above.  */
+  if (cost > 2 && value < 0)
+    {
+      struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+      int alt_cost;
+
+      HOST_WIDE_INT nval = ~value;
+      alt_cost = 1 + riscv_build_integer (alt_codes, nval, mode);
+      if (alt_cost < cost)
+	{
+	  alt_codes[alt_cost - 1].code = XOR;
+	  alt_codes[alt_cost - 1].value = -1;
+	  alt_codes[alt_cost - 1].use_uw = false;
+	  memcpy (codes, alt_codes, sizeof (alt_codes));
+	  cost = alt_cost;
+	}
+    }
+
+
   if (!TARGET_64BIT
       && (value > INT32_MAX || value < INT32_MIN))
     {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-8.c b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
new file mode 100644
index 00000000000..2bcdb4e774d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 72 } } */
+
+unsigned long foo_0xc0000000000077ff(void) { return 0xc0000000000077ffUL; }
+unsigned long foo_0xc00000000000b7ff(void) { return 0xc00000000000b7ffUL; }
+unsigned long foo_0xc0000000000137ff(void) { return 0xc0000000000137ffUL; }
+unsigned long foo_0xc0000000000237ff(void) { return 0xc0000000000237ffUL; }
+unsigned long foo_0xc0000000000437ff(void) { return 0xc0000000000437ffUL; }
+unsigned long foo_0xc0000000000837ff(void) { return 0xc0000000000837ffUL; }
+unsigned long foo_0xc0000000001037ff(void) { return 0xc0000000001037ffUL; }
+unsigned long foo_0xc0000000002037ff(void) { return 0xc0000000002037ffUL; }
+unsigned long foo_0xc0000000004037ff(void) { return 0xc0000000004037ffUL; }
+unsigned long foo_0xc0000000008037ff(void) { return 0xc0000000008037ffUL; }
+unsigned long foo_0xc0000000010037ff(void) { return 0xc0000000010037ffUL; }
+unsigned long foo_0xc0000000020037ff(void) { return 0xc0000000020037ffUL; }
+unsigned long foo_0xc0000000040037ff(void) { return 0xc0000000040037ffUL; }
+unsigned long foo_0xc0000000080037ff(void) { return 0xc0000000080037ffUL; }
+unsigned long foo_0xc0000000100037ff(void) { return 0xc0000000100037ffUL; }
+unsigned long foo_0xe0000000000037ff(void) { return 0xe0000000000037ffUL; }
+unsigned long foo_0xc00000000000d7ff(void) { return 0xc00000000000d7ffUL; }
+unsigned long foo_0xc0000000000157ff(void) { return 0xc0000000000157ffUL; }