diff mbox series

c++/modules: use optimized crc32 from zlib

Message ID 20240212214912.2550529-1-ppalka@redhat.com
State New
Headers show
Series c++/modules: use optimized crc32 from zlib | expand

Commit Message

Patrick Palka Feb. 12, 2024, 9:49 p.m. UTC
Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk?

-- >8 --

The current implementation of bytes::calc_crc computes the checksum one
byte at a time, and turns out to be quite slow, taking 5% and 15% of
time compiling and streaming in the std module respectively.  We have
a crc32_unsigned routine that handles 4 bytes at a time which could
speed up this hot function, but we also have a bundled zlib with highly
optimized crc routines that can handle up to 32 bytes at a time.

So this patch makes us use zlib's crc32 in this hot code path.
According to some perf experiments this reduces the overhead of calc_crc
from 15% of total time to 3% when streaming in the std module.

gcc/cp/ChangeLog:

	* Make-lang.in (CFLAGS-cp/module.o): Add $(ZLIBINC).
	* module.cc: Include <zlib.h>.
	(bytes::calc_crc): Use crc32 from zlib.
	(bytes_out::set_crc): Use crc32_combine from zlib.
---
 gcc/cp/Make-lang.in | 2 +-
 gcc/cp/module.cc    | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

Comments

Jason Merrill Feb. 13, 2024, 3:33 a.m. UTC | #1
On 2/12/24 16:49, Patrick Palka wrote:
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
> for trunk?

OK.

> -- >8 --
> 
> The current implementation of bytes::calc_crc computes the checksum one
> byte at a time, and turns out to be quite slow, taking 5% and 15% of
> time compiling and streaming in the std module respectively.  We have
> a crc32_unsigned routine that handles 4 bytes at a time which could
> speed up this hot function, but we also have a bundled zlib with highly
> optimized crc routines that can handle up to 32 bytes at a time.
> 
> So this patch makes us use zlib's crc32 in this hot code path.
> According to some perf experiments this reduces the overhead of calc_crc
> from 15% of total time to 3% when streaming in the std module.
> 
> gcc/cp/ChangeLog:
> 
> 	* Make-lang.in (CFLAGS-cp/module.o): Add $(ZLIBINC).
> 	* module.cc: Include <zlib.h>.
> 	(bytes::calc_crc): Use crc32 from zlib.
> 	(bytes_out::set_crc): Use crc32_combine from zlib.
> ---
>   gcc/cp/Make-lang.in | 2 +-
>   gcc/cp/module.cc    | 8 +++-----
>   2 files changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in
> index 630db41d87e..f153891a1ef 100644
> --- a/gcc/cp/Make-lang.in
> +++ b/gcc/cp/Make-lang.in
> @@ -55,7 +55,7 @@ c++.serial = cc1plus$(exeext)
>   CFLAGS-cp/g++spec.o += $(DRIVER_DEFINES)
>   
>   CFLAGS-cp/module.o += -DHOST_MACHINE=\"$(host)\" \
> -	-DTARGET_MACHINE=\"$(target)\"
> +	-DTARGET_MACHINE=\"$(target)\" $(ZLIBINC)
>   
>   # In non-release builds, use a date-related module version.
>   ifneq ($(DEVPHASE_c),)
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index 86e43aee542..c94f4a257de 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -233,6 +233,7 @@ Classes used:
>   /* This TU doesn't need or want to see the networking.  */
>   #define CODY_NETWORKING 0
>   #include "mapper-client.h"
> +#include <zlib.h> // for crc32, crc32_combine
>   
>   #if 0 // 1 for testing no mmap
>   #define MAPPED_READING 0
> @@ -487,10 +488,7 @@ protected:
>   unsigned
>   bytes::calc_crc (unsigned l) const
>   {
> -  unsigned crc = 0;
> -  for (size_t ix = 4; ix < l; ix++)
> -    crc = crc32_byte (crc, buffer[ix]);
> -  return crc;
> +  return crc32 (0, (unsigned char *)buffer + 4, l - 4);
>   }
>   
>   class elf_in;
> @@ -717,7 +715,7 @@ bytes_out::set_crc (unsigned *crc_ptr)
>         unsigned crc = calc_crc (pos);
>         unsigned accum = *crc_ptr;
>         /* Only mix the existing *CRC_PTR if it is non-zero.  */
> -      accum = accum ? crc32_unsigned (accum, crc) : crc;
> +      accum = accum ? crc32_combine (accum, crc, pos - 4) : crc;
>         *crc_ptr = accum;
>   
>         /* Buffer will be sufficiently aligned.  */
diff mbox series

Patch

diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in
index 630db41d87e..f153891a1ef 100644
--- a/gcc/cp/Make-lang.in
+++ b/gcc/cp/Make-lang.in
@@ -55,7 +55,7 @@  c++.serial = cc1plus$(exeext)
 CFLAGS-cp/g++spec.o += $(DRIVER_DEFINES)
 
 CFLAGS-cp/module.o += -DHOST_MACHINE=\"$(host)\" \
-	-DTARGET_MACHINE=\"$(target)\"
+	-DTARGET_MACHINE=\"$(target)\" $(ZLIBINC)
 
 # In non-release builds, use a date-related module version.
 ifneq ($(DEVPHASE_c),)
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 86e43aee542..c94f4a257de 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -233,6 +233,7 @@  Classes used:
 /* This TU doesn't need or want to see the networking.  */
 #define CODY_NETWORKING 0
 #include "mapper-client.h"
+#include <zlib.h> // for crc32, crc32_combine
 
 #if 0 // 1 for testing no mmap
 #define MAPPED_READING 0
@@ -487,10 +488,7 @@  protected:
 unsigned
 bytes::calc_crc (unsigned l) const
 {
-  unsigned crc = 0;
-  for (size_t ix = 4; ix < l; ix++)
-    crc = crc32_byte (crc, buffer[ix]);
-  return crc;
+  return crc32 (0, (unsigned char *)buffer + 4, l - 4);
 }
 
 class elf_in;
@@ -717,7 +715,7 @@  bytes_out::set_crc (unsigned *crc_ptr)
       unsigned crc = calc_crc (pos);
       unsigned accum = *crc_ptr;
       /* Only mix the existing *CRC_PTR if it is non-zero.  */
-      accum = accum ? crc32_unsigned (accum, crc) : crc;
+      accum = accum ? crc32_combine (accum, crc, pos - 4) : crc;
       *crc_ptr = accum;
 
       /* Buffer will be sufficiently aligned.  */