diff mbox series

options, lto: Optimize streaming of optimization nodes

Message ID 20200913083327.GG21814@tucnak
State New
Headers show
Series options, lto: Optimize streaming of optimization nodes | expand

Commit Message

Jakub Jelinek Sept. 13, 2020, 8:33 a.m. UTC
Hi!

When working on the previous patch, I've noticed that all cl_optimization
fields appart from strings are streamed with bp_pack_value (..., 64); so we
waste quite a lot of space, given that many of the options are just booleans
or char options and there are 450-ish of them.

Fixed by streaming the number of bits the corresponding fields have.
While for char fields we have also range information, except for 3
it is either -128, 127 or 0, 255, so it didn't seem worth it to bother
with using range-ish packing.

Bootstrapped/regtested on x86_64-linux, i686-linux, armv7hl-linux-gnueabi,
aarch64-linux, powerpc64le-linux and lto bootstrapped on x86_64-linux, ok
for trunk?

2020-09-13  Jakub Jelinek  <jakub@redhat.com>

	* optc-save-gen.awk: In cl_optimization_stream_out and
	cl_optimization_stream_in, stream int, enum, short and char
	variables with corresponding HOST_BITS_PER_* rather than
	hardcoded 64.


	Jakub

Comments

Richard Biener Sept. 14, 2020, 6:39 a.m. UTC | #1
On Sun, 13 Sep 2020, Jakub Jelinek wrote:

> Hi!
> 
> When working on the previous patch, I've noticed that all cl_optimization
> fields appart from strings are streamed with bp_pack_value (..., 64); so we
> waste quite a lot of space, given that many of the options are just booleans
> or char options and there are 450-ish of them.
> 
> Fixed by streaming the number of bits the corresponding fields have.
> While for char fields we have also range information, except for 3
> it is either -128, 127 or 0, 255, so it didn't seem worth it to bother
> with using range-ish packing.
> 
> Bootstrapped/regtested on x86_64-linux, i686-linux, armv7hl-linux-gnueabi,
> aarch64-linux, powerpc64le-linux and lto bootstrapped on x86_64-linux, ok
> for trunk?

Hmm, in principle the LTO bytecode format should be not dependent
on the host, while it probably doesn't work right now to move
32bit host to 64bit host targeting the same target bytecode files
the following makes explicit use of HOST_* and that might have been
the reason we're using 64bit encodings for everything.  Note
using 64bits isn't too bad in practice since we uleb encode the
bitpack words and outputing 64bits basically ensures we're outputting
a stream of uleb encoded uint64_t.

Richard.

> 2020-09-13  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* optc-save-gen.awk: In cl_optimization_stream_out and
> 	cl_optimization_stream_in, stream int, enum, short and char
> 	variables with corresponding HOST_BITS_PER_* rather than
> 	hardcoded 64.
> 
> --- gcc/optc-save-gen.awk.jj	2020-09-11 17:38:31.524230167 +0200
> +++ gcc/optc-save-gen.awk	2020-09-11 22:12:59.948580827 +0200
> @@ -1252,13 +1252,23 @@ print "cl_optimization_stream_out (struc
>  print "                            struct bitpack_d *bp,";
>  print "                            struct cl_optimization *ptr)";
>  print "{";
> -for (i = 0; i < n_opt_val; i++) {
> -	name = var_opt_val[i]
> -	otype = var_opt_val_type[i];
> -	if (otype ~ "^const char \\**$")
> -		print "  bp_pack_string (ob, bp, ptr->" name", true);";
> -	else
> -		print "  bp_pack_value (bp, ptr->" name", 64);";
> +for (i = 0; i < n_opt_other; i++) {
> +	print "  bp_pack_value (bp, ptr->x_" var_opt_other[i] ", HOST_BITS_PER_WIDE_INT);";
> +}
> +for (i = 0; i < n_opt_int; i++) {
> +	print "  bp_pack_value (bp, static_cast<unsigned int>(ptr->x_" var_opt_int[i] "), HOST_BITS_PER_INT);";
> +}
> +for (i = 0; i < n_opt_enum; i++) {
> +	print "  bp_pack_value (bp, static_cast<unsigned int>(ptr->x_" var_opt_enum[i] "), HOST_BITS_PER_INT);";
> +}
> +for (i = 0; i < n_opt_short; i++) {
> +	print "  bp_pack_value (bp, static_cast<unsigned short>(ptr->x_" var_opt_short[i] "), HOST_BITS_PER_SHORT);";
> +}
> +for (i = 0; i < n_opt_char; i++) {
> +	print "  bp_pack_value (bp, static_cast<unsigned char>(ptr->x_" var_opt_char[i] "), HOST_BITS_PER_CHAR);";
> +}
> +for (i = 0; i < n_opt_string; i++) {
> +	print "  bp_pack_string (ob, bp, ptr->x_" var_opt_string[i] ", true);";
>  }
>  print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
>  print "    bp_pack_value (bp, ptr->explicit_mask[i], 64);";
> @@ -1271,17 +1281,26 @@ print "cl_optimization_stream_in (struct
>  print "                           struct bitpack_d *bp ATTRIBUTE_UNUSED,";
>  print "                           struct cl_optimization *ptr ATTRIBUTE_UNUSED)";
>  print "{";
> -for (i = 0; i < n_opt_val; i++) {
> -	name = var_opt_val[i]
> -	otype = var_opt_val_type[i];
> -	if (otype ~ "^const char \\**$")
> -	{
> -	      print "  ptr->" name" = bp_unpack_string (data_in, bp);";
> -	      print "  if (ptr->" name")";
> -	      print "    ptr->" name" = xstrdup (ptr->" name");";
> -	}
> -	else
> -	      print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value (bp, 64);";
> +for (i = 0; i < n_opt_other; i++) {
> +	print "  ptr->x_" var_opt_other[i] " = bp_unpack_value (bp, HOST_BITS_PER_WIDE_INT);";
> +}
> +for (i = 0; i < n_opt_int; i++) {
> +	print "  ptr->x_" var_opt_int[i] " = bp_unpack_value (bp, HOST_BITS_PER_INT);";
> +}
> +for (i = 0; i < n_opt_enum; i++) {
> +	print "  ptr->x_" var_opt_enum[i] " = static_cast<" var_opt_enum_type[i] ">(bp_unpack_value (bp, HOST_BITS_PER_INT));";
> +}
> +for (i = 0; i < n_opt_short; i++) {
> +	print "  ptr->x_" var_opt_short[i] " = bp_unpack_value (bp, HOST_BITS_PER_SHORT);";
> +}
> +for (i = 0; i < n_opt_char; i++) {
> +	print "  ptr->x_" var_opt_char[i] " = bp_unpack_value (bp, HOST_BITS_PER_CHAR);";
> +}
> +for (i = 0; i < n_opt_string; i++) {
> +	name = var_opt_string[i];
> +	print "  ptr->x_" name" = bp_unpack_string (data_in, bp);";
> +	print "  if (ptr->x_" name")";
> +	print "    ptr->x_" name" = xstrdup (ptr->x_" name");";
>  }
>  print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
>  print "    ptr->explicit_mask[i] = bp_unpack_value (bp, 64);";
> 
> 	Jakub
> 
>
Jakub Jelinek Sept. 14, 2020, 7 a.m. UTC | #2
On Mon, Sep 14, 2020 at 08:39:22AM +0200, Richard Biener wrote:
> > When working on the previous patch, I've noticed that all cl_optimization
> > fields appart from strings are streamed with bp_pack_value (..., 64); so we
> > waste quite a lot of space, given that many of the options are just booleans
> > or char options and there are 450-ish of them.
> > 
> > Fixed by streaming the number of bits the corresponding fields have.
> > While for char fields we have also range information, except for 3
> > it is either -128, 127 or 0, 255, so it didn't seem worth it to bother
> > with using range-ish packing.
> > 
> > Bootstrapped/regtested on x86_64-linux, i686-linux, armv7hl-linux-gnueabi,
> > aarch64-linux, powerpc64le-linux and lto bootstrapped on x86_64-linux, ok
> > for trunk?
> 
> Hmm, in principle the LTO bytecode format should be not dependent
> on the host, while it probably doesn't work right now to move
> 32bit host to 64bit host targeting the same target bytecode files
> the following makes explicit use of HOST_* and that might have been
> the reason we're using 64bit encodings for everything.  Note
> using 64bits isn't too bad in practice since we uleb encode the
> bitpack words and outputing 64bits basically ensures we're outputting
> a stream of uleb encoded uint64_t.

So would using hardcoded 8, 16, 32 and 64 be better?
I mean, we certainly assume the -128, 127 or 0, 255 ranges e.g. for chars.
There are ~ 4 strings, 226 chars, 222 ints/enums in my options-save.c
bp_pack_* calls and 39 64-bit (which includes the target stuff) on
x86_64-linux.

Another option is using the variable length
bp_pack_var_len_unsigned/bp_pack_var_len_int
for everything, I guess most of the char options are 0/1/2, most of the
params are smallish too (though there are some 100s, 1000s and 10000s and
even higher, but most of them are small).
Either the awk script would need to figure out which are guaranteed to be
unsigned and use *_unsigned for them and use *_int for rest, or we'd need to
use *_int everywhere.

	Jakub
Richard Biener Sept. 14, 2020, 7:31 a.m. UTC | #3
On Mon, 14 Sep 2020, Jakub Jelinek wrote:

> On Mon, Sep 14, 2020 at 08:39:22AM +0200, Richard Biener wrote:
> > > When working on the previous patch, I've noticed that all cl_optimization
> > > fields appart from strings are streamed with bp_pack_value (..., 64); so we
> > > waste quite a lot of space, given that many of the options are just booleans
> > > or char options and there are 450-ish of them.
> > > 
> > > Fixed by streaming the number of bits the corresponding fields have.
> > > While for char fields we have also range information, except for 3
> > > it is either -128, 127 or 0, 255, so it didn't seem worth it to bother
> > > with using range-ish packing.
> > > 
> > > Bootstrapped/regtested on x86_64-linux, i686-linux, armv7hl-linux-gnueabi,
> > > aarch64-linux, powerpc64le-linux and lto bootstrapped on x86_64-linux, ok
> > > for trunk?
> > 
> > Hmm, in principle the LTO bytecode format should be not dependent
> > on the host, while it probably doesn't work right now to move
> > 32bit host to 64bit host targeting the same target bytecode files
> > the following makes explicit use of HOST_* and that might have been
> > the reason we're using 64bit encodings for everything.  Note
> > using 64bits isn't too bad in practice since we uleb encode the
> > bitpack words and outputing 64bits basically ensures we're outputting
> > a stream of uleb encoded uint64_t.
> 
> So would using hardcoded 8, 16, 32 and 64 be better?
> I mean, we certainly assume the -128, 127 or 0, 255 ranges e.g. for chars.
> There are ~ 4 strings, 226 chars, 222 ints/enums in my options-save.c
> bp_pack_* calls and 39 64-bit (which includes the target stuff) on
> x86_64-linux.
>
> Another option is using the variable length
> bp_pack_var_len_unsigned/bp_pack_var_len_int
> for everything, I guess most of the char options are 0/1/2, most of the
> params are smallish too (though there are some 100s, 1000s and 10000s and
> even higher, but most of them are small).
> Either the awk script would need to figure out which are guaranteed to be
> unsigned and use *_unsigned for them and use *_int for rest, or we'd need to
> use *_int everywhere.

But does it make any noticable difference in the end?  Using
bp_pack_var_len_unsigned just causes us to [u]leb encode half-bytes
rather than full bytes.  Using hardcoded 8/16/32/64 makes it still
dependent on what 'int' is at maximum on the host.

That is, I'd indeed prefer bp_pack_var_len_unsigned over hard-coding
8, 16, etc., but can you share a size comparison of the bitpack?
I guess with bp_pack_var_len_unsigned it might shrink in half
compared to the current code and streaming standard -O2?

Richard.
 
> 	Jakub
> 
>
Jakub Jelinek Sept. 14, 2020, 8:48 a.m. UTC | #4
On Mon, Sep 14, 2020 at 09:31:52AM +0200, Richard Biener wrote:
> But does it make any noticable difference in the end?  Using

Yes.

> bp_pack_var_len_unsigned just causes us to [u]leb encode half-bytes
> rather than full bytes.  Using hardcoded 8/16/32/64 makes it still
> dependent on what 'int' is at maximum on the host.
> 
> That is, I'd indeed prefer bp_pack_var_len_unsigned over hard-coding
> 8, 16, etc., but can you share a size comparison of the bitpack?
> I guess with bp_pack_var_len_unsigned it might shrink in half
> compared to the current code and streaming standard -O2?

So, I've tried
--- gcc/tree-streamer-out.c.jj	2020-07-28 15:39:10.079755251 +0200
+++ gcc/tree-streamer-out.c	2020-09-14 10:31:29.106957258 +0200
@@ -489,7 +489,11 @@ streamer_write_tree_bitfields (struct ou
     pack_ts_translation_unit_decl_value_fields (ob, &bp, expr);
 
   if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION))
+{
+long ts = ob->main_stream->total_size;
     cl_optimization_stream_out (ob, &bp, TREE_OPTIMIZATION (expr));
+fprintf (stderr, "total_size %ld\n", (long) (ob->main_stream->total_size - ts));
+}
 
   if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR))
     bp_pack_var_len_unsigned (&bp, CONSTRUCTOR_NELTS (expr));
hack without and with the following patch on a simple small testcase with
-O2 -flto.
Got 574 bytes without the opc-save-gen.awk change and 454 bytes with it,
that is ~ 21% saving on the TREE_OPTIMIZATION streaming.

2020-09-14  Jakub Jelinek  <jakub@redhat.com>

	* optc-save-gen.awk: In cl_optimization_stream_out use
	bp_pack_var_len_{int,unsigned} instead of bp_pack_value.  In
	cl_optimization_stream_in use bp_unpack_var_len_{int,unsigned}
	instead of bp_unpack_value.  Formatting fix.

--- gcc/optc-save-gen.awk.jj	2020-09-14 09:04:35.879854156 +0200
+++ gcc/optc-save-gen.awk	2020-09-14 10:38:47.722424942 +0200
@@ -1257,8 +1257,10 @@ for (i = 0; i < n_opt_val; i++) {
 	otype = var_opt_val_type[i];
 	if (otype ~ "^const char \\**$")
 		print "  bp_pack_string (ob, bp, ptr->" name", true);";
+	else if (otype ~ "^unsigned")
+		print "  bp_pack_var_len_unsigned (bp, ptr->" name");";
 	else
-		print "  bp_pack_value (bp, ptr->" name", 64);";
+		print "  bp_pack_var_len_int (bp, ptr->" name");";
 }
 print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
 print "    bp_pack_value (bp, ptr->explicit_mask[i], 64);";
@@ -1274,14 +1276,15 @@ print "{";
 for (i = 0; i < n_opt_val; i++) {
 	name = var_opt_val[i]
 	otype = var_opt_val_type[i];
-	if (otype ~ "^const char \\**$")
-	{
-	      print "  ptr->" name" = bp_unpack_string (data_in, bp);";
-	      print "  if (ptr->" name")";
-	      print "    ptr->" name" = xstrdup (ptr->" name");";
+	if (otype ~ "^const char \\**$") {
+		print "  ptr->" name" = bp_unpack_string (data_in, bp);";
+		print "  if (ptr->" name")";
+		print "    ptr->" name" = xstrdup (ptr->" name");";
 	}
+	else if (otype ~ "^unsigned")
+		print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_var_len_unsigned (bp);";
 	else
-	      print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value (bp, 64);";
+		print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_var_len_int (bp);";
 }
 print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
 print "    ptr->explicit_mask[i] = bp_unpack_value (bp, 64);";


	Jakub
Jan Hubicka Sept. 14, 2020, 9:02 a.m. UTC | #5
> On Mon, Sep 14, 2020 at 09:31:52AM +0200, Richard Biener wrote:
> > But does it make any noticable difference in the end?  Using
> 
> Yes.
> 
> > bp_pack_var_len_unsigned just causes us to [u]leb encode half-bytes
> > rather than full bytes.  Using hardcoded 8/16/32/64 makes it still
> > dependent on what 'int' is at maximum on the host.
> > 
> > That is, I'd indeed prefer bp_pack_var_len_unsigned over hard-coding
> > 8, 16, etc., but can you share a size comparison of the bitpack?
> > I guess with bp_pack_var_len_unsigned it might shrink in half
> > compared to the current code and streaming standard -O2?
> 
> So, I've tried
> --- gcc/tree-streamer-out.c.jj	2020-07-28 15:39:10.079755251 +0200
> +++ gcc/tree-streamer-out.c	2020-09-14 10:31:29.106957258 +0200
> @@ -489,7 +489,11 @@ streamer_write_tree_bitfields (struct ou
>      pack_ts_translation_unit_decl_value_fields (ob, &bp, expr);
>  
>    if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION))
> +{
> +long ts = ob->main_stream->total_size;
>      cl_optimization_stream_out (ob, &bp, TREE_OPTIMIZATION (expr));
> +fprintf (stderr, "total_size %ld\n", (long) (ob->main_stream->total_size - ts));
> +}

You should be able to read the sizes from streaming dump file as well.
>  
>    if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR))
>      bp_pack_var_len_unsigned (&bp, CONSTRUCTOR_NELTS (expr));
> hack without and with the following patch on a simple small testcase with
> -O2 -flto.
> Got 574 bytes without the opc-save-gen.awk change and 454 bytes with it,
> that is ~ 21% saving on the TREE_OPTIMIZATION streaming.
> 
> 2020-09-14  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* optc-save-gen.awk: In cl_optimization_stream_out use
> 	bp_pack_var_len_{int,unsigned} instead of bp_pack_value.  In
> 	cl_optimization_stream_in use bp_unpack_var_len_{int,unsigned}
> 	instead of bp_unpack_value.  Formatting fix.
> 
> --- gcc/optc-save-gen.awk.jj	2020-09-14 09:04:35.879854156 +0200
> +++ gcc/optc-save-gen.awk	2020-09-14 10:38:47.722424942 +0200
> @@ -1257,8 +1257,10 @@ for (i = 0; i < n_opt_val; i++) {
>  	otype = var_opt_val_type[i];
>  	if (otype ~ "^const char \\**$")
>  		print "  bp_pack_string (ob, bp, ptr->" name", true);";
> +	else if (otype ~ "^unsigned")
> +		print "  bp_pack_var_len_unsigned (bp, ptr->" name");";
>  	else
> -		print "  bp_pack_value (bp, ptr->" name", 64);";
> +		print "  bp_pack_var_len_int (bp, ptr->" name");";
>  }
>  print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
>  print "    bp_pack_value (bp, ptr->explicit_mask[i], 64);";
> @@ -1274,14 +1276,15 @@ print "{";
>  for (i = 0; i < n_opt_val; i++) {
>  	name = var_opt_val[i]
>  	otype = var_opt_val_type[i];
> -	if (otype ~ "^const char \\**$")
> -	{
> -	      print "  ptr->" name" = bp_unpack_string (data_in, bp);";
> -	      print "  if (ptr->" name")";
> -	      print "    ptr->" name" = xstrdup (ptr->" name");";
> +	if (otype ~ "^const char \\**$") {
> +		print "  ptr->" name" = bp_unpack_string (data_in, bp);";
> +		print "  if (ptr->" name")";
> +		print "    ptr->" name" = xstrdup (ptr->" name");";
>  	}
> +	else if (otype ~ "^unsigned")
> +		print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_var_len_unsigned (bp);";
>  	else
> -	      print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value (bp, 64);";
> +		print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_var_len_int (bp);";

Not making difference between signed/unsigned was my implementation
lazyness at the time code was added. So this looks like nice cleanup.

Especially for the new param machinery, most of streamed values are
probably going to be the default values.  Perhaps somehow we could
stream them more effectively.

Overall we sould not get much more than 1 optimize/target node per unit
so the size should show up only when you stream a lot of very small .o
files.

Honza
>  }
>  print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
>  print "    ptr->explicit_mask[i] = bp_unpack_value (bp, 64);";
> 
> 
> 	Jakub
>
Richard Biener Sept. 14, 2020, 9:23 a.m. UTC | #6
On Mon, 14 Sep 2020, Jakub Jelinek wrote:

> On Mon, Sep 14, 2020 at 09:31:52AM +0200, Richard Biener wrote:
> > But does it make any noticable difference in the end?  Using
> 
> Yes.
> 
> > bp_pack_var_len_unsigned just causes us to [u]leb encode half-bytes
> > rather than full bytes.  Using hardcoded 8/16/32/64 makes it still
> > dependent on what 'int' is at maximum on the host.
> > 
> > That is, I'd indeed prefer bp_pack_var_len_unsigned over hard-coding
> > 8, 16, etc., but can you share a size comparison of the bitpack?
> > I guess with bp_pack_var_len_unsigned it might shrink in half
> > compared to the current code and streaming standard -O2?
> 
> So, I've tried
> --- gcc/tree-streamer-out.c.jj	2020-07-28 15:39:10.079755251 +0200
> +++ gcc/tree-streamer-out.c	2020-09-14 10:31:29.106957258 +0200
> @@ -489,7 +489,11 @@ streamer_write_tree_bitfields (struct ou
>      pack_ts_translation_unit_decl_value_fields (ob, &bp, expr);
>  
>    if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION))
> +{
> +long ts = ob->main_stream->total_size;
>      cl_optimization_stream_out (ob, &bp, TREE_OPTIMIZATION (expr));
> +fprintf (stderr, "total_size %ld\n", (long) (ob->main_stream->total_size - ts));
> +}
>  
>    if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR))
>      bp_pack_var_len_unsigned (&bp, CONSTRUCTOR_NELTS (expr));
> hack without and with the following patch on a simple small testcase with
> -O2 -flto.
> Got 574 bytes without the opc-save-gen.awk change and 454 bytes with it,
> that is ~ 21% saving on the TREE_OPTIMIZATION streaming.

OK, not spectacular but I guess good enough.  So I'd say the below
patch using bp_pack_var_len_{int,unsigned} is OK.

Thanks,
Richard.

> 2020-09-14  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* optc-save-gen.awk: In cl_optimization_stream_out use
> 	bp_pack_var_len_{int,unsigned} instead of bp_pack_value.  In
> 	cl_optimization_stream_in use bp_unpack_var_len_{int,unsigned}
> 	instead of bp_unpack_value.  Formatting fix.
> 
> --- gcc/optc-save-gen.awk.jj	2020-09-14 09:04:35.879854156 +0200
> +++ gcc/optc-save-gen.awk	2020-09-14 10:38:47.722424942 +0200
> @@ -1257,8 +1257,10 @@ for (i = 0; i < n_opt_val; i++) {
>  	otype = var_opt_val_type[i];
>  	if (otype ~ "^const char \\**$")
>  		print "  bp_pack_string (ob, bp, ptr->" name", true);";
> +	else if (otype ~ "^unsigned")
> +		print "  bp_pack_var_len_unsigned (bp, ptr->" name");";
>  	else
> -		print "  bp_pack_value (bp, ptr->" name", 64);";
> +		print "  bp_pack_var_len_int (bp, ptr->" name");";
>  }
>  print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
>  print "    bp_pack_value (bp, ptr->explicit_mask[i], 64);";
> @@ -1274,14 +1276,15 @@ print "{";
>  for (i = 0; i < n_opt_val; i++) {
>  	name = var_opt_val[i]
>  	otype = var_opt_val_type[i];
> -	if (otype ~ "^const char \\**$")
> -	{
> -	      print "  ptr->" name" = bp_unpack_string (data_in, bp);";
> -	      print "  if (ptr->" name")";
> -	      print "    ptr->" name" = xstrdup (ptr->" name");";
> +	if (otype ~ "^const char \\**$") {
> +		print "  ptr->" name" = bp_unpack_string (data_in, bp);";
> +		print "  if (ptr->" name")";
> +		print "    ptr->" name" = xstrdup (ptr->" name");";
>  	}
> +	else if (otype ~ "^unsigned")
> +		print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_var_len_unsigned (bp);";
>  	else
> -	      print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value (bp, 64);";
> +		print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_var_len_int (bp);";
>  }
>  print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
>  print "    ptr->explicit_mask[i] = bp_unpack_value (bp, 64);";
> 
> 
> 	Jakub
> 
>
Jakub Jelinek Sept. 14, 2020, 9:47 a.m. UTC | #7
On Mon, Sep 14, 2020 at 11:02:26AM +0200, Jan Hubicka wrote:
> Especially for the new param machinery, most of streamed values are
> probably going to be the default values.  Perhaps somehow we could
> stream them more effectively.

Ah, that seems like a good idea, that brings further savings, the size
goes down from 574 bytes to 273 bytes, i.e. less than half.
Here is an updated version that does that.  Not trying to handle
enums because the code doesn't know if (enum ...) 10 is even valid,
similarly non-parameters because those really generally don't have large
initializers, and params without Init (those are 0 initialized and thus
don't need to be handled).

2020-09-14  Jakub Jelinek  <jakub@redhat.com>

	* optc-save-gen.awk: Initialize var_opt_init.  In
	cl_optimization_stream_out use bp_pack_var_len_{int,unsigned} instead
	of bp_pack_value and for params with default values larger than 10, xor
	the default value with the actual parameter value.  In
	cl_optimization_stream_in use bp_unpack_var_len_{int,unsigned}
	instead of bp_unpack_value and repeat the above xor.  Formatting fix.

--- gcc/optc-save-gen.awk.jj	2020-09-14 10:51:54.493740942 +0200
+++ gcc/optc-save-gen.awk	2020-09-14 11:39:39.441602594 +0200
@@ -1186,6 +1186,7 @@ for (i = 0; i < n_opts; i++) {
 		var_opt_val_type[n_opt_val] = otype;
 		var_opt_val[n_opt_val] = "x_" name;
 		var_opt_hash[n_opt_val] = flag_set_p("Optimization", flags[i]);
+		var_opt_init[n_opt_val] = opt_args("Init", flags[i]);
 		n_opt_val++;
 	}
 }
@@ -1257,8 +1258,21 @@ for (i = 0; i < n_opt_val; i++) {
 	otype = var_opt_val_type[i];
 	if (otype ~ "^const char \\**$")
 		print "  bp_pack_string (ob, bp, ptr->" name", true);";
-	else
-		print "  bp_pack_value (bp, ptr->" name", 64);";
+	else {
+		if (otype ~ "^unsigned") {
+			sgn = "unsigned";
+		} else {
+			sgn = "int";
+		}
+		if (name ~ "^x_param" && !(otype ~ "^enum ") && var_opt_init[i]) {
+			print "  if (" var_opt_init[i] " > (" var_opt_val_type[i] ") 10)";
+			print "    bp_pack_var_len_" sgn " (bp, ptr->" name" ^ " var_opt_init[i] ");";
+			print "  else";
+			print "    bp_pack_var_len_" sgn " (bp, ptr->" name");";
+		} else {
+			print "  bp_pack_var_len_" sgn " (bp, ptr->" name");";
+		}
+	}
 }
 print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
 print "    bp_pack_value (bp, ptr->explicit_mask[i], 64);";
@@ -1274,14 +1288,23 @@ print "{";
 for (i = 0; i < n_opt_val; i++) {
 	name = var_opt_val[i]
 	otype = var_opt_val_type[i];
-	if (otype ~ "^const char \\**$")
-	{
-	      print "  ptr->" name" = bp_unpack_string (data_in, bp);";
-	      print "  if (ptr->" name")";
-	      print "    ptr->" name" = xstrdup (ptr->" name");";
+	if (otype ~ "^const char \\**$") {
+		print "  ptr->" name" = bp_unpack_string (data_in, bp);";
+		print "  if (ptr->" name")";
+		print "    ptr->" name" = xstrdup (ptr->" name");";
+	}
+	else {
+		if (otype ~ "^unsigned") {
+			sgn = "unsigned";
+		} else {
+			sgn = "int";
+		}
+		print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_var_len_" sgn " (bp);";
+		if (name ~ "^x_param" && !(otype ~ "^enum ") && var_opt_init[i]) {
+			print "  if (" var_opt_init[i] " > (" var_opt_val_type[i] ") 10)";
+			print "    ptr->" name" ^= " var_opt_init[i] ";";
+		}
 	}
-	else
-	      print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value (bp, 64);";
 }
 print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
 print "    ptr->explicit_mask[i] = bp_unpack_value (bp, 64);";


	Jakub
diff mbox series

Patch

--- gcc/optc-save-gen.awk.jj	2020-09-11 17:38:31.524230167 +0200
+++ gcc/optc-save-gen.awk	2020-09-11 22:12:59.948580827 +0200
@@ -1252,13 +1252,23 @@  print "cl_optimization_stream_out (struc
 print "                            struct bitpack_d *bp,";
 print "                            struct cl_optimization *ptr)";
 print "{";
-for (i = 0; i < n_opt_val; i++) {
-	name = var_opt_val[i]
-	otype = var_opt_val_type[i];
-	if (otype ~ "^const char \\**$")
-		print "  bp_pack_string (ob, bp, ptr->" name", true);";
-	else
-		print "  bp_pack_value (bp, ptr->" name", 64);";
+for (i = 0; i < n_opt_other; i++) {
+	print "  bp_pack_value (bp, ptr->x_" var_opt_other[i] ", HOST_BITS_PER_WIDE_INT);";
+}
+for (i = 0; i < n_opt_int; i++) {
+	print "  bp_pack_value (bp, static_cast<unsigned int>(ptr->x_" var_opt_int[i] "), HOST_BITS_PER_INT);";
+}
+for (i = 0; i < n_opt_enum; i++) {
+	print "  bp_pack_value (bp, static_cast<unsigned int>(ptr->x_" var_opt_enum[i] "), HOST_BITS_PER_INT);";
+}
+for (i = 0; i < n_opt_short; i++) {
+	print "  bp_pack_value (bp, static_cast<unsigned short>(ptr->x_" var_opt_short[i] "), HOST_BITS_PER_SHORT);";
+}
+for (i = 0; i < n_opt_char; i++) {
+	print "  bp_pack_value (bp, static_cast<unsigned char>(ptr->x_" var_opt_char[i] "), HOST_BITS_PER_CHAR);";
+}
+for (i = 0; i < n_opt_string; i++) {
+	print "  bp_pack_string (ob, bp, ptr->x_" var_opt_string[i] ", true);";
 }
 print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
 print "    bp_pack_value (bp, ptr->explicit_mask[i], 64);";
@@ -1271,17 +1281,26 @@  print "cl_optimization_stream_in (struct
 print "                           struct bitpack_d *bp ATTRIBUTE_UNUSED,";
 print "                           struct cl_optimization *ptr ATTRIBUTE_UNUSED)";
 print "{";
-for (i = 0; i < n_opt_val; i++) {
-	name = var_opt_val[i]
-	otype = var_opt_val_type[i];
-	if (otype ~ "^const char \\**$")
-	{
-	      print "  ptr->" name" = bp_unpack_string (data_in, bp);";
-	      print "  if (ptr->" name")";
-	      print "    ptr->" name" = xstrdup (ptr->" name");";
-	}
-	else
-	      print "  ptr->" name" = (" var_opt_val_type[i] ") bp_unpack_value (bp, 64);";
+for (i = 0; i < n_opt_other; i++) {
+	print "  ptr->x_" var_opt_other[i] " = bp_unpack_value (bp, HOST_BITS_PER_WIDE_INT);";
+}
+for (i = 0; i < n_opt_int; i++) {
+	print "  ptr->x_" var_opt_int[i] " = bp_unpack_value (bp, HOST_BITS_PER_INT);";
+}
+for (i = 0; i < n_opt_enum; i++) {
+	print "  ptr->x_" var_opt_enum[i] " = static_cast<" var_opt_enum_type[i] ">(bp_unpack_value (bp, HOST_BITS_PER_INT));";
+}
+for (i = 0; i < n_opt_short; i++) {
+	print "  ptr->x_" var_opt_short[i] " = bp_unpack_value (bp, HOST_BITS_PER_SHORT);";
+}
+for (i = 0; i < n_opt_char; i++) {
+	print "  ptr->x_" var_opt_char[i] " = bp_unpack_value (bp, HOST_BITS_PER_CHAR);";
+}
+for (i = 0; i < n_opt_string; i++) {
+	name = var_opt_string[i];
+	print "  ptr->x_" name" = bp_unpack_string (data_in, bp);";
+	print "  if (ptr->x_" name")";
+	print "    ptr->x_" name" = xstrdup (ptr->x_" name");";
 }
 print "  for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)";
 print "    ptr->explicit_mask[i] = bp_unpack_value (bp, 64);";