Submitter | Richard Sandiford |
---|---|

Date | July 7, 2010, 9:21 p.m. |

Message ID | <87ocejrnsd.fsf@firetop.home> |

Download | mbox | patch |

Permalink | /patch/58187/ |

State | New |

Headers | show |

## Comments

## Patch

Index: gcc/expmed.h =================================================================== --- gcc/expmed.h 2010-07-07 21:52:27.000000000 +0100 +++ gcc/expmed.h 2010-07-07 22:18:22.000000000 +0100 @@ -22,8 +22,118 @@ Software Foundation; either version 3, o #ifndef EXPMED_H #define EXPMED_H 1 +enum alg_code { + alg_unknown, + alg_zero, + alg_m, alg_shift, + alg_add_t_m2, + alg_sub_t_m2, + alg_add_factor, + alg_sub_factor, + alg_add_t2_m, + alg_sub_t2_m, + alg_impossible +}; + +/* This structure holds the "cost" of a multiply sequence. The + "cost" field holds the total rtx_cost of every operator in the + synthetic multiplication sequence, hence cost(a op b) is defined + as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero. + The "latency" field holds the minimum possible latency of the + synthetic multiply, on a hypothetical infinitely parallel CPU. + This is the critical path, or the maximum height, of the expression + tree which is the sum of rtx_costs on the most expensive path from + any leaf to the root. Hence latency(a op b) is defined as zero for + leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */ + +struct mult_cost { + short cost; /* Total rtx_cost of the multiplication sequence. */ + short latency; /* The latency of the multiplication sequence. */ +}; + +/* This macro is used to compare a pointer to a mult_cost against an + single integer "rtx_cost" value. This is equivalent to the macro + CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */ +#define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \ + || ((X)->cost == (Y) && (X)->latency < (Y))) + +/* This macro is used to compare two pointers to mult_costs against + each other. The macro returns true if X is cheaper than Y. + Currently, the cheaper of two mult_costs is the one with the + lower "cost". If "cost"s are tied, the lower latency is cheaper. */ +#define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \ + || ((X)->cost == (Y)->cost \ + && (X)->latency < (Y)->latency)) + +/* This structure records a sequence of operations. + `ops' is the number of operations recorded. + `cost' is their total cost. + The operations are stored in `op' and the corresponding + logarithms of the integer coefficients in `log'. + + These are the operations: + alg_zero total := 0; + alg_m total := multiplicand; + alg_shift total := total * coeff + alg_add_t_m2 total := total + multiplicand * coeff; + alg_sub_t_m2 total := total - multiplicand * coeff; + alg_add_factor total := total * coeff + total; + alg_sub_factor total := total * coeff - total; + alg_add_t2_m total := total * coeff + multiplicand; + alg_sub_t2_m total := total * coeff - multiplicand; + + The first operand must be either alg_zero or alg_m. */ + +struct algorithm +{ + struct mult_cost cost; + short ops; + /* The size of the OP and LOG fields are not directly related to the + word size, but the worst-case algorithms will be if we have few + consecutive ones or zeros, i.e., a multiplicand like 10101010101... + In that case we will generate shift-by-2, add, shift-by-2, add,..., + in total wordsize operations. */ + enum alg_code op[MAX_BITS_PER_WORD]; + char log[MAX_BITS_PER_WORD]; +}; + +/* The entry for our multiplication cache/hash table. */ +struct alg_hash_entry { + /* The number we are multiplying by. */ + unsigned HOST_WIDE_INT t; + + /* The mode in which we are multiplying something by T. */ + enum machine_mode mode; + + /* The best multiplication algorithm for t. */ + enum alg_code alg; + + /* The cost of multiplication if ALG_CODE is not alg_impossible. + Otherwise, the cost within which multiplication by T is + impossible. */ + struct mult_cost cost; + + /* Optimized for speed? */ + bool speed; +}; + +/* The number of cache/hash entries. */ +#if HOST_BITS_PER_WIDE_INT == 64 +#define NUM_ALG_HASH_ENTRIES 1031 +#else +#define NUM_ALG_HASH_ENTRIES 307 +#endif + /* Target-dependent globals. */ struct target_expmed { + /* Each entry of ALG_HASH caches alg_code for some integer. This is + actually a hash table. If we have a collision, that the older + entry is kicked out. */ + struct alg_hash_entry x_alg_hash[NUM_ALG_HASH_ENTRIES]; + + /* True if x_alg_hash might already have been used. */ + bool x_alg_hash_used_p; + /* Nonzero means divides or modulus operations are relatively cheap for powers of two, so don't use branches; emit the operation instead. Usually, this will mean that the MD file will emit non-branch @@ -54,6 +164,10 @@ struct target_expmed { #define this_target_expmed (&default_target_expmed) #endif +#define alg_hash \ + (this_target_expmed->x_alg_hash) +#define alg_hash_used_p \ + (this_target_expmed->x_alg_hash_used_p) #define sdiv_pow2_cheap \ (this_target_expmed->x_sdiv_pow2_cheap) #define smod_pow2_cheap \ Index: gcc/expmed.c =================================================================== --- gcc/expmed.c 2010-07-07 21:52:27.000000000 +0100 +++ gcc/expmed.c 2010-07-07 22:17:49.000000000 +0100 @@ -259,6 +259,10 @@ init_expmed (void) } } } + if (alg_hash_used_p) + memset (alg_hash, 0, sizeof (alg_hash)); + else + alg_hash_used_p = true; default_rtl_profile (); } @@ -2282,113 +2286,6 @@ expand_shift (enum tree_code code, enum return temp; } -enum alg_code { - alg_unknown, - alg_zero, - alg_m, alg_shift, - alg_add_t_m2, - alg_sub_t_m2, - alg_add_factor, - alg_sub_factor, - alg_add_t2_m, - alg_sub_t2_m, - alg_impossible -}; - -/* This structure holds the "cost" of a multiply sequence. The - "cost" field holds the total rtx_cost of every operator in the - synthetic multiplication sequence, hence cost(a op b) is defined - as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero. - The "latency" field holds the minimum possible latency of the - synthetic multiply, on a hypothetical infinitely parallel CPU. - This is the critical path, or the maximum height, of the expression - tree which is the sum of rtx_costs on the most expensive path from - any leaf to the root. Hence latency(a op b) is defined as zero for - leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */ - -struct mult_cost { - short cost; /* Total rtx_cost of the multiplication sequence. */ - short latency; /* The latency of the multiplication sequence. */ -}; - -/* This macro is used to compare a pointer to a mult_cost against an - single integer "rtx_cost" value. This is equivalent to the macro - CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */ -#define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \ - || ((X)->cost == (Y) && (X)->latency < (Y))) - -/* This macro is used to compare two pointers to mult_costs against - each other. The macro returns true if X is cheaper than Y. - Currently, the cheaper of two mult_costs is the one with the - lower "cost". If "cost"s are tied, the lower latency is cheaper. */ -#define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \ - || ((X)->cost == (Y)->cost \ - && (X)->latency < (Y)->latency)) - -/* This structure records a sequence of operations. - `ops' is the number of operations recorded. - `cost' is their total cost. - The operations are stored in `op' and the corresponding - logarithms of the integer coefficients in `log'. - - These are the operations: - alg_zero total := 0; - alg_m total := multiplicand; - alg_shift total := total * coeff - alg_add_t_m2 total := total + multiplicand * coeff; - alg_sub_t_m2 total := total - multiplicand * coeff; - alg_add_factor total := total * coeff + total; - alg_sub_factor total := total * coeff - total; - alg_add_t2_m total := total * coeff + multiplicand; - alg_sub_t2_m total := total * coeff - multiplicand; - - The first operand must be either alg_zero or alg_m. */ - -struct algorithm -{ - struct mult_cost cost; - short ops; - /* The size of the OP and LOG fields are not directly related to the - word size, but the worst-case algorithms will be if we have few - consecutive ones or zeros, i.e., a multiplicand like 10101010101... - In that case we will generate shift-by-2, add, shift-by-2, add,..., - in total wordsize operations. */ - enum alg_code op[MAX_BITS_PER_WORD]; - char log[MAX_BITS_PER_WORD]; -}; - -/* The entry for our multiplication cache/hash table. */ -struct alg_hash_entry { - /* The number we are multiplying by. */ - unsigned HOST_WIDE_INT t; - - /* The mode in which we are multiplying something by T. */ - enum machine_mode mode; - - /* The best multiplication algorithm for t. */ - enum alg_code alg; - - /* The cost of multiplication if ALG_CODE is not alg_impossible. - Otherwise, the cost within which multiplication by T is - impossible. */ - struct mult_cost cost; - - /* OPtimized for speed? */ - bool speed; -}; - -/* The number of cache/hash entries. */ -#if HOST_BITS_PER_WIDE_INT == 64 -#define NUM_ALG_HASH_ENTRIES 1031 -#else -#define NUM_ALG_HASH_ENTRIES 307 -#endif - -/* Each entry of ALG_HASH caches alg_code for some integer. This is - actually a hash table. If we have a collision, that the older - entry is kicked out. */ -static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES]; - /* Indicates the type of fixup needed after a constant multiplication. BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that the result should be negated, and ADD_VARIANT means that the