Message ID | 1379620717-20358-1-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
Ping? r~ On 09/20/2013 05:58 AM, Richard Henderson wrote: > We previously allocated 32-bits per temp for the next_free_temp entry. > We now allocate 4 bits per temp across the 4 bitmaps. > > Using a linked list meant that if a translator is tweeked, resulting in > temps being freed in a different order, that would have follow-on effects > throughout the TB. Always allocating the lowest free temp means that > follow-on effects are minimized, which can make it easier to diff output > when debugging the translators. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/tcg.c | 32 +++++++++++++++----------------- > tcg/tcg.h | 11 ++++++----- > 2 files changed, 21 insertions(+), 22 deletions(-) > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index fd7fb6b..41ed8b0 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -316,11 +316,12 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) > > void tcg_func_start(TCGContext *s) > { > - int i; > tcg_pool_reset(s); > s->nb_temps = s->nb_globals; > - for(i = 0; i < (TCG_TYPE_COUNT * 2); i++) > - s->first_free_temp[i] = -1; > + > + /* No temps have been previously allocated for size or locality. */ > + memset(s->free_temps, 0, sizeof(s->free_temps)); > + > s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); > s->nb_labels = 0; > s->current_frame_offset = s->frame_start; > @@ -468,16 +469,15 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local) > TCGTemp *ts; > int idx, k; > > - k = type; > - if (temp_local) > - k += TCG_TYPE_COUNT; > - idx = s->first_free_temp[k]; > - if (idx != -1) { > - /* There is already an available temp with the > - right type */ > + k = type + (temp_local ? TCG_TYPE_COUNT : 0); > + idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); > + if (idx < TCG_MAX_TEMPS) { > + /* There is already an available temp with the right type. */ > + clear_bit(idx, s->free_temps[k].l); > + > ts = &s->temps[idx]; > - s->first_free_temp[k] = ts->next_free_temp; > ts->temp_allocated = 1; > + assert(ts->base_type == type); > assert(ts->temp_local == temp_local); > } else { > idx = s->nb_temps; > @@ -533,7 +533,7 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local) > return MAKE_TCGV_I64(idx); > } > > -static inline void tcg_temp_free_internal(int idx) > +static void tcg_temp_free_internal(int idx) > { > TCGContext *s = &tcg_ctx; > TCGTemp *ts; > @@ -550,11 +550,9 @@ static inline void tcg_temp_free_internal(int idx) > ts = &s->temps[idx]; > assert(ts->temp_allocated != 0); > ts->temp_allocated = 0; > - k = ts->base_type; > - if (ts->temp_local) > - k += TCG_TYPE_COUNT; > - ts->next_free_temp = s->first_free_temp[k]; > - s->first_free_temp[k] = idx; > + > + k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0); > + set_bit(idx, s->free_temps[k].l); > } > > void tcg_temp_free_i32(TCGv_i32 arg) > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 902c751..c45e41b 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -26,7 +26,7 @@ > #define TCG_H > > #include "qemu-common.h" > - > +#include "qemu/bitops.h" > #include "tcg-target.h" > > /* Default target word size to pointer size. */ > @@ -400,8 +400,6 @@ typedef struct TCGTemp { > basic blocks. Otherwise, it is not > preserved across basic blocks. */ > unsigned int temp_allocated:1; /* never used for code gen */ > - /* index of next free temp of same base type, -1 if end */ > - int next_free_temp; > const char *name; > } TCGTemp; > > @@ -412,6 +410,10 @@ typedef struct TCGHelperInfo { > > typedef struct TCGContext TCGContext; > > +typedef struct TCGTempSet { > + unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; > +} TCGTempSet; > + > struct TCGContext { > uint8_t *pool_cur, *pool_end; > TCGPool *pool_first, *pool_current, *pool_first_large; > @@ -419,8 +421,6 @@ struct TCGContext { > int nb_labels; > int nb_globals; > int nb_temps; > - /* index of free temps, -1 if none */ > - int first_free_temp[TCG_TYPE_COUNT * 2]; > > /* goto_tb support */ > uint8_t *code_buf; > @@ -446,6 +446,7 @@ struct TCGContext { > > uint8_t *code_ptr; > TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ > + TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; > > TCGHelperInfo *helpers; > int nb_helpers; >
Ping. r~ On 11/19/2013 07:57 PM, Richard Henderson wrote: > Ping? > > r~ > > On 09/20/2013 05:58 AM, Richard Henderson wrote: >> We previously allocated 32-bits per temp for the next_free_temp entry. >> We now allocate 4 bits per temp across the 4 bitmaps. >> >> Using a linked list meant that if a translator is tweeked, resulting in >> temps being freed in a different order, that would have follow-on effects >> throughout the TB. Always allocating the lowest free temp means that >> follow-on effects are minimized, which can make it easier to diff output >> when debugging the translators. >> >> Signed-off-by: Richard Henderson <rth@twiddle.net> >> --- >> tcg/tcg.c | 32 +++++++++++++++----------------- >> tcg/tcg.h | 11 ++++++----- >> 2 files changed, 21 insertions(+), 22 deletions(-) >> >> diff --git a/tcg/tcg.c b/tcg/tcg.c >> index fd7fb6b..41ed8b0 100644 >> --- a/tcg/tcg.c >> +++ b/tcg/tcg.c >> @@ -316,11 +316,12 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) >> >> void tcg_func_start(TCGContext *s) >> { >> - int i; >> tcg_pool_reset(s); >> s->nb_temps = s->nb_globals; >> - for(i = 0; i < (TCG_TYPE_COUNT * 2); i++) >> - s->first_free_temp[i] = -1; >> + >> + /* No temps have been previously allocated for size or locality. */ >> + memset(s->free_temps, 0, sizeof(s->free_temps)); >> + >> s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); >> s->nb_labels = 0; >> s->current_frame_offset = s->frame_start; >> @@ -468,16 +469,15 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local) >> TCGTemp *ts; >> int idx, k; >> >> - k = type; >> - if (temp_local) >> - k += TCG_TYPE_COUNT; >> - idx = s->first_free_temp[k]; >> - if (idx != -1) { >> - /* There is already an available temp with the >> - right type */ >> + k = type + (temp_local ? TCG_TYPE_COUNT : 0); >> + idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); >> + if (idx < TCG_MAX_TEMPS) { >> + /* There is already an available temp with the right type. */ >> + clear_bit(idx, s->free_temps[k].l); >> + >> ts = &s->temps[idx]; >> - s->first_free_temp[k] = ts->next_free_temp; >> ts->temp_allocated = 1; >> + assert(ts->base_type == type); >> assert(ts->temp_local == temp_local); >> } else { >> idx = s->nb_temps; >> @@ -533,7 +533,7 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local) >> return MAKE_TCGV_I64(idx); >> } >> >> -static inline void tcg_temp_free_internal(int idx) >> +static void tcg_temp_free_internal(int idx) >> { >> TCGContext *s = &tcg_ctx; >> TCGTemp *ts; >> @@ -550,11 +550,9 @@ static inline void tcg_temp_free_internal(int idx) >> ts = &s->temps[idx]; >> assert(ts->temp_allocated != 0); >> ts->temp_allocated = 0; >> - k = ts->base_type; >> - if (ts->temp_local) >> - k += TCG_TYPE_COUNT; >> - ts->next_free_temp = s->first_free_temp[k]; >> - s->first_free_temp[k] = idx; >> + >> + k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0); >> + set_bit(idx, s->free_temps[k].l); >> } >> >> void tcg_temp_free_i32(TCGv_i32 arg) >> diff --git a/tcg/tcg.h b/tcg/tcg.h >> index 902c751..c45e41b 100644 >> --- a/tcg/tcg.h >> +++ b/tcg/tcg.h >> @@ -26,7 +26,7 @@ >> #define TCG_H >> >> #include "qemu-common.h" >> - >> +#include "qemu/bitops.h" >> #include "tcg-target.h" >> >> /* Default target word size to pointer size. */ >> @@ -400,8 +400,6 @@ typedef struct TCGTemp { >> basic blocks. Otherwise, it is not >> preserved across basic blocks. */ >> unsigned int temp_allocated:1; /* never used for code gen */ >> - /* index of next free temp of same base type, -1 if end */ >> - int next_free_temp; >> const char *name; >> } TCGTemp; >> >> @@ -412,6 +410,10 @@ typedef struct TCGHelperInfo { >> >> typedef struct TCGContext TCGContext; >> >> +typedef struct TCGTempSet { >> + unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; >> +} TCGTempSet; >> + >> struct TCGContext { >> uint8_t *pool_cur, *pool_end; >> TCGPool *pool_first, *pool_current, *pool_first_large; >> @@ -419,8 +421,6 @@ struct TCGContext { >> int nb_labels; >> int nb_globals; >> int nb_temps; >> - /* index of free temps, -1 if none */ >> - int first_free_temp[TCG_TYPE_COUNT * 2]; >> >> /* goto_tb support */ >> uint8_t *code_buf; >> @@ -446,6 +446,7 @@ struct TCGContext { >> >> uint8_t *code_ptr; >> TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ >> + TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; >> >> TCGHelperInfo *helpers; >> int nb_helpers; >> >
On 19 September 2013 20:58, Richard Henderson <rth@twiddle.net> wrote: > We previously allocated 32-bits per temp for the next_free_temp entry. > We now allocate 4 bits per temp across the 4 bitmaps. > > Using a linked list meant that if a translator is tweeked, resulting in > temps being freed in a different order, that would have follow-on effects > throughout the TB. Always allocating the lowest free temp means that > follow-on effects are minimized, which can make it easier to diff output > when debugging the translators. > > Signed-off-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Peter Maydell <peter.maydell@linaro.org> -- PMM
On Thu, Sep 19, 2013 at 12:58:37PM -0700, Richard Henderson wrote: > We previously allocated 32-bits per temp for the next_free_temp entry. > We now allocate 4 bits per temp across the 4 bitmaps. > > Using a linked list meant that if a translator is tweeked, resulting in > temps being freed in a different order, that would have follow-on effects > throughout the TB. Always allocating the lowest free temp means that > follow-on effects are minimized, which can make it easier to diff output > when debugging the translators. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/tcg.c | 32 +++++++++++++++----------------- > tcg/tcg.h | 11 ++++++----- > 2 files changed, 21 insertions(+), 22 deletions(-) > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index fd7fb6b..41ed8b0 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -316,11 +316,12 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) > > void tcg_func_start(TCGContext *s) > { > - int i; > tcg_pool_reset(s); > s->nb_temps = s->nb_globals; > - for(i = 0; i < (TCG_TYPE_COUNT * 2); i++) > - s->first_free_temp[i] = -1; > + > + /* No temps have been previously allocated for size or locality. */ > + memset(s->free_temps, 0, sizeof(s->free_temps)); > + > s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); > s->nb_labels = 0; > s->current_frame_offset = s->frame_start; > @@ -468,16 +469,15 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local) > TCGTemp *ts; > int idx, k; > > - k = type; > - if (temp_local) > - k += TCG_TYPE_COUNT; > - idx = s->first_free_temp[k]; > - if (idx != -1) { > - /* There is already an available temp with the > - right type */ > + k = type + (temp_local ? TCG_TYPE_COUNT : 0); > + idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); > + if (idx < TCG_MAX_TEMPS) { > + /* There is already an available temp with the right type. */ > + clear_bit(idx, s->free_temps[k].l); > + > ts = &s->temps[idx]; > - s->first_free_temp[k] = ts->next_free_temp; > ts->temp_allocated = 1; > + assert(ts->base_type == type); > assert(ts->temp_local == temp_local); > } else { > idx = s->nb_temps; > @@ -533,7 +533,7 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local) > return MAKE_TCGV_I64(idx); > } > > -static inline void tcg_temp_free_internal(int idx) > +static void tcg_temp_free_internal(int idx) > { > TCGContext *s = &tcg_ctx; > TCGTemp *ts; > @@ -550,11 +550,9 @@ static inline void tcg_temp_free_internal(int idx) > ts = &s->temps[idx]; > assert(ts->temp_allocated != 0); > ts->temp_allocated = 0; > - k = ts->base_type; > - if (ts->temp_local) > - k += TCG_TYPE_COUNT; > - ts->next_free_temp = s->first_free_temp[k]; > - s->first_free_temp[k] = idx; > + > + k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0); > + set_bit(idx, s->free_temps[k].l); > } > > void tcg_temp_free_i32(TCGv_i32 arg) > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 902c751..c45e41b 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -26,7 +26,7 @@ > #define TCG_H > > #include "qemu-common.h" > - > +#include "qemu/bitops.h" > #include "tcg-target.h" > > /* Default target word size to pointer size. */ > @@ -400,8 +400,6 @@ typedef struct TCGTemp { > basic blocks. Otherwise, it is not > preserved across basic blocks. */ > unsigned int temp_allocated:1; /* never used for code gen */ > - /* index of next free temp of same base type, -1 if end */ > - int next_free_temp; > const char *name; > } TCGTemp; > > @@ -412,6 +410,10 @@ typedef struct TCGHelperInfo { > > typedef struct TCGContext TCGContext; > > +typedef struct TCGTempSet { > + unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; > +} TCGTempSet; > + > struct TCGContext { > uint8_t *pool_cur, *pool_end; > TCGPool *pool_first, *pool_current, *pool_first_large; > @@ -419,8 +421,6 @@ struct TCGContext { > int nb_labels; > int nb_globals; > int nb_temps; > - /* index of free temps, -1 if none */ > - int first_free_temp[TCG_TYPE_COUNT * 2]; > > /* goto_tb support */ > uint8_t *code_buf; > @@ -446,6 +446,7 @@ struct TCGContext { > > uint8_t *code_ptr; > TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ > + TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; > > TCGHelperInfo *helpers; > int nb_helpers; It looks fine to me, though I don't know if it has any impact on the performances. find_first_bit() is calling find_next_bit() which is 87 instructions longs on x86_64. Maybe implementing find_first_bit() directly would help a bit there. That said: Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
On 12/08/2013 05:48 AM, Aurelien Jarno wrote: > It looks fine to me, though I don't know if it has any impact on the > performances. I couldn't measure any performance difference at all. But for me, the improvement in debugging for the translators is important. r~
On Mon, Dec 09, 2013 at 11:23:14AM -0800, Richard Henderson wrote: > On 12/08/2013 05:48 AM, Aurelien Jarno wrote: > > It looks fine to me, though I don't know if it has any impact on the > > performances. > > I couldn't measure any performance difference at all. Ok great. Still in the long term we should provide a dedicated find_first_bit function that can be inlined, so that the compiler can do numerous optimizations in the case the size is known at compile time. > But for me, the improvement in debugging for the translators is important. > Yeah, I understand the point, that's why I gave a reviewed-by: Aurelien
diff --git a/tcg/tcg.c b/tcg/tcg.c index fd7fb6b..41ed8b0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -316,11 +316,12 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) void tcg_func_start(TCGContext *s) { - int i; tcg_pool_reset(s); s->nb_temps = s->nb_globals; - for(i = 0; i < (TCG_TYPE_COUNT * 2); i++) - s->first_free_temp[i] = -1; + + /* No temps have been previously allocated for size or locality. */ + memset(s->free_temps, 0, sizeof(s->free_temps)); + s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); s->nb_labels = 0; s->current_frame_offset = s->frame_start; @@ -468,16 +469,15 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local) TCGTemp *ts; int idx, k; - k = type; - if (temp_local) - k += TCG_TYPE_COUNT; - idx = s->first_free_temp[k]; - if (idx != -1) { - /* There is already an available temp with the - right type */ + k = type + (temp_local ? TCG_TYPE_COUNT : 0); + idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); + if (idx < TCG_MAX_TEMPS) { + /* There is already an available temp with the right type. */ + clear_bit(idx, s->free_temps[k].l); + ts = &s->temps[idx]; - s->first_free_temp[k] = ts->next_free_temp; ts->temp_allocated = 1; + assert(ts->base_type == type); assert(ts->temp_local == temp_local); } else { idx = s->nb_temps; @@ -533,7 +533,7 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local) return MAKE_TCGV_I64(idx); } -static inline void tcg_temp_free_internal(int idx) +static void tcg_temp_free_internal(int idx) { TCGContext *s = &tcg_ctx; TCGTemp *ts; @@ -550,11 +550,9 @@ static inline void tcg_temp_free_internal(int idx) ts = &s->temps[idx]; assert(ts->temp_allocated != 0); ts->temp_allocated = 0; - k = ts->base_type; - if (ts->temp_local) - k += TCG_TYPE_COUNT; - ts->next_free_temp = s->first_free_temp[k]; - s->first_free_temp[k] = idx; + + k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0); + set_bit(idx, s->free_temps[k].l); } void tcg_temp_free_i32(TCGv_i32 arg) diff --git a/tcg/tcg.h b/tcg/tcg.h index 902c751..c45e41b 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -26,7 +26,7 @@ #define TCG_H #include "qemu-common.h" - +#include "qemu/bitops.h" #include "tcg-target.h" /* Default target word size to pointer size. */ @@ -400,8 +400,6 @@ typedef struct TCGTemp { basic blocks. Otherwise, it is not preserved across basic blocks. */ unsigned int temp_allocated:1; /* never used for code gen */ - /* index of next free temp of same base type, -1 if end */ - int next_free_temp; const char *name; } TCGTemp; @@ -412,6 +410,10 @@ typedef struct TCGHelperInfo { typedef struct TCGContext TCGContext; +typedef struct TCGTempSet { + unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; +} TCGTempSet; + struct TCGContext { uint8_t *pool_cur, *pool_end; TCGPool *pool_first, *pool_current, *pool_first_large; @@ -419,8 +421,6 @@ struct TCGContext { int nb_labels; int nb_globals; int nb_temps; - /* index of free temps, -1 if none */ - int first_free_temp[TCG_TYPE_COUNT * 2]; /* goto_tb support */ uint8_t *code_buf; @@ -446,6 +446,7 @@ struct TCGContext { uint8_t *code_ptr; TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ + TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; TCGHelperInfo *helpers; int nb_helpers;
We previously allocated 32-bits per temp for the next_free_temp entry. We now allocate 4 bits per temp across the 4 bitmaps. Using a linked list meant that if a translator is tweeked, resulting in temps being freed in a different order, that would have follow-on effects throughout the TB. Always allocating the lowest free temp means that follow-on effects are minimized, which can make it easier to diff output when debugging the translators. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/tcg.c | 32 +++++++++++++++----------------- tcg/tcg.h | 11 ++++++----- 2 files changed, 21 insertions(+), 22 deletions(-)