Message ID | 20170720205041.GA8754@flamenco |
---|---|
State | New |
Headers | show |
On 07/20/2017 10:50 AM, Emilio G. Cota wrote: > On Wed, Jul 19, 2017 at 22:04:50 -1000, Richard Henderson wrote: >> On 07/19/2017 05:09 PM, Emilio G. Cota wrote: >>> + /* We do not yet support multiple TCG contexts, so use one region for now */ >>> + n_regions = 1; >>> + >>> + /* start on a page-aligned address */ >>> + buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); >>> + g_assert(buf < tcg_init_ctx.code_gen_buffer + size); >>> + >>> + /* discard that initial portion */ >>> + size -= buf - tcg_init_ctx.code_gen_buffer; >> >> It seems pointless wasting most of a page after the prologue when n_regions >> == 1. We don't really need to start on a page boundary in that case. >> >>> + /* make region_size a multiple of page_size */ >>> + region_size = size / n_regions; >>> + region_size = QEMU_ALIGN_DOWN(region_size, qemu_real_host_page_size); >> >> This division can result in a number of pages at the end of the region being >> unused. Is it worthwhile freeing them? Or marking them mprotect_none along >> with the last guard page? > > Perhaps we should then enlarge both the first and last regions so that we > fully use the buffer. I really like the idea. That's a lot of space recovered for 64k page hosts. I do think we can make the computation clearer. How about > static void tcg_region_assign(TCGContext *s, size_t curr_region) > { > + void *aligned = QEMU_ALIGN_PTR_UP(region.start, qemu_real_host_page_size); > void *buf; > + size_t size = region.size; > > - buf = region.buf + curr_region * (region.size + qemu_real_host_page_size); > + /* The beginning of the first region might not be page-aligned */ > + if (curr_region == 0) { > + buf = region.start; > + size += aligned - buf; > + } else { > + buf = aligned + curr_region * (region.size + qemu_real_host_page_size); > + } > + /* the last region might be larger than region.size */ > + if (curr_region == region.n - 1) { > + void *aligned_end = buf + size; > + > + size += region.end - qemu_real_host_page_size - aligned_end; > + } > s->code_gen_buffer = buf; > s->code_gen_ptr = buf; > - s->code_gen_buffer_size = region.size; > - s->code_gen_highwater = buf + region.size - TCG_HIGHWATER; > + s->code_gen_buffer_size = size; > + s->code_gen_highwater = buf + size - TCG_HIGHWATER; > } static inline void tcg_region_bounds(TCGContext *s, size_t curr_region, void **pstart, void **pend) { void *start, *end; /* ??? Maybe store "aligned" precomputed. */ start = QEMU_ALIGN_PTR_UP(region.start, qemu_real_host_page_size); /* ??? Maybe store "stride" precomputed. */ start += curr_region * (region.size + qemu_real_host_page_size); end = start + region.size; if (curr_region == 0) { start = region.start; } if (curr_region == region.n - 1) { end = region.end; } *pstart = start; *pend = end; } static void tcg_region_assign(TCGContext *s, size_t curr_region) { void *start, *end; tcg_region_bounds(s, curr_region, &start, &end); s->code_gen_buffer = start; s->code_gen_ptr = start; s->code_gen_buffer_size = end - start; s->code_gen_highwater = end - TCG_HIGHWATER; } > @@ -409,44 +424,50 @@ static size_t tcg_n_regions(void) > void tcg_region_init(void) > { > void *buf = tcg_init_ctx.code_gen_buffer; > + void *aligned; > size_t size = tcg_init_ctx.code_gen_buffer_size; > + size_t page_size = qemu_real_host_page_size; > size_t region_size; > size_t n_regions; > size_t i; > + int rc; > > n_regions = tcg_n_regions(); > > - /* start on a page-aligned address */ > - buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); > - g_assert(buf < tcg_init_ctx.code_gen_buffer + size); > - > - /* discard that initial portion */ > - size -= buf - tcg_init_ctx.code_gen_buffer; > - > - /* make region_size a multiple of page_size */ > - region_size = size / n_regions; > - region_size = QEMU_ALIGN_DOWN(region_size, qemu_real_host_page_size); > + /* The first region will be 'aligned - buf' bytes larger than the others */ > + aligned = QEMU_ALIGN_PTR_UP(buf, page_size); > + g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); > + /* > + * Make region_size a multiple of page_size, using aligned as the start. > + * As a result of this we might end up with a few extra pages at the end of > + * the buffer; we will assign those to the last region. > + */ > + region_size = (size - (aligned - buf)) / n_regions; > + region_size = QEMU_ALIGN_DOWN(region_size, page_size); > > /* A region must have at least 2 pages; one code, one guard */ > - g_assert(region_size >= 2 * qemu_real_host_page_size); > + g_assert(region_size >= 2 * page_size); > > /* init the region struct */ > qemu_mutex_init(®ion.lock); > region.n = n_regions; > - region.buf = buf; > + region.start = buf; > + /* page-align the end, since its last page will be a guard page */ > + region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); > /* do not count the guard page in region.size */ > - region.size = region_size - qemu_real_host_page_size; > + region.size = region_size - page_size; > > - /* set guard pages */ > - for (i = 0; i < region.n; i++) { > + /* set guard pages for the first n-1 regions */ > + for (i = 0; i < region.n - 1; i++) { > void *guard; > - int rc; > > - guard = region.buf + region.size; > - guard += i * (region.size + qemu_real_host_page_size); > - rc = qemu_mprotect_none(guard, qemu_real_host_page_size); > + guard = aligned + region.size + i * (region.size + page_size); > + rc = qemu_mprotect_none(guard, page_size); > g_assert(!rc); > } > + /* the last region gets the rest of the buffer */ > + rc = qemu_mprotect_none(region.end - page_size, page_size); > + g_assert(!rc); for (i = region.n; i-- > 0; ) { void *start, *end; tcg_region_bounds(s, i, &start, &end); rc = qemu_mprotect_none(end, page_size); g_assert(rc == 0); } r~
On Thu, Jul 20, 2017 at 11:22:10 -1000, Richard Henderson wrote: > >Perhaps we should then enlarge both the first and last regions so that we > >fully use the buffer. > > I really like the idea. That's a lot of space recovered for 64k page hosts. > > I do think we can make the computation clearer. How about (snip) > > static inline void tcg_region_bounds(TCGContext *s, size_t curr_region, > void **pstart, void **pend) > { > void *start, *end; > > /* ??? Maybe store "aligned" precomputed. */ > start = QEMU_ALIGN_PTR_UP(region.start, qemu_real_host_page_size); > /* ??? Maybe store "stride" precomputed. */ > start += curr_region * (region.size + qemu_real_host_page_size); > end = start + region.size; > > if (curr_region == 0) { > start = region.start; > } > if (curr_region == region.n - 1) { > end = region.end; > } > > *pstart = start; > *pend = end; > } That's a nice helper -- will do it this way. For v4, should I send all patches again, or just the handful of patches that are changing from v3? E.
On 07/20/2017 01:23 PM, Emilio G. Cota wrote: > That's a nice helper -- will do it this way. > > For v4, should I send all patches again, or just the handful of patches > that are changing from v3? I don't mind if you just send the changed patches. Just be sure to reference the tree in the cover letter. r~
diff --git a/tcg/tcg.c b/tcg/tcg.c index a5c01be..8bf8bca 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -129,7 +129,8 @@ static unsigned int n_tcg_ctxs; */ struct tcg_region_state { QemuMutex lock; - void *buf; /* set at init time */ + void *start; /* set at init time */ + void *end; /* set at init time */ size_t n; /* set at init time */ size_t size; /* size of one region; set at init time */ size_t current; /* protected by the lock */ @@ -277,13 +278,27 @@ TCGLabel *gen_new_label(void) static void tcg_region_assign(TCGContext *s, size_t curr_region) { + void *aligned = QEMU_ALIGN_PTR_UP(region.start, qemu_real_host_page_size); void *buf; + size_t size = region.size; - buf = region.buf + curr_region * (region.size + qemu_real_host_page_size); + /* The beginning of the first region might not be page-aligned */ + if (curr_region == 0) { + buf = region.start; + size += aligned - buf; + } else { + buf = aligned + curr_region * (region.size + qemu_real_host_page_size); + } + /* the last region might be larger than region.size */ + if (curr_region == region.n - 1) { + void *aligned_end = buf + size; + + size += region.end - qemu_real_host_page_size - aligned_end; + } s->code_gen_buffer = buf; s->code_gen_ptr = buf; - s->code_gen_buffer_size = region.size; - s->code_gen_highwater = buf + region.size - TCG_HIGHWATER; + s->code_gen_buffer_size = size; + s->code_gen_highwater = buf + size - TCG_HIGHWATER; } static bool tcg_region_alloc__locked(TCGContext *s) @@ -409,44 +424,50 @@ static size_t tcg_n_regions(void) void tcg_region_init(void) { void *buf = tcg_init_ctx.code_gen_buffer; + void *aligned; size_t size = tcg_init_ctx.code_gen_buffer_size; + size_t page_size = qemu_real_host_page_size; size_t region_size; size_t n_regions; size_t i; + int rc; n_regions = tcg_n_regions(); - /* start on a page-aligned address */ - buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); - g_assert(buf < tcg_init_ctx.code_gen_buffer + size); - - /* discard that initial portion */ - size -= buf - tcg_init_ctx.code_gen_buffer; - - /* make region_size a multiple of page_size */ - region_size = size / n_regions; - region_size = QEMU_ALIGN_DOWN(region_size, qemu_real_host_page_size); + /* The first region will be 'aligned - buf' bytes larger than the others */ + aligned = QEMU_ALIGN_PTR_UP(buf, page_size); + g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); + /* + * Make region_size a multiple of page_size, using aligned as the start. + * As a result of this we might end up with a few extra pages at the end of + * the buffer; we will assign those to the last region. + */ + region_size = (size - (aligned - buf)) / n_regions; + region_size = QEMU_ALIGN_DOWN(region_size, page_size); /* A region must have at least 2 pages; one code, one guard */ - g_assert(region_size >= 2 * qemu_real_host_page_size); + g_assert(region_size >= 2 * page_size); /* init the region struct */ qemu_mutex_init(®ion.lock); region.n = n_regions; - region.buf = buf; + region.start = buf; + /* page-align the end, since its last page will be a guard page */ + region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); /* do not count the guard page in region.size */ - region.size = region_size - qemu_real_host_page_size; + region.size = region_size - page_size; - /* set guard pages */ - for (i = 0; i < region.n; i++) { + /* set guard pages for the first n-1 regions */ + for (i = 0; i < region.n - 1; i++) { void *guard; - int rc; - guard = region.buf + region.size; - guard += i * (region.size + qemu_real_host_page_size); - rc = qemu_mprotect_none(guard, qemu_real_host_page_size); + guard = aligned + region.size + i * (region.size + page_size); + rc = qemu_mprotect_none(guard, page_size); g_assert(!rc); } + /* the last region gets the rest of the buffer */ + rc = qemu_mprotect_none(region.end - page_size, page_size); + g_assert(!rc); /* In user-mode we support only one ctx, so do the initial allocation now */ #ifdef CONFIG_USER_ONLY