@@ -135,6 +135,7 @@ typedef struct PhysPageMap {
struct AddressSpaceDispatch {
struct rcu_head rcu;
+ MemoryRegionSection *mru_section;
/* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections.
*/
@@ -342,14 +343,26 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
hwaddr addr,
bool resolve_subpage)
{
- MemoryRegionSection *section;
+ MemoryRegionSection *section = atomic_read(&d->mru_section);
subpage_t *subpage;
+ bool update;
- section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
+ if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
+ range_covers_byte(section->offset_within_address_space,
+ section->size.lo, addr)) {
+ update = false;
+ } else {
+ section = phys_page_find(d->phys_map, addr, d->map.nodes,
+ d->map.sections);
+ update = true;
+ }
if (resolve_subpage && section->mr->subpage) {
subpage = container_of(section->mr, subpage_t, iomem);
section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
}
+ if (update) {
+ atomic_set(&d->mru_section, section);
+ }
return section;
}
Under heavy workloads the lookup will likely end up with the same MemoryRegionSection from last time. Using a pointer to cache the result, like ram_list.mru_block, significantly reduce computation cost of address_space_translate. During address space topology update, as->dispatch will be reallocated so the pointer is invalidated automatically. Perf reports a visible drop on the cpu usage. Before: + 2.06% phys_page_find + 0.95% address_space_translate_internal + 0.80% address_space_translate After: + 0.78% address_space_translate + 0.77% address_space_translate_internal + 0.69% address_space_lookup_region Signed-off-by: Fam Zheng <famz@redhat.com> --- exec.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-)