diff mbox

[10/21] memory: make section size a 128-bit integer

Message ID 1369948629-2833-11-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini May 30, 2013, 9:16 p.m. UTC
So far, the size of all regions passed to listeners could fit in 64 bits,
because artificial regions (containers and aliases) are eliminated by
the memory core, leaving only device regions which have reasonable sizes

An IOMMU however cannot be eliminated by the memory core, and may have
an artificial size, hence we may need 65 bits to represent its size.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c                        | 37 +++++++++++++++++++++----------------
 hw/core/loader.c              |  2 +-
 hw/display/exynos4210_fimd.c  |  4 ++--
 hw/display/framebuffer.c      |  3 ++-
 hw/misc/vfio.c                |  4 ++--
 hw/virtio/dataplane/hostmem.c |  2 +-
 hw/virtio/vhost.c             |  4 ++--
 hw/virtio/virtio-balloon.c    |  2 +-
 include/exec/memory.h         |  5 ++++-
 include/qemu/int128.h         | 10 ++++++++++
 kvm-all.c                     | 23 ++++++++++++++---------
 memory.c                      | 14 +++++++-------
 xen-all.c                     |  6 +++---
 13 files changed, 70 insertions(+), 46 deletions(-)

Comments

Alexey Kardashevskiy May 31, 2013, 6:56 a.m. UTC | #1
On 05/31/2013 07:16 AM, Paolo Bonzini wrote:
> So far, the size of all regions passed to listeners could fit in 64 bits,
> because artificial regions (containers and aliases) are eliminated by
> the memory core, leaving only device regions which have reasonable sizes
> 
> An IOMMU however cannot be eliminated by the memory core, and may have
> an artificial size, hence we may need 65 bits to represent its size.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

[...]

> diff --git a/kvm-all.c b/kvm-all.c
> index 8222729..86c9af3 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -316,7 +316,7 @@ static void kvm_log_start(MemoryListener *listener,
>      int r;
>  
>      r = kvm_dirty_pages_log_change(section->offset_within_address_space,
> -                                   section->size, true);
> +                                   int128_get64(section->size), true);
>      if (r < 0) {
>          abort();
>      }
> @@ -328,7 +328,7 @@ static void kvm_log_stop(MemoryListener *listener,
>      int r;
>  
>      r = kvm_dirty_pages_log_change(section->offset_within_address_space,
> -                                   section->size, false);
> +                                   int128_get64(section->size), false);
>      if (r < 0) {
>          abort();
>      }
> @@ -366,7 +366,8 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
>      unsigned int i, j;
>      unsigned long page_number, c;
>      hwaddr addr, addr1;
> -    unsigned int len = ((section->size / getpagesize()) + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
> +    unsigned int pages = int128_get64(section->size) / getpagesize();
> +    unsigned int len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
>      unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
>  
>      /*
> @@ -409,7 +410,7 @@ static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
>      KVMSlot *mem;
>      int ret = 0;
>      hwaddr start_addr = section->offset_within_address_space;
> -    hwaddr end_addr = start_addr + section->size;
> +    hwaddr end_addr = start_addr + int128_get64(section->size);
>  
>      d.dirty_bitmap = NULL;
>      while (start_addr < end_addr) {
> @@ -619,7 +620,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>      MemoryRegion *mr = section->mr;
>      bool log_dirty = memory_region_is_logging(mr);
>      hwaddr start_addr = section->offset_within_address_space;
> -    ram_addr_t size = section->size;
> +    ram_addr_t size = int128_get64(section->size);
>      void *ram = NULL;
>      unsigned delta;
>  

Tried to replay part2 and part3 on qemu.org/master (I assume part1 is
already there), part2 played well, part3 failed. What did I do wrong? Thanks.


alexey@ka1:~/pcipassthru/qemu-impreza$ git am ~/bonzini-iommu3.mbox
Applying: memory: Introduce address_space_lookup_region
Applying: memory: move private types to exec.c
Applying: exec: Allow unaligned address_space_rw
Applying: exec: Resolve subpages in one step except for IOTLB fills
Applying: exec: Implement subpage_read/write via address_space_rw
Applying: exec: return MemoryRegion from address_space_translate
Applying: Revert "memory: limit sections in the radix tree to the actual
address space size"
Applying: Revert "s390x: reduce TARGET_PHYS_ADDR_SPACE_BITS to 62"
Applying: exec: reorganize mem_add to match Int128 version
Applying: memory: make section size a 128-bit integer
error: patch failed: kvm-all.c:619
error: kvm-all.c: patch does not apply
Patch failed at 0010 memory: make section size a 128-bit integer
When you have resolved this problem run "git am --resolved".
If you would prefer to skip this patch, instead run "git am --skip".
To restore the original branch and stop patching run "git am --abort".
Paolo Bonzini May 31, 2013, 7:12 a.m. UTC | #2
Il 31/05/2013 08:56, Alexey Kardashevskiy ha scritto:
> On 05/31/2013 07:16 AM, Paolo Bonzini wrote:
>> So far, the size of all regions passed to listeners could fit in 64 bits,
>> because artificial regions (containers and aliases) are eliminated by
>> the memory core, leaving only device regions which have reasonable sizes
>>
>> An IOMMU however cannot be eliminated by the memory core, and may have
>> an artificial size, hence we may need 65 bits to represent its size.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> [...]
> 
>> diff --git a/kvm-all.c b/kvm-all.c
>> index 8222729..86c9af3 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -316,7 +316,7 @@ static void kvm_log_start(MemoryListener *listener,
>>      int r;
>>  
>>      r = kvm_dirty_pages_log_change(section->offset_within_address_space,
>> -                                   section->size, true);
>> +                                   int128_get64(section->size), true);
>>      if (r < 0) {
>>          abort();
>>      }
>> @@ -328,7 +328,7 @@ static void kvm_log_stop(MemoryListener *listener,
>>      int r;
>>  
>>      r = kvm_dirty_pages_log_change(section->offset_within_address_space,
>> -                                   section->size, false);
>> +                                   int128_get64(section->size), false);
>>      if (r < 0) {
>>          abort();
>>      }
>> @@ -366,7 +366,8 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
>>      unsigned int i, j;
>>      unsigned long page_number, c;
>>      hwaddr addr, addr1;
>> -    unsigned int len = ((section->size / getpagesize()) + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
>> +    unsigned int pages = int128_get64(section->size) / getpagesize();
>> +    unsigned int len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
>>      unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
>>  
>>      /*
>> @@ -409,7 +410,7 @@ static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
>>      KVMSlot *mem;
>>      int ret = 0;
>>      hwaddr start_addr = section->offset_within_address_space;
>> -    hwaddr end_addr = start_addr + section->size;
>> +    hwaddr end_addr = start_addr + int128_get64(section->size);
>>  
>>      d.dirty_bitmap = NULL;
>>      while (start_addr < end_addr) {
>> @@ -619,7 +620,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>>      MemoryRegion *mr = section->mr;
>>      bool log_dirty = memory_region_is_logging(mr);
>>      hwaddr start_addr = section->offset_within_address_space;
>> -    ram_addr_t size = section->size;
>> +    ram_addr_t size = int128_get64(section->size);
>>      void *ram = NULL;
>>      unsigned delta;
>>  
> 
> Tried to replay part2 and part3 on qemu.org/master (I assume part1 is
> already there), part2 played well, part3 failed. What did I do wrong? Thanks.

Needs a rebase, it seems.  I'll push to the iommu branch later today and
repost this patch.

Paolo
Richard Henderson May 31, 2013, 10:18 p.m. UTC | #3
On 05/30/2013 02:16 PM, Paolo Bonzini wrote:
> +static inline Int128 int128_rshift(Int128 a, int n)
> +{
> +    return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), (a.hi >> n) };
> +}

Produces wrong results for n == 0, since (a.hi << 64) is undefined.


r~
Paolo Bonzini June 2, 2013, 2:03 p.m. UTC | #4
Il 01/06/2013 00:18, Richard Henderson ha scritto:
> On 05/30/2013 02:16 PM, Paolo Bonzini wrote:
>> +static inline Int128 int128_rshift(Int128 a, int n)
>> +{
>> +    return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), (a.hi >> n) };
>> +}
> 
> Produces wrong results for n == 0, since (a.hi << 64) is undefined.

Good catch, I'm adding an


    if (!n) {
        return a;
    }

before.

Paolo
Peter Maydell June 2, 2013, 2:18 p.m. UTC | #5
On 31 May 2013 23:18, Richard Henderson <rth@twiddle.net> wrote:
> On 05/30/2013 02:16 PM, Paolo Bonzini wrote:
>> +static inline Int128 int128_rshift(Int128 a, int n)
>> +{
>> +    return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), (a.hi >> n) };
>> +}
>
> Produces wrong results for n == 0, since (a.hi << 64) is undefined.

It produces wrong results for shifts by more than 64,
for that matter.

thanks
-- PMM
Paolo Bonzini June 2, 2013, 2:36 p.m. UTC | #6
Il 02/06/2013 16:18, Peter Maydell ha scritto:
> On 31 May 2013 23:18, Richard Henderson <rth@twiddle.net> wrote:
>> On 05/30/2013 02:16 PM, Paolo Bonzini wrote:
>>> +static inline Int128 int128_rshift(Int128 a, int n)
>>> +{
>>> +    return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), (a.hi >> n) };
>>> +}
>>
>> Produces wrong results for n == 0, since (a.hi << 64) is undefined.
> 
> It produces wrong results for shifts by more than 64,
> for that matter.

This should work:

    int64_t h;
    if (!n) {
        return a;
    }
    h = a.hi >> n;
    if (n >= 64) {
        return (Int128) { h, h >> 63 };
    } else {
       return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), h };
    }

Paolo
Peter Maydell June 2, 2013, 2:50 p.m. UTC | #7
On 2 June 2013 15:36, Paolo Bonzini <pbonzini@redhat.com> wrote:
> This should work:
>
>     int64_t h;
>     if (!n) {
>         return a;
>     }
>     h = a.hi >> n;

This is undefined for n >= 64.

>     if (n >= 64) {
>         return (Int128) { h, h >> 63 };
>     } else {
>        return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), h };
>     }

I would suggest looking at fpu/softfloat-macros.h:shift128Right()
except that that has at least one clearly dubious thing in it
(a check for "count < 64" in an else case that can't be reached
if count < 64)...

thanks
-- PMM
Paolo Bonzini June 2, 2013, 7:52 p.m. UTC | #8
Il 02/06/2013 16:50, Peter Maydell ha scritto:
> On 2 June 2013 15:36, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> This should work:
>>
>>     int64_t h;
>>     if (!n) {
>>         return a;
>>     }
>>     h = a.hi >> n;
> 
> This is undefined for n >= 64.

Yes, it has to be a.hi >> (n & 63).

> I would suggest looking at fpu/softfloat-macros.h:shift128Right()
> except that that has at least one clearly dubious thing in it
> (a check for "count < 64" in an else case that can't be reached
> if count < 64)...

It's a bit different in that I want an arithmetic right shift.

Paolo
Alexey Kardashevskiy June 6, 2013, 8:36 a.m. UTC | #9
On 05/31/2013 07:16 AM, Paolo Bonzini wrote:
> So far, the size of all regions passed to listeners could fit in 64 bits,
> because artificial regions (containers and aliases) are eliminated by
> the memory core, leaving only device regions which have reasonable sizes
> 
> An IOMMU however cannot be eliminated by the memory core, and may have
> an artificial size, hence we may need 65 bits to represent its size.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  exec.c                        | 37 +++++++++++++++++++++----------------
>  hw/core/loader.c              |  2 +-
>  hw/display/exynos4210_fimd.c  |  4 ++--
>  hw/display/framebuffer.c      |  3 ++-
>  hw/misc/vfio.c                |  4 ++--
>  hw/virtio/dataplane/hostmem.c |  2 +-
>  hw/virtio/vhost.c             |  4 ++--
>  hw/virtio/virtio-balloon.c    |  2 +-
>  include/exec/memory.h         |  5 ++++-
>  include/qemu/int128.h         | 10 ++++++++++
>  kvm-all.c                     | 23 ++++++++++++++---------
>  memory.c                      | 14 +++++++-------
>  xen-all.c                     |  6 +++---
>  13 files changed, 70 insertions(+), 46 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index cf3ea6c..b86f0cc 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -801,7 +801,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>      MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
> -        .size = TARGET_PAGE_SIZE,
> +        .size = int128_make64(TARGET_PAGE_SIZE),
>      };
>      hwaddr start, end;
>  
> @@ -816,16 +816,18 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
> -    end = start + section->size - 1;
> +    end = start + int128_get64(section->size) - 1;
>      subpage_register(subpage, start, end, phys_section_add(section));
>  }
>  
>  
> -static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
> +static void register_multipage(AddressSpaceDispatch *d,
> +                               MemoryRegionSection *section)
>  {
>      hwaddr start_addr = section->offset_within_address_space;
>      uint16_t section_index = phys_section_add(section);
> -    uint64_t num_pages = section->size >> TARGET_PAGE_BITS;
> +    uint64_t num_pages = int128_get64(int128_rshift(section->size,
> +                                                    TARGET_PAGE_BITS));
>  
>      assert(num_pages);
>      phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
> @@ -835,28 +837,29 @@ static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
>  {
>      AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
>      MemoryRegionSection now = *section, remain = *section;
> +    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
>  
>      if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
>          uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
>                         - now.offset_within_address_space;
>  
> -        now.size = MIN(left, now.size);
> +        now.size = int128_min(int128_make64(left), now.size);
>          register_subpage(d, &now);
>      } else {
> -        now.size = 0;
> +        now.size = int128_zero();
>      }
> -    while (remain.size != now.size) {
> -        remain.size -= now.size;
> -        remain.offset_within_address_space += now.size;
> -        remain.offset_within_region += now.size;
> +    while (int128_ne(remain.size, now.size)) {
> +        remain.size = int128_sub(remain.size, now.size);
> +        remain.offset_within_address_space += int128_get64(now.size);
> +        remain.offset_within_region += int128_get64(now.size);
>          now = remain;
> -        if (remain.size < TARGET_PAGE_SIZE) {
> +        if (int128_lt(remain.size, page_size)) {
>              register_subpage(d, &now);
>          } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
> -            now.size = TARGET_PAGE_SIZE;
> +            now.size = page_size;
>              register_subpage(d, &now);
>          } else {
> -            now.size &= -TARGET_PAGE_SIZE;
> +            now.size = int128_and(now.size, int128_neg(page_size));
>              register_multipage(d, &now);
>          }
>      }
> @@ -1666,7 +1669,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .mr = mr,
>          .offset_within_address_space = 0,
>          .offset_within_region = 0,
> -        .size = UINT64_MAX,
> +        .size = int128_2_64(),
>      };
>  
>      return phys_section_add(&section);
> @@ -1735,14 +1738,16 @@ static void io_region_add(MemoryListener *listener,
>      mrio->mr = section->mr;
>      mrio->offset = section->offset_within_region;
>      iorange_init(&mrio->iorange, &memory_region_iorange_ops,
> -                 section->offset_within_address_space, section->size);
> +                 section->offset_within_address_space,
> +                 int128_get64(section->size));
>      ioport_register(&mrio->iorange);
>  }
>  
>  static void io_region_del(MemoryListener *listener,
>                            MemoryRegionSection *section)
>  {
> -    isa_unassign_ioport(section->offset_within_address_space, section->size);
> +    isa_unassign_ioport(section->offset_within_address_space,
> +                        int128_get64(section->size));
>  }
>  
>  static MemoryListener core_memory_listener = {
> diff --git a/hw/core/loader.c b/hw/core/loader.c
> index 7507914..3a60cbe 100644
> --- a/hw/core/loader.c
> +++ b/hw/core/loader.c
> @@ -726,7 +726,7 @@ int rom_load_all(void)
>          addr  = rom->addr;
>          addr += rom->romsize;
>          section = memory_region_find(get_system_memory(), rom->addr, 1);
> -        rom->isrom = section.size && memory_region_is_rom(section.mr);
> +        rom->isrom = int128_nz(section.size) && memory_region_is_rom(section.mr);
>      }
>      qemu_register_reset(rom_reset, NULL);
>      roms_loaded = 1;
> diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
> index 6cb5016..0da00a9 100644
> --- a/hw/display/exynos4210_fimd.c
> +++ b/hw/display/exynos4210_fimd.c
> @@ -1133,7 +1133,7 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
>      DPRINT_TRACE("Window %u framebuffer changed: address=0x%08x, len=0x%x\n",
>              win, fb_start_addr, w->fb_len);
>  
> -    if (w->mem_section.size != w->fb_len ||
> +    if (int128_get64(w->mem_section.size) != w->fb_len ||
>              !memory_region_is_ram(w->mem_section.mr)) {
>          DPRINT_ERROR("Failed to find window %u framebuffer region\n", win);
>          goto error_return;
> @@ -1155,7 +1155,7 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
>  
>  error_return:
>      w->mem_section.mr = NULL;
> -    w->mem_section.size = 0;
> +    w->mem_section.size = int128_zero();
>      w->host_fb_addr = NULL;
>      w->fb_len = 0;
>  }
> diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c
> index 6be31db..49c9e59 100644
> --- a/hw/display/framebuffer.c
> +++ b/hw/display/framebuffer.c
> @@ -54,7 +54,8 @@ void framebuffer_update_display(
>      src_len = src_width * rows;
>  
>      mem_section = memory_region_find(address_space, base, src_len);
> -    if (mem_section.size != src_len || !memory_region_is_ram(mem_section.mr)) {
> +    if (int128_get64(mem_section.size) != src_len ||
> +            !memory_region_is_ram(mem_section.mr)) {
>          return;
>      }
>      mem = mem_section.mr;
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index 693a9ff..c89676b 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -1953,7 +1953,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
>      }
>  
>      iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> -    end = (section->offset_within_address_space + section->size) &
> +    end = (section->offset_within_address_space + int128_get64(section->size)) &
>            TARGET_PAGE_MASK;



Another problem with this patch. Here is some more context (***):

===
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + section->size) &
+    end = (section->offset_within_address_space +
int128_get64(section->size)) &
           TARGET_PAGE_MASK;

     if (iova >= end) {
         return;
     }

     vaddr = memory_region_get_ram_ptr(section->mr) +
             section->offset_within_region +
             (iova - section->offset_within_address_space);

     DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
             iova, end - 1, vaddr);

     ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);

===

What happens:

1. "spapr: use memory core for iommu support"  patch calls
memory_region_init_iommu() with size=UINT64_MAX.

2. "memory: use 128-bit integers for sizes and intermediates" patch fixes
such values to UINT64_MAX+1:

void memory_region_init(MemoryRegion *mr,
                         const char *name,
                         uint64_t size)
 {
     mr->ops = NULL;
     mr->parent = NULL;
-    mr->size = size;
+    mr->size = int128_make64(size);
+    if (size == UINT64_MAX) {
+        mr->size = int128_2_64();
+    }

3. (***) patch calls int128_get64() which fails in assert.


At the moment I fixed it by calling  memory_region_init_iommu(...
UINT64_MAX >> 1) + 1) and it makes me happy (or it can be INT64_MAX+1) but
I am not sure it is canonically right :)

What would be the right fix?
diff mbox

Patch

diff --git a/exec.c b/exec.c
index cf3ea6c..b86f0cc 100644
--- a/exec.c
+++ b/exec.c
@@ -801,7 +801,7 @@  static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
     MemoryRegionSection subsection = {
         .offset_within_address_space = base,
-        .size = TARGET_PAGE_SIZE,
+        .size = int128_make64(TARGET_PAGE_SIZE),
     };
     hwaddr start, end;
 
@@ -816,16 +816,18 @@  static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
         subpage = container_of(existing->mr, subpage_t, iomem);
     }
     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
-    end = start + section->size - 1;
+    end = start + int128_get64(section->size) - 1;
     subpage_register(subpage, start, end, phys_section_add(section));
 }
 
 
-static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
+static void register_multipage(AddressSpaceDispatch *d,
+                               MemoryRegionSection *section)
 {
     hwaddr start_addr = section->offset_within_address_space;
     uint16_t section_index = phys_section_add(section);
-    uint64_t num_pages = section->size >> TARGET_PAGE_BITS;
+    uint64_t num_pages = int128_get64(int128_rshift(section->size,
+                                                    TARGET_PAGE_BITS));
 
     assert(num_pages);
     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
@@ -835,28 +837,29 @@  static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
     MemoryRegionSection now = *section, remain = *section;
+    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
 
     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                        - now.offset_within_address_space;
 
-        now.size = MIN(left, now.size);
+        now.size = int128_min(int128_make64(left), now.size);
         register_subpage(d, &now);
     } else {
-        now.size = 0;
+        now.size = int128_zero();
     }
-    while (remain.size != now.size) {
-        remain.size -= now.size;
-        remain.offset_within_address_space += now.size;
-        remain.offset_within_region += now.size;
+    while (int128_ne(remain.size, now.size)) {
+        remain.size = int128_sub(remain.size, now.size);
+        remain.offset_within_address_space += int128_get64(now.size);
+        remain.offset_within_region += int128_get64(now.size);
         now = remain;
-        if (remain.size < TARGET_PAGE_SIZE) {
+        if (int128_lt(remain.size, page_size)) {
             register_subpage(d, &now);
         } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
-            now.size = TARGET_PAGE_SIZE;
+            now.size = page_size;
             register_subpage(d, &now);
         } else {
-            now.size &= -TARGET_PAGE_SIZE;
+            now.size = int128_and(now.size, int128_neg(page_size));
             register_multipage(d, &now);
         }
     }
@@ -1666,7 +1669,7 @@  static uint16_t dummy_section(MemoryRegion *mr)
         .mr = mr,
         .offset_within_address_space = 0,
         .offset_within_region = 0,
-        .size = UINT64_MAX,
+        .size = int128_2_64(),
     };
 
     return phys_section_add(&section);
@@ -1735,14 +1738,16 @@  static void io_region_add(MemoryListener *listener,
     mrio->mr = section->mr;
     mrio->offset = section->offset_within_region;
     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
-                 section->offset_within_address_space, section->size);
+                 section->offset_within_address_space,
+                 int128_get64(section->size));
     ioport_register(&mrio->iorange);
 }
 
 static void io_region_del(MemoryListener *listener,
                           MemoryRegionSection *section)
 {
-    isa_unassign_ioport(section->offset_within_address_space, section->size);
+    isa_unassign_ioport(section->offset_within_address_space,
+                        int128_get64(section->size));
 }
 
 static MemoryListener core_memory_listener = {
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 7507914..3a60cbe 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -726,7 +726,7 @@  int rom_load_all(void)
         addr  = rom->addr;
         addr += rom->romsize;
         section = memory_region_find(get_system_memory(), rom->addr, 1);
-        rom->isrom = section.size && memory_region_is_rom(section.mr);
+        rom->isrom = int128_nz(section.size) && memory_region_is_rom(section.mr);
     }
     qemu_register_reset(rom_reset, NULL);
     roms_loaded = 1;
diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
index 6cb5016..0da00a9 100644
--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1133,7 +1133,7 @@  static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
     DPRINT_TRACE("Window %u framebuffer changed: address=0x%08x, len=0x%x\n",
             win, fb_start_addr, w->fb_len);
 
-    if (w->mem_section.size != w->fb_len ||
+    if (int128_get64(w->mem_section.size) != w->fb_len ||
             !memory_region_is_ram(w->mem_section.mr)) {
         DPRINT_ERROR("Failed to find window %u framebuffer region\n", win);
         goto error_return;
@@ -1155,7 +1155,7 @@  static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
 
 error_return:
     w->mem_section.mr = NULL;
-    w->mem_section.size = 0;
+    w->mem_section.size = int128_zero();
     w->host_fb_addr = NULL;
     w->fb_len = 0;
 }
diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c
index 6be31db..49c9e59 100644
--- a/hw/display/framebuffer.c
+++ b/hw/display/framebuffer.c
@@ -54,7 +54,8 @@  void framebuffer_update_display(
     src_len = src_width * rows;
 
     mem_section = memory_region_find(address_space, base, src_len);
-    if (mem_section.size != src_len || !memory_region_is_ram(mem_section.mr)) {
+    if (int128_get64(mem_section.size) != src_len ||
+            !memory_region_is_ram(mem_section.mr)) {
         return;
     }
     mem = mem_section.mr;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 693a9ff..c89676b 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1953,7 +1953,7 @@  static void vfio_listener_region_add(MemoryListener *listener,
     }
 
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + section->size) &
+    end = (section->offset_within_address_space + int128_get64(section->size)) &
           TARGET_PAGE_MASK;
 
     if (iova >= end) {
@@ -1997,7 +1997,7 @@  static void vfio_listener_region_del(MemoryListener *listener,
     }
 
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + section->size) &
+    end = (section->offset_within_address_space + int128_get64(section->size)) &
           TARGET_PAGE_MASK;
 
     if (iova >= end) {
diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
index 37292ff..7e46723 100644
--- a/hw/virtio/dataplane/hostmem.c
+++ b/hw/virtio/dataplane/hostmem.c
@@ -90,7 +90,7 @@  static void hostmem_append_new_region(HostMem *hostmem,
     hostmem->new_regions[num] = (HostMemRegion){
         .host_addr = ram_ptr + section->offset_within_region,
         .guest_addr = section->offset_within_address_space,
-        .size = section->size,
+        .size = int128_get64(section->size),
         .readonly = section->readonly,
     };
     hostmem->num_new_regions++;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index fbabf99..baf84ea 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -81,7 +81,7 @@  static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
         return 0;
     }
     start_addr = section->offset_within_address_space;
-    end_addr = range_get_last(start_addr, section->size);
+    end_addr = range_get_last(start_addr, int128_get64(section->size));
     start_addr = MAX(first, start_addr);
     end_addr = MIN(last, end_addr);
 
@@ -379,7 +379,7 @@  static void vhost_set_memory(MemoryListener *listener,
     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
                                          memory_listener);
     hwaddr start_addr = section->offset_within_address_space;
-    ram_addr_t size = section->size;
+    ram_addr_t size = int128_get64(section->size);
     bool log_dirty = memory_region_is_logging(section->mr);
     int s = offsetof(struct vhost_memory, regions) +
         (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d669756..a27051c 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -197,7 +197,8 @@  static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 
             /* FIXME: remove get_system_memory(), but how? */
             section = memory_region_find(get_system_memory(), pa, 1);
-            if (!section.size || !memory_region_is_ram(section.mr))
+            if (!int128_nz(section.size) || !memory_region_is_ram(section.mr)) {
                 continue;
+            }
 
             /* Using memory_region_get_ram_ptr is bending the rules a bit, but
                should be OK because we only want a single page.  */
diff --git a/include/exec/memory.h b/include/exec/memory.h
index c11a3f8..fddc6ad 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -26,6 +26,9 @@ 
 #include "exec/ioport.h"
 #include "qemu/int128.h"
 
+#define MAX_PHYS_ADDR_SPACE_BITS 62
+#define MAX_PHYS_ADDR            (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
+
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegionPortio MemoryRegionPortio;
 typedef struct MemoryRegionMmio MemoryRegionMmio;
@@ -185,7 +188,7 @@  struct MemoryRegionSection {
     MemoryRegion *mr;
     AddressSpace *address_space;
     hwaddr offset_within_region;
-    uint64_t size;
+    Int128 size;
     hwaddr offset_within_address_space;
     bool readonly;
 };
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index b3864b6..f094c5c 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -34,6 +34,16 @@  static inline Int128 int128_2_64(void)
     return (Int128) { 0, 1 };
 }
 
+static inline Int128 int128_and(Int128 a, Int128 b)
+{
+    return (Int128) { a.lo & b.lo, a.hi & b.hi };
+}
+
+static inline Int128 int128_rshift(Int128 a, int n)
+{
+    return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), (a.hi >> n) };
+}
+
 static inline Int128 int128_add(Int128 a, Int128 b)
 {
     Int128 r = { a.lo + b.lo, a.hi + b.hi };
diff --git a/kvm-all.c b/kvm-all.c
index 8222729..86c9af3 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -316,7 +316,7 @@  static void kvm_log_start(MemoryListener *listener,
     int r;
 
     r = kvm_dirty_pages_log_change(section->offset_within_address_space,
-                                   section->size, true);
+                                   int128_get64(section->size), true);
     if (r < 0) {
         abort();
     }
@@ -328,7 +328,7 @@  static void kvm_log_stop(MemoryListener *listener,
     int r;
 
     r = kvm_dirty_pages_log_change(section->offset_within_address_space,
-                                   section->size, false);
+                                   int128_get64(section->size), false);
     if (r < 0) {
         abort();
     }
@@ -366,7 +366,8 @@  static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
     unsigned int i, j;
     unsigned long page_number, c;
     hwaddr addr, addr1;
-    unsigned int len = ((section->size / getpagesize()) + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
+    unsigned int pages = int128_get64(section->size) / getpagesize();
+    unsigned int len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
     unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
 
     /*
@@ -409,7 +410,7 @@  static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
     KVMSlot *mem;
     int ret = 0;
     hwaddr start_addr = section->offset_within_address_space;
-    hwaddr end_addr = start_addr + section->size;
+    hwaddr end_addr = start_addr + int128_get64(section->size);
 
     d.dirty_bitmap = NULL;
     while (start_addr < end_addr) {
@@ -619,7 +620,7 @@  static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
     MemoryRegion *mr = section->mr;
     bool log_dirty = memory_region_is_logging(mr);
     hwaddr start_addr = section->offset_within_address_space;
-    ram_addr_t size = section->size;
+    ram_addr_t size = int128_get64(section->size);
     void *ram = NULL;
     unsigned delta;
 
@@ -811,7 +812,8 @@  static void kvm_mem_ioeventfd_add(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
-                               data, true, section->size, match_data);
+                               data, true, int128_get64(section->size),
+                               match_data);
     if (r < 0) {
         abort();
     }
@@ -826,7 +828,8 @@  static void kvm_mem_ioeventfd_del(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
-                               data, false, section->size, match_data);
+                               data, false, int128_get64(section->size),
+                               match_data);
     if (r < 0) {
         abort();
     }
@@ -841,7 +844,8 @@  static void kvm_io_ioeventfd_add(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
-                              data, true, section->size, match_data);
+                              data, true, int128_get64(section->size),
+                              match_data);
     if (r < 0) {
         abort();
     }
@@ -857,7 +861,8 @@  static void kvm_io_ioeventfd_del(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
-                              data, false, section->size, match_data);
+                              data, false, int128_get64(section->size),
+                              match_data);
     if (r < 0) {
         abort();
     }
diff --git a/memory.c b/memory.c
index 5cb8f4a..5be2d7b 100644
--- a/memory.c
+++ b/memory.c
@@ -152,7 +152,7 @@  static bool memory_listener_match(MemoryListener *listener,
         .mr = (fr)->mr,                                                 \
         .address_space = (as),                                          \
         .offset_within_region = (fr)->offset_in_region,                 \
-        .size = int128_get64((fr)->addr.size),                          \
+        .size = (fr)->addr.size,                                        \
         .offset_within_address_space = int128_get64((fr)->addr.start),  \
         .readonly = (fr)->readonly,                                     \
               }))
@@ -634,7 +634,7 @@  static void address_space_add_del_ioeventfds(AddressSpace *as,
             section = (MemoryRegionSection) {
                 .address_space = as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
-                .size = int128_get64(fd->addr.size),
+                .size = fd->addr.size,
             };
             MEMORY_LISTENER_CALL(eventfd_del, Forward, &section,
                                  fd->match_data, fd->data, fd->e);
@@ -647,7 +647,7 @@  static void address_space_add_del_ioeventfds(AddressSpace *as,
             section = (MemoryRegionSection) {
                 .address_space = as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
-                .size = int128_get64(fd->addr.size),
+                .size = fd->addr.size,
             };
             MEMORY_LISTENER_CALL(eventfd_add, Reverse, &section,
                                  fd->match_data, fd->data, fd->e);
@@ -1215,7 +1215,7 @@  static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa
             section = (MemoryRegionSection) {
                 .address_space = as,
                 .offset_within_address_space = int128_get64(fr->addr.start),
-                .size = int128_get64(fr->addr.size),
+                .size = fr->addr.size,
             };
 
             MEMORY_LISTENER_CALL(coalesced_mmio_del, Reverse, &section,
@@ -1506,7 +1506,7 @@  static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
 MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size)
 {
-    MemoryRegionSection ret = { .mr = NULL, .size = 0 };
+    MemoryRegionSection ret = { .mr = NULL };
     MemoryRegion *root;
     AddressSpace *as;
     AddrRange range;
@@ -1536,7 +1536,7 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
     ret.offset_within_region = fr->offset_in_region;
     ret.offset_within_region += int128_get64(int128_sub(range.start,
                                                         fr->addr.start));
-    ret.size = int128_get64(range.size);
+    ret.size = range.size;
     ret.offset_within_address_space = int128_get64(range.start);
     ret.readonly = fr->readonly;
     return ret;
@@ -1584,7 +1584,7 @@  static void listener_add_address_space(MemoryListener *listener,
             .mr = fr->mr,
             .address_space = as,
             .offset_within_region = fr->offset_in_region,
-            .size = int128_get64(fr->addr.size),
+            .size = fr->addr.size,
             .offset_within_address_space = int128_get64(fr->addr.start),
             .readonly = fr->readonly,
         };
diff --git a/xen-all.c b/xen-all.c
index 539a154..cd520b1 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -418,7 +418,7 @@  static void xen_set_memory(struct MemoryListener *listener,
 {
     XenIOState *state = container_of(listener, XenIOState, memory_listener);
     hwaddr start_addr = section->offset_within_address_space;
-    ram_addr_t size = section->size;
+    ram_addr_t size = int128_get64(section->size);
     bool log_dirty = memory_region_is_logging(section->mr);
     hvmmem_type_t mem_type;
 
@@ -522,7 +522,7 @@  static void xen_log_start(MemoryListener *listener,
     XenIOState *state = container_of(listener, XenIOState, memory_listener);
 
     xen_sync_dirty_bitmap(state, section->offset_within_address_space,
-                          section->size);
+                          int128_get64(section->size));
 }
 
 static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section)
@@ -539,7 +539,7 @@  static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
     XenIOState *state = container_of(listener, XenIOState, memory_listener);
 
     xen_sync_dirty_bitmap(state, section->offset_within_address_space,
-                          section->size);
+                          int128_get64(section->size));
 }
 
 static void xen_log_global_start(MemoryListener *listener)