Patchwork [v2,3/3] CPUPhysMemoryClient: Batch contiguous addresses when playing catchup

login
register
mail settings
Submitter Alex Williamson
Date May 3, 2011, 6:36 p.m.
Message ID <20110503183652.28430.83897.stgit@s20.home>
Download mbox | patch
Permalink /patch/93881/
State New
Headers show

Comments

Alex Williamson - May 3, 2011, 6:36 p.m.
When a phys memory client registers and we play catchup by walking
the page tables, we can make a huge improvement in the number of
times the set_memory callback is called by batching contiguous
pages together.  With a 4G guest, this reduces the number of callbacks
at registration from 1048866 to 296.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 exec.c |   38 ++++++++++++++++++++++++++++++++------
 1 files changed, 32 insertions(+), 6 deletions(-)
Michael S. Tsirkin - May 5, 2011, 1:21 p.m.
On Tue, May 03, 2011 at 12:36:58PM -0600, Alex Williamson wrote:
> When a phys memory client registers and we play catchup by walking
> the page tables, we can make a huge improvement in the number of
> times the set_memory callback is called by batching contiguous
> pages together.  With a 4G guest, this reduces the number of callbacks
> at registration from 1048866 to 296.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
> 
>  exec.c |   38 ++++++++++++++++++++++++++++++++------
>  1 files changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index bbd5c86..a0678a4 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1741,14 +1741,21 @@ static int cpu_notify_migration_log(int enable)
>      return 0;
>  }
>  
> +struct last_map {
> +    target_phys_addr_t start_addr;
> +    ram_addr_t size;

A bit worried that ram_addr_t size might thinkably overflow
(it's just a long, could be a 4G ram). Break it out when it fills up?

> +    ram_addr_t phys_offset;
> +};
> +
>  /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
>   * address.  Each intermediate table provides the next L2_BITs of guest
>   * physical address space.  The number of levels vary based on host and
>   * guest configuration, making it efficient to build the final guest
>   * physical address by seeding the L1 offset and shifting and adding in
>   * each L2 offset as we recurse through them. */
> -static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> -                                 int level, void **lp, target_phys_addr_t addr)
> +static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
> +                                 void **lp, target_phys_addr_t addr,
> +                                 struct last_map *map)
>  {
>      int i;
>  
> @@ -1760,15 +1767,29 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
>          addr <<= L2_BITS + TARGET_PAGE_BITS;
>          for (i = 0; i < L2_SIZE; ++i) {
>              if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
> -                client->set_memory(client, addr | i << TARGET_PAGE_BITS,
> -                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
> +                target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
> +
> +                if (map->size &&
> +                    start_addr == map->start_addr + map->size &&
> +                    pd[i].phys_offset == map->phys_offset + map->size) {
> +
> +                    map->size += TARGET_PAGE_SIZE;
> +                    continue;
> +                } else if (map->size) {
> +                    client->set_memory(client, map->start_addr,
> +                                       map->size, map->phys_offset);
> +                }
> +
> +                map->start_addr = start_addr;
> +                map->size = TARGET_PAGE_SIZE;
> +                map->phys_offset = pd[i].phys_offset;
>              }
>          }
>      } else {
>          void **pp = *lp;
>          for (i = 0; i < L2_SIZE; ++i) {
>              phys_page_for_each_1(client, level - 1, pp + i,
> -                                 (addr << L2_BITS) | i);
> +                                 (addr << L2_BITS) | i, map);
>          }
>      }
>  }
> @@ -1776,9 +1797,14 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
>  static void phys_page_for_each(CPUPhysMemoryClient *client)
>  {
>      int i;
> +    struct last_map map = { 0 };
> +

Nit: just {} is enough.

>      for (i = 0; i < P_L1_SIZE; ++i) {
>          phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
> -                             l1_phys_map + i, i);
> +                             l1_phys_map + i, i, &map);
> +    }
> +    if (map.size) {
> +        client->set_memory(client, map.start_addr, map.size, map.phys_offset);
>      }
>  }
>
Alex Williamson - May 5, 2011, 2:21 p.m.
On Thu, 2011-05-05 at 16:21 +0300, Michael S. Tsirkin wrote:
> On Tue, May 03, 2011 at 12:36:58PM -0600, Alex Williamson wrote:
> > When a phys memory client registers and we play catchup by walking
> > the page tables, we can make a huge improvement in the number of
> > times the set_memory callback is called by batching contiguous
> > pages together.  With a 4G guest, this reduces the number of callbacks
> > at registration from 1048866 to 296.
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> > 
> >  exec.c |   38 ++++++++++++++++++++++++++++++++------
> >  1 files changed, 32 insertions(+), 6 deletions(-)
> > 
> > diff --git a/exec.c b/exec.c
> > index bbd5c86..a0678a4 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -1741,14 +1741,21 @@ static int cpu_notify_migration_log(int enable)
> >      return 0;
> >  }
> >  
> > +struct last_map {
> > +    target_phys_addr_t start_addr;
> > +    ram_addr_t size;
> 
> A bit worried that ram_addr_t size might thinkably overflow
> (it's just a long, could be a 4G ram). Break it out when it fills up?

struct CPUPhysMemoryClient {
    void (*set_memory)(struct CPUPhysMemoryClient *client,
                       target_phys_addr_t start_addr,
                       ram_addr_t size,
                       ram_addr_t phys_offset);

ram_addr_t seems to be the standard for describing these types of
things.  It's an unsigned long, so 4G is only  concern for 32b builds,
which don't support that much memory anyway.  Please apply.  Thanks,

Alex

> > +    ram_addr_t phys_offset;
> > +};
> > +
> >  /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
> >   * address.  Each intermediate table provides the next L2_BITs of guest
> >   * physical address space.  The number of levels vary based on host and
> >   * guest configuration, making it efficient to build the final guest
> >   * physical address by seeding the L1 offset and shifting and adding in
> >   * each L2 offset as we recurse through them. */
> > -static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > -                                 int level, void **lp, target_phys_addr_t addr)
> > +static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
> > +                                 void **lp, target_phys_addr_t addr,
> > +                                 struct last_map *map)
> >  {
> >      int i;
> >  
> > @@ -1760,15 +1767,29 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> >          addr <<= L2_BITS + TARGET_PAGE_BITS;
> >          for (i = 0; i < L2_SIZE; ++i) {
> >              if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
> > -                client->set_memory(client, addr | i << TARGET_PAGE_BITS,
> > -                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
> > +                target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
> > +
> > +                if (map->size &&
> > +                    start_addr == map->start_addr + map->size &&
> > +                    pd[i].phys_offset == map->phys_offset + map->size) {
> > +
> > +                    map->size += TARGET_PAGE_SIZE;
> > +                    continue;
> > +                } else if (map->size) {
> > +                    client->set_memory(client, map->start_addr,
> > +                                       map->size, map->phys_offset);
> > +                }
> > +
> > +                map->start_addr = start_addr;
> > +                map->size = TARGET_PAGE_SIZE;
> > +                map->phys_offset = pd[i].phys_offset;
> >              }
> >          }
> >      } else {
> >          void **pp = *lp;
> >          for (i = 0; i < L2_SIZE; ++i) {
> >              phys_page_for_each_1(client, level - 1, pp + i,
> > -                                 (addr << L2_BITS) | i);
> > +                                 (addr << L2_BITS) | i, map);
> >          }
> >      }
> >  }
> > @@ -1776,9 +1797,14 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> >  static void phys_page_for_each(CPUPhysMemoryClient *client)
> >  {
> >      int i;
> > +    struct last_map map = { 0 };
> > +
> 
> Nit: just {} is enough.
> 
> >      for (i = 0; i < P_L1_SIZE; ++i) {
> >          phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
> > -                             l1_phys_map + i, i);
> > +                             l1_phys_map + i, i, &map);
> > +    }
> > +    if (map.size) {
> > +        client->set_memory(client, map.start_addr, map.size, map.phys_offset);
> >      }
> >  }
> >
Jes Sorensen - May 5, 2011, 2:30 p.m.
On 05/05/11 16:21, Alex Williamson wrote:
>> > A bit worried that ram_addr_t size might thinkably overflow
>> > (it's just a long, could be a 4G ram). Break it out when it fills up?
> struct CPUPhysMemoryClient {
>     void (*set_memory)(struct CPUPhysMemoryClient *client,
>                        target_phys_addr_t start_addr,
>                        ram_addr_t size,
>                        ram_addr_t phys_offset);
> 
> ram_addr_t seems to be the standard for describing these types of
> things.  It's an unsigned long, so 4G is only  concern for 32b builds,
> which don't support that much memory anyway.  Please apply.  Thanks,

A memory size can obviously not be bigger than the maximum physical
address, so I find it really hard to see how this could overflow.

It seems fair to use it for the size here.

Acked-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Michael S. Tsirkin - May 5, 2011, 3:18 p.m.
On Thu, May 05, 2011 at 04:30:57PM +0200, Jes Sorensen wrote:
> On 05/05/11 16:21, Alex Williamson wrote:
> >> > A bit worried that ram_addr_t size might thinkably overflow
> >> > (it's just a long, could be a 4G ram). Break it out when it fills up?
> > struct CPUPhysMemoryClient {
> >     void (*set_memory)(struct CPUPhysMemoryClient *client,
> >                        target_phys_addr_t start_addr,
> >                        ram_addr_t size,
> >                        ram_addr_t phys_offset);
> > 
> > ram_addr_t seems to be the standard for describing these types of
> > things.  It's an unsigned long, so 4G is only  concern for 32b builds,
> > which don't support that much memory anyway.  Please apply.  Thanks,
> 
> A memory size can obviously not be bigger than the maximum physical
> address, so I find it really hard to see how this could overflow.

For example, a 4G size does not fit in 32 bits.


> It seems fair to use it for the size here.
> 
> Acked-by: Jes Sorensen <Jes.Sorensen@redhat.com>
>
Michael S. Tsirkin - May 5, 2011, 3:21 p.m.
On Thu, May 05, 2011 at 08:21:06AM -0600, Alex Williamson wrote:
> On Thu, 2011-05-05 at 16:21 +0300, Michael S. Tsirkin wrote:
> > On Tue, May 03, 2011 at 12:36:58PM -0600, Alex Williamson wrote:
> > > When a phys memory client registers and we play catchup by walking
> > > the page tables, we can make a huge improvement in the number of
> > > times the set_memory callback is called by batching contiguous
> > > pages together.  With a 4G guest, this reduces the number of callbacks
> > > at registration from 1048866 to 296.
> > > 
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > ---
> > > 
> > >  exec.c |   38 ++++++++++++++++++++++++++++++++------
> > >  1 files changed, 32 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/exec.c b/exec.c
> > > index bbd5c86..a0678a4 100644
> > > --- a/exec.c
> > > +++ b/exec.c
> > > @@ -1741,14 +1741,21 @@ static int cpu_notify_migration_log(int enable)
> > >      return 0;
> > >  }
> > >  
> > > +struct last_map {
> > > +    target_phys_addr_t start_addr;
> > > +    ram_addr_t size;
> > 
> > A bit worried that ram_addr_t size might thinkably overflow
> > (it's just a long, could be a 4G ram). Break it out when it fills up?
> 
> struct CPUPhysMemoryClient {
>     void (*set_memory)(struct CPUPhysMemoryClient *client,
>                        target_phys_addr_t start_addr,
>                        ram_addr_t size,
>                        ram_addr_t phys_offset);
> 
> ram_addr_t seems to be the standard for describing these types of
> things.  It's an unsigned long, so 4G is only  concern for 32b builds,
> which don't support that much memory anyway.  Please apply.  Thanks,
> 
> Alex

OK, I don't think it's a problem in practice.
I dislike the use of _addr for sizes, we should
have _size_t, but that's a separate problem,
this patch is consistent.

I'll give people a bit of time to review and reply though,
there seems to be no rush.

> > > +    ram_addr_t phys_offset;
> > > +};
> > > +
> > >  /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
> > >   * address.  Each intermediate table provides the next L2_BITs of guest
> > >   * physical address space.  The number of levels vary based on host and
> > >   * guest configuration, making it efficient to build the final guest
> > >   * physical address by seeding the L1 offset and shifting and adding in
> > >   * each L2 offset as we recurse through them. */
> > > -static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > > -                                 int level, void **lp, target_phys_addr_t addr)
> > > +static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
> > > +                                 void **lp, target_phys_addr_t addr,
> > > +                                 struct last_map *map)
> > >  {
> > >      int i;
> > >  
> > > @@ -1760,15 +1767,29 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > >          addr <<= L2_BITS + TARGET_PAGE_BITS;
> > >          for (i = 0; i < L2_SIZE; ++i) {
> > >              if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
> > > -                client->set_memory(client, addr | i << TARGET_PAGE_BITS,
> > > -                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
> > > +                target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
> > > +
> > > +                if (map->size &&
> > > +                    start_addr == map->start_addr + map->size &&
> > > +                    pd[i].phys_offset == map->phys_offset + map->size) {
> > > +
> > > +                    map->size += TARGET_PAGE_SIZE;
> > > +                    continue;
> > > +                } else if (map->size) {
> > > +                    client->set_memory(client, map->start_addr,
> > > +                                       map->size, map->phys_offset);
> > > +                }
> > > +
> > > +                map->start_addr = start_addr;
> > > +                map->size = TARGET_PAGE_SIZE;
> > > +                map->phys_offset = pd[i].phys_offset;
> > >              }
> > >          }
> > >      } else {
> > >          void **pp = *lp;
> > >          for (i = 0; i < L2_SIZE; ++i) {
> > >              phys_page_for_each_1(client, level - 1, pp + i,
> > > -                                 (addr << L2_BITS) | i);
> > > +                                 (addr << L2_BITS) | i, map);
> > >          }
> > >      }
> > >  }
> > > @@ -1776,9 +1797,14 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > >  static void phys_page_for_each(CPUPhysMemoryClient *client)
> > >  {
> > >      int i;
> > > +    struct last_map map = { 0 };
> > > +
> > 
> > Nit: just {} is enough.
> > 
> > >      for (i = 0; i < P_L1_SIZE; ++i) {
> > >          phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
> > > -                             l1_phys_map + i, i);
> > > +                             l1_phys_map + i, i, &map);
> > > +    }
> > > +    if (map.size) {
> > > +        client->set_memory(client, map.start_addr, map.size, map.phys_offset);
> > >      }
> > >  }
> > >  
> 
>
Jes Sorensen - May 5, 2011, 3:36 p.m.
On 05/05/11 17:18, Michael S. Tsirkin wrote:
>> > A memory size can obviously not be bigger than the maximum physical
>> > address, so I find it really hard to see how this could overflow.
> For example, a 4G size does not fit in 32 bits.

That is the only corner case - you can handle that by -1 if you like.

Jes
Michael S. Tsirkin - May 5, 2011, 3:38 p.m.
On Thu, May 05, 2011 at 05:36:04PM +0200, Jes Sorensen wrote:
> On 05/05/11 17:18, Michael S. Tsirkin wrote:
> >> > A memory size can obviously not be bigger than the maximum physical
> >> > address, so I find it really hard to see how this could overflow.
> > For example, a 4G size does not fit in 32 bits.
> 
> That is the only corner case

True.

> you can handle that by -1 if you like.

But then all users need to be updated.
Seems easier to break out of the loop easier.
It's likely not a real problem, certainly not on a pc,
don't know about other systems.

> Jes
>
Jes Sorensen - May 5, 2011, 3:40 p.m.
On 05/05/11 17:38, Michael S. Tsirkin wrote:
> On Thu, May 05, 2011 at 05:36:04PM +0200, Jes Sorensen wrote:
>> > On 05/05/11 17:18, Michael S. Tsirkin wrote:
>>>>> > >> > A memory size can obviously not be bigger than the maximum physical
>>>>> > >> > address, so I find it really hard to see how this could overflow.
>>> > > For example, a 4G size does not fit in 32 bits.
>> > 
>> > That is the only corner case
> True.
> 
>> > you can handle that by -1 if you like.
> But then all users need to be updated.
> Seems easier to break out of the loop easier.
> It's likely not a real problem, certainly not on a pc,
> don't know about other systems.

I think it is quite fair to limit the amount of memory we support when
running 32 bit qemu binaries. I would expect more things to break than
just this if we tried to support 4GB of RAM on a 32 bit host.

Cheers,
Jes
Michael S. Tsirkin - May 5, 2011, 3:41 p.m.
On Thu, May 05, 2011 at 05:40:19PM +0200, Jes Sorensen wrote:
> On 05/05/11 17:38, Michael S. Tsirkin wrote:
> > On Thu, May 05, 2011 at 05:36:04PM +0200, Jes Sorensen wrote:
> >> > On 05/05/11 17:18, Michael S. Tsirkin wrote:
> >>>>> > >> > A memory size can obviously not be bigger than the maximum physical
> >>>>> > >> > address, so I find it really hard to see how this could overflow.
> >>> > > For example, a 4G size does not fit in 32 bits.
> >> > 
> >> > That is the only corner case
> > True.
> > 
> >> > you can handle that by -1 if you like.
> > But then all users need to be updated.
> > Seems easier to break out of the loop easier.
> > It's likely not a real problem, certainly not on a pc,
> > don't know about other systems.
> 
> I think it is quite fair to limit the amount of memory we support when
> running 32 bit qemu binaries. I would expect more things to break than
> just this if we tried to support 4GB of RAM on a 32 bit host.
> 
> Cheers,
> Jes

Fair enough.
Alex Williamson - May 25, 2011, 3:47 a.m.
On Thu, 2011-05-05 at 18:21 +0300, Michael S. Tsirkin wrote:
> On Thu, May 05, 2011 at 08:21:06AM -0600, Alex Williamson wrote:
> > On Thu, 2011-05-05 at 16:21 +0300, Michael S. Tsirkin wrote:
> > > On Tue, May 03, 2011 at 12:36:58PM -0600, Alex Williamson wrote:
> > > > When a phys memory client registers and we play catchup by walking
> > > > the page tables, we can make a huge improvement in the number of
> > > > times the set_memory callback is called by batching contiguous
> > > > pages together.  With a 4G guest, this reduces the number of callbacks
> > > > at registration from 1048866 to 296.
> > > > 
> > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > ---
> > > > 
> > > >  exec.c |   38 ++++++++++++++++++++++++++++++++------
> > > >  1 files changed, 32 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/exec.c b/exec.c
> > > > index bbd5c86..a0678a4 100644
> > > > --- a/exec.c
> > > > +++ b/exec.c
> > > > @@ -1741,14 +1741,21 @@ static int cpu_notify_migration_log(int enable)
> > > >      return 0;
> > > >  }
> > > >  
> > > > +struct last_map {
> > > > +    target_phys_addr_t start_addr;
> > > > +    ram_addr_t size;
> > > 
> > > A bit worried that ram_addr_t size might thinkably overflow
> > > (it's just a long, could be a 4G ram). Break it out when it fills up?
> > 
> > struct CPUPhysMemoryClient {
> >     void (*set_memory)(struct CPUPhysMemoryClient *client,
> >                        target_phys_addr_t start_addr,
> >                        ram_addr_t size,
> >                        ram_addr_t phys_offset);
> > 
> > ram_addr_t seems to be the standard for describing these types of
> > things.  It's an unsigned long, so 4G is only  concern for 32b builds,
> > which don't support that much memory anyway.  Please apply.  Thanks,
> > 
> > Alex
> 
> OK, I don't think it's a problem in practice.
> I dislike the use of _addr for sizes, we should
> have _size_t, but that's a separate problem,
> this patch is consistent.
> 
> I'll give people a bit of time to review and reply though,
> there seems to be no rush.

Bump.  I didn't see anything come out of the discussion that would
suggest a respin.  Please apply.  Thanks,

Alex

> > > > +    ram_addr_t phys_offset;
> > > > +};
> > > > +
> > > >  /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
> > > >   * address.  Each intermediate table provides the next L2_BITs of guest
> > > >   * physical address space.  The number of levels vary based on host and
> > > >   * guest configuration, making it efficient to build the final guest
> > > >   * physical address by seeding the L1 offset and shifting and adding in
> > > >   * each L2 offset as we recurse through them. */
> > > > -static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > > > -                                 int level, void **lp, target_phys_addr_t addr)
> > > > +static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
> > > > +                                 void **lp, target_phys_addr_t addr,
> > > > +                                 struct last_map *map)
> > > >  {
> > > >      int i;
> > > >  
> > > > @@ -1760,15 +1767,29 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > > >          addr <<= L2_BITS + TARGET_PAGE_BITS;
> > > >          for (i = 0; i < L2_SIZE; ++i) {
> > > >              if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
> > > > -                client->set_memory(client, addr | i << TARGET_PAGE_BITS,
> > > > -                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
> > > > +                target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
> > > > +
> > > > +                if (map->size &&
> > > > +                    start_addr == map->start_addr + map->size &&
> > > > +                    pd[i].phys_offset == map->phys_offset + map->size) {
> > > > +
> > > > +                    map->size += TARGET_PAGE_SIZE;
> > > > +                    continue;
> > > > +                } else if (map->size) {
> > > > +                    client->set_memory(client, map->start_addr,
> > > > +                                       map->size, map->phys_offset);
> > > > +                }
> > > > +
> > > > +                map->start_addr = start_addr;
> > > > +                map->size = TARGET_PAGE_SIZE;
> > > > +                map->phys_offset = pd[i].phys_offset;
> > > >              }
> > > >          }
> > > >      } else {
> > > >          void **pp = *lp;
> > > >          for (i = 0; i < L2_SIZE; ++i) {
> > > >              phys_page_for_each_1(client, level - 1, pp + i,
> > > > -                                 (addr << L2_BITS) | i);
> > > > +                                 (addr << L2_BITS) | i, map);
> > > >          }
> > > >      }
> > > >  }
> > > > @@ -1776,9 +1797,14 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > > >  static void phys_page_for_each(CPUPhysMemoryClient *client)
> > > >  {
> > > >      int i;
> > > > +    struct last_map map = { 0 };
> > > > +
> > > 
> > > Nit: just {} is enough.
> > > 
> > > >      for (i = 0; i < P_L1_SIZE; ++i) {
> > > >          phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
> > > > -                             l1_phys_map + i, i);
> > > > +                             l1_phys_map + i, i, &map);
> > > > +    }
> > > > +    if (map.size) {
> > > > +        client->set_memory(client, map.start_addr, map.size, map.phys_offset);
> > > >      }
> > > >  }
> > > >  
> > 
> >
Michael S. Tsirkin - May 25, 2011, 6:08 a.m.
On Tue, May 24, 2011 at 09:47:57PM -0600, Alex Williamson wrote:
> On Thu, 2011-05-05 at 18:21 +0300, Michael S. Tsirkin wrote:
> > On Thu, May 05, 2011 at 08:21:06AM -0600, Alex Williamson wrote:
> > > On Thu, 2011-05-05 at 16:21 +0300, Michael S. Tsirkin wrote:
> > > > On Tue, May 03, 2011 at 12:36:58PM -0600, Alex Williamson wrote:
> > > > > When a phys memory client registers and we play catchup by walking
> > > > > the page tables, we can make a huge improvement in the number of
> > > > > times the set_memory callback is called by batching contiguous
> > > > > pages together.  With a 4G guest, this reduces the number of callbacks
> > > > > at registration from 1048866 to 296.
> > > > > 
> > > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > > ---
> > > > > 
> > > > >  exec.c |   38 ++++++++++++++++++++++++++++++++------
> > > > >  1 files changed, 32 insertions(+), 6 deletions(-)
> > > > > 
> > > > > diff --git a/exec.c b/exec.c
> > > > > index bbd5c86..a0678a4 100644
> > > > > --- a/exec.c
> > > > > +++ b/exec.c
> > > > > @@ -1741,14 +1741,21 @@ static int cpu_notify_migration_log(int enable)
> > > > >      return 0;
> > > > >  }
> > > > >  
> > > > > +struct last_map {
> > > > > +    target_phys_addr_t start_addr;
> > > > > +    ram_addr_t size;
> > > > 
> > > > A bit worried that ram_addr_t size might thinkably overflow
> > > > (it's just a long, could be a 4G ram). Break it out when it fills up?
> > > 
> > > struct CPUPhysMemoryClient {
> > >     void (*set_memory)(struct CPUPhysMemoryClient *client,
> > >                        target_phys_addr_t start_addr,
> > >                        ram_addr_t size,
> > >                        ram_addr_t phys_offset);
> > > 
> > > ram_addr_t seems to be the standard for describing these types of
> > > things.  It's an unsigned long, so 4G is only  concern for 32b builds,
> > > which don't support that much memory anyway.  Please apply.  Thanks,
> > > 
> > > Alex
> > 
> > OK, I don't think it's a problem in practice.
> > I dislike the use of _addr for sizes, we should
> > have _size_t, but that's a separate problem,
> > this patch is consistent.
> > 
> > I'll give people a bit of time to review and reply though,
> > there seems to be no rush.
> 
> Bump.  I didn't see anything come out of the discussion that would
> suggest a respin.  Please apply.  Thanks,
> 
> Alex

Applied.
Thanks,

> > > > > +    ram_addr_t phys_offset;
> > > > > +};
> > > > > +
> > > > >  /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
> > > > >   * address.  Each intermediate table provides the next L2_BITs of guest
> > > > >   * physical address space.  The number of levels vary based on host and
> > > > >   * guest configuration, making it efficient to build the final guest
> > > > >   * physical address by seeding the L1 offset and shifting and adding in
> > > > >   * each L2 offset as we recurse through them. */
> > > > > -static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > > > > -                                 int level, void **lp, target_phys_addr_t addr)
> > > > > +static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
> > > > > +                                 void **lp, target_phys_addr_t addr,
> > > > > +                                 struct last_map *map)
> > > > >  {
> > > > >      int i;
> > > > >  
> > > > > @@ -1760,15 +1767,29 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > > > >          addr <<= L2_BITS + TARGET_PAGE_BITS;
> > > > >          for (i = 0; i < L2_SIZE; ++i) {
> > > > >              if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
> > > > > -                client->set_memory(client, addr | i << TARGET_PAGE_BITS,
> > > > > -                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
> > > > > +                target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
> > > > > +
> > > > > +                if (map->size &&
> > > > > +                    start_addr == map->start_addr + map->size &&
> > > > > +                    pd[i].phys_offset == map->phys_offset + map->size) {
> > > > > +
> > > > > +                    map->size += TARGET_PAGE_SIZE;
> > > > > +                    continue;
> > > > > +                } else if (map->size) {
> > > > > +                    client->set_memory(client, map->start_addr,
> > > > > +                                       map->size, map->phys_offset);
> > > > > +                }
> > > > > +
> > > > > +                map->start_addr = start_addr;
> > > > > +                map->size = TARGET_PAGE_SIZE;
> > > > > +                map->phys_offset = pd[i].phys_offset;
> > > > >              }
> > > > >          }
> > > > >      } else {
> > > > >          void **pp = *lp;
> > > > >          for (i = 0; i < L2_SIZE; ++i) {
> > > > >              phys_page_for_each_1(client, level - 1, pp + i,
> > > > > -                                 (addr << L2_BITS) | i);
> > > > > +                                 (addr << L2_BITS) | i, map);
> > > > >          }
> > > > >      }
> > > > >  }
> > > > > @@ -1776,9 +1797,14 @@ static void phys_page_for_each_1(CPUPhysMemoryClient *client,
> > > > >  static void phys_page_for_each(CPUPhysMemoryClient *client)
> > > > >  {
> > > > >      int i;
> > > > > +    struct last_map map = { 0 };
> > > > > +
> > > > 
> > > > Nit: just {} is enough.
> > > > 
> > > > >      for (i = 0; i < P_L1_SIZE; ++i) {
> > > > >          phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
> > > > > -                             l1_phys_map + i, i);
> > > > > +                             l1_phys_map + i, i, &map);
> > > > > +    }
> > > > > +    if (map.size) {
> > > > > +        client->set_memory(client, map.start_addr, map.size, map.phys_offset);
> > > > >      }
> > > > >  }
> > > > >  
> > > 
> > > 
> 
>

Patch

diff --git a/exec.c b/exec.c
index bbd5c86..a0678a4 100644
--- a/exec.c
+++ b/exec.c
@@ -1741,14 +1741,21 @@  static int cpu_notify_migration_log(int enable)
     return 0;
 }
 
+struct last_map {
+    target_phys_addr_t start_addr;
+    ram_addr_t size;
+    ram_addr_t phys_offset;
+};
+
 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
  * address.  Each intermediate table provides the next L2_BITs of guest
  * physical address space.  The number of levels vary based on host and
  * guest configuration, making it efficient to build the final guest
  * physical address by seeding the L1 offset and shifting and adding in
  * each L2 offset as we recurse through them. */
-static void phys_page_for_each_1(CPUPhysMemoryClient *client,
-                                 int level, void **lp, target_phys_addr_t addr)
+static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
+                                 void **lp, target_phys_addr_t addr,
+                                 struct last_map *map)
 {
     int i;
 
@@ -1760,15 +1767,29 @@  static void phys_page_for_each_1(CPUPhysMemoryClient *client,
         addr <<= L2_BITS + TARGET_PAGE_BITS;
         for (i = 0; i < L2_SIZE; ++i) {
             if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
-                client->set_memory(client, addr | i << TARGET_PAGE_BITS,
-                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
+                target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
+
+                if (map->size &&
+                    start_addr == map->start_addr + map->size &&
+                    pd[i].phys_offset == map->phys_offset + map->size) {
+
+                    map->size += TARGET_PAGE_SIZE;
+                    continue;
+                } else if (map->size) {
+                    client->set_memory(client, map->start_addr,
+                                       map->size, map->phys_offset);
+                }
+
+                map->start_addr = start_addr;
+                map->size = TARGET_PAGE_SIZE;
+                map->phys_offset = pd[i].phys_offset;
             }
         }
     } else {
         void **pp = *lp;
         for (i = 0; i < L2_SIZE; ++i) {
             phys_page_for_each_1(client, level - 1, pp + i,
-                                 (addr << L2_BITS) | i);
+                                 (addr << L2_BITS) | i, map);
         }
     }
 }
@@ -1776,9 +1797,14 @@  static void phys_page_for_each_1(CPUPhysMemoryClient *client,
 static void phys_page_for_each(CPUPhysMemoryClient *client)
 {
     int i;
+    struct last_map map = { 0 };
+
     for (i = 0; i < P_L1_SIZE; ++i) {
         phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
-                             l1_phys_map + i, i);
+                             l1_phys_map + i, i, &map);
+    }
+    if (map.size) {
+        client->set_memory(client, map.start_addr, map.size, map.phys_offset);
     }
 }