diff mbox

[RFC,v6,01/14] exec.c: Add new exclusive bitmap to ram_list

Message ID 1450082498-27109-2-git-send-email-a.rigo@virtualopensystems.com
State New
Headers show

Commit Message

Alvise Rigo Dec. 14, 2015, 8:41 a.m. UTC
The purpose of this new bitmap is to flag the memory pages that are in
the middle of LL/SC operations (after a LL, before a SC) on a per-vCPU
basis.
For all these pages, the corresponding TLB entries will be generated
in such a way to force the slow-path if at least one vCPU has the bit
not set.
When the system starts, the whole memory is dirty (all the bitmap is
set). A page, after being marked as exclusively-clean, will be
restored as dirty after the SC.

For each page we keep 8 bits to be shared among all the vCPUs available
in the system. In general, the to the vCPU n correspond the bit n % 8.

Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
 exec.c                  |  8 ++++--
 include/exec/memory.h   |  3 +-
 include/exec/ram_addr.h | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 3 deletions(-)

Comments

Alex Bennée Dec. 18, 2015, 1:18 p.m. UTC | #1
Alvise Rigo <a.rigo@virtualopensystems.com> writes:

> The purpose of this new bitmap is to flag the memory pages that are in
> the middle of LL/SC operations (after a LL, before a SC) on a per-vCPU
> basis.
> For all these pages, the corresponding TLB entries will be generated
> in such a way to force the slow-path if at least one vCPU has the bit
> not set.
> When the system starts, the whole memory is dirty (all the bitmap is
> set). A page, after being marked as exclusively-clean, will be
> restored as dirty after the SC.
>
> For each page we keep 8 bits to be shared among all the vCPUs available
> in the system. In general, the to the vCPU n correspond the bit n % 8.
>
> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> ---
>  exec.c                  |  8 ++++--
>  include/exec/memory.h   |  3 +-
>  include/exec/ram_addr.h | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 84 insertions(+), 3 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 0bf0a6e..e66d232 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1548,11 +1548,15 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
>          int i;
>
>          /* ram_list.dirty_memory[] is protected by the iothread lock.  */
> -        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
> +        for (i = 0; i < DIRTY_MEMORY_EXCLUSIVE; i++) {
>              ram_list.dirty_memory[i] =
>                  bitmap_zero_extend(ram_list.dirty_memory[i],
>                                     old_ram_size, new_ram_size);
> -       }
> +        }
> +        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE] = bitmap_zero_extend(
> +                ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
> +                old_ram_size * EXCL_BITMAP_CELL_SZ,
> +                new_ram_size * EXCL_BITMAP_CELL_SZ);
>      }

I'm wondering is old/new_ram_size should be renamed to
old/new_ram_pages?

So as I understand it we have created a bitmap which has
EXCL_BITMAP_CELL_SZ bits per page.

>      cpu_physical_memory_set_dirty_range(new_block->offset,
>                                          new_block->used_length,
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 0f07159..2782c77 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -19,7 +19,8 @@
>  #define DIRTY_MEMORY_VGA       0
>  #define DIRTY_MEMORY_CODE      1
>  #define DIRTY_MEMORY_MIGRATION 2
> -#define DIRTY_MEMORY_NUM       3        /* num of dirty bits */
> +#define DIRTY_MEMORY_EXCLUSIVE 3
> +#define DIRTY_MEMORY_NUM       4        /* num of dirty bits */
>
>  #include <stdint.h>
>  #include <stdbool.h>
> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
> index 7115154..b48af27 100644
> --- a/include/exec/ram_addr.h
> +++ b/include/exec/ram_addr.h
> @@ -21,6 +21,7 @@
>
>  #ifndef CONFIG_USER_ONLY
>  #include "hw/xen/xen.h"
> +#include "sysemu/sysemu.h"
>
>  struct RAMBlock {
>      struct rcu_head rcu;
> @@ -82,6 +83,13 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp);
>  #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
>  #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
>
> +/* Exclusive bitmap support. */
> +#define EXCL_BITMAP_CELL_SZ 8
> +#define EXCL_BITMAP_GET_BIT_OFFSET(addr) \
> +        (EXCL_BITMAP_CELL_SZ * (addr >> TARGET_PAGE_BITS))
> +#define EXCL_BITMAP_GET_BYTE_OFFSET(addr) (addr >> TARGET_PAGE_BITS)
> +#define EXCL_IDX(cpu) (cpu % EXCL_BITMAP_CELL_SZ)
> +

I think some of the explanation of what CELL_SZ means from your commit
message needs to go here.

>  static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
>                                                   ram_addr_t length,
>                                                   unsigned client)
> @@ -173,6 +181,11 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
>      if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
>          bitmap_set_atomic(d[DIRTY_MEMORY_CODE], page, end - page);
>      }
> +    if (unlikely(mask & (1 << DIRTY_MEMORY_EXCLUSIVE))) {
> +        bitmap_set_atomic(d[DIRTY_MEMORY_EXCLUSIVE],
> +                        page * EXCL_BITMAP_CELL_SZ,
> +                        (end - page) * EXCL_BITMAP_CELL_SZ);
> +    }
>      xen_modified_memory(start, length);
>  }
>
> @@ -288,5 +301,68 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest,
>  }
>
>  void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
> +
> +/* One cell for each page. The n-th bit of a cell describes all the i-th vCPUs
> + * such that (i % EXCL_BITMAP_CELL_SZ) == n.
> + * A bit set to zero ensures that all the vCPUs described by the bit have the
> + * EXCL_BIT set for the page. */
> +static inline void cpu_physical_memory_unset_excl(ram_addr_t addr, uint32_t cpu)
> +{
> +    set_bit_atomic(EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu),
> +            ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
> +}
> +
> +/* Return true if there is at least one cpu with the EXCL bit set for the page
> + * of @addr. */
> +static inline int cpu_physical_memory_atleast_one_excl(ram_addr_t addr)
> +{
> +    uint8_t *bitmap;
> +
> +    bitmap = (uint8_t *)(ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
> +
> +    /* This is safe even if smp_cpus < 8 since the unused bits are always 1. */
> +    return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] != UCHAR_MAX;
> +}

So I'm getting a little confused as to why we need EXCL_BITMAP_CELL_SZ
bits rather that just one bit shared amongst the CPUs. What's so special
about 8 if we could say have a 16 cpu system?

Ultimately if any page has an exclusive operation going on all vCPUs are
forced down the slow-path for that pages access? Where is the benefit in
splitting that bitfield across these vCPUs if the only test is "is at
least one vCPU doing an excl right now?".

> +
> +/* Return true if the @cpu has the bit set (not exclusive) for the page of
> + * @addr.  If @cpu == smp_cpus return true if at least one vCPU has the dirty
> + * bit set for that page. */
> +static inline int cpu_physical_memory_not_excl(ram_addr_t addr,
> +                                               unsigned long cpu)
> +{
> +    uint8_t *bitmap;
> +
> +    bitmap = (uint8_t *)ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE];
> +
> +    if (cpu == smp_cpus) {
> +        if (smp_cpus >= EXCL_BITMAP_CELL_SZ) {
> +            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)];
> +        } else {
> +            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] &
> +                                            ((1 << smp_cpus) - 1);
> +        }
> +    } else {
> +        return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] & (1 << EXCL_IDX(cpu));
> +    }
> +}
> +
> +/* Clean the dirty bit of @cpu (i.e. set the page as exclusive). If @cpu ==
> + * smp_cpus clean the dirty bit for all the vCPUs. */
> +static inline int cpu_physical_memory_set_excl(ram_addr_t addr, uint32_t cpu)
> +{
> +    if (cpu == smp_cpus) {
> +        int nr = (smp_cpus >= EXCL_BITMAP_CELL_SZ) ?
> +                            EXCL_BITMAP_CELL_SZ : smp_cpus;
> +
> +        return bitmap_test_and_clear_atomic(
> +                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
> +                        EXCL_BITMAP_GET_BIT_OFFSET(addr), nr);
> +    } else {
> +        return bitmap_test_and_clear_atomic(
> +                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
> +                        EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu), 1);
> +    }
> +}
> +
>  #endif
>  #endif

Maybe this will get clearer as I read on but at the moment the bitfield
split confuses me.

--
Alex Bennée
Alvise Rigo Dec. 18, 2015, 1:47 p.m. UTC | #2
On Fri, Dec 18, 2015 at 2:18 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Alvise Rigo <a.rigo@virtualopensystems.com> writes:
>
>> The purpose of this new bitmap is to flag the memory pages that are in
>> the middle of LL/SC operations (after a LL, before a SC) on a per-vCPU
>> basis.
>> For all these pages, the corresponding TLB entries will be generated
>> in such a way to force the slow-path if at least one vCPU has the bit
>> not set.
>> When the system starts, the whole memory is dirty (all the bitmap is
>> set). A page, after being marked as exclusively-clean, will be
>> restored as dirty after the SC.
>>
>> For each page we keep 8 bits to be shared among all the vCPUs available
>> in the system. In general, the to the vCPU n correspond the bit n % 8.
>>
>> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
>> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
>> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
>> ---
>>  exec.c                  |  8 ++++--
>>  include/exec/memory.h   |  3 +-
>>  include/exec/ram_addr.h | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 84 insertions(+), 3 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 0bf0a6e..e66d232 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -1548,11 +1548,15 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
>>          int i;
>>
>>          /* ram_list.dirty_memory[] is protected by the iothread lock.  */
>> -        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
>> +        for (i = 0; i < DIRTY_MEMORY_EXCLUSIVE; i++) {
>>              ram_list.dirty_memory[i] =
>>                  bitmap_zero_extend(ram_list.dirty_memory[i],
>>                                     old_ram_size, new_ram_size);
>> -       }
>> +        }
>> +        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE] = bitmap_zero_extend(
>> +                ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
>> +                old_ram_size * EXCL_BITMAP_CELL_SZ,
>> +                new_ram_size * EXCL_BITMAP_CELL_SZ);
>>      }
>
> I'm wondering is old/new_ram_size should be renamed to
> old/new_ram_pages?

Yes, I think it make more sense.

>
> So as I understand it we have created a bitmap which has
> EXCL_BITMAP_CELL_SZ bits per page.
>
>>      cpu_physical_memory_set_dirty_range(new_block->offset,
>>                                          new_block->used_length,
>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>> index 0f07159..2782c77 100644
>> --- a/include/exec/memory.h
>> +++ b/include/exec/memory.h
>> @@ -19,7 +19,8 @@
>>  #define DIRTY_MEMORY_VGA       0
>>  #define DIRTY_MEMORY_CODE      1
>>  #define DIRTY_MEMORY_MIGRATION 2
>> -#define DIRTY_MEMORY_NUM       3        /* num of dirty bits */
>> +#define DIRTY_MEMORY_EXCLUSIVE 3
>> +#define DIRTY_MEMORY_NUM       4        /* num of dirty bits */
>>
>>  #include <stdint.h>
>>  #include <stdbool.h>
>> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
>> index 7115154..b48af27 100644
>> --- a/include/exec/ram_addr.h
>> +++ b/include/exec/ram_addr.h
>> @@ -21,6 +21,7 @@
>>
>>  #ifndef CONFIG_USER_ONLY
>>  #include "hw/xen/xen.h"
>> +#include "sysemu/sysemu.h"
>>
>>  struct RAMBlock {
>>      struct rcu_head rcu;
>> @@ -82,6 +83,13 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp);
>>  #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
>>  #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
>>
>> +/* Exclusive bitmap support. */
>> +#define EXCL_BITMAP_CELL_SZ 8
>> +#define EXCL_BITMAP_GET_BIT_OFFSET(addr) \
>> +        (EXCL_BITMAP_CELL_SZ * (addr >> TARGET_PAGE_BITS))
>> +#define EXCL_BITMAP_GET_BYTE_OFFSET(addr) (addr >> TARGET_PAGE_BITS)
>> +#define EXCL_IDX(cpu) (cpu % EXCL_BITMAP_CELL_SZ)
>> +
>
> I think some of the explanation of what CELL_SZ means from your commit
> message needs to go here.

OK.

>
>>  static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
>>                                                   ram_addr_t length,
>>                                                   unsigned client)
>> @@ -173,6 +181,11 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
>>      if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
>>          bitmap_set_atomic(d[DIRTY_MEMORY_CODE], page, end - page);
>>      }
>> +    if (unlikely(mask & (1 << DIRTY_MEMORY_EXCLUSIVE))) {
>> +        bitmap_set_atomic(d[DIRTY_MEMORY_EXCLUSIVE],
>> +                        page * EXCL_BITMAP_CELL_SZ,
>> +                        (end - page) * EXCL_BITMAP_CELL_SZ);
>> +    }
>>      xen_modified_memory(start, length);
>>  }
>>
>> @@ -288,5 +301,68 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest,
>>  }
>>
>>  void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
>> +
>> +/* One cell for each page. The n-th bit of a cell describes all the i-th vCPUs
>> + * such that (i % EXCL_BITMAP_CELL_SZ) == n.
>> + * A bit set to zero ensures that all the vCPUs described by the bit have the
>> + * EXCL_BIT set for the page. */
>> +static inline void cpu_physical_memory_unset_excl(ram_addr_t addr, uint32_t cpu)
>> +{
>> +    set_bit_atomic(EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu),
>> +            ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
>> +}
>> +
>> +/* Return true if there is at least one cpu with the EXCL bit set for the page
>> + * of @addr. */
>> +static inline int cpu_physical_memory_atleast_one_excl(ram_addr_t addr)
>> +{
>> +    uint8_t *bitmap;
>> +
>> +    bitmap = (uint8_t *)(ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
>> +
>> +    /* This is safe even if smp_cpus < 8 since the unused bits are always 1. */
>> +    return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] != UCHAR_MAX;
>> +}
>
> So I'm getting a little confused as to why we need EXCL_BITMAP_CELL_SZ
> bits rather that just one bit shared amongst the CPUs. What's so special
> about 8 if we could say have a 16 cpu system?

Here the ideal solution would be to have one bit per CPU, but since
this is too expensive, we group them in cluster of 8.
Of course the value 8 could be changed, but I would not go much higher
since DIRTY_MEMORY_EXCLUSIVE is already 8 times the size of the other
bitmaps.

>
> Ultimately if any page has an exclusive operation going on all vCPUs are
> forced down the slow-path for that pages access? Where is the benefit in
> splitting that bitfield across these vCPUs if the only test is "is at
> least one vCPU doing an excl right now?".

Clustering the CPUs in this way, we make the flushing procedure a bit
more efficient since we perform TLB flushes at a cluster granularity.
In practice, when handling a LL operation we query the flush only to
those clusters with the bit "dirty".

Regards,
avise

>
>> +
>> +/* Return true if the @cpu has the bit set (not exclusive) for the page of
>> + * @addr.  If @cpu == smp_cpus return true if at least one vCPU has the dirty
>> + * bit set for that page. */
>> +static inline int cpu_physical_memory_not_excl(ram_addr_t addr,
>> +                                               unsigned long cpu)
>> +{
>> +    uint8_t *bitmap;
>> +
>> +    bitmap = (uint8_t *)ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE];
>> +
>> +    if (cpu == smp_cpus) {
>> +        if (smp_cpus >= EXCL_BITMAP_CELL_SZ) {
>> +            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)];
>> +        } else {
>> +            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] &
>> +                                            ((1 << smp_cpus) - 1);
>> +        }
>> +    } else {
>> +        return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] & (1 << EXCL_IDX(cpu));
>> +    }
>> +}
>> +
>> +/* Clean the dirty bit of @cpu (i.e. set the page as exclusive). If @cpu ==
>> + * smp_cpus clean the dirty bit for all the vCPUs. */
>> +static inline int cpu_physical_memory_set_excl(ram_addr_t addr, uint32_t cpu)
>> +{
>> +    if (cpu == smp_cpus) {
>> +        int nr = (smp_cpus >= EXCL_BITMAP_CELL_SZ) ?
>> +                            EXCL_BITMAP_CELL_SZ : smp_cpus;
>> +
>> +        return bitmap_test_and_clear_atomic(
>> +                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
>> +                        EXCL_BITMAP_GET_BIT_OFFSET(addr), nr);
>> +    } else {
>> +        return bitmap_test_and_clear_atomic(
>> +                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
>> +                        EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu), 1);
>> +    }
>> +}
>> +
>>  #endif
>>  #endif
>
> Maybe this will get clearer as I read on but at the moment the bitfield
> split confuses me.
>
> --
> Alex Bennée
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 0bf0a6e..e66d232 100644
--- a/exec.c
+++ b/exec.c
@@ -1548,11 +1548,15 @@  static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
         int i;
 
         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
-        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+        for (i = 0; i < DIRTY_MEMORY_EXCLUSIVE; i++) {
             ram_list.dirty_memory[i] =
                 bitmap_zero_extend(ram_list.dirty_memory[i],
                                    old_ram_size, new_ram_size);
-       }
+        }
+        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE] = bitmap_zero_extend(
+                ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
+                old_ram_size * EXCL_BITMAP_CELL_SZ,
+                new_ram_size * EXCL_BITMAP_CELL_SZ);
     }
     cpu_physical_memory_set_dirty_range(new_block->offset,
                                         new_block->used_length,
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 0f07159..2782c77 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -19,7 +19,8 @@ 
 #define DIRTY_MEMORY_VGA       0
 #define DIRTY_MEMORY_CODE      1
 #define DIRTY_MEMORY_MIGRATION 2
-#define DIRTY_MEMORY_NUM       3        /* num of dirty bits */
+#define DIRTY_MEMORY_EXCLUSIVE 3
+#define DIRTY_MEMORY_NUM       4        /* num of dirty bits */
 
 #include <stdint.h>
 #include <stdbool.h>
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 7115154..b48af27 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -21,6 +21,7 @@ 
 
 #ifndef CONFIG_USER_ONLY
 #include "hw/xen/xen.h"
+#include "sysemu/sysemu.h"
 
 struct RAMBlock {
     struct rcu_head rcu;
@@ -82,6 +83,13 @@  int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp);
 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
 
+/* Exclusive bitmap support. */
+#define EXCL_BITMAP_CELL_SZ 8
+#define EXCL_BITMAP_GET_BIT_OFFSET(addr) \
+        (EXCL_BITMAP_CELL_SZ * (addr >> TARGET_PAGE_BITS))
+#define EXCL_BITMAP_GET_BYTE_OFFSET(addr) (addr >> TARGET_PAGE_BITS)
+#define EXCL_IDX(cpu) (cpu % EXCL_BITMAP_CELL_SZ)
+
 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
                                                  ram_addr_t length,
                                                  unsigned client)
@@ -173,6 +181,11 @@  static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
     if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
         bitmap_set_atomic(d[DIRTY_MEMORY_CODE], page, end - page);
     }
+    if (unlikely(mask & (1 << DIRTY_MEMORY_EXCLUSIVE))) {
+        bitmap_set_atomic(d[DIRTY_MEMORY_EXCLUSIVE],
+                        page * EXCL_BITMAP_CELL_SZ,
+                        (end - page) * EXCL_BITMAP_CELL_SZ);
+    }
     xen_modified_memory(start, length);
 }
 
@@ -288,5 +301,68 @@  uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest,
 }
 
 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
+
+/* One cell for each page. The n-th bit of a cell describes all the i-th vCPUs
+ * such that (i % EXCL_BITMAP_CELL_SZ) == n.
+ * A bit set to zero ensures that all the vCPUs described by the bit have the
+ * EXCL_BIT set for the page. */
+static inline void cpu_physical_memory_unset_excl(ram_addr_t addr, uint32_t cpu)
+{
+    set_bit_atomic(EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu),
+            ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
+}
+
+/* Return true if there is at least one cpu with the EXCL bit set for the page
+ * of @addr. */
+static inline int cpu_physical_memory_atleast_one_excl(ram_addr_t addr)
+{
+    uint8_t *bitmap;
+
+    bitmap = (uint8_t *)(ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
+
+    /* This is safe even if smp_cpus < 8 since the unused bits are always 1. */
+    return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] != UCHAR_MAX;
+}
+
+/* Return true if the @cpu has the bit set (not exclusive) for the page of
+ * @addr.  If @cpu == smp_cpus return true if at least one vCPU has the dirty
+ * bit set for that page. */
+static inline int cpu_physical_memory_not_excl(ram_addr_t addr,
+                                               unsigned long cpu)
+{
+    uint8_t *bitmap;
+
+    bitmap = (uint8_t *)ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE];
+
+    if (cpu == smp_cpus) {
+        if (smp_cpus >= EXCL_BITMAP_CELL_SZ) {
+            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)];
+        } else {
+            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] &
+                                            ((1 << smp_cpus) - 1);
+        }
+    } else {
+        return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] & (1 << EXCL_IDX(cpu));
+    }
+}
+
+/* Clean the dirty bit of @cpu (i.e. set the page as exclusive). If @cpu ==
+ * smp_cpus clean the dirty bit for all the vCPUs. */
+static inline int cpu_physical_memory_set_excl(ram_addr_t addr, uint32_t cpu)
+{
+    if (cpu == smp_cpus) {
+        int nr = (smp_cpus >= EXCL_BITMAP_CELL_SZ) ?
+                            EXCL_BITMAP_CELL_SZ : smp_cpus;
+
+        return bitmap_test_and_clear_atomic(
+                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
+                        EXCL_BITMAP_GET_BIT_OFFSET(addr), nr);
+    } else {
+        return bitmap_test_and_clear_atomic(
+                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
+                        EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu), 1);
+    }
+}
+
 #endif
 #endif