diff mbox

[v2,3/6] Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap.

Message ID 1270515084-24120-4-git-send-email-tamura.yoshiaki@lab.ntt.co.jp
State New
Headers show

Commit Message

Yoshiaki Tamura April 6, 2010, 12:51 a.m. UTC
Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
Signed-off-by: OHMURA Kei <ohmura.kei@lab.ntt.co.jp>
---
 cpu-all.h |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 67 insertions(+), 14 deletions(-)

Comments

Avi Kivity April 12, 2010, 8:10 a.m. UTC | #1
On 04/06/2010 03:51 AM, Yoshiaki Tamura wrote:
> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
> Signed-off-by: OHMURA Kei<ohmura.kei@lab.ntt.co.jp>
> ---
>
>   static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
>   {
> -    return phys_ram_dirty[addr>>  TARGET_PAGE_BITS];
> +     unsigned long mask;
> +     int index = (addr>>  TARGET_PAGE_BITS) / HOST_LONG_BITS;
> +     int offset = (addr>>  TARGET_PAGE_BITS)&  (HOST_LONG_BITS - 1);
> +     int ret = 0;
> +
> +     mask = 1UL<<  offset;
> +     if (phys_ram_dirty[MASTER_DIRTY_FLAG][index]&  mask)
> +         return 0xff;
> +     if (phys_ram_dirty[VGA_DIRTY_FLAG][index]&  mask)
> +         ret |= VGA_DIRTY_FLAG;
> +     if (phys_ram_dirty[CODE_DIRTY_FLAG][index]&  mask)
> +         ret |=  CODE_DIRTY_FLAG;
> +     if (phys_ram_dirty[MIGRATION_DIRTY_FLAG][index]&  mask)
> +         ret |= MIGRATION_DIRTY_FLAG;
> +
> +     return ret;
>   }
>    

Again, nicer as a loop.

I think if you define both *_DIRTY_FLAG and *_DIRTY_IDX the transition 
patches can be nicer.

Coding style: use braces after if (), even for single statements.

>
>   static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
>                                                   int dirty_flags)
>   {
> -    return phys_ram_dirty[addr>>  TARGET_PAGE_BITS]&  dirty_flags;
> +    unsigned long mask;
> +    int index = (addr>>  TARGET_PAGE_BITS) / HOST_LONG_BITS;
> +    int offset = (addr>>  TARGET_PAGE_BITS)&  (HOST_LONG_BITS - 1);
> +
> +    mask = 1UL<<  offset;
> +    return (phys_ram_dirty[MASTER_DIRTY_FLAG][index]&  mask) ||
> +        (phys_ram_dirty[dirty_flags][index]&  mask);
>   }
>    

A helper that also accepts the DIRTY_IDX index can increase reuse.

>
>   static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
>   {
> -    phys_ram_dirty[addr>>  TARGET_PAGE_BITS] = 0xff;
> +    unsigned long mask;
> +    int index = (addr>>  TARGET_PAGE_BITS) / HOST_LONG_BITS;
> +    int offset = (addr>>  TARGET_PAGE_BITS)&  (HOST_LONG_BITS - 1);
> +
> +    mask = 1UL<<  offset;
> +    phys_ram_dirty[MASTER_DIRTY_FLAG][index] |= mask;
>    

>
> -static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
> -                                                      int dirty_flags)
> +static inline void cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
> +                                                       int dirty_flags)
>   {
> -    return phys_ram_dirty[addr>>  TARGET_PAGE_BITS] |= dirty_flags;
> +    unsigned long mask;
> +    int index = (addr>>  TARGET_PAGE_BITS) / HOST_LONG_BITS;
> +    int offset = (addr>>  TARGET_PAGE_BITS)&  (HOST_LONG_BITS - 1);
> +
> +    mask = 1UL<<  offset;
> +    if (dirty_flags&  VGA_DIRTY_FLAG)
> +        phys_ram_dirty[VGA_DIRTY_FLAG][index] |= mask;
> +    if (dirty_flags&  CODE_DIRTY_FLAG)
> +        phys_ram_dirty[CODE_DIRTY_FLAG][index] |= mask;
> +    if (dirty_flags&  MIGRATION_DIRTY_FLAG)
> +        phys_ram_dirty[MIGRATION_DIRTY_FLAG][index] |= mask;
>   }
>    

Is it necessary to update migration and vga bitmaps?

We can simply update the master bitmap, and update the migration and vga 
bitmaps only when they need it.  That can be done in a different patch.

Note that we should only allocate the migration and vga bitmaps when 
migration or vga is active.
Yoshiaki Tamura April 12, 2010, 10:58 a.m. UTC | #2
Avi Kivity wrote:
> On 04/06/2010 03:51 AM, Yoshiaki Tamura wrote:
>> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
>> Signed-off-by: OHMURA Kei<ohmura.kei@lab.ntt.co.jp>
>> ---
>>
>> static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
>> {
>> - return phys_ram_dirty[addr>> TARGET_PAGE_BITS];
>> + unsigned long mask;
>> + int index = (addr>> TARGET_PAGE_BITS) / HOST_LONG_BITS;
>> + int offset = (addr>> TARGET_PAGE_BITS)& (HOST_LONG_BITS - 1);
>> + int ret = 0;
>> +
>> + mask = 1UL<< offset;
>> + if (phys_ram_dirty[MASTER_DIRTY_FLAG][index]& mask)
>> + return 0xff;
>> + if (phys_ram_dirty[VGA_DIRTY_FLAG][index]& mask)
>> + ret |= VGA_DIRTY_FLAG;
>> + if (phys_ram_dirty[CODE_DIRTY_FLAG][index]& mask)
>> + ret |= CODE_DIRTY_FLAG;
>> + if (phys_ram_dirty[MIGRATION_DIRTY_FLAG][index]& mask)
>> + ret |= MIGRATION_DIRTY_FLAG;
>> +
>> + return ret;
>> }
>
> Again, nicer as a loop.
> I think if you define both *_DIRTY_FLAG and *_DIRTY_IDX the transition
> patches can be nicer.

Got your suggestion from the other message.
I agree.

> Coding style: use braces after if (), even for single statements.

Thanks for pointing out.

>> static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
>> int dirty_flags)
>> {
>> - return phys_ram_dirty[addr>> TARGET_PAGE_BITS]& dirty_flags;
>> + unsigned long mask;
>> + int index = (addr>> TARGET_PAGE_BITS) / HOST_LONG_BITS;
>> + int offset = (addr>> TARGET_PAGE_BITS)& (HOST_LONG_BITS - 1);
>> +
>> + mask = 1UL<< offset;
>> + return (phys_ram_dirty[MASTER_DIRTY_FLAG][index]& mask) ||
>> + (phys_ram_dirty[dirty_flags][index]& mask);
>> }
>
> A helper that also accepts the DIRTY_IDX index can increase reuse.

I would ffsl to map DIRTY_FLAG to DIRTY_IDX.

>> static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
>> {
>> - phys_ram_dirty[addr>> TARGET_PAGE_BITS] = 0xff;
>> + unsigned long mask;
>> + int index = (addr>> TARGET_PAGE_BITS) / HOST_LONG_BITS;
>> + int offset = (addr>> TARGET_PAGE_BITS)& (HOST_LONG_BITS - 1);
>> +
>> + mask = 1UL<< offset;
>> + phys_ram_dirty[MASTER_DIRTY_FLAG][index] |= mask;
>
>>
>> -static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
>> - int dirty_flags)
>> +static inline void cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
>> + int dirty_flags)
>> {
>> - return phys_ram_dirty[addr>> TARGET_PAGE_BITS] |= dirty_flags;
>> + unsigned long mask;
>> + int index = (addr>> TARGET_PAGE_BITS) / HOST_LONG_BITS;
>> + int offset = (addr>> TARGET_PAGE_BITS)& (HOST_LONG_BITS - 1);
>> +
>> + mask = 1UL<< offset;
>> + if (dirty_flags& VGA_DIRTY_FLAG)
>> + phys_ram_dirty[VGA_DIRTY_FLAG][index] |= mask;
>> + if (dirty_flags& CODE_DIRTY_FLAG)
>> + phys_ram_dirty[CODE_DIRTY_FLAG][index] |= mask;
>> + if (dirty_flags& MIGRATION_DIRTY_FLAG)
>> + phys_ram_dirty[MIGRATION_DIRTY_FLAG][index] |= mask;
>> }
>
> Is it necessary to update migration and vga bitmaps?
>
> We can simply update the master bitmap, and update the migration and vga
> bitmaps only when they need it. That can be done in a different patch.

Let me explain the role of the master bitmap here.

In the previous discussion, the master bitmap represented at least one dirty 
type is actually dirty.  While implementing this approach, I though there is one 
downside that upon resetting the bitmap, it needs to check all dirty types 
whether they are already cleared to unset the master bitmap, and this might hurt 
the performance in general.

In this patch, master bitmap represents all types are dirty, similar to existing 
0xff.  With this approach, resetting the master bitmap can be done without 
checking the other types.  set_dirty_flags is actually taking the burden in this 
case though.  Anyway, IIUC somebody would be unhappy depending on the role of 
the master bitmap.

> Note that we should only allocate the migration and vga bitmaps when
> migration or vga is active.

Right.  May I do it in a different patch?
I think this is about optimization.  In this patch, I focused on not breaking 
the existing function.
Avi Kivity April 12, 2010, 11:09 a.m. UTC | #3
On 04/12/2010 01:58 PM, Yoshiaki Tamura wrote:
>> Is it necessary to update migration and vga bitmaps?
>>
>> We can simply update the master bitmap, and update the migration and vga
>> bitmaps only when they need it. That can be done in a different patch.
>
>
> Let me explain the role of the master bitmap here.
>
> In the previous discussion, the master bitmap represented at least one 
> dirty type is actually dirty.  While implementing this approach, I 
> though there is one downside that upon resetting the bitmap, it needs 
> to check all dirty types whether they are already cleared to unset the 
> master bitmap, and this might hurt the performance in general.

The way I see it, the master bitmap is a buffer between the guest and 
all the other client bitmaps (migration and vga).  So, a bit can be set 
in vga and cleared in master.  Let's look at the following scenario:

1. guest touches address 0xa0000 (page 0xa0)
2. qemu updates master bitmap bit 0xa0
3. vga timer fires
4. vga syncs the dirty bitmap for addresses 0xa0000-0xc0000
4.1 vga_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
4.2 migration_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
4.3 master_bitmap[0xa0-0xc0] = 0
5. vga draws based on vga_bitmap

the nice thing is that step 4.2 can be omitted if migration is not active.

so, client bitmaps are always updated when the client requests them, 
master bitmap is only a buffer.

>
> In this patch, master bitmap represents all types are dirty, similar 
> to existing 0xff.  With this approach, resetting the master bitmap can 
> be done without checking the other types.  set_dirty_flags is actually 
> taking the burden in this case though.  Anyway, IIUC somebody would be 
> unhappy depending on the role of the master bitmap.

Yes, the problem is that set_dirty_flags is the common case and also 
uses random access, we'd want to make it touch only a single bit.  
client access is rare, and also sequential, and therefore efficient.

>
>> Note that we should only allocate the migration and vga bitmaps when
>> migration or vga is active.
>
> Right.  May I do it in a different patch?

Sure, more patches is usually better.

> I think this is about optimization.  In this patch, I focused on not 
> breaking the existing function.

Yes, that's the best approach.  But we do need to see how all the 
incremental steps end up.
Yoshiaki Tamura April 13, 2010, 8:01 a.m. UTC | #4
Avi Kivity wrote:
> On 04/12/2010 01:58 PM, Yoshiaki Tamura wrote:
>>> Is it necessary to update migration and vga bitmaps?
>>>
>>> We can simply update the master bitmap, and update the migration and vga
>>> bitmaps only when they need it. That can be done in a different patch.
>>
>>
>> Let me explain the role of the master bitmap here.
>>
>> In the previous discussion, the master bitmap represented at least one
>> dirty type is actually dirty. While implementing this approach, I
>> though there is one downside that upon resetting the bitmap, it needs
>> to check all dirty types whether they are already cleared to unset the
>> master bitmap, and this might hurt the performance in general.
>
> The way I see it, the master bitmap is a buffer between the guest and
> all the other client bitmaps (migration and vga). So, a bit can be set
> in vga and cleared in master. Let's look at the following scenario:
>
> 1. guest touches address 0xa0000 (page 0xa0)
> 2. qemu updates master bitmap bit 0xa0
> 3. vga timer fires
> 4. vga syncs the dirty bitmap for addresses 0xa0000-0xc0000
> 4.1 vga_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
> 4.2 migration_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
> 4.3 master_bitmap[0xa0-0xc0] = 0
> 5. vga draws based on vga_bitmap
>
> the nice thing is that step 4.2 can be omitted if migration is not active.
>
> so, client bitmaps are always updated when the client requests them,
> master bitmap is only a buffer.

Thanks.  I think I'm finally getting your point.
At 4.2 and 4.3, is syncing to client necessary?
Simply doing OR at get_dirty like in this patch not enough?

>> In this patch, master bitmap represents all types are dirty, similar
>> to existing 0xff. With this approach, resetting the master bitmap can
>> be done without checking the other types. set_dirty_flags is actually
>> taking the burden in this case though. Anyway, IIUC somebody would be
>> unhappy depending on the role of the master bitmap.
>
> Yes, the problem is that set_dirty_flags is the common case and also
> uses random access, we'd want to make it touch only a single bit. client
> access is rare, and also sequential, and therefore efficient.

I see.
So set_dirty_flags would be like,

1. set master first regard less of dirty type.
2. if dirty type was CODE, set the client.

>>> Note that we should only allocate the migration and vga bitmaps when
>>> migration or vga is active.
>>
>> Right. May I do it in a different patch?
>
> Sure, more patches is usually better.
>
>> I think this is about optimization. In this patch, I focused on not
>> breaking the existing function.
>
> Yes, that's the best approach. But we do need to see how all the
> incremental steps end up.

I totally agree.  Thanks for helping.

In summary, I'll prepare two patch sets.

1. v3 of this patch set.
- Change FLAGS value to (1,2,4,8), and add IDX (0,1,2,3)
- Use ffsl to convert FLAGS to IDX.
- Add help function which takes IDX.
- Change the behavior of set_dirty_flags as above.
- Change dirty bitmap access to a loop.
- Add brace after if ()
- Move some macros to qemu-common.h.

2. Allocate vga and migration dirty bitmap dynamically.

Please modify or add items if I were missing.
Avi Kivity April 13, 2010, 9:20 a.m. UTC | #5
On 04/13/2010 11:01 AM, Yoshiaki Tamura wrote:
> Avi Kivity wrote:
>> On 04/12/2010 01:58 PM, Yoshiaki Tamura wrote:
>>>> Is it necessary to update migration and vga bitmaps?
>>>>
>>>> We can simply update the master bitmap, and update the migration 
>>>> and vga
>>>> bitmaps only when they need it. That can be done in a different patch.
>>>
>>>
>>> Let me explain the role of the master bitmap here.
>>>
>>> In the previous discussion, the master bitmap represented at least one
>>> dirty type is actually dirty. While implementing this approach, I
>>> though there is one downside that upon resetting the bitmap, it needs
>>> to check all dirty types whether they are already cleared to unset the
>>> master bitmap, and this might hurt the performance in general.
>>
>> The way I see it, the master bitmap is a buffer between the guest and
>> all the other client bitmaps (migration and vga). So, a bit can be set
>> in vga and cleared in master. Let's look at the following scenario:
>>
>> 1. guest touches address 0xa0000 (page 0xa0)
>> 2. qemu updates master bitmap bit 0xa0
>> 3. vga timer fires
>> 4. vga syncs the dirty bitmap for addresses 0xa0000-0xc0000
>> 4.1 vga_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
>> 4.2 migration_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
>> 4.3 master_bitmap[0xa0-0xc0] = 0
>> 5. vga draws based on vga_bitmap
>>
>> the nice thing is that step 4.2 can be omitted if migration is not 
>> active.
>>
>> so, client bitmaps are always updated when the client requests them,
>> master bitmap is only a buffer.
>
> Thanks.  I think I'm finally getting your point.
> At 4.2 and 4.3, is syncing to client necessary?

Yes.  We want to clear the master bitmap, otherwise on the next 
iteration we will get dirty bits that may be spurious.  But if we clear 
the dirty bitmap, we must copy it to all client bitmaps, not just vga, 
otherwise we lose data.

> Simply doing OR at get_dirty like in this patch not enough?

If you don't clear the master bitmap, then you will get the same bits at 
the next iteration.

>
>>> In this patch, master bitmap represents all types are dirty, similar
>>> to existing 0xff. With this approach, resetting the master bitmap can
>>> be done without checking the other types. set_dirty_flags is actually
>>> taking the burden in this case though. Anyway, IIUC somebody would be
>>> unhappy depending on the role of the master bitmap.
>>
>> Yes, the problem is that set_dirty_flags is the common case and also
>> uses random access, we'd want to make it touch only a single bit. client
>> access is rare, and also sequential, and therefore efficient.
>
> I see.
> So set_dirty_flags would be like,
>
> 1. set master first regard less of dirty type.
> 2. if dirty type was CODE, set the client.

There really should be two APIs, one for general dirtyness and one for 
code dirty.  As Anthony said, the code dirty bitmap is completely 
separate from the other uses.

Consider starting by separating the two uses, it may clear up the code 
and make things easier later on.

>
> In summary, I'll prepare two patch sets.
>
> 1. v3 of this patch set.
> - Change FLAGS value to (1,2,4,8), and add IDX (0,1,2,3)
> - Use ffsl to convert FLAGS to IDX.
> - Add help function which takes IDX.
> - Change the behavior of set_dirty_flags as above.
> - Change dirty bitmap access to a loop.
> - Add brace after if ()
> - Move some macros to qemu-common.h.
>
> 2. Allocate vga and migration dirty bitmap dynamically.
>
> Please modify or add items if I were missing.

I would add separation of CODE and the other dirty bitmaps.
Yoshiaki Tamura April 13, 2010, 10:49 a.m. UTC | #6
Avi Kivity wrote:
> On 04/13/2010 11:01 AM, Yoshiaki Tamura wrote:
>> Avi Kivity wrote:
>>> On 04/12/2010 01:58 PM, Yoshiaki Tamura wrote:
>>>>> Is it necessary to update migration and vga bitmaps?
>>>>>
>>>>> We can simply update the master bitmap, and update the migration
>>>>> and vga
>>>>> bitmaps only when they need it. That can be done in a different patch.
>>>>
>>>>
>>>> Let me explain the role of the master bitmap here.
>>>>
>>>> In the previous discussion, the master bitmap represented at least one
>>>> dirty type is actually dirty. While implementing this approach, I
>>>> though there is one downside that upon resetting the bitmap, it needs
>>>> to check all dirty types whether they are already cleared to unset the
>>>> master bitmap, and this might hurt the performance in general.
>>>
>>> The way I see it, the master bitmap is a buffer between the guest and
>>> all the other client bitmaps (migration and vga). So, a bit can be set
>>> in vga and cleared in master. Let's look at the following scenario:
>>>
>>> 1. guest touches address 0xa0000 (page 0xa0)
>>> 2. qemu updates master bitmap bit 0xa0
>>> 3. vga timer fires
>>> 4. vga syncs the dirty bitmap for addresses 0xa0000-0xc0000
>>> 4.1 vga_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
>>> 4.2 migration_bitmap[0xa0-0xc0] |= master_bitmap[0xa0-0xc0]
>>> 4.3 master_bitmap[0xa0-0xc0] = 0
>>> 5. vga draws based on vga_bitmap
>>>
>>> the nice thing is that step 4.2 can be omitted if migration is not
>>> active.
>>>
>>> so, client bitmaps are always updated when the client requests them,
>>> master bitmap is only a buffer.
>>
>> Thanks. I think I'm finally getting your point.
>> At 4.2 and 4.3, is syncing to client necessary?
>
> Yes. We want to clear the master bitmap, otherwise on the next iteration
> we will get dirty bits that may be spurious. But if we clear the dirty
> bitmap, we must copy it to all client bitmaps, not just vga, otherwise
> we lose data.
>
>> Simply doing OR at get_dirty like in this patch not enough?
>
> If you don't clear the master bitmap, then you will get the same bits at
> the next iteration.

OK.
Master is just a cache/buffer, and upon get_dirty, we copy it to client bitmaps 
with for loop, clear the master and finally check actual client bitmap.  If 
we're going to allocate client bitmaps dynamically, we need to check whether it 
isn't null too.

>>>> In this patch, master bitmap represents all types are dirty, similar
>>>> to existing 0xff. With this approach, resetting the master bitmap can
>>>> be done without checking the other types. set_dirty_flags is actually
>>>> taking the burden in this case though. Anyway, IIUC somebody would be
>>>> unhappy depending on the role of the master bitmap.
>>>
>>> Yes, the problem is that set_dirty_flags is the common case and also
>>> uses random access, we'd want to make it touch only a single bit. client
>>> access is rare, and also sequential, and therefore efficient.
>>
>> I see.
>> So set_dirty_flags would be like,
>>
>> 1. set master first regard less of dirty type.
>> 2. if dirty type was CODE, set the client.
>
> There really should be two APIs, one for general dirtyness and one for
> code dirty. As Anthony said, the code dirty bitmap is completely
> separate from the other uses.
>
> Consider starting by separating the two uses, it may clear up the code
> and make things easier later on.

That really make sense.  A little bit hesitant because the code might look a bit 
duplicated.

>> In summary, I'll prepare two patch sets.
>>
>> 1. v3 of this patch set.
>> - Change FLAGS value to (1,2,4,8), and add IDX (0,1,2,3)
>> - Use ffsl to convert FLAGS to IDX.
>> - Add help function which takes IDX.
>> - Change the behavior of set_dirty_flags as above.
>> - Change dirty bitmap access to a loop.
>> - Add brace after if ()
>> - Move some macros to qemu-common.h.
>>
>> 2. Allocate vga and migration dirty bitmap dynamically.
>>
>> Please modify or add items if I were missing.
>
> I would add separation of CODE and the other dirty bitmaps.

I'm not sure I should starting separating first or later at this moment, but 
will consider it.
diff mbox

Patch

diff --git a/cpu-all.h b/cpu-all.h
index c409fad..0f5bfbe 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -891,43 +891,96 @@  extern unsigned long *phys_ram_dirty[NUM_DIRTY_FLAGS];
 /* read dirty bit (return 0 or 1) */
 static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
 {
-    return phys_ram_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+    unsigned long mask;
+    int index = (addr >> TARGET_PAGE_BITS) / HOST_LONG_BITS;
+    int offset = (addr >> TARGET_PAGE_BITS) & (HOST_LONG_BITS - 1);
+ 
+    mask = 1UL << offset;
+    return (phys_ram_dirty[MASTER_DIRTY_FLAG][index] & mask) == mask;
 }
 
 static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
 {
-    return phys_ram_dirty[addr >> TARGET_PAGE_BITS];
+     unsigned long mask;
+     int index = (addr >> TARGET_PAGE_BITS) / HOST_LONG_BITS;
+     int offset = (addr >> TARGET_PAGE_BITS) & (HOST_LONG_BITS - 1);
+     int ret = 0;
+ 
+     mask = 1UL << offset;
+     if (phys_ram_dirty[MASTER_DIRTY_FLAG][index] & mask)
+         return 0xff;
+     if (phys_ram_dirty[VGA_DIRTY_FLAG][index] & mask)
+         ret |= VGA_DIRTY_FLAG;
+     if (phys_ram_dirty[CODE_DIRTY_FLAG][index] & mask)
+         ret |=  CODE_DIRTY_FLAG;
+     if (phys_ram_dirty[MIGRATION_DIRTY_FLAG][index] & mask)
+         ret |= MIGRATION_DIRTY_FLAG;
+ 
+     return ret;
 }
 
 static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
                                                 int dirty_flags)
 {
-    return phys_ram_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
+    unsigned long mask;
+    int index = (addr >> TARGET_PAGE_BITS) / HOST_LONG_BITS;
+    int offset = (addr >> TARGET_PAGE_BITS) & (HOST_LONG_BITS - 1);
+
+    mask = 1UL << offset;
+    return (phys_ram_dirty[MASTER_DIRTY_FLAG][index] & mask) ||
+        (phys_ram_dirty[dirty_flags][index] & mask);
 }
 
 static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
 {
-    phys_ram_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+    unsigned long mask;
+    int index = (addr >> TARGET_PAGE_BITS) / HOST_LONG_BITS;
+    int offset = (addr >> TARGET_PAGE_BITS) & (HOST_LONG_BITS - 1);
+
+    mask = 1UL << offset;
+    phys_ram_dirty[MASTER_DIRTY_FLAG][index] |= mask;
+}
+
+static inline void cpu_physical_memory_set_dirty_range(ram_addr_t addr,
+                                                       unsigned long mask)
+{
+    int index = (addr >> TARGET_PAGE_BITS) / HOST_LONG_BITS;
+
+    phys_ram_dirty[MASTER_DIRTY_FLAG][index] |= mask;
 }
 
-static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
-                                                      int dirty_flags)
+static inline void cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
+                                                       int dirty_flags)
 {
-    return phys_ram_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
+    unsigned long mask;
+    int index = (addr >> TARGET_PAGE_BITS) / HOST_LONG_BITS;
+    int offset = (addr >> TARGET_PAGE_BITS) & (HOST_LONG_BITS - 1);
+
+    mask = 1UL << offset;
+    if (dirty_flags & VGA_DIRTY_FLAG) 
+        phys_ram_dirty[VGA_DIRTY_FLAG][index] |= mask;
+    if (dirty_flags & CODE_DIRTY_FLAG) 
+        phys_ram_dirty[CODE_DIRTY_FLAG][index] |= mask;
+    if (dirty_flags & MIGRATION_DIRTY_FLAG) 
+        phys_ram_dirty[MIGRATION_DIRTY_FLAG][index] |= mask;
 }
 
 static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
                                                         int length,
                                                         int dirty_flags)
 {
-    int i, mask, len;
-    uint8_t *p;
+    ram_addr_t addr = start;
+    unsigned long mask;
+    int index, offset, i;
+
+    for (i = 0;  i < length; i += TARGET_PAGE_SIZE) {
+        index = ((addr + i) >> TARGET_PAGE_BITS) / HOST_LONG_BITS;
+        offset = ((addr + i) >> TARGET_PAGE_BITS) & (HOST_LONG_BITS - 1);
+        mask = ~(1UL << offset);
 
-    len = length >> TARGET_PAGE_BITS;
-    mask = ~dirty_flags;
-    p = phys_ram_dirty + (start >> TARGET_PAGE_BITS);
-    for (i = 0; i < len; i++)
-        p[i] &= mask;
+        phys_ram_dirty[MASTER_DIRTY_FLAG][index] &= mask;
+        phys_ram_dirty[dirty_flags][index] &= mask;
+     }
 }
 
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,