diff mbox

[RFC,v7,08/16] softmmu: Honor the new exclusive bitmap

Message ID 1454059965-23402-9-git-send-email-a.rigo@virtualopensystems.com
State New
Headers show

Commit Message

Alvise Rigo Jan. 29, 2016, 9:32 a.m. UTC
The pages set as exclusive (clean) in the DIRTY_MEMORY_EXCLUSIVE bitmap
have to have their TLB entries flagged with TLB_EXCL. The accesses to
pages with TLB_EXCL flag set have to be properly handled in that they
can potentially invalidate an open LL/SC transaction.

Modify the TLB entries generation to honor the new bitmap and extend
the softmmu_template to handle the accesses made to guest pages marked
as exclusive.

In the case we remove a TLB entry marked as EXCL, we unset the
corresponding exclusive bit in the bitmap.

Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
 cputlb.c           | 44 ++++++++++++++++++++++++++++--
 softmmu_template.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 113 insertions(+), 11 deletions(-)

Comments

Alex Bennée Feb. 16, 2016, 5:39 p.m. UTC | #1
Alvise Rigo <a.rigo@virtualopensystems.com> writes:

> The pages set as exclusive (clean) in the DIRTY_MEMORY_EXCLUSIVE bitmap
> have to have their TLB entries flagged with TLB_EXCL. The accesses to
> pages with TLB_EXCL flag set have to be properly handled in that they
> can potentially invalidate an open LL/SC transaction.
>
> Modify the TLB entries generation to honor the new bitmap and extend
> the softmmu_template to handle the accesses made to guest pages marked
> as exclusive.
>
> In the case we remove a TLB entry marked as EXCL, we unset the
> corresponding exclusive bit in the bitmap.
>
> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> ---
>  cputlb.c           | 44 ++++++++++++++++++++++++++++--
>  softmmu_template.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 113 insertions(+), 11 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index ce6d720..aa9cc17 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -395,6 +395,16 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
>      env->tlb_v_table[mmu_idx][vidx] = *te;
>      env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
>
> +    if (unlikely(!(te->addr_write & TLB_MMIO) && (te->addr_write & TLB_EXCL))) {
> +        /* We are removing an exclusive entry, set the page to dirty. This
> +         * is not be necessary if the vCPU has performed both SC and LL. */
> +        hwaddr hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) +
> +                                          (te->addr_write & TARGET_PAGE_MASK);
> +        if (!cpu->ll_sc_context) {
> +            cpu_physical_memory_unset_excl(hw_addr);
> +        }
> +    }
> +

I'm confused by the later patches removing this code and its comments
about missing the setting of flags.

>      /* refill the tlb */
>      env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
>      env->iotlb[mmu_idx][index].attrs = attrs;
> @@ -418,9 +428,19 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
>          } else if (memory_region_is_ram(section->mr)
>                     && cpu_physical_memory_is_clean(section->mr->ram_addr
>                                                     + xlat)) {
> -            te->addr_write = address | TLB_NOTDIRTY;
> -        } else {
> -            te->addr_write = address;
> +            address |= TLB_NOTDIRTY;
> +        }
> +
> +        /* Since the MMIO accesses follow always the slow path, we do not need
> +         * to set any flag to trap the access */
> +        if (!(address & TLB_MMIO)) {
> +            if (cpu_physical_memory_is_excl(section->mr->ram_addr + xlat)) {
> +                /* There is at least one vCPU that has flagged the address as
> +                 * exclusive. */
> +                te->addr_write = address | TLB_EXCL;
> +            } else {
> +                te->addr_write = address;
> +            }

Again this is confusing when following patches blat over the code.
Perhaps this part of the patch should be:

        /* Since the MMIO accesses follow always the slow path, we do not need
         * to set any flag to trap the access */
        if (!(address & TLB_MMIO)) {
            if (cpu_physical_memory_is_excl(section->mr->ram_addr + xlat)) {
                /* There is at least one vCPU that has flagged the address as
                 * exclusive. */
                address |= TLB_EXCL;
            }
        }
        te->addr_write = address;

So the future patch is clearer about what it does?

>          }
>      } else {
>          te->addr_write = -1;
> @@ -474,6 +494,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
>      return qemu_ram_addr_from_host_nofail(p);
>  }
>
> +/* For every vCPU compare the exclusive address and reset it in case of a
> + * match. Since only one vCPU is running at once, no lock has to be held to
> + * guard this operation. */
> +static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
> +{
> +    CPUState *cpu;
> +
> +    CPU_FOREACH(cpu) {
> +        if (cpu->excl_protected_range.begin != EXCLUSIVE_RESET_ADDR &&
> +            ranges_overlap(cpu->excl_protected_range.begin,
> +                           cpu->excl_protected_range.end -
> +                           cpu->excl_protected_range.begin,
> +                           addr, size)) {
> +            cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> +        }
> +    }
> +}
> +
>  #define MMUSUFFIX _mmu
>
>  /* Generates LoadLink/StoreConditional helpers in softmmu_template.h */
> diff --git a/softmmu_template.h b/softmmu_template.h
> index 4332db2..267c52a 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -474,11 +474,43 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>          tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
>      }
>
> -    /* Handle an IO access.  */
> +    /* Handle an IO access or exclusive access.  */
>      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> -        glue(helper_le_st_name, _do_mmio_access)(env, val, addr, oi,
> -                                                 mmu_idx, index, retaddr);
> -        return;
> +        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {

From here:

> +            CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> +            CPUState *cpu = ENV_GET_CPU(env);
> +            CPUClass *cc = CPU_GET_CLASS(cpu);
> +            /* The slow-path has been forced since we are writing to
> +             * exclusive-protected memory. */
> +            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> +
> +            /* The function lookup_and_reset_cpus_ll_addr could have reset the
> +             * exclusive address. Fail the SC in this case.
> +             * N.B.: here excl_succeed == true means that the caller is
> +             * helper_stcond_name in softmmu_llsc_template.
> +             * On the contrary, excl_succeeded == false occurs when a VCPU is
> +             * writing through normal store to a page with TLB_EXCL bit set. */
> +            if (cpu->excl_succeeded) {
> +                if (!cc->cpu_valid_excl_access(cpu, hw_addr, DATA_SIZE)) {
> +                    /* The vCPU is SC-ing to an unprotected address. */
> +                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> +                    cpu->excl_succeeded = false;
> +
> +                    return;
> +                }
> +            }
> +

To here is repeated code later on. It would be better to have a common
chunk of logic.

> +            glue(helper_le_st_name, _do_ram_access)(env, val, addr, oi,
> +                                                    mmu_idx, index, retaddr);
> +
> +            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);

In fact if the endianess is passed to the inline function you could have
a call that was:

        if (tlb_addr & TLB_EXCL) {
           glue(helper_st_name, _do_excl)(true, env, val, addr, oi, mmu_idx,
                                              index, retaddr);
        }

and

        if (tlb_addr & TLB_EXCL) {
           glue(helper_st_name, _do_excl)(false, env, val, addr, oi, mmu_idx,
                                              index, retaddr);
        }

later. Then future patches would just extend the single helper.

> +
> +            return;
> +        } else {
> +            glue(helper_le_st_name, _do_mmio_access)(env, val, addr, oi,
> +                                                     mmu_idx, index, retaddr);
> +            return;
> +        }
>      }
>
>      glue(helper_le_st_name, _do_ram_access)(env, val, addr, oi, mmu_idx, index,
> @@ -586,11 +618,43 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>          tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
>      }
>
> -    /* Handle an IO access.  */
> +    /* Handle an IO access or exclusive access.  */
>      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> -        glue(helper_be_st_name, _do_mmio_access)(env, val, addr, oi,
> -                                                 mmu_idx, index, retaddr);
> -        return;
> +        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
> +            CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> +            CPUState *cpu = ENV_GET_CPU(env);
> +            CPUClass *cc = CPU_GET_CLASS(cpu);
> +            /* The slow-path has been forced since we are writing to
> +             * exclusive-protected memory. */
> +            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> +
> +            /* The function lookup_and_reset_cpus_ll_addr could have reset the
> +             * exclusive address. Fail the SC in this case.
> +             * N.B.: here excl_succeed == true means that the caller is
> +             * helper_stcond_name in softmmu_llsc_template.
> +             * On the contrary, excl_succeeded == false occurs when a VCPU is
> +             * writing through normal store to a page with TLB_EXCL bit set. */
> +            if (cpu->excl_succeeded) {
> +                if (!cc->cpu_valid_excl_access(cpu, hw_addr, DATA_SIZE)) {
> +                    /* The vCPU is SC-ing to an unprotected address. */
> +                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> +                    cpu->excl_succeeded = false;
> +
> +                    return;
> +                }
> +            }
> +
> +            glue(helper_be_st_name, _do_ram_access)(env, val, addr, oi,
> +                                                    mmu_idx, index, retaddr);
> +
> +            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
> +
> +            return;
> +        } else {
> +            glue(helper_be_st_name, _do_mmio_access)(env, val, addr, oi,
> +                                                     mmu_idx, index, retaddr);
> +            return;
> +        }
>      }
>
>      glue(helper_be_st_name, _do_ram_access)(env, val, addr, oi, mmu_idx, index,


--
Alex Bennée
Alvise Rigo Feb. 18, 2016, 2:18 p.m. UTC | #2
On Tue, Feb 16, 2016 at 6:39 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
>
>
> Alvise Rigo <a.rigo@virtualopensystems.com> writes:
>
> > The pages set as exclusive (clean) in the DIRTY_MEMORY_EXCLUSIVE bitmap
> > have to have their TLB entries flagged with TLB_EXCL. The accesses to
> > pages with TLB_EXCL flag set have to be properly handled in that they
> > can potentially invalidate an open LL/SC transaction.
> >
> > Modify the TLB entries generation to honor the new bitmap and extend
> > the softmmu_template to handle the accesses made to guest pages marked
> > as exclusive.
> >
> > In the case we remove a TLB entry marked as EXCL, we unset the
> > corresponding exclusive bit in the bitmap.
> >
> > Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> > Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> > Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> > ---
> >  cputlb.c           | 44 ++++++++++++++++++++++++++++--
> >  softmmu_template.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++------
> >  2 files changed, 113 insertions(+), 11 deletions(-)
> >
> > diff --git a/cputlb.c b/cputlb.c
> > index ce6d720..aa9cc17 100644
> > --- a/cputlb.c
> > +++ b/cputlb.c
> > @@ -395,6 +395,16 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> >      env->tlb_v_table[mmu_idx][vidx] = *te;
> >      env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
> >
> > +    if (unlikely(!(te->addr_write & TLB_MMIO) && (te->addr_write & TLB_EXCL))) {
> > +        /* We are removing an exclusive entry, set the page to dirty. This
> > +         * is not be necessary if the vCPU has performed both SC and LL. */
> > +        hwaddr hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) +
> > +                                          (te->addr_write & TARGET_PAGE_MASK);
> > +        if (!cpu->ll_sc_context) {
> > +            cpu_physical_memory_unset_excl(hw_addr);
> > +        }
> > +    }
> > +
>
> I'm confused by the later patches removing this code and its comments
> about missing the setting of flags.


I hope I answered to this question in the other thread.

>
>
> >      /* refill the tlb */
> >      env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
> >      env->iotlb[mmu_idx][index].attrs = attrs;
> > @@ -418,9 +428,19 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> >          } else if (memory_region_is_ram(section->mr)
> >                     && cpu_physical_memory_is_clean(section->mr->ram_addr
> >                                                     + xlat)) {
> > -            te->addr_write = address | TLB_NOTDIRTY;
> > -        } else {
> > -            te->addr_write = address;
> > +            address |= TLB_NOTDIRTY;
> > +        }
> > +
> > +        /* Since the MMIO accesses follow always the slow path, we do not need
> > +         * to set any flag to trap the access */
> > +        if (!(address & TLB_MMIO)) {
> > +            if (cpu_physical_memory_is_excl(section->mr->ram_addr + xlat)) {
> > +                /* There is at least one vCPU that has flagged the address as
> > +                 * exclusive. */
> > +                te->addr_write = address | TLB_EXCL;
> > +            } else {
> > +                te->addr_write = address;
> > +            }
>
> Again this is confusing when following patches blat over the code.
> Perhaps this part of the patch should be:
>
>         /* Since the MMIO accesses follow always the slow path, we do not need
>          * to set any flag to trap the access */
>         if (!(address & TLB_MMIO)) {
>             if (cpu_physical_memory_is_excl(section->mr->ram_addr + xlat)) {
>                 /* There is at least one vCPU that has flagged the address as
>                  * exclusive. */
>                 address |= TLB_EXCL;
>             }
>         }
>         te->addr_write = address;
>
> So the future patch is clearer about what it does?


Yes, this is more clear. I will fix it.

>
>
> >          }
> >      } else {
> >          te->addr_write = -1;
> > @@ -474,6 +494,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
> >      return qemu_ram_addr_from_host_nofail(p);
> >  }
> >
> > +/* For every vCPU compare the exclusive address and reset it in case of a
> > + * match. Since only one vCPU is running at once, no lock has to be held to
> > + * guard this operation. */
> > +static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
> > +{
> > +    CPUState *cpu;
> > +
> > +    CPU_FOREACH(cpu) {
> > +        if (cpu->excl_protected_range.begin != EXCLUSIVE_RESET_ADDR &&
> > +            ranges_overlap(cpu->excl_protected_range.begin,
> > +                           cpu->excl_protected_range.end -
> > +                           cpu->excl_protected_range.begin,
> > +                           addr, size)) {
> > +            cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> > +        }
> > +    }
> > +}
> > +
> >  #define MMUSUFFIX _mmu
> >
> >  /* Generates LoadLink/StoreConditional helpers in softmmu_template.h */
> > diff --git a/softmmu_template.h b/softmmu_template.h
> > index 4332db2..267c52a 100644
> > --- a/softmmu_template.h
> > +++ b/softmmu_template.h
> > @@ -474,11 +474,43 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
> >          tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
> >      }
> >
> > -    /* Handle an IO access.  */
> > +    /* Handle an IO access or exclusive access.  */
> >      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> > -        glue(helper_le_st_name, _do_mmio_access)(env, val, addr, oi,
> > -                                                 mmu_idx, index, retaddr);
> > -        return;
> > +        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
>
> From here:
>
> > +            CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> > +            CPUState *cpu = ENV_GET_CPU(env);
> > +            CPUClass *cc = CPU_GET_CLASS(cpu);
> > +            /* The slow-path has been forced since we are writing to
> > +             * exclusive-protected memory. */
> > +            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> > +
> > +            /* The function lookup_and_reset_cpus_ll_addr could have reset the
> > +             * exclusive address. Fail the SC in this case.
> > +             * N.B.: here excl_succeed == true means that the caller is
> > +             * helper_stcond_name in softmmu_llsc_template.
> > +             * On the contrary, excl_succeeded == false occurs when a VCPU is
> > +             * writing through normal store to a page with TLB_EXCL bit set. */
> > +            if (cpu->excl_succeeded) {
> > +                if (!cc->cpu_valid_excl_access(cpu, hw_addr, DATA_SIZE)) {
> > +                    /* The vCPU is SC-ing to an unprotected address. */
> > +                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> > +                    cpu->excl_succeeded = false;
> > +
> > +                    return;
> > +                }
> > +            }
> > +
>
> To here is repeated code later on. It would be better to have a common
> chunk of logic.
>
> > +            glue(helper_le_st_name, _do_ram_access)(env, val, addr, oi,
> > +                                                    mmu_idx, index, retaddr);
> > +
> > +            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
>
> In fact if the endianess is passed to the inline function you could have
> a call that was:
>
>         if (tlb_addr & TLB_EXCL) {
>            glue(helper_st_name, _do_excl)(true, env, val, addr, oi, mmu_idx,
>                                               index, retaddr);
>         }
>
> and
>
>         if (tlb_addr & TLB_EXCL) {
>            glue(helper_st_name, _do_excl)(false, env, val, addr, oi, mmu_idx,
>                                               index, retaddr);
>         }
>
> later. Then future patches would just extend the single helper.

OK, let's shirk down this file :)

Thank you,
alvise

>
>
> > +
> > +            return;
> > +        } else {
> > +            glue(helper_le_st_name, _do_mmio_access)(env, val, addr, oi,
> > +                                                     mmu_idx, index, retaddr);
> > +            return;
> > +        }
> >      }
> >
> >      glue(helper_le_st_name, _do_ram_access)(env, val, addr, oi, mmu_idx, index,
> > @@ -586,11 +618,43 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
> >          tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
> >      }
> >
> > -    /* Handle an IO access.  */
> > +    /* Handle an IO access or exclusive access.  */
> >      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> > -        glue(helper_be_st_name, _do_mmio_access)(env, val, addr, oi,
> > -                                                 mmu_idx, index, retaddr);
> > -        return;
> > +        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
> > +            CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> > +            CPUState *cpu = ENV_GET_CPU(env);
> > +            CPUClass *cc = CPU_GET_CLASS(cpu);
> > +            /* The slow-path has been forced since we are writing to
> > +             * exclusive-protected memory. */
> > +            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> > +
> > +            /* The function lookup_and_reset_cpus_ll_addr could have reset the
> > +             * exclusive address. Fail the SC in this case.
> > +             * N.B.: here excl_succeed == true means that the caller is
> > +             * helper_stcond_name in softmmu_llsc_template.
> > +             * On the contrary, excl_succeeded == false occurs when a VCPU is
> > +             * writing through normal store to a page with TLB_EXCL bit set. */
> > +            if (cpu->excl_succeeded) {
> > +                if (!cc->cpu_valid_excl_access(cpu, hw_addr, DATA_SIZE)) {
> > +                    /* The vCPU is SC-ing to an unprotected address. */
> > +                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> > +                    cpu->excl_succeeded = false;
> > +
> > +                    return;
> > +                }
> > +            }
> > +
> > +            glue(helper_be_st_name, _do_ram_access)(env, val, addr, oi,
> > +                                                    mmu_idx, index, retaddr);
> > +
> > +            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
> > +
> > +            return;
> > +        } else {
> > +            glue(helper_be_st_name, _do_mmio_access)(env, val, addr, oi,
> > +                                                     mmu_idx, index, retaddr);
> > +            return;
> > +        }
> >      }
> >
> >      glue(helper_be_st_name, _do_ram_access)(env, val, addr, oi, mmu_idx, index,
>
>
> --
> Alex Bennée
diff mbox

Patch

diff --git a/cputlb.c b/cputlb.c
index ce6d720..aa9cc17 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -395,6 +395,16 @@  void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     env->tlb_v_table[mmu_idx][vidx] = *te;
     env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
 
+    if (unlikely(!(te->addr_write & TLB_MMIO) && (te->addr_write & TLB_EXCL))) {
+        /* We are removing an exclusive entry, set the page to dirty. This
+         * is not be necessary if the vCPU has performed both SC and LL. */
+        hwaddr hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) +
+                                          (te->addr_write & TARGET_PAGE_MASK);
+        if (!cpu->ll_sc_context) {
+            cpu_physical_memory_unset_excl(hw_addr);
+        }
+    }
+
     /* refill the tlb */
     env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
     env->iotlb[mmu_idx][index].attrs = attrs;
@@ -418,9 +428,19 @@  void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
         } else if (memory_region_is_ram(section->mr)
                    && cpu_physical_memory_is_clean(section->mr->ram_addr
                                                    + xlat)) {
-            te->addr_write = address | TLB_NOTDIRTY;
-        } else {
-            te->addr_write = address;
+            address |= TLB_NOTDIRTY;
+        }
+
+        /* Since the MMIO accesses follow always the slow path, we do not need
+         * to set any flag to trap the access */
+        if (!(address & TLB_MMIO)) {
+            if (cpu_physical_memory_is_excl(section->mr->ram_addr + xlat)) {
+                /* There is at least one vCPU that has flagged the address as
+                 * exclusive. */
+                te->addr_write = address | TLB_EXCL;
+            } else {
+                te->addr_write = address;
+            }
         }
     } else {
         te->addr_write = -1;
@@ -474,6 +494,24 @@  tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
     return qemu_ram_addr_from_host_nofail(p);
 }
 
+/* For every vCPU compare the exclusive address and reset it in case of a
+ * match. Since only one vCPU is running at once, no lock has to be held to
+ * guard this operation. */
+static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->excl_protected_range.begin != EXCLUSIVE_RESET_ADDR &&
+            ranges_overlap(cpu->excl_protected_range.begin,
+                           cpu->excl_protected_range.end -
+                           cpu->excl_protected_range.begin,
+                           addr, size)) {
+            cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
+        }
+    }
+}
+
 #define MMUSUFFIX _mmu
 
 /* Generates LoadLink/StoreConditional helpers in softmmu_template.h */
diff --git a/softmmu_template.h b/softmmu_template.h
index 4332db2..267c52a 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -474,11 +474,43 @@  void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     }
 
-    /* Handle an IO access.  */
+    /* Handle an IO access or exclusive access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        glue(helper_le_st_name, _do_mmio_access)(env, val, addr, oi,
-                                                 mmu_idx, index, retaddr);
-        return;
+        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
+            CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+            CPUState *cpu = ENV_GET_CPU(env);
+            CPUClass *cc = CPU_GET_CLASS(cpu);
+            /* The slow-path has been forced since we are writing to
+             * exclusive-protected memory. */
+            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+
+            /* The function lookup_and_reset_cpus_ll_addr could have reset the
+             * exclusive address. Fail the SC in this case.
+             * N.B.: here excl_succeed == true means that the caller is
+             * helper_stcond_name in softmmu_llsc_template.
+             * On the contrary, excl_succeeded == false occurs when a VCPU is
+             * writing through normal store to a page with TLB_EXCL bit set. */
+            if (cpu->excl_succeeded) {
+                if (!cc->cpu_valid_excl_access(cpu, hw_addr, DATA_SIZE)) {
+                    /* The vCPU is SC-ing to an unprotected address. */
+                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
+                    cpu->excl_succeeded = false;
+
+                    return;
+                }
+            }
+
+            glue(helper_le_st_name, _do_ram_access)(env, val, addr, oi,
+                                                    mmu_idx, index, retaddr);
+
+            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
+
+            return;
+        } else {
+            glue(helper_le_st_name, _do_mmio_access)(env, val, addr, oi,
+                                                     mmu_idx, index, retaddr);
+            return;
+        }
     }
 
     glue(helper_le_st_name, _do_ram_access)(env, val, addr, oi, mmu_idx, index,
@@ -586,11 +618,43 @@  void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     }
 
-    /* Handle an IO access.  */
+    /* Handle an IO access or exclusive access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        glue(helper_be_st_name, _do_mmio_access)(env, val, addr, oi,
-                                                 mmu_idx, index, retaddr);
-        return;
+        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
+            CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+            CPUState *cpu = ENV_GET_CPU(env);
+            CPUClass *cc = CPU_GET_CLASS(cpu);
+            /* The slow-path has been forced since we are writing to
+             * exclusive-protected memory. */
+            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+
+            /* The function lookup_and_reset_cpus_ll_addr could have reset the
+             * exclusive address. Fail the SC in this case.
+             * N.B.: here excl_succeed == true means that the caller is
+             * helper_stcond_name in softmmu_llsc_template.
+             * On the contrary, excl_succeeded == false occurs when a VCPU is
+             * writing through normal store to a page with TLB_EXCL bit set. */
+            if (cpu->excl_succeeded) {
+                if (!cc->cpu_valid_excl_access(cpu, hw_addr, DATA_SIZE)) {
+                    /* The vCPU is SC-ing to an unprotected address. */
+                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
+                    cpu->excl_succeeded = false;
+
+                    return;
+                }
+            }
+
+            glue(helper_be_st_name, _do_ram_access)(env, val, addr, oi,
+                                                    mmu_idx, index, retaddr);
+
+            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
+
+            return;
+        } else {
+            glue(helper_be_st_name, _do_mmio_access)(env, val, addr, oi,
+                                                     mmu_idx, index, retaddr);
+            return;
+        }
     }
 
     glue(helper_be_st_name, _do_ram_access)(env, val, addr, oi, mmu_idx, index,