diff mbox

[RFC,v3,13/13] softmmu_template.h: move to multithreading

Message ID 1436516626-8322-14-git-send-email-a.rigo@virtualopensystems.com
State New
Headers show

Commit Message

Alvise Rigo July 10, 2015, 8:23 a.m. UTC
Exploiting the tcg_excl_access_lock, port the helper_{le,be}_st_name to
work in real multithreading.

- The macro lookup_cpus_ll_addr now uses directly the
  env->excl_protected_addr to invalidate others' LL/SC operations

Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
 softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 89 insertions(+), 21 deletions(-)

Comments

Alex Bennée July 17, 2015, 3:57 p.m. UTC | #1
Alvise Rigo <a.rigo@virtualopensystems.com> writes:

> Exploiting the tcg_excl_access_lock, port the helper_{le,be}_st_name to
> work in real multithreading.
>
> - The macro lookup_cpus_ll_addr now uses directly the
>   env->excl_protected_addr to invalidate others' LL/SC operations
>
> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> ---
>  softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 89 insertions(+), 21 deletions(-)
>
> diff --git a/softmmu_template.h b/softmmu_template.h
> index bc767f6..522454f 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -141,21 +141,24 @@
>      vidx >= 0;                                                                \
>  })
>  
> +#define EXCLUSIVE_RESET_ADDR ULLONG_MAX
> +
> +/* This macro requires the caller to have the tcg_excl_access_lock lock since
> + * it modifies the excl_protected_hwaddr of a running vCPU.
> + * The macros scans all the excl_protected_hwaddr of all the vCPUs and compare
> + * them with the address the current vCPU is writing to. If there is a match,
> + * we reset the value, making the SC fail. */

It would have been nice if we had started with a comment when the
function^H^H^H^H^H macro was first introduced and then updated here.

>  #define lookup_cpus_ll_addr(addr)                                             \
>  ({                                                                            \
>      CPUState *cpu;                                                            \
>      CPUArchState *acpu;                                                       \
> -    bool hit = false;                                                         \
>                                                                                \
>      CPU_FOREACH(cpu) {                                                        \
>          acpu = (CPUArchState *)cpu->env_ptr;                                  \
>          if (cpu != current_cpu && acpu->excl_protected_hwaddr == addr) {      \
> -            hit = true;                                                       \
> -            break;                                                            \
> +            acpu->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;               \
>          }                                                                     \
>      }                                                                         \
> -                                                                              \
> -    hit;                                                                      \
>  })

My comment about using an inline function in the earlier patch stands.

>  
>  #ifndef SOFTMMU_CODE_ACCESS
> @@ -439,18 +442,52 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>               * exclusive-protected memory. */
>              hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
>  
> -            bool set_to_dirty;
> -
>              /* Two cases of invalidation: the current vCPU is writing to another
>               * vCPU's exclusive address or the vCPU that issued the LoadLink is
>               * writing to it, but not through a StoreCond. */
> -            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
> -            set_to_dirty |= env->ll_sc_context &&
> -                           (env->excl_protected_hwaddr == hw_addr);
> +            qemu_mutex_lock(&tcg_excl_access_lock);
> +
> +            /* The macro lookup_cpus_ll_addr could have reset the exclusive
> +             * address. Fail the SC in this case.
> +             * N.B.: Here excl_succeeded == 0 means that we don't come from a
> +             * store conditional.  */
> +            if (env->excl_succeeded &&
> +                        (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) {
> +                env->excl_succeeded = 0;
> +                qemu_mutex_unlock(&tcg_excl_access_lock);
> +
> +                return;
> +            }
> +
> +            lookup_cpus_ll_addr(hw_addr);

Add comment for side effect, also confused by the above comment given we
call lookups_cpus_ll_addr() after the comment about it tweaking excl_succedded.

> +
> +            if (!env->excl_succeeded) {
> +                if (env->ll_sc_context &&
> +                            (env->excl_protected_hwaddr == hw_addr)) {
> +                    cpu_physical_memory_set_excl_dirty(hw_addr);
> +                }
> +            } else {
> +                if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
> +                        env->excl_protected_hwaddr != hw_addr) {
> +                    env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
> +                    qemu_mutex_unlock(&tcg_excl_access_lock);
> +                    env->excl_succeeded = 0;
> +
> +                    return;
> +                }
> +            }

I'm wondering if it can be more naturally written the other way round to
aid comprehension:

if (env->excl_succeeded) {
   if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
       env->excl_protected_hwaddr != hw_addr) {
       ..do stuff..
       return
   }
} else {
   if (env->ll_sc_context &&
      (env->excl_protected_hwaddr == hw_addr)) {
      cpu_physical_memory_set_excl_dirty(hw_addr);
   }
}

Although now I'm confused as to why we push on in 3 of the 4 cases.

> +
> +            haddr = addr + env->tlb_table[mmu_idx][index].addend;
> +        #if DATA_SIZE == 1
> +            glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
> +        #else
> +            glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
> +        #endif
> +
> +            env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
> +            qemu_mutex_unlock(&tcg_excl_access_lock);
>  
> -            if (set_to_dirty) {
> -                cpu_physical_memory_set_excl_dirty(hw_addr);
> -            } /* the vCPU is legitimately writing to the protected address */
> +            return;
>          } else {
>              if ((addr & (DATA_SIZE - 1)) != 0) {
>                  goto do_unaligned_access;
> @@ -537,18 +574,49 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>               * exclusive-protected memory. */
>              hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
>  
> -            bool set_to_dirty;
> -
>              /* Two cases of invalidation: the current vCPU is writing to another
>               * vCPU's exclusive address or the vCPU that issued the LoadLink is
>               * writing to it, but not through a StoreCond. */
> -            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
> -            set_to_dirty |= env->ll_sc_context &&
> -                           (env->excl_protected_hwaddr == hw_addr);
> +            qemu_mutex_lock(&tcg_excl_access_lock);
> +
> +            /* The macro lookup_cpus_ll_addr could have reset the exclusive
> +             * address. Fail the SC in this case.
> +             * N.B.: Here excl_succeeded == 0 means that we don't come from a
> +             * store conditional.  */
> +            if (env->excl_succeeded &&
> +                        (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) {
> +                env->excl_succeeded = 0;
> +                qemu_mutex_unlock(&tcg_excl_access_lock);
> +
> +                return;
> +            }
> +
> +            lookup_cpus_ll_addr(hw_addr);
> +
> +            if (!env->excl_succeeded) {
> +                if (env->ll_sc_context &&
> +                            (env->excl_protected_hwaddr == hw_addr)) {
> +                    cpu_physical_memory_set_excl_dirty(hw_addr);
> +                }
> +            } else {
> +                if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
> +                        env->excl_protected_hwaddr != hw_addr) {
> +                    env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
> +                    qemu_mutex_unlock(&tcg_excl_access_lock);
> +                    env->excl_succeeded = 0;
> +
> +                    return;
> +                }
> +            }

Given the amount of copy/paste between the two functions I wonder if
there is some commonality to be re-factored out here?

>  
> -            if (set_to_dirty) {
> -                cpu_physical_memory_set_excl_dirty(hw_addr);
> -            } /* the vCPU is legitimately writing to the protected address */
> +            haddr = addr + env->tlb_table[mmu_idx][index].addend;
> +
> +            glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
> +
> +            qemu_mutex_unlock(&tcg_excl_access_lock);
> +            env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
> +
> +            return;
>          } else {
>              if ((addr & (DATA_SIZE - 1)) != 0) {
>                  goto do_unaligned_access;
Alvise Rigo July 17, 2015, 4:19 p.m. UTC | #2
On Fri, Jul 17, 2015 at 5:57 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Alvise Rigo <a.rigo@virtualopensystems.com> writes:
>
>> Exploiting the tcg_excl_access_lock, port the helper_{le,be}_st_name to
>> work in real multithreading.
>>
>> - The macro lookup_cpus_ll_addr now uses directly the
>>   env->excl_protected_addr to invalidate others' LL/SC operations
>>
>> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
>> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
>> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
>> ---
>>  softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++++++++----------
>>  1 file changed, 89 insertions(+), 21 deletions(-)
>>
>> diff --git a/softmmu_template.h b/softmmu_template.h
>> index bc767f6..522454f 100644
>> --- a/softmmu_template.h
>> +++ b/softmmu_template.h
>> @@ -141,21 +141,24 @@
>>      vidx >= 0;                                                                \
>>  })
>>
>> +#define EXCLUSIVE_RESET_ADDR ULLONG_MAX
>> +
>> +/* This macro requires the caller to have the tcg_excl_access_lock lock since
>> + * it modifies the excl_protected_hwaddr of a running vCPU.
>> + * The macros scans all the excl_protected_hwaddr of all the vCPUs and compare
>> + * them with the address the current vCPU is writing to. If there is a match,
>> + * we reset the value, making the SC fail. */
>
> It would have been nice if we had started with a comment when the
> function^H^H^H^H^H macro was first introduced and then updated here.

OK. Related to this, I think to refactor the patches in such a way to
drop the "move to multithreading" part, since it makes things more
confusing (even if my intent was the opposite).

>
>>  #define lookup_cpus_ll_addr(addr)                                             \
>>  ({                                                                            \
>>      CPUState *cpu;                                                            \
>>      CPUArchState *acpu;                                                       \
>> -    bool hit = false;                                                         \
>>                                                                                \
>>      CPU_FOREACH(cpu) {                                                        \
>>          acpu = (CPUArchState *)cpu->env_ptr;                                  \
>>          if (cpu != current_cpu && acpu->excl_protected_hwaddr == addr) {      \
>> -            hit = true;                                                       \
>> -            break;                                                            \
>> +            acpu->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;               \
>>          }                                                                     \
>>      }                                                                         \
>> -                                                                              \
>> -    hit;                                                                      \
>>  })
>
> My comment about using an inline function in the earlier patch stands.
>
>>
>>  #ifndef SOFTMMU_CODE_ACCESS
>> @@ -439,18 +442,52 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>>               * exclusive-protected memory. */
>>              hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
>>
>> -            bool set_to_dirty;
>> -
>>              /* Two cases of invalidation: the current vCPU is writing to another
>>               * vCPU's exclusive address or the vCPU that issued the LoadLink is
>>               * writing to it, but not through a StoreCond. */
>> -            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
>> -            set_to_dirty |= env->ll_sc_context &&
>> -                           (env->excl_protected_hwaddr == hw_addr);
>> +            qemu_mutex_lock(&tcg_excl_access_lock);
>> +
>> +            /* The macro lookup_cpus_ll_addr could have reset the exclusive
>> +             * address. Fail the SC in this case.
>> +             * N.B.: Here excl_succeeded == 0 means that we don't come from a
>> +             * store conditional.  */
>> +            if (env->excl_succeeded &&
>> +                        (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) {
>> +                env->excl_succeeded = 0;
>> +                qemu_mutex_unlock(&tcg_excl_access_lock);
>> +
>> +                return;
>> +            }
>> +
>> +            lookup_cpus_ll_addr(hw_addr);
>
> Add comment for side effect, also confused by the above comment given we
> call lookups_cpus_ll_addr() after the comment about it tweaking excl_succedded.

I will improve all the comments to avoid any misinterpretation. I will
also be more verbose in the cover letter about how the whole machinery
works.

>
>> +
>> +            if (!env->excl_succeeded) {
>> +                if (env->ll_sc_context &&
>> +                            (env->excl_protected_hwaddr == hw_addr)) {
>> +                    cpu_physical_memory_set_excl_dirty(hw_addr);
>> +                }
>> +            } else {
>> +                if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
>> +                        env->excl_protected_hwaddr != hw_addr) {
>> +                    env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
>> +                    qemu_mutex_unlock(&tcg_excl_access_lock);
>> +                    env->excl_succeeded = 0;
>> +
>> +                    return;
>> +                }
>> +            }
>
> I'm wondering if it can be more naturally written the other way round to
> aid comprehension:
>
> if (env->excl_succeeded) {
>    if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
>        env->excl_protected_hwaddr != hw_addr) {
>        ..do stuff..
>        return
>    }
> } else {
>    if (env->ll_sc_context &&
>       (env->excl_protected_hwaddr == hw_addr)) {
>       cpu_physical_memory_set_excl_dirty(hw_addr);
>    }
> }
>
> Although now I'm confused as to why we push on in 3 of the 4 cases.

I will try to find another way to write this code, starting from
adding more comments about the corner cases that we are addressing.

>
>> +
>> +            haddr = addr + env->tlb_table[mmu_idx][index].addend;
>> +        #if DATA_SIZE == 1
>> +            glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
>> +        #else
>> +            glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
>> +        #endif
>> +
>> +            env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
>> +            qemu_mutex_unlock(&tcg_excl_access_lock);
>>
>> -            if (set_to_dirty) {
>> -                cpu_physical_memory_set_excl_dirty(hw_addr);
>> -            } /* the vCPU is legitimately writing to the protected address */
>> +            return;
>>          } else {
>>              if ((addr & (DATA_SIZE - 1)) != 0) {
>>                  goto do_unaligned_access;
>> @@ -537,18 +574,49 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>>               * exclusive-protected memory. */
>>              hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
>>
>> -            bool set_to_dirty;
>> -
>>              /* Two cases of invalidation: the current vCPU is writing to another
>>               * vCPU's exclusive address or the vCPU that issued the LoadLink is
>>               * writing to it, but not through a StoreCond. */
>> -            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
>> -            set_to_dirty |= env->ll_sc_context &&
>> -                           (env->excl_protected_hwaddr == hw_addr);
>> +            qemu_mutex_lock(&tcg_excl_access_lock);
>> +
>> +            /* The macro lookup_cpus_ll_addr could have reset the exclusive
>> +             * address. Fail the SC in this case.
>> +             * N.B.: Here excl_succeeded == 0 means that we don't come from a
>> +             * store conditional.  */
>> +            if (env->excl_succeeded &&
>> +                        (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) {
>> +                env->excl_succeeded = 0;
>> +                qemu_mutex_unlock(&tcg_excl_access_lock);
>> +
>> +                return;
>> +            }
>> +
>> +            lookup_cpus_ll_addr(hw_addr);
>> +
>> +            if (!env->excl_succeeded) {
>> +                if (env->ll_sc_context &&
>> +                            (env->excl_protected_hwaddr == hw_addr)) {
>> +                    cpu_physical_memory_set_excl_dirty(hw_addr);
>> +                }
>> +            } else {
>> +                if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
>> +                        env->excl_protected_hwaddr != hw_addr) {
>> +                    env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
>> +                    qemu_mutex_unlock(&tcg_excl_access_lock);
>> +                    env->excl_succeeded = 0;
>> +
>> +                    return;
>> +                }
>> +            }
>
> Given the amount of copy/paste between the two functions I wonder if
> there is some commonality to be re-factored out here?

Either we create inline functions outside this file (like in cputlb.c)
or we define new macros.
I don't see other viable ways to re-factor this code.

Thank you,
alvise

>
>>
>> -            if (set_to_dirty) {
>> -                cpu_physical_memory_set_excl_dirty(hw_addr);
>> -            } /* the vCPU is legitimately writing to the protected address */
>> +            haddr = addr + env->tlb_table[mmu_idx][index].addend;
>> +
>> +            glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
>> +
>> +            qemu_mutex_unlock(&tcg_excl_access_lock);
>> +            env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
>> +
>> +            return;
>>          } else {
>>              if ((addr & (DATA_SIZE - 1)) != 0) {
>>                  goto do_unaligned_access;
>
> --
> Alex Bennée
diff mbox

Patch

diff --git a/softmmu_template.h b/softmmu_template.h
index bc767f6..522454f 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -141,21 +141,24 @@ 
     vidx >= 0;                                                                \
 })
 
+#define EXCLUSIVE_RESET_ADDR ULLONG_MAX
+
+/* This macro requires the caller to have the tcg_excl_access_lock lock since
+ * it modifies the excl_protected_hwaddr of a running vCPU.
+ * The macros scans all the excl_protected_hwaddr of all the vCPUs and compare
+ * them with the address the current vCPU is writing to. If there is a match,
+ * we reset the value, making the SC fail. */
 #define lookup_cpus_ll_addr(addr)                                             \
 ({                                                                            \
     CPUState *cpu;                                                            \
     CPUArchState *acpu;                                                       \
-    bool hit = false;                                                         \
                                                                               \
     CPU_FOREACH(cpu) {                                                        \
         acpu = (CPUArchState *)cpu->env_ptr;                                  \
         if (cpu != current_cpu && acpu->excl_protected_hwaddr == addr) {      \
-            hit = true;                                                       \
-            break;                                                            \
+            acpu->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;               \
         }                                                                     \
     }                                                                         \
-                                                                              \
-    hit;                                                                      \
 })
 
 #ifndef SOFTMMU_CODE_ACCESS
@@ -439,18 +442,52 @@  void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
              * exclusive-protected memory. */
             hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 
-            bool set_to_dirty;
-
             /* Two cases of invalidation: the current vCPU is writing to another
              * vCPU's exclusive address or the vCPU that issued the LoadLink is
              * writing to it, but not through a StoreCond. */
-            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
-            set_to_dirty |= env->ll_sc_context &&
-                           (env->excl_protected_hwaddr == hw_addr);
+            qemu_mutex_lock(&tcg_excl_access_lock);
+
+            /* The macro lookup_cpus_ll_addr could have reset the exclusive
+             * address. Fail the SC in this case.
+             * N.B.: Here excl_succeeded == 0 means that we don't come from a
+             * store conditional.  */
+            if (env->excl_succeeded &&
+                        (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) {
+                env->excl_succeeded = 0;
+                qemu_mutex_unlock(&tcg_excl_access_lock);
+
+                return;
+            }
+
+            lookup_cpus_ll_addr(hw_addr);
+
+            if (!env->excl_succeeded) {
+                if (env->ll_sc_context &&
+                            (env->excl_protected_hwaddr == hw_addr)) {
+                    cpu_physical_memory_set_excl_dirty(hw_addr);
+                }
+            } else {
+                if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
+                        env->excl_protected_hwaddr != hw_addr) {
+                    env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
+                    qemu_mutex_unlock(&tcg_excl_access_lock);
+                    env->excl_succeeded = 0;
+
+                    return;
+                }
+            }
+
+            haddr = addr + env->tlb_table[mmu_idx][index].addend;
+        #if DATA_SIZE == 1
+            glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
+        #else
+            glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
+        #endif
+
+            env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
+            qemu_mutex_unlock(&tcg_excl_access_lock);
 
-            if (set_to_dirty) {
-                cpu_physical_memory_set_excl_dirty(hw_addr);
-            } /* the vCPU is legitimately writing to the protected address */
+            return;
         } else {
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 goto do_unaligned_access;
@@ -537,18 +574,49 @@  void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
              * exclusive-protected memory. */
             hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 
-            bool set_to_dirty;
-
             /* Two cases of invalidation: the current vCPU is writing to another
              * vCPU's exclusive address or the vCPU that issued the LoadLink is
              * writing to it, but not through a StoreCond. */
-            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
-            set_to_dirty |= env->ll_sc_context &&
-                           (env->excl_protected_hwaddr == hw_addr);
+            qemu_mutex_lock(&tcg_excl_access_lock);
+
+            /* The macro lookup_cpus_ll_addr could have reset the exclusive
+             * address. Fail the SC in this case.
+             * N.B.: Here excl_succeeded == 0 means that we don't come from a
+             * store conditional.  */
+            if (env->excl_succeeded &&
+                        (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) {
+                env->excl_succeeded = 0;
+                qemu_mutex_unlock(&tcg_excl_access_lock);
+
+                return;
+            }
+
+            lookup_cpus_ll_addr(hw_addr);
+
+            if (!env->excl_succeeded) {
+                if (env->ll_sc_context &&
+                            (env->excl_protected_hwaddr == hw_addr)) {
+                    cpu_physical_memory_set_excl_dirty(hw_addr);
+                }
+            } else {
+                if (cpu_physical_memory_excl_is_dirty(hw_addr) ||
+                        env->excl_protected_hwaddr != hw_addr) {
+                    env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
+                    qemu_mutex_unlock(&tcg_excl_access_lock);
+                    env->excl_succeeded = 0;
+
+                    return;
+                }
+            }
 
-            if (set_to_dirty) {
-                cpu_physical_memory_set_excl_dirty(hw_addr);
-            } /* the vCPU is legitimately writing to the protected address */
+            haddr = addr + env->tlb_table[mmu_idx][index].addend;
+
+            glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
+
+            qemu_mutex_unlock(&tcg_excl_access_lock);
+            env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR;
+
+            return;
         } else {
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 goto do_unaligned_access;