diff mbox

[RFC,v5,3/6] softmmu: Add helpers for a new slowpath

Message ID 1443083566-10994-4-git-send-email-a.rigo@virtualopensystems.com
State New
Headers show

Commit Message

Alvise Rigo Sept. 24, 2015, 8:32 a.m. UTC
The new helpers rely on the legacy ones to perform the actual read/write.

The LoadLink helper (helper_ldlink_name) prepares the way for the
following SC operation. It sets the linked address and the size of the
access.
These helper also update the TLB entry of the page involved in the
LL/SC for those vCPUs that have the bit set (dirty), so that the
following accesses made by all the vCPUs will follow the slow path.

The StoreConditional helper (helper_stcond_name) returns 1 if the
store has to fail due to a concurrent access to the same page by
another vCPU. A 'concurrent access' can be a store made by *any* vCPU
(although, some implementations allow stores made by the CPU that issued
the LoadLink).

Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
 cputlb.c                |   3 ++
 softmmu_llsc_template.h | 124 ++++++++++++++++++++++++++++++++++++++++++++++++
 softmmu_template.h      |  12 +++++
 tcg/tcg.h               |  30 ++++++++++++
 4 files changed, 169 insertions(+)
 create mode 100644 softmmu_llsc_template.h

Comments

Richard Henderson Sept. 30, 2015, 3:58 a.m. UTC | #1
On 09/24/2015 06:32 PM, Alvise Rigo wrote:
> The new helpers rely on the legacy ones to perform the actual read/write.
>
> The LoadLink helper (helper_ldlink_name) prepares the way for the
> following SC operation. It sets the linked address and the size of the
> access.
> These helper also update the TLB entry of the page involved in the
> LL/SC for those vCPUs that have the bit set (dirty), so that the
> following accesses made by all the vCPUs will follow the slow path.
>
> The StoreConditional helper (helper_stcond_name) returns 1 if the
> store has to fail due to a concurrent access to the same page by
> another vCPU. A 'concurrent access' can be a store made by *any* vCPU
> (although, some implementations allow stores made by the CPU that issued
> the LoadLink).
>
> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> ---
>   cputlb.c                |   3 ++
>   softmmu_llsc_template.h | 124 ++++++++++++++++++++++++++++++++++++++++++++++++
>   softmmu_template.h      |  12 +++++
>   tcg/tcg.h               |  30 ++++++++++++
>   4 files changed, 169 insertions(+)
>   create mode 100644 softmmu_llsc_template.h
>
> diff --git a/cputlb.c b/cputlb.c
> index 1e25a2a..d5aae7c 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -416,6 +416,8 @@ static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
>
>   #define MMUSUFFIX _mmu
>
> +/* Generates LoadLink/StoreConditional helpers in softmmu_template.h */
> +#define GEN_EXCLUSIVE_HELPERS
>   #define SHIFT 0
>   #include "softmmu_template.h"
>
> @@ -428,6 +430,7 @@ static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
>   #define SHIFT 3
>   #include "softmmu_template.h"
>   #undef MMUSUFFIX
> +#undef GEN_EXCLUSIVE_HELPERS
>
>   #define MMUSUFFIX _cmmu
>   #undef GETPC_ADJ
> diff --git a/softmmu_llsc_template.h b/softmmu_llsc_template.h
> new file mode 100644
> index 0000000..9f22834
> --- /dev/null
> +++ b/softmmu_llsc_template.h
> @@ -0,0 +1,124 @@
> +/*
> + *  Software MMU support (esclusive load/store operations)
> + *
> + * Generate helpers used by TCG for qemu_ldlink/stcond ops.
> + *
> + * Included from softmmu_template.h only.
> + *
> + * Copyright (c) 2015 Virtual Open Systems
> + *
> + * Authors:
> + *  Alvise Rigo <a.rigo@virtualopensystems.com>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/* This template does not generate together the le and be version, but only one
> + * of the two depending on whether BIGENDIAN_EXCLUSIVE_HELPERS has been set.
> + * The same nomenclature as softmmu_template.h is used for the exclusive
> + * helpers.  */
> +
> +#ifdef BIGENDIAN_EXCLUSIVE_HELPERS
> +
> +#define helper_ldlink_name  glue(glue(helper_be_ldlink, USUFFIX), MMUSUFFIX)
> +#define helper_stcond_name  glue(glue(helper_be_stcond, SUFFIX), MMUSUFFIX)
> +#define helper_ld glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
> +#define helper_st glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
> +
> +#else /* LE helpers + 8bit helpers (generated only once for both LE end BE) */
> +
> +#if DATA_SIZE > 1
> +#define helper_ldlink_name  glue(glue(helper_le_ldlink, USUFFIX), MMUSUFFIX)
> +#define helper_stcond_name  glue(glue(helper_le_stcond, SUFFIX), MMUSUFFIX)
> +#define helper_ld glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
> +#define helper_st glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
> +#else /* DATA_SIZE <= 1 */
> +#define helper_ldlink_name  glue(glue(helper_ret_ldlink, USUFFIX), MMUSUFFIX)
> +#define helper_stcond_name  glue(glue(helper_ret_stcond, SUFFIX), MMUSUFFIX)
> +#define helper_ld glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
> +#define helper_st glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
> +#endif
> +
> +#endif
> +
> +WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
> +                                TCGMemOpIdx oi, uintptr_t retaddr)
> +{
> +    WORD_TYPE ret;
> +    int index;
> +    CPUState *cpu;
> +    hwaddr hw_addr;
> +    unsigned mmu_idx = get_mmuidx(oi);
> +
> +    /* Use the proper load helper from cpu_ldst.h */
> +    ret = helper_ld(env, addr, mmu_idx, retaddr);
> +
> +    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> +
> +    /* hw_addr = hwaddr of the page (i.e. section->mr->ram_addr + xlat)
> +     * plus the offset (i.e. addr & ~TARGET_PAGE_MASK) */
> +    hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) + addr;
> +
> +    cpu_physical_memory_clear_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index);
> +    /* If all the vCPUs have the EXCL bit set for this page there is no need
> +     * to request any flush. */
> +    if (cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus)) {
> +        CPU_FOREACH(cpu) {
> +            if (current_cpu != cpu) {
> +                if (cpu_physical_memory_excl_is_dirty(hw_addr,
> +                                                    cpu->cpu_index)) {
> +                    cpu_physical_memory_clear_excl_dirty(hw_addr,
> +                                                         cpu->cpu_index);
> +                    tlb_flush(cpu, 1);
> +                }

Why would you need to indicate that another cpu has started an exclusive 
operation on this page?  That seems definitely wrong.

I think that all you wanted was to flush the other cpu, so that it notices that 
this cpu has started an exclusive operation.

It would be great if most of this were pulled out to a subroutine so that these 
helper functions didn't accrue so much duplicate code.

It would be fantastic to implement a tlb_flush_phys_page to that we didn't have 
to flush the entire tlb of the other cpus.

> +    /* We set it preventively to true to distinguish the following legacy
> +     * access as one made by the store conditional wrapper. If the store
> +     * conditional does not succeed, the value will be set to 0.*/
> +    env->excl_succeeded = 1;

Ah -- "distinguish" is the word that was missing from the comments in the 
previous patches.

> diff --git a/softmmu_template.h b/softmmu_template.h
> index e4431e8..ad65d20 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -640,6 +640,18 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
>   #endif
>   #endif /* !defined(SOFTMMU_CODE_ACCESS) */
>
> +#ifdef GEN_EXCLUSIVE_HELPERS
> +
> +#if DATA_SIZE > 1 /* The 8-bit helpers are generate along with LE helpers */
> +#define BIGENDIAN_EXCLUSIVE_HELPERS
> +#include "softmmu_llsc_template.h"
> +#undef BIGENDIAN_EXCLUSIVE_HELPERS
> +#endif
> +
> +#include "softmmu_llsc_template.h"
> +
> +#endif /* !defined(GEN_EXCLUSIVE_HELPERS) */

I'm not especially keen on this.  Not that what we currently have in 
softmmu_template.h is terribly better.

I wonder what can be done to clean all of this up, short of actually 
transitioning to c++ templates...


r~
Alvise Rigo Sept. 30, 2015, 9:46 a.m. UTC | #2
On Wed, Sep 30, 2015 at 5:58 AM, Richard Henderson <rth@twiddle.net> wrote:
> On 09/24/2015 06:32 PM, Alvise Rigo wrote:
>>
>> The new helpers rely on the legacy ones to perform the actual read/write.
>>
>> The LoadLink helper (helper_ldlink_name) prepares the way for the
>> following SC operation. It sets the linked address and the size of the
>> access.
>> These helper also update the TLB entry of the page involved in the
>> LL/SC for those vCPUs that have the bit set (dirty), so that the
>> following accesses made by all the vCPUs will follow the slow path.
>>
>> The StoreConditional helper (helper_stcond_name) returns 1 if the
>> store has to fail due to a concurrent access to the same page by
>> another vCPU. A 'concurrent access' can be a store made by *any* vCPU
>> (although, some implementations allow stores made by the CPU that issued
>> the LoadLink).
>>
>> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
>> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
>> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
>> ---
>>   cputlb.c                |   3 ++
>>   softmmu_llsc_template.h | 124
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>>   softmmu_template.h      |  12 +++++
>>   tcg/tcg.h               |  30 ++++++++++++
>>   4 files changed, 169 insertions(+)
>>   create mode 100644 softmmu_llsc_template.h
>>
>> diff --git a/cputlb.c b/cputlb.c
>> index 1e25a2a..d5aae7c 100644
>> --- a/cputlb.c
>> +++ b/cputlb.c
>> @@ -416,6 +416,8 @@ static inline void
>> lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
>>
>>   #define MMUSUFFIX _mmu
>>
>> +/* Generates LoadLink/StoreConditional helpers in softmmu_template.h */
>> +#define GEN_EXCLUSIVE_HELPERS
>>   #define SHIFT 0
>>   #include "softmmu_template.h"
>>
>> @@ -428,6 +430,7 @@ static inline void
>> lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
>>   #define SHIFT 3
>>   #include "softmmu_template.h"
>>   #undef MMUSUFFIX
>> +#undef GEN_EXCLUSIVE_HELPERS
>>
>>   #define MMUSUFFIX _cmmu
>>   #undef GETPC_ADJ
>> diff --git a/softmmu_llsc_template.h b/softmmu_llsc_template.h
>> new file mode 100644
>> index 0000000..9f22834
>> --- /dev/null
>> +++ b/softmmu_llsc_template.h
>> @@ -0,0 +1,124 @@
>> +/*
>> + *  Software MMU support (esclusive load/store operations)
>> + *
>> + * Generate helpers used by TCG for qemu_ldlink/stcond ops.
>> + *
>> + * Included from softmmu_template.h only.
>> + *
>> + * Copyright (c) 2015 Virtual Open Systems
>> + *
>> + * Authors:
>> + *  Alvise Rigo <a.rigo@virtualopensystems.com>
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see
>> <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +/* This template does not generate together the le and be version, but
>> only one
>> + * of the two depending on whether BIGENDIAN_EXCLUSIVE_HELPERS has been
>> set.
>> + * The same nomenclature as softmmu_template.h is used for the exclusive
>> + * helpers.  */
>> +
>> +#ifdef BIGENDIAN_EXCLUSIVE_HELPERS
>> +
>> +#define helper_ldlink_name  glue(glue(helper_be_ldlink, USUFFIX),
>> MMUSUFFIX)
>> +#define helper_stcond_name  glue(glue(helper_be_stcond, SUFFIX),
>> MMUSUFFIX)
>> +#define helper_ld glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
>> +#define helper_st glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
>> +
>> +#else /* LE helpers + 8bit helpers (generated only once for both LE end
>> BE) */
>> +
>> +#if DATA_SIZE > 1
>> +#define helper_ldlink_name  glue(glue(helper_le_ldlink, USUFFIX),
>> MMUSUFFIX)
>> +#define helper_stcond_name  glue(glue(helper_le_stcond, SUFFIX),
>> MMUSUFFIX)
>> +#define helper_ld glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
>> +#define helper_st glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
>> +#else /* DATA_SIZE <= 1 */
>> +#define helper_ldlink_name  glue(glue(helper_ret_ldlink, USUFFIX),
>> MMUSUFFIX)
>> +#define helper_stcond_name  glue(glue(helper_ret_stcond, SUFFIX),
>> MMUSUFFIX)
>> +#define helper_ld glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
>> +#define helper_st glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
>> +#endif
>> +
>> +#endif
>> +
>> +WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
>> +                                TCGMemOpIdx oi, uintptr_t retaddr)
>> +{
>> +    WORD_TYPE ret;
>> +    int index;
>> +    CPUState *cpu;
>> +    hwaddr hw_addr;
>> +    unsigned mmu_idx = get_mmuidx(oi);
>> +
>> +    /* Use the proper load helper from cpu_ldst.h */
>> +    ret = helper_ld(env, addr, mmu_idx, retaddr);
>> +
>> +    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
>> +
>> +    /* hw_addr = hwaddr of the page (i.e. section->mr->ram_addr + xlat)
>> +     * plus the offset (i.e. addr & ~TARGET_PAGE_MASK) */
>> +    hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) +
>> addr;
>> +
>> +    cpu_physical_memory_clear_excl_dirty(hw_addr,
>> ENV_GET_CPU(env)->cpu_index);
>> +    /* If all the vCPUs have the EXCL bit set for this page there is no
>> need
>> +     * to request any flush. */
>> +    if (cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus)) {
>> +        CPU_FOREACH(cpu) {
>> +            if (current_cpu != cpu) {
>> +                if (cpu_physical_memory_excl_is_dirty(hw_addr,
>> +                                                    cpu->cpu_index)) {
>> +                    cpu_physical_memory_clear_excl_dirty(hw_addr,
>> +                                                         cpu->cpu_index);
>> +                    tlb_flush(cpu, 1);
>> +                }
>
>
> Why would you need to indicate that another cpu has started an exclusive
> operation on this page?  That seems definitely wrong.

The cpu_physical_memory_clear_excl_dirty() sets the flag to generate
the TLB entry with the EXCL flag.

>
> I think that all you wanted was to flush the other cpu, so that it notices
> that this cpu has started an exclusive operation.

Indeed, after calling cpu_physical_memory_clear_excl_dirty and
flushing the TLB, the CPU will be forced to create the TLB entries
from scratch, with the TLB flag if necessary.

>
> It would be great if most of this were pulled out to a subroutine so that
> these helper functions didn't accrue so much duplicate code.
>
> It would be fantastic to implement a tlb_flush_phys_page to that we didn't
> have to flush the entire tlb of the other cpus.

Yes, this would be great, but it would also require a structure
mapping PHYS_ADDR -> TLB_ENTRIES, which is not provided by the softmmu
at the moment.

>
>> +    /* We set it preventively to true to distinguish the following legacy
>> +     * access as one made by the store conditional wrapper. If the store
>> +     * conditional does not succeed, the value will be set to 0.*/
>> +    env->excl_succeeded = 1;
>
>
> Ah -- "distinguish" is the word that was missing from the comments in the
> previous patches.
>
>> diff --git a/softmmu_template.h b/softmmu_template.h
>> index e4431e8..ad65d20 100644
>> --- a/softmmu_template.h
>> +++ b/softmmu_template.h
>> @@ -640,6 +640,18 @@ void probe_write(CPUArchState *env, target_ulong
>> addr, int mmu_idx,
>>   #endif
>>   #endif /* !defined(SOFTMMU_CODE_ACCESS) */
>>
>> +#ifdef GEN_EXCLUSIVE_HELPERS
>> +
>> +#if DATA_SIZE > 1 /* The 8-bit helpers are generate along with LE helpers
>> */
>> +#define BIGENDIAN_EXCLUSIVE_HELPERS
>> +#include "softmmu_llsc_template.h"
>> +#undef BIGENDIAN_EXCLUSIVE_HELPERS
>> +#endif
>> +
>> +#include "softmmu_llsc_template.h"
>> +
>> +#endif /* !defined(GEN_EXCLUSIVE_HELPERS) */
>
>
> I'm not especially keen on this.  Not that what we currently have in
> softmmu_template.h is terribly better.

What I've already proposed in the past was to copy the actual store
code from softmmu_template to softmmu_llsc_template.
But since now we need to support all the TLB_ flags, this will really
produce a lot of code duplication.

>
> I wonder what can be done to clean all of this up, short of actually
> transitioning to c++ templates...
>

Is there already an example in QEMU I can look at?
Wouldn't this require a new compiler dependency?

Thanks,
alvise

>
> r~
Richard Henderson Sept. 30, 2015, 8:42 p.m. UTC | #3
On 09/30/2015 07:46 PM, alvise rigo wrote:
> On Wed, Sep 30, 2015 at 5:58 AM, Richard Henderson <rth@twiddle.net> wrote:
>> Why would you need to indicate that another cpu has started an exclusive
>> operation on this page?  That seems definitely wrong.
>
> The cpu_physical_memory_clear_excl_dirty() sets the flag to generate
> the TLB entry with the EXCL flag.

Yes, but surely the clearing of dirty on current_cpu is enough to cause the 
other cpus to see that they need to set TLB_EXCL when reloading their tlb entries.

Why do you need to manipulate the *other* cpu's dirty bit?

>> I wonder what can be done to clean all of this up, short of actually
>> transitioning to c++ templates...
>>
>
> Is there already an example in QEMU I can look at?

No.


r~
Alvise Rigo Oct. 1, 2015, 8:05 a.m. UTC | #4
On Wed, Sep 30, 2015 at 10:42 PM, Richard Henderson <rth@twiddle.net> wrote:
>
> On 09/30/2015 07:46 PM, alvise rigo wrote:
>>
>> On Wed, Sep 30, 2015 at 5:58 AM, Richard Henderson <rth@twiddle.net> wrote:
>>>
>>> Why would you need to indicate that another cpu has started an exclusive
>>> operation on this page?  That seems definitely wrong.
>>
>>
>> The cpu_physical_memory_clear_excl_dirty() sets the flag to generate
>> the TLB entry with the EXCL flag.
>
>
> Yes, but surely the clearing of dirty on current_cpu is enough to cause the other cpus to see that they need to set TLB_EXCL when reloading their tlb entries.
>
> Why do you need to manipulate the *other* cpu's dirty bit?

Because then we can assume that a cpu with the bit cleared has for
sure the TLB entries with the EXCL flag set for that specific page.
Moreover, knowing which cpus have the EXCL flag set allows to reduce
the flushing requests whenever a new LL is issued on the same page.

>
>
>>> I wonder what can be done to clean all of this up, short of actually
>>> transitioning to c++ templates...
>>>
>>
>> Is there already an example in QEMU I can look at?
>
>
> No.

Thank,
alvise

>
>
>
> r~
Richard Henderson Oct. 1, 2015, 7:34 p.m. UTC | #5
On 10/01/2015 06:05 PM, alvise rigo wrote:
> On Wed, Sep 30, 2015 at 10:42 PM, Richard Henderson <rth@twiddle.net> wrote:
>>
>> On 09/30/2015 07:46 PM, alvise rigo wrote:
>>>
>>> On Wed, Sep 30, 2015 at 5:58 AM, Richard Henderson <rth@twiddle.net> wrote:
>>>>
>>>> Why would you need to indicate that another cpu has started an exclusive
>>>> operation on this page?  That seems definitely wrong.
>>>
>>>
>>> The cpu_physical_memory_clear_excl_dirty() sets the flag to generate
>>> the TLB entry with the EXCL flag.
>>
>>
>> Yes, but surely the clearing of dirty on current_cpu is enough to cause the other cpus to see that they need to set TLB_EXCL when reloading their tlb entries.
>>
>> Why do you need to manipulate the *other* cpu's dirty bit?
>
> Because then we can assume that a cpu with the bit cleared has for
> sure the TLB entries with the EXCL flag set for that specific page.
> Moreover, knowing which cpus have the EXCL flag set allows to reduce
> the flushing requests whenever a new LL is issued on the same page.

Does it actually help, or is that a guess without numbers?


r~
diff mbox

Patch

diff --git a/cputlb.c b/cputlb.c
index 1e25a2a..d5aae7c 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -416,6 +416,8 @@  static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
 
 #define MMUSUFFIX _mmu
 
+/* Generates LoadLink/StoreConditional helpers in softmmu_template.h */
+#define GEN_EXCLUSIVE_HELPERS
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -428,6 +430,7 @@  static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
 #define SHIFT 3
 #include "softmmu_template.h"
 #undef MMUSUFFIX
+#undef GEN_EXCLUSIVE_HELPERS
 
 #define MMUSUFFIX _cmmu
 #undef GETPC_ADJ
diff --git a/softmmu_llsc_template.h b/softmmu_llsc_template.h
new file mode 100644
index 0000000..9f22834
--- /dev/null
+++ b/softmmu_llsc_template.h
@@ -0,0 +1,124 @@ 
+/*
+ *  Software MMU support (esclusive load/store operations)
+ *
+ * Generate helpers used by TCG for qemu_ldlink/stcond ops.
+ *
+ * Included from softmmu_template.h only.
+ *
+ * Copyright (c) 2015 Virtual Open Systems
+ *
+ * Authors:
+ *  Alvise Rigo <a.rigo@virtualopensystems.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This template does not generate together the le and be version, but only one
+ * of the two depending on whether BIGENDIAN_EXCLUSIVE_HELPERS has been set.
+ * The same nomenclature as softmmu_template.h is used for the exclusive
+ * helpers.  */
+
+#ifdef BIGENDIAN_EXCLUSIVE_HELPERS
+
+#define helper_ldlink_name  glue(glue(helper_be_ldlink, USUFFIX), MMUSUFFIX)
+#define helper_stcond_name  glue(glue(helper_be_stcond, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
+
+#else /* LE helpers + 8bit helpers (generated only once for both LE end BE) */
+
+#if DATA_SIZE > 1
+#define helper_ldlink_name  glue(glue(helper_le_ldlink, USUFFIX), MMUSUFFIX)
+#define helper_stcond_name  glue(glue(helper_le_stcond, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
+#else /* DATA_SIZE <= 1 */
+#define helper_ldlink_name  glue(glue(helper_ret_ldlink, USUFFIX), MMUSUFFIX)
+#define helper_stcond_name  glue(glue(helper_ret_stcond, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
+#endif
+
+#endif
+
+WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    WORD_TYPE ret;
+    int index;
+    CPUState *cpu;
+    hwaddr hw_addr;
+    unsigned mmu_idx = get_mmuidx(oi);
+
+    /* Use the proper load helper from cpu_ldst.h */
+    ret = helper_ld(env, addr, mmu_idx, retaddr);
+
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+
+    /* hw_addr = hwaddr of the page (i.e. section->mr->ram_addr + xlat)
+     * plus the offset (i.e. addr & ~TARGET_PAGE_MASK) */
+    hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) + addr;
+
+    cpu_physical_memory_clear_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index);
+    /* If all the vCPUs have the EXCL bit set for this page there is no need
+     * to request any flush. */
+    if (cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus)) {
+        CPU_FOREACH(cpu) {
+            if (current_cpu != cpu) {
+                if (cpu_physical_memory_excl_is_dirty(hw_addr,
+                                                    cpu->cpu_index)) {
+                    cpu_physical_memory_clear_excl_dirty(hw_addr,
+                                                         cpu->cpu_index);
+                    tlb_flush(cpu, 1);
+                }
+            }
+        }
+    }
+
+    env->excl_protected_range.begin = hw_addr;
+    env->excl_protected_range.end = hw_addr + DATA_SIZE;
+
+    /* For this vCPU, just update the TLB entry, no need to flush. */
+    env->tlb_table[mmu_idx][index].addr_write |= TLB_EXCL;
+
+    return ret;
+}
+
+WORD_TYPE helper_stcond_name(CPUArchState *env, target_ulong addr,
+                             DATA_TYPE val, TCGMemOpIdx oi,
+                             uintptr_t retaddr)
+{
+    WORD_TYPE ret;
+    unsigned mmu_idx = get_mmuidx(oi);
+
+    /* We set it preventively to true to distinguish the following legacy
+     * access as one made by the store conditional wrapper. If the store
+     * conditional does not succeed, the value will be set to 0.*/
+    env->excl_succeeded = 1;
+    helper_st(env, addr, val, mmu_idx, retaddr);
+
+    if (env->excl_succeeded) {
+        env->excl_succeeded = 0;
+        ret = 0;
+    } else {
+        ret = 1;
+    }
+
+    return ret;
+}
+
+#undef helper_ldlink_name
+#undef helper_stcond_name
+#undef helper_ld
+#undef helper_st
diff --git a/softmmu_template.h b/softmmu_template.h
index e4431e8..ad65d20 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -640,6 +640,18 @@  void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
 #endif
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#ifdef GEN_EXCLUSIVE_HELPERS
+
+#if DATA_SIZE > 1 /* The 8-bit helpers are generate along with LE helpers */
+#define BIGENDIAN_EXCLUSIVE_HELPERS
+#include "softmmu_llsc_template.h"
+#undef BIGENDIAN_EXCLUSIVE_HELPERS
+#endif
+
+#include "softmmu_llsc_template.h"
+
+#endif /* !defined(GEN_EXCLUSIVE_HELPERS) */
+
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 231a781..f8e6e68 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -957,6 +957,21 @@  tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
                                     TCGMemOpIdx oi, uintptr_t retaddr);
 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr);
+/* Exclusive variants */
+tcg_target_ulong helper_ret_ldlinkub_mmu(CPUArchState *env, target_ulong addr,
+                                            TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldlinkuw_mmu(CPUArchState *env, target_ulong addr,
+                                            TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldlinkul_mmu(CPUArchState *env, target_ulong addr,
+                                            TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_le_ldlinkq_mmu(CPUArchState *env, target_ulong addr,
+                                            TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldlinkuw_mmu(CPUArchState *env, target_ulong addr,
+                                            TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldlinkul_mmu(CPUArchState *env, target_ulong addr,
+                                            TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_be_ldlinkq_mmu(CPUArchState *env, target_ulong addr,
+                                            TCGMemOpIdx oi, uintptr_t retaddr);
 
 /* Value sign-extended to tcg register size.  */
 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
@@ -984,6 +999,21 @@  void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr);
 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr);
+/* Exclusive variants */
+tcg_target_ulong helper_ret_stcondb_mmu(CPUArchState *env, target_ulong addr,
+                            uint8_t val, TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_stcondw_mmu(CPUArchState *env, target_ulong addr,
+                            uint16_t val, TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_stcondl_mmu(CPUArchState *env, target_ulong addr,
+                            uint32_t val, TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_le_stcondq_mmu(CPUArchState *env, target_ulong addr,
+                            uint64_t val, TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_stcondw_mmu(CPUArchState *env, target_ulong addr,
+                            uint16_t val, TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_stcondl_mmu(CPUArchState *env, target_ulong addr,
+                            uint32_t val, TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_be_stcondq_mmu(CPUArchState *env, target_ulong addr,
+                            uint64_t val, TCGMemOpIdx oi, uintptr_t retaddr);
 
 /* Temporary aliases until backends are converted.  */
 #ifdef TARGET_WORDS_BIGENDIAN