@@ -26,6 +26,10 @@
/* Alpha processors have a weak memory model */
#define TCG_GUEST_DEFAULT_MO (0)
+#define TCG_GUEST_MO_BEF_LD (0)
+#define TCG_GUEST_MO_AFT_LD (0)
+#define TCG_GUEST_MO_BEF_ST (0)
+#define TCG_GUEST_MO_AFT_ST (0)
#define ICACHE_LINE_SIZE 32
#define DCACHE_LINE_SIZE 32
@@ -29,6 +29,10 @@
/* ARM processors have a weak memory model */
#define TCG_GUEST_DEFAULT_MO (0)
+#define TCG_GUEST_MO_BEF_LD (0)
+#define TCG_GUEST_MO_AFT_LD (0)
+#define TCG_GUEST_MO_BEF_ST (0)
+#define TCG_GUEST_MO_AFT_ST (0)
#ifdef TARGET_AARCH64
#define KVM_HAVE_MCE_INJECTION 1
@@ -33,6 +33,10 @@
#define CPU_RESOLVING_TYPE TYPE_AVR_CPU
#define TCG_GUEST_DEFAULT_MO 0
+#define TCG_GUEST_MO_BEF_LD 0
+#define TCG_GUEST_MO_AFT_LD 0
+#define TCG_GUEST_MO_BEF_ST 0
+#define TCG_GUEST_MO_AFT_ST 0
/*
* AVR has two memory spaces, data & code.
@@ -29,6 +29,10 @@
a weak memory model, but with TLB bits that force ordering on a per-page
basis. It's probably easier to fall back to a strong memory model. */
#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL
+#define TCG_GUEST_MO_BEF_LD (TCG_MO_LD_LD | TCG_MO_ST_LD)
+#define TCG_GUEST_MO_AFT_LD (0)
+#define TCG_GUEST_MO_BEF_ST (TCG_MO_ST_ST | TCG_MO_LD_ST)
+#define TCG_GUEST_MO_AFT_ST (0)
#define MMU_KERNEL_IDX 0
#define MMU_USER_IDX 3
@@ -29,6 +29,10 @@
/* The x86 has a strong memory model with some store-after-load re-ordering */
#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+#define TCG_GUEST_MO_BEF_LD (0)
+#define TCG_GUEST_MO_AFT_LD (TCG_MO_LD_ST | TCG_MO_LD_LD)
+#define TCG_GUEST_MO_BEF_ST (TCG_MO_ST_ST)
+#define TCG_GUEST_MO_AFT_ST (0)
#define KVM_HAVE_MCE_INJECTION 1
@@ -8,6 +8,10 @@
#include "mips-defs.h"
#define TCG_GUEST_DEFAULT_MO (0)
+#define TCG_GUEST_MO_BEF_LD (0)
+#define TCG_GUEST_MO_AFT_LD (0)
+#define TCG_GUEST_MO_BEF_ST (0)
+#define TCG_GUEST_MO_AFT_ST (0)
typedef struct CPUMIPSTLBContext CPUMIPSTLBContext;
@@ -27,6 +27,10 @@
#include "qom/object.h"
#define TCG_GUEST_DEFAULT_MO 0
+#define TCG_GUEST_MO_BEF_LD 0
+#define TCG_GUEST_MO_AFT_LD 0
+#define TCG_GUEST_MO_BEF_ST 0
+#define TCG_GUEST_MO_AFT_ST 0
#define TARGET_PAGE_BITS_64K 16
#define TARGET_PAGE_BITS_16M 24
@@ -29,6 +29,10 @@
#include "cpu_bits.h"
#define TCG_GUEST_DEFAULT_MO 0
+#define TCG_GUEST_MO_BEF_LD 0
+#define TCG_GUEST_MO_AFT_LD 0
+#define TCG_GUEST_MO_BEF_ST 0
+#define TCG_GUEST_MO_AFT_ST 0
#define TYPE_RISCV_CPU "riscv-cpu"
@@ -34,6 +34,10 @@
/* The z/Architecture has a strong memory model with some store-after-load re-ordering */
#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+#define TCG_GUEST_MO_BEF_LD (0)
+#define TCG_GUEST_MO_AFT_LD (TCG_MO_LD_ST | TCG_MO_LD_LD)
+#define TCG_GUEST_MO_BEF_ST (TCG_MO_ST_ST)
+#define TCG_GUEST_MO_AFT_ST (0)
#define TARGET_INSN_START_EXTRA_WORDS 2
@@ -35,6 +35,10 @@
/* Xtensa processors have a weak memory model */
#define TCG_GUEST_DEFAULT_MO (0)
+#define TCG_GUEST_MO_BEF_LD (0)
+#define TCG_GUEST_MO_AFT_LD (0)
+#define TCG_GUEST_MO_BEF_ST (0)
+#define TCG_GUEST_MO_AFT_ST (0)
enum {
/* Additional instructions */
@@ -2834,9 +2834,6 @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
static void tcg_gen_req_mo(TCGBar type)
{
-#ifdef TCG_GUEST_DEFAULT_MO
- type &= TCG_GUEST_DEFAULT_MO;
-#endif
type &= ~TCG_TARGET_DEFAULT_MO;
if (type) {
tcg_gen_mb(type | TCG_BAR_SC);
@@ -2873,7 +2870,7 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
MemOp orig_memop;
MemOpIdx oi;
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ tcg_gen_req_mo(TCG_GUEST_MO_BEF_LD);
memop = tcg_canonicalize_memop(memop, 0, 0);
oi = make_memop_idx(memop, idx);
@@ -2904,6 +2901,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
g_assert_not_reached();
}
}
+
+ tcg_gen_req_mo(TCG_GUEST_MO_AFT_LD);
}
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
@@ -2911,7 +2910,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
TCGv_i32 swap = NULL;
MemOpIdx oi;
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ tcg_gen_req_mo(TCG_GUEST_MO_BEF_ST);
memop = tcg_canonicalize_memop(memop, 0, 1);
oi = make_memop_idx(memop, idx);
@@ -2942,6 +2941,8 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
if (swap) {
tcg_temp_free_i32(swap);
}
+
+ tcg_gen_req_mo(TCG_GUEST_MO_AFT_ST);
}
void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
@@ -2959,7 +2960,7 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
return;
}
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ tcg_gen_req_mo(TCG_GUEST_MO_BEF_LD);
memop = tcg_canonicalize_memop(memop, 1, 0);
oi = make_memop_idx(memop, idx);
@@ -2994,6 +2995,8 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
g_assert_not_reached();
}
}
+
+ tcg_gen_req_mo(TCG_GUEST_MO_AFT_LD);
}
void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
@@ -3006,7 +3009,7 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
return;
}
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ tcg_gen_req_mo(TCG_GUEST_MO_BEF_ST);
memop = tcg_canonicalize_memop(memop, 1, 1);
oi = make_memop_idx(memop, idx);
@@ -3036,6 +3039,8 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
if (swap) {
tcg_temp_free_i64(swap);
}
+
+ tcg_gen_req_mo(TCG_GUEST_MO_AFT_ST);
}
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
This commit allows memory ordering enforcement to be performed more precisely. The previous scheme with fences always inserted before the memory access made it impossible to correctly enforce the x86 model on weakly ordered architectures such as arm. With this change, the memory models of guests can be defined more precisely, with a fence before and a fence after the access. This allows for a precise mapping of the ordering, that relies less on what type of fences the host architecture provides. Signed-off-by: Redha Gouicem <redha.gouicem@gmail.com> --- target/alpha/cpu.h | 4 ++++ target/arm/cpu.h | 4 ++++ target/avr/cpu.h | 4 ++++ target/hppa/cpu.h | 4 ++++ target/i386/cpu.h | 4 ++++ target/mips/cpu.h | 4 ++++ target/ppc/cpu.h | 4 ++++ target/riscv/cpu.h | 4 ++++ target/s390x/cpu.h | 4 ++++ target/xtensa/cpu.h | 4 ++++ tcg/tcg-op.c | 19 ++++++++++++------- 11 files changed, 52 insertions(+), 7 deletions(-)