diff mbox

[v4] target-microblaze: lwx/swx: first implementation

Message ID 1338521008-12740-1-git-send-email-peter.crosthwaite@petalogix.com
State New
Headers show

Commit Message

Peter A. G. Crosthwaite June 1, 2012, 3:23 a.m. UTC
Signed-off-by: Peter A. G. Crosthwaite <peter.crosthwaite@petalogix.com>
---
changed from v3:
simplified tcg local variable usage aqcross branch
changed from v2:
fixed tcg local variable usage across branch
reworked carry logic (with new write_carryi() function)
made LOG_DIS show lwx swx properly
changed from v1:
implemented reservation address checking
created new cpu state variable specifically for reservation address/flag state

 target-microblaze/cpu.c       |    1 +
 target-microblaze/cpu.h       |    4 ++
 target-microblaze/helper.c    |    2 +
 target-microblaze/translate.c |   62 +++++++++++++++++++++++++++++++++++++---
 4 files changed, 64 insertions(+), 5 deletions(-)

Comments

Andreas Färber June 1, 2012, 1:51 p.m. UTC | #1
Am 01.06.2012 05:23, schrieb Peter A. G. Crosthwaite:
> Signed-off-by: Peter A. G. Crosthwaite <peter.crosthwaite@petalogix.com>
> ---
> changed from v3:
> simplified tcg local variable usage aqcross branch
> changed from v2:
> fixed tcg local variable usage across branch
> reworked carry logic (with new write_carryi() function)
> made LOG_DIS show lwx swx properly
> changed from v1:
> implemented reservation address checking
> created new cpu state variable specifically for reservation address/flag state
> 
>  target-microblaze/cpu.c       |    1 +
>  target-microblaze/cpu.h       |    4 ++
>  target-microblaze/helper.c    |    2 +
>  target-microblaze/translate.c |   62 +++++++++++++++++++++++++++++++++++++---
>  4 files changed, 64 insertions(+), 5 deletions(-)
> 
> diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c
> index 9c3b74e..34b3a9b 100644
> --- a/target-microblaze/cpu.c
> +++ b/target-microblaze/cpu.c
> @@ -39,6 +39,7 @@ static void mb_cpu_reset(CPUState *s)
>      mcc->parent_reset(s);
>  
>      memset(env, 0, offsetof(CPUMBState, breakpoints));
> +    env->res_addr = RES_ADDR_NONE;
>      tlb_flush(env, 1);
>  
>      /* Disable stack protector.  */

Note that my idea for structuring mb_cpu_reset() was to group the
"duplicated common" functionality there that would get moved into the
base class' reset function once no longer dependent on "env".
Not a blocker for this patch but if you have to send a v2 anyway.

Andreas
Edgar E. Iglesias June 4, 2012, 8:31 a.m. UTC | #2
Applied this one, thanks Peter
Stefan Weil June 4, 2012, 8:26 p.m. UTC | #3
Hi Peter,

this patch (which was applied to QEMU master) breaks debug builds
(configure --enable-debug). see my comment below.

Regards,

Stefan W.


Am 01.06.2012 05:23, schrieb Peter A. G. Crosthwaite:
> Signed-off-by: Peter A. G. Crosthwaite<peter.crosthwaite@petalogix.com>
> ---
> changed from v3:
> simplified tcg local variable usage aqcross branch
> changed from v2:
> fixed tcg local variable usage across branch
> reworked carry logic (with new write_carryi() function)
> made LOG_DIS show lwx swx properly
> changed from v1:
> implemented reservation address checking
> created new cpu state variable specifically for reservation address/flag state
>
>   target-microblaze/cpu.c       |    1 +
>   target-microblaze/cpu.h       |    4 ++
>   target-microblaze/helper.c    |    2 +
>   target-microblaze/translate.c |   62 +++++++++++++++++++++++++++++++++++++---
>   4 files changed, 64 insertions(+), 5 deletions(-)
>

...

> diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
> index a362938..f0ebd59 100644
> --- a/target-microblaze/translate.c
> +++ b/target-microblaze/translate.c
> @@ -162,6 +162,14 @@ static void write_carry(DisasContext *dc, TCGv v)
>       tcg_temp_free(t0);
>   }
>
> +static void write_carryi(DisasContext *dc, int carry)
> +{
> +    TCGv t0 = tcg_temp_new();
> +    tcg_gen_movi_tl(t0, carry ? 1 : 0);
> +    write_carry(dc, t0);
> +    tcg_temp_free(t0);
> +}
> +
>   /* True if ALU operand b is a small immediate that may deserve
>      faster treatment.  */
>   static inline int dec_alu_op_b_is_small_imm(DisasContext *dc)
> @@ -948,12 +956,13 @@ static inline void dec_byteswap(DisasContext *dc, TCGv dst, TCGv src, int size)
>   static void dec_load(DisasContext *dc)
>   {
>       TCGv t, *addr;
> -    unsigned int size, rev = 0;
> +    unsigned int size, rev = 0, ex = 0;
>
>       size = 1<<  (dc->opcode&  3);
>
>       if (!dc->type_b) {
>           rev = (dc->ir>>  9)&  1;
> +        ex = (dc->ir>>  10)&  1;
>       }
>
>       if (size>  4&&  (dc->tb_flags&  MSR_EE_FLAG)
> @@ -963,7 +972,8 @@ static void dec_load(DisasContext *dc)
>           return;
>       }
>
> -    LOG_DIS("l%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
> +    LOG_DIS("l%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
> +                                                        ex ? "x" : "");
>
>       t_sync_flags(dc);
>       addr = compute_ldst_addr(dc,&t);
> @@ -1019,6 +1029,17 @@ static void dec_load(DisasContext *dc)
>           }
>       }
>
> +    /* lwx does not throw unaligned access errors, so force alignment */
> +    if (ex) {
> +        /* Force addr into the temp.  */
> +        if (addr !=&t) {
> +            t = tcg_temp_new();
> +            tcg_gen_mov_tl(t, *addr);
> +            addr =&t;
> +        }
> +        tcg_gen_andi_tl(t, t, ~3);
> +    }
> +
>       /* If we get a fault on a dslot, the jmpstate better be in sync.  */
>       sync_jmpstate(dc);
>
> @@ -1057,6 +1078,12 @@ static void dec_load(DisasContext *dc)
>           }
>       }
>
> +    if (ex) { /* lwx */
> +        /* no support for for AXI exclusive so always clear C */
> +        write_carryi(dc, 0);
> +        tcg_gen_st_tl(*addr, cpu_env, offsetof(CPUMBState, res_addr));
> +    }
> +
>       if (addr ==&t)
>           tcg_temp_free(t);
>   }
> @@ -1078,12 +1105,14 @@ static void gen_store(DisasContext *dc, TCGv addr, TCGv val,
>
>   static void dec_store(DisasContext *dc)
>   {
> -    TCGv t, *addr;
> -    unsigned int size, rev = 0;
> +    TCGv t, *addr, swx_addr, r_check = 0;

With --enable-debug, TCGv is a struct (not an int),
therefore r_check = 0 does not work.

swx_addr and r_check are used in the same context.
Can their declarations be moved to the block where
they are first used?

If that is not possible, either none or both of these
two variables should be initialised.


> +    int swx_skip = 0;
> +    unsigned int size, rev = 0, ex = 0;
>
>       size = 1<<  (dc->opcode&  3);
>       if (!dc->type_b) {
>           rev = (dc->ir>>  9)&  1;
> +        ex = (dc->ir>>  10)&  1;
>       }
>
>       if (size>  4&&  (dc->tb_flags&  MSR_EE_FLAG)
> @@ -1093,12 +1122,30 @@ static void dec_store(DisasContext *dc)
>           return;
>       }
>
> -    LOG_DIS("s%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
> +    LOG_DIS("s%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
> +                                                        ex ? "x" : "");
>       t_sync_flags(dc);
>       /* If we get a fault on a dslot, the jmpstate better be in sync.  */
>       sync_jmpstate(dc);
>       addr = compute_ldst_addr(dc,&t);
>
> +    if (ex) { /* swx */
> +        r_check = tcg_temp_new();
> +        swx_addr = tcg_temp_local_new();


TCGv r_check = ...;
TCGv swx_addr = ...;

> +
> +        /* Force addr into the swx_addr. */
> +        tcg_gen_mov_tl(swx_addr, *addr);
> +        addr =&swx_addr;
> +        /* swx does not throw unaligned access errors, so force alignment */
> +        tcg_gen_andi_tl(swx_addr, swx_addr, ~3);
> +
> +        tcg_gen_ld_tl(r_check, cpu_env, offsetof(CPUMBState, res_addr));
> +        write_carryi(dc, 1);
> +        swx_skip = gen_new_label();
> +        tcg_gen_brcond_tl(TCG_COND_NE, r_check, swx_addr, swx_skip);
> +        write_carryi(dc, 0);

Add this code from block at end of function?

         gen_set_label(swx_skip);
         tcg_temp_free(swx_addr);
	tcg_temp_free(r_check);

> +    }
> +
>       if (rev&&  size != 4) {
>           /* Endian reverse the address. t is addr.  */
>           switch (size) {
> @@ -1174,6 +1221,11 @@ static void dec_store(DisasContext *dc)
>           gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
>                               tcg_const_tl(1), tcg_const_tl(size - 1));
>       }

Move the following conditional block up?

> +    if (ex) {
> +        gen_set_label(swx_skip);
> +        tcg_temp_free(r_check);
> +        tcg_temp_free(swx_addr);
> +    }
Peter A. G. Crosthwaite June 5, 2012, 2:34 a.m. UTC | #4
Thanks Stefan,

Fix is up on the mailing list. I went for the simple solution of just
always initing the tcg variables with tcg_temp_*new. May be a minor
performance penalty but noting noticeable.

Regards,
Peter

On Tue, Jun 5, 2012 at 6:26 AM, Stefan Weil <sw@weilnetz.de> wrote:
> Hi Peter,
>
> this patch (which was applied to QEMU master) breaks debug builds
> (configure --enable-debug). see my comment below.
>
> Regards,
>
> Stefan W.
>
>
>
> Am 01.06.2012 05:23, schrieb Peter A. G. Crosthwaite:
>>
>> Signed-off-by: Peter A. G. Crosthwaite<peter.crosthwaite@petalogix.com>
>>
>> ---
>> changed from v3:
>> simplified tcg local variable usage aqcross branch
>> changed from v2:
>> fixed tcg local variable usage across branch
>> reworked carry logic (with new write_carryi() function)
>> made LOG_DIS show lwx swx properly
>> changed from v1:
>> implemented reservation address checking
>> created new cpu state variable specifically for reservation address/flag
>> state
>>
>>  target-microblaze/cpu.c       |    1 +
>>  target-microblaze/cpu.h       |    4 ++
>>  target-microblaze/helper.c    |    2 +
>>  target-microblaze/translate.c |   62
>> +++++++++++++++++++++++++++++++++++++---
>>  4 files changed, 64 insertions(+), 5 deletions(-)
>>
>
> ...
>
>> diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
>> index a362938..f0ebd59 100644
>> --- a/target-microblaze/translate.c
>> +++ b/target-microblaze/translate.c
>> @@ -162,6 +162,14 @@ static void write_carry(DisasContext *dc, TCGv v)
>>      tcg_temp_free(t0);
>>  }
>>
>> +static void write_carryi(DisasContext *dc, int carry)
>> +{
>> +    TCGv t0 = tcg_temp_new();
>> +    tcg_gen_movi_tl(t0, carry ? 1 : 0);
>> +    write_carry(dc, t0);
>> +    tcg_temp_free(t0);
>> +}
>> +
>>  /* True if ALU operand b is a small immediate that may deserve
>>     faster treatment.  */
>>  static inline int dec_alu_op_b_is_small_imm(DisasContext *dc)
>> @@ -948,12 +956,13 @@ static inline void dec_byteswap(DisasContext *dc,
>> TCGv dst, TCGv src, int size)
>>  static void dec_load(DisasContext *dc)
>>  {
>>      TCGv t, *addr;
>> -    unsigned int size, rev = 0;
>> +    unsigned int size, rev = 0, ex = 0;
>>
>>      size = 1<<  (dc->opcode&  3);
>>
>>      if (!dc->type_b) {
>>          rev = (dc->ir>>  9)&  1;
>>
>> +        ex = (dc->ir>>  10)&  1;
>>      }
>>
>>      if (size>  4&&  (dc->tb_flags&  MSR_EE_FLAG)
>>
>> @@ -963,7 +972,8 @@ static void dec_load(DisasContext *dc)
>>          return;
>>      }
>>
>> -    LOG_DIS("l%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
>> +    LOG_DIS("l%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
>> +                                                        ex ? "x" : "");
>>
>>      t_sync_flags(dc);
>>      addr = compute_ldst_addr(dc,&t);
>> @@ -1019,6 +1029,17 @@ static void dec_load(DisasContext *dc)
>>          }
>>      }
>>
>> +    /* lwx does not throw unaligned access errors, so force alignment */
>> +    if (ex) {
>> +        /* Force addr into the temp.  */
>> +        if (addr !=&t) {
>> +            t = tcg_temp_new();
>> +            tcg_gen_mov_tl(t, *addr);
>> +            addr =&t;
>> +        }
>> +        tcg_gen_andi_tl(t, t, ~3);
>> +    }
>> +
>>      /* If we get a fault on a dslot, the jmpstate better be in sync.  */
>>      sync_jmpstate(dc);
>>
>> @@ -1057,6 +1078,12 @@ static void dec_load(DisasContext *dc)
>>          }
>>      }
>>
>> +    if (ex) { /* lwx */
>> +        /* no support for for AXI exclusive so always clear C */
>> +        write_carryi(dc, 0);
>> +        tcg_gen_st_tl(*addr, cpu_env, offsetof(CPUMBState, res_addr));
>> +    }
>> +
>>      if (addr ==&t)
>>          tcg_temp_free(t);
>>  }
>> @@ -1078,12 +1105,14 @@ static void gen_store(DisasContext *dc, TCGv addr,
>> TCGv val,
>>
>>  static void dec_store(DisasContext *dc)
>>  {
>> -    TCGv t, *addr;
>> -    unsigned int size, rev = 0;
>> +    TCGv t, *addr, swx_addr, r_check = 0;
>
>
> With --enable-debug, TCGv is a struct (not an int),
> therefore r_check = 0 does not work.
>
> swx_addr and r_check are used in the same context.
> Can their declarations be moved to the block where
> they are first used?
>
> If that is not possible, either none or both of these
> two variables should be initialised.
>
>
>> +    int swx_skip = 0;
>> +    unsigned int size, rev = 0, ex = 0;
>>
>>      size = 1<<  (dc->opcode&  3);
>>      if (!dc->type_b) {
>>          rev = (dc->ir>>  9)&  1;
>>
>> +        ex = (dc->ir>>  10)&  1;
>>      }
>>
>>      if (size>  4&&  (dc->tb_flags&  MSR_EE_FLAG)
>>
>> @@ -1093,12 +1122,30 @@ static void dec_store(DisasContext *dc)
>>          return;
>>      }
>>
>> -    LOG_DIS("s%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
>> +    LOG_DIS("s%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
>> +                                                        ex ? "x" : "");
>>      t_sync_flags(dc);
>>      /* If we get a fault on a dslot, the jmpstate better be in sync.  */
>>      sync_jmpstate(dc);
>>      addr = compute_ldst_addr(dc,&t);
>>
>> +    if (ex) { /* swx */
>> +        r_check = tcg_temp_new();
>> +        swx_addr = tcg_temp_local_new();
>
>
>
> TCGv r_check = ...;
> TCGv swx_addr = ...;
>
>
>> +
>> +        /* Force addr into the swx_addr. */
>> +        tcg_gen_mov_tl(swx_addr, *addr);
>> +        addr =&swx_addr;
>> +        /* swx does not throw unaligned access errors, so force alignment
>> */
>> +        tcg_gen_andi_tl(swx_addr, swx_addr, ~3);
>> +
>> +        tcg_gen_ld_tl(r_check, cpu_env, offsetof(CPUMBState, res_addr));
>> +        write_carryi(dc, 1);
>> +        swx_skip = gen_new_label();
>> +        tcg_gen_brcond_tl(TCG_COND_NE, r_check, swx_addr, swx_skip);
>> +        write_carryi(dc, 0);
>
>
> Add this code from block at end of function?
>
>        gen_set_label(swx_skip);
>        tcg_temp_free(swx_addr);
>        tcg_temp_free(r_check);
>
>> +    }
>> +
>>      if (rev&&  size != 4) {
>>
>>          /* Endian reverse the address. t is addr.  */
>>          switch (size) {
>> @@ -1174,6 +1221,11 @@ static void dec_store(DisasContext *dc)
>>          gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
>>                              tcg_const_tl(1), tcg_const_tl(size - 1));
>>      }
>
>
> Move the following conditional block up?
>
>
>> +    if (ex) {
>> +        gen_set_label(swx_skip);
>> +        tcg_temp_free(r_check);
>> +        tcg_temp_free(swx_addr);
>> +    }
>
>
diff mbox

Patch

diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c
index 9c3b74e..34b3a9b 100644
--- a/target-microblaze/cpu.c
+++ b/target-microblaze/cpu.c
@@ -39,6 +39,7 @@  static void mb_cpu_reset(CPUState *s)
     mcc->parent_reset(s);
 
     memset(env, 0, offsetof(CPUMBState, breakpoints));
+    env->res_addr = RES_ADDR_NONE;
     tlb_flush(env, 1);
 
     /* Disable stack protector.  */
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index 718d5bb..b0ba9fe 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -243,6 +243,10 @@  struct CPUMBState {
     /* Stack protectors. Yes, it's a hw feature.  */
     uint32_t slr, shr;
 
+    /* lwx/swx reserved address */
+#define RES_ADDR_NONE 0xffffffff /* Use 0xffffffff to indicate no reservation */
+    uint32_t res_addr;
+
     /* Internal flags.  */
 #define IMM_FLAG	4
 #define MSR_EE_FLAG     (1 << 8)
diff --git a/target-microblaze/helper.c b/target-microblaze/helper.c
index 2412a58..74fce26 100644
--- a/target-microblaze/helper.c
+++ b/target-microblaze/helper.c
@@ -29,6 +29,7 @@ 
 void do_interrupt (CPUMBState *env)
 {
     env->exception_index = -1;
+    env->res_addr = RES_ADDR_NONE;
     env->regs[14] = env->sregs[SR_PC];
 }
 
@@ -116,6 +117,7 @@  void do_interrupt(CPUMBState *env)
     assert(!((env->iflags & D_FLAG) && (env->iflags & IMM_FLAG)));
     assert(!(env->iflags & (DRTI_FLAG | DRTE_FLAG | DRTB_FLAG)));
 /*    assert(env->sregs[SR_MSR] & (MSR_EE)); Only for HW exceptions.  */
+    env->res_addr = RES_ADDR_NONE;
     switch (env->exception_index) {
         case EXCP_HW_EXCP:
             if (!(env->pvr.regs[0] & PVR0_USE_EXC_MASK)) {
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index a362938..f0ebd59 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -162,6 +162,14 @@  static void write_carry(DisasContext *dc, TCGv v)
     tcg_temp_free(t0);
 }
 
+static void write_carryi(DisasContext *dc, int carry)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_movi_tl(t0, carry ? 1 : 0);
+    write_carry(dc, t0);
+    tcg_temp_free(t0);
+}
+
 /* True if ALU operand b is a small immediate that may deserve
    faster treatment.  */
 static inline int dec_alu_op_b_is_small_imm(DisasContext *dc)
@@ -948,12 +956,13 @@  static inline void dec_byteswap(DisasContext *dc, TCGv dst, TCGv src, int size)
 static void dec_load(DisasContext *dc)
 {
     TCGv t, *addr;
-    unsigned int size, rev = 0;
+    unsigned int size, rev = 0, ex = 0;
 
     size = 1 << (dc->opcode & 3);
 
     if (!dc->type_b) {
         rev = (dc->ir >> 9) & 1;
+        ex = (dc->ir >> 10) & 1;
     }
 
     if (size > 4 && (dc->tb_flags & MSR_EE_FLAG)
@@ -963,7 +972,8 @@  static void dec_load(DisasContext *dc)
         return;
     }
 
-    LOG_DIS("l%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
+    LOG_DIS("l%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
+                                                        ex ? "x" : "");
 
     t_sync_flags(dc);
     addr = compute_ldst_addr(dc, &t);
@@ -1019,6 +1029,17 @@  static void dec_load(DisasContext *dc)
         }
     }
 
+    /* lwx does not throw unaligned access errors, so force alignment */
+    if (ex) {
+        /* Force addr into the temp.  */
+        if (addr != &t) {
+            t = tcg_temp_new();
+            tcg_gen_mov_tl(t, *addr);
+            addr = &t;
+        }
+        tcg_gen_andi_tl(t, t, ~3);
+    }
+
     /* If we get a fault on a dslot, the jmpstate better be in sync.  */
     sync_jmpstate(dc);
 
@@ -1057,6 +1078,12 @@  static void dec_load(DisasContext *dc)
         }
     }
 
+    if (ex) { /* lwx */
+        /* no support for for AXI exclusive so always clear C */
+        write_carryi(dc, 0);
+        tcg_gen_st_tl(*addr, cpu_env, offsetof(CPUMBState, res_addr));
+    }
+
     if (addr == &t)
         tcg_temp_free(t);
 }
@@ -1078,12 +1105,14 @@  static void gen_store(DisasContext *dc, TCGv addr, TCGv val,
 
 static void dec_store(DisasContext *dc)
 {
-    TCGv t, *addr;
-    unsigned int size, rev = 0;
+    TCGv t, *addr, swx_addr, r_check = 0;
+    int swx_skip = 0;
+    unsigned int size, rev = 0, ex = 0;
 
     size = 1 << (dc->opcode & 3);
     if (!dc->type_b) {
         rev = (dc->ir >> 9) & 1;
+        ex = (dc->ir >> 10) & 1;
     }
 
     if (size > 4 && (dc->tb_flags & MSR_EE_FLAG)
@@ -1093,12 +1122,30 @@  static void dec_store(DisasContext *dc)
         return;
     }
 
-    LOG_DIS("s%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
+    LOG_DIS("s%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
+                                                        ex ? "x" : "");
     t_sync_flags(dc);
     /* If we get a fault on a dslot, the jmpstate better be in sync.  */
     sync_jmpstate(dc);
     addr = compute_ldst_addr(dc, &t);
 
+    if (ex) { /* swx */
+        r_check = tcg_temp_new();
+        swx_addr = tcg_temp_local_new();
+
+        /* Force addr into the swx_addr. */
+        tcg_gen_mov_tl(swx_addr, *addr);
+        addr = &swx_addr;
+        /* swx does not throw unaligned access errors, so force alignment */
+        tcg_gen_andi_tl(swx_addr, swx_addr, ~3);
+
+        tcg_gen_ld_tl(r_check, cpu_env, offsetof(CPUMBState, res_addr));
+        write_carryi(dc, 1);
+        swx_skip = gen_new_label();
+        tcg_gen_brcond_tl(TCG_COND_NE, r_check, swx_addr, swx_skip);
+        write_carryi(dc, 0);
+    }
+
     if (rev && size != 4) {
         /* Endian reverse the address. t is addr.  */
         switch (size) {
@@ -1174,6 +1221,11 @@  static void dec_store(DisasContext *dc)
         gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
                             tcg_const_tl(1), tcg_const_tl(size - 1));
     }
+    if (ex) {
+        gen_set_label(swx_skip);
+        tcg_temp_free(r_check);
+        tcg_temp_free(swx_addr);
+    }
 
     if (addr == &t)
         tcg_temp_free(t);