diff mbox series

[v2,03/10] tcg: Use one-insn-per-tb accelerator property in curr_cflags()

Message ID 20230403144637.2949366-4-peter.maydell@linaro.org
State New
Headers show
Series Deprecate/rename singlestep command line option, monitor interfaces | expand

Commit Message

Peter Maydell April 3, 2023, 2:46 p.m. UTC
Change curr_cflags() to look at the one-insn-per-tb accelerator
property instead of the old singlestep global variable.

Since this is the final remaining use of the global, we can
delete it entirely.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
This is the "clean" way of doing it. I dunno how much of
a hot path curr_cflags is; if it's really critical we could
have a global that's private to accel/tcg/internals.h I guess.
---
 accel/tcg/internal.h      | 16 ++++++++++++++++
 include/exec/cpu-common.h |  3 ---
 accel/tcg/cpu-exec.c      |  5 +++--
 accel/tcg/tcg-all.c       | 17 -----------------
 bsd-user/main.c           |  1 -
 linux-user/main.c         |  1 -
 softmmu/globals.c         |  1 -
 7 files changed, 19 insertions(+), 25 deletions(-)

Comments

Richard Henderson April 3, 2023, 6:33 p.m. UTC | #1
On 4/3/23 07:46, Peter Maydell wrote:
>   uint32_t curr_cflags(CPUState *cpu)
>   {
>       uint32_t cflags = cpu->tcg_cflags;
> +    TCGState *tcgstate = TCG_STATE(current_accel());

As mentioned against the cover, this is a very hot path.

We should try for something less expensive.  Perhaps as simple as

     return cpu->tcg_cflags | tcg_cflags_global;

where cpu->tcg_cflags is updated with cpu->singlestep_enabled.


r~
Peter Maydell April 13, 2023, 4:24 p.m. UTC | #2
On Mon, 3 Apr 2023 at 19:33, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 4/3/23 07:46, Peter Maydell wrote:
> >   uint32_t curr_cflags(CPUState *cpu)
> >   {
> >       uint32_t cflags = cpu->tcg_cflags;
> > +    TCGState *tcgstate = TCG_STATE(current_accel());
>
> As mentioned against the cover, this is a very hot path.
>
> We should try for something less expensive.  Perhaps as simple as
>
>      return cpu->tcg_cflags | tcg_cflags_global;
>
> where cpu->tcg_cflags is updated with cpu->singlestep_enabled.

I feel like that introduces atomicity issues. If I'm reading
the code right, curr_cflags() is called without any kind
of lock held. At the moment we get away with this because
'singlestep' is an int and is always going to be atomically
updated. If we make tcg_cflags_global a value which might have
multiple bits set or not set I'm not entirely sure what the
right way is to handle the reads and writes of it.

I think we can assume we have the iothread lock at any
point where we want to change either 'singlestep' or
the 'nochain' option, at least.

Any suggestions? I'm not very familiar with the
qemu atomic primitives...

thanks
-- PMM
Richard Henderson April 14, 2023, 6:17 a.m. UTC | #3
On 4/13/23 18:24, Peter Maydell wrote:
> On Mon, 3 Apr 2023 at 19:33, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 4/3/23 07:46, Peter Maydell wrote:
>>>    uint32_t curr_cflags(CPUState *cpu)
>>>    {
>>>        uint32_t cflags = cpu->tcg_cflags;
>>> +    TCGState *tcgstate = TCG_STATE(current_accel());
>>
>> As mentioned against the cover, this is a very hot path.
>>
>> We should try for something less expensive.  Perhaps as simple as
>>
>>       return cpu->tcg_cflags | tcg_cflags_global;
>>
>> where cpu->tcg_cflags is updated with cpu->singlestep_enabled.
> 
> I feel like that introduces atomicity issues. If I'm reading
> the code right, curr_cflags() is called without any kind
> of lock held. At the moment we get away with this because
> 'singlestep' is an int and is always going to be atomically
> updated. If we make tcg_cflags_global a value which might have
> multiple bits set or not set I'm not entirely sure what the
> right way is to handle the reads and writes of it.

qatomic_read() here, will dtrt for no tearing on the read.
(Not that we should have expected one anyway, for uint32_t.)

> I think we can assume we have the iothread lock at any
> point where we want to change either 'singlestep' or
> the 'nochain' option, at least.

Indeed, it can only be changed by the monitor, under user control, so even without a lock 
there's no real race there.

Using qatomic_set(&global, new_value) is sufficient to match the qatomic_read() for no 
tearing.  Concurrent threads will see the old value or the new value, but not garbage, 
which is just fine.

We probably need to kick all cpus, so that they come out of long-running TB chains to see 
the new value and re-translate.


r~
diff mbox series

Patch

diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 96f198b28b4..6ea5f7a295f 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -10,6 +10,22 @@ 
 #define ACCEL_TCG_INTERNAL_H
 
 #include "exec/exec-all.h"
+#include "qemu/accel.h"
+
+struct TCGState {
+    AccelState parent_obj;
+
+    bool mttcg_enabled;
+    bool one_insn_per_tb;
+    int splitwx_enabled;
+    unsigned long tb_size;
+};
+typedef struct TCGState TCGState;
+
+#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
+
+DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE,
+                         TYPE_TCG_ACCEL)
 
 /*
  * Access to the various translations structures need to be serialised
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 6feaa40ca7b..609a29a5dc2 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -162,9 +162,6 @@  int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
                         void *ptr, size_t len, bool is_write);
 
-/* vl.c */
-extern int singlestep;
-
 void list_cpus(const char *optarg);
 
 #endif /* CPU_COMMON_H */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index c815f2dbfdf..1ed3878b6b7 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -149,17 +149,18 @@  static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 uint32_t curr_cflags(CPUState *cpu)
 {
     uint32_t cflags = cpu->tcg_cflags;
+    TCGState *tcgstate = TCG_STATE(current_accel());
 
     /*
      * Record gdb single-step.  We should be exiting the TB by raising
      * EXCP_DEBUG, but to simplify other tests, disable chaining too.
      *
-     * For singlestep and -d nochain, suppress goto_tb so that
+     * For one-insn-per-tb and -d nochain, suppress goto_tb so that
      * we can log -d cpu,exec after every TB.
      */
     if (unlikely(cpu->singlestep_enabled)) {
         cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
-    } else if (singlestep) {
+    } else if (tcgstate->one_insn_per_tb) {
         cflags |= CF_NO_GOTO_TB | 1;
     } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
         cflags |= CF_NO_GOTO_TB;
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index fcf361c8db6..7c4f9f34d39 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -38,21 +38,6 @@ 
 #endif
 #include "internal.h"
 
-struct TCGState {
-    AccelState parent_obj;
-
-    bool mttcg_enabled;
-    bool one_insn_per_tb;
-    int splitwx_enabled;
-    unsigned long tb_size;
-};
-typedef struct TCGState TCGState;
-
-#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
-
-DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE,
-                         TYPE_TCG_ACCEL)
-
 /*
  * We default to false if we know other options have been enabled
  * which are currently incompatible with MTTCG. Otherwise when each
@@ -219,8 +204,6 @@  static void tcg_set_one_insn_per_tb(Object *obj, bool value, Error **errp)
 {
     TCGState *s = TCG_STATE(obj);
     s->one_insn_per_tb = value;
-    /* For the moment, set the global also: this changes the behaviour */
-    singlestep = value;
 }
 
 static int tcg_gdbstub_supported_sstep_flags(void)
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 09b84da190c..a9e5a127d38 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -49,7 +49,6 @@ 
 #include "host-os.h"
 #include "target_arch_cpu.h"
 
-int singlestep;
 static bool opt_one_insn_per_tb;
 uintptr_t guest_base;
 bool have_guest_base;
diff --git a/linux-user/main.c b/linux-user/main.c
index 489694ad654..c7020b413bc 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -68,7 +68,6 @@ 
 char *exec_path;
 char real_exec_path[PATH_MAX];
 
-int singlestep;
 static bool opt_one_insn_per_tb;
 static const char *argv0;
 static const char *gdbstub;
diff --git a/softmmu/globals.c b/softmmu/globals.c
index 39678aa8c58..e83b5428d12 100644
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -43,7 +43,6 @@  int vga_interface_type = VGA_NONE;
 bool vga_interface_created;
 Chardev *parallel_hds[MAX_PARALLEL_PORTS];
 int win2k_install_hack;
-int singlestep;
 int fd_bootchk = 1;
 int graphic_rotate;
 QEMUOptionRom option_rom[MAX_OPTION_ROMS];