diff mbox series

[v5,01/13] target/arm: Reorganize PMCCNTR accesses

Message ID 1529699547-17044-2-git-send-email-alindsay@codeaurora.org
State New
Headers show
Series More fully implement ARM PMUv3 | expand

Commit Message

Aaron Lindsay June 22, 2018, 8:32 p.m. UTC
pmccntr_read and pmccntr_write contained duplicate code that was already
being handled by pmccntr_sync. Consolidate the duplicated code into two
functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
c15_ccnt in CPUARMState so that we can simultaneously save both the
architectural register value and the last underlying cycle count - this
ensure time isn't lost and will also allow us to access the 'old'
architectural register value in order to detect overflows in later
patches.

Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
---
 target/arm/cpu.h    |  28 ++++++++++-----
 target/arm/helper.c | 100 ++++++++++++++++++++++++++++------------------------
 2 files changed, 73 insertions(+), 55 deletions(-)

Comments

Peter Maydell June 28, 2018, 4:40 p.m. UTC | #1
On 22 June 2018 at 21:32, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> pmccntr_read and pmccntr_write contained duplicate code that was already
> being handled by pmccntr_sync. Consolidate the duplicated code into two
> functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
> c15_ccnt in CPUARMState so that we can simultaneously save both the
> architectural register value and the last underlying cycle count - this
> ensure time isn't lost and will also allow us to access the 'old'
> architectural register value in order to detect overflows in later
> patches.
>
> Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
> ---
>  target/arm/cpu.h    |  28 ++++++++++-----
>  target/arm/helper.c | 100 ++++++++++++++++++++++++++++------------------------
>  2 files changed, 73 insertions(+), 55 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 8488273..ba2c876 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -467,10 +467,20 @@ typedef struct CPUARMState {
>          uint64_t oslsr_el1; /* OS Lock Status */
>          uint64_t mdcr_el2;
>          uint64_t mdcr_el3;
> -        /* If the counter is enabled, this stores the last time the counter
> -         * was reset. Otherwise it stores the counter value
> +        /* Stores the architectural value of the counter *the last time it was
> +         * updated* by pmccntr_op_start. Accesses should always be surrounded
> +         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
> +         * architecturally-corect value is being read/set.

"correct"

>           */
>          uint64_t c15_ccnt;
> +        /* Stores the delta between the architectural value and the underlying
> +         * cycle count during normal operation. It is used to update c15_ccnt
> +         * to be the correct architectural value before accesses. During
> +         * accesses, c15_ccnt_delta contains the underlying count being used
> +         * for the access, after which it reverts to the delta value in
> +         * pmccntr_op_finish.
> +         */
> +        uint64_t c15_ccnt_delta;

I had a look through the patchset, and I still can't see
anything here which indicates how we're handling migration.

If this field is only ever used as a temporary while we're
between op_start and op_finish, then we can just return it from
start and take it as an argument to finish, and we're fine
(migration of the PMCCNTR_EL0 register will be done by calling
its readfn on the source and its writefn on the destination).

If there's a reason for having this be a field in the state struct
(I think you said counter overflow)? then we need to be sure that
migration is going to do the right thing and that we don't for instance
lose overflow indications during a migration.

>          uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
>          uint64_t vpidr_el2; /* Virtualization Processor ID Register */
>          uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
> @@ -924,15 +934,17 @@ int cpu_arm_signal_handler(int host_signum, void *pinfo,
>                             void *puc);
>
>  /**
> - * pmccntr_sync
> + * pmccntr_op_start/finish
>   * @env: CPUARMState
>   *
> - * Synchronises the counter in the PMCCNTR. This must always be called twice,
> - * once before any action that might affect the timer and again afterwards.
> - * The function is used to swap the state of the register if required.
> - * This only happens when not in user mode (!CONFIG_USER_ONLY)
> + * Convert the counter in the PMCCNTR between its delta form (the typical mode
> + * when it's enabled) and the guest-visible value. These two calls must always
> + * surround any action which might affect the counter, and the return value
> + * from pmccntr_op_start must be supplied as the second argument to
> + * pmccntr_op_finish.
>   */
> -void pmccntr_sync(CPUARMState *env);
> +void pmccntr_op_start(CPUARMState *env);
> +void pmccntr_op_finish(CPUARMState *env);

The comment says pmccntr_op_start() returns a value but the
prototype says it doesn't...

thanks
-- PMM
Aaron Lindsay Aug. 28, 2018, 8:03 p.m. UTC | #2
On Jun 28 17:40, Peter Maydell wrote:
> On 22 June 2018 at 21:32, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> > diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> > index 8488273..ba2c876 100644
> > --- a/target/arm/cpu.h
> > +++ b/target/arm/cpu.h
> > @@ -467,10 +467,20 @@ typedef struct CPUARMState {
> >          uint64_t oslsr_el1; /* OS Lock Status */
> >          uint64_t mdcr_el2;
> >          uint64_t mdcr_el3;
> > -        /* If the counter is enabled, this stores the last time the counter
> > -         * was reset. Otherwise it stores the counter value
> > +        /* Stores the architectural value of the counter *the last time it was
> > +         * updated* by pmccntr_op_start. Accesses should always be surrounded
> > +         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
> > +         * architecturally-corect value is being read/set.
> 
> "correct"

Thanks - fixed for v6.

> >           */
> >          uint64_t c15_ccnt;
> > +        /* Stores the delta between the architectural value and the underlying
> > +         * cycle count during normal operation. It is used to update c15_ccnt
> > +         * to be the correct architectural value before accesses. During
> > +         * accesses, c15_ccnt_delta contains the underlying count being used
> > +         * for the access, after which it reverts to the delta value in
> > +         * pmccntr_op_finish.
> > +         */
> > +        uint64_t c15_ccnt_delta;
> 
> I had a look through the patchset, and I still can't see
> anything here which indicates how we're handling migration.

I'll admit that I had not previously thought through everything
exhaustively, but I think I've convinced myself that *almost* everything
is already handled by calling .readfn/writefn on the registers for
migration... more below.

> If this field is only ever used as a temporary while we're
> between op_start and op_finish, then we can just return it from
> start and take it as an argument to finish, and we're fine
> (migration of the PMCCNTR_EL0 register will be done by calling
> its readfn on the source and its writefn on the destination).
> 
> If there's a reason for having this be a field in the state struct
> (I think you said counter overflow)? then we need to be sure that
> migration is going to do the right thing and that we don't for instance
> lose overflow indications during a migration.

I think we do need additional state for supporting counter overflow
during normal operation (more than just between op_start and op_finish),
but I don't think sending additional state over the wire is necessary
for migration. Let me lay out my reasoning to see if you think it makes
sense:

I observe that we seem to deal mostly with the following values with
respect to PMU counters, and I think it might be easier to discuss this
in terms of values rather than variables:

	AC = current architectural value of PMCCNTR
	AL = architectural value of PMCCNTR when last observed/updated by
	     the OS
	UC = current underlying counter value
	UL = underlying counter value when PMCCNTR last observed/updated by
	     OS
	D = difference between underlying counter value and architectural
	    value

At any given point in time, the following should then be true during
normal execution (i.e. between OS accesses to the counter):
	UL - AL == D == UC - AC

The approach taken previous to my patchset was to store D in
cp15.c15_ccnt most of the time, updating that variable to hold AC when
read (by doing `c15_ccnt = UC - c15_ccnt`... an implementation using `AC
= UC - D`, which is a different way to write `D = UC - AC` from above).
This operation was reversed after a write completes, so that c15_ccnt
again holds a value corresponding to D.

Only storing D works well and minimizes memory usage if you don't care
about detecting overflows. However, without some frame of reference for
what the last observed architectural value was relative to its current
value, we have no way to know if the counter has overflowed in the
intervening time. In other words, we need to know both AC and AL to know
if the counter has overflowed. Assuming we store D and can obtain UC we
can calculate AC, but we need to store at least one additional piece of
information to obtain AL (either AL itself or UL).

In my patchset so far, I'm storing AL in c15_ccnt and D in
c15_ccnt_delta during normal operation and AL/AC in c15_ccnt, UL/UC in
c15_ccnt_delta between op_start and op_finish (note that AL == AC, UL ==
UC, and D == 0 during this time). Also note that UL/UC are only stored
between op_start and op_finish calls to avoid losing time, and are not
needed to maintain architectural state across migrations.

Now for migration - The read/writefn's already marshal/unmarshal both
the counter value and overflow state to their architectural values in
PMCCNTR and PMOVSCLR. I think this would work on its own if we made
two big (and wrong) assumptions:
	1. PMOVSCLR is read after PMCCNTR on save (ensures all overflows
	   which have occurred are recorded in PMOVSCLR) and written before
	   it on load (ensures any overflow bits in PMOVSCLR cause the
	   interrupt line to be raised)
	2. Time (as measured by `qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)`) does
	   not change during the process of saving or loading CP registers
	   (ensures PMCCNTR can't have overflowed between when it was saved
	   and when PMOVSCLR was later saved).

One possible solution for this is to add calls to op_start in both
cpu_pre_load and cpu_pre_save, add calls to op_finish in cpu_post_load
and cpu_post_save, and ensure that all CP registers which would call
op_start/finish on their own have raw_read/writefn's which assume
they're between calls to op_start and op_finish.

Thoughts? (and I apologize for the wall of text)

-Aaron
Peter Maydell Sept. 25, 2018, 3:18 p.m. UTC | #3
On 28 August 2018 at 21:03, Aaron Lindsay <aclindsa@gmail.com> wrote:
> On Jun 28 17:40, Peter Maydell wrote:
>> I had a look through the patchset, and I still can't see
>> anything here which indicates how we're handling migration.
>
> I'll admit that I had not previously thought through everything
> exhaustively, but I think I've convinced myself that *almost* everything
> is already handled by calling .readfn/writefn on the registers for
> migration... more below.

Thanks for writing up this explanation; it's very helpful. Sorry
it's taken me a while to get to it -- you unfortunately sent it
just as I was going on holiday...

>> If this field is only ever used as a temporary while we're
>> between op_start and op_finish, then we can just return it from
>> start and take it as an argument to finish, and we're fine
>> (migration of the PMCCNTR_EL0 register will be done by calling
>> its readfn on the source and its writefn on the destination).
>>
>> If there's a reason for having this be a field in the state struct
>> (I think you said counter overflow)? then we need to be sure that
>> migration is going to do the right thing and that we don't for instance
>> lose overflow indications during a migration.
>
> I think we do need additional state for supporting counter overflow
> during normal operation (more than just between op_start and op_finish),
> but I don't think sending additional state over the wire is necessary
> for migration. Let me lay out my reasoning to see if you think it makes
> sense:
>
> I observe that we seem to deal mostly with the following values with
> respect to PMU counters, and I think it might be easier to discuss this
> in terms of values rather than variables:
>
>         AC = current architectural value of PMCCNTR
>         AL = architectural value of PMCCNTR when last observed/updated by
>              the OS
>         UC = current underlying counter value
>         UL = underlying counter value when PMCCNTR last observed/updated by
>              OS
>         D = difference between underlying counter value and architectural
>             value
>
> At any given point in time, the following should then be true during
> normal execution (i.e. between OS accesses to the counter):
>         UL - AL == D == UC - AC
>
> The approach taken previous to my patchset was to store D in
> cp15.c15_ccnt most of the time, updating that variable to hold AC when
> read (by doing `c15_ccnt = UC - c15_ccnt`... an implementation using `AC
> = UC - D`, which is a different way to write `D = UC - AC` from above).
> This operation was reversed after a write completes, so that c15_ccnt
> again holds a value corresponding to D.
>
> Only storing D works well and minimizes memory usage if you don't care
> about detecting overflows. However, without some frame of reference for
> what the last observed architectural value was relative to its current
> value, we have no way to know if the counter has overflowed in the
> intervening time. In other words, we need to know both AC and AL to know
> if the counter has overflowed. Assuming we store D and can obtain UC we
> can calculate AC, but we need to store at least one additional piece of
> information to obtain AL (either AL itself or UL).

In this scheme, how do you tell the difference between "AC > AL
because it hasn't overflowed" and "AC > AL because it has overflowed
and run all the way round past AL again" ? That is, I'm not sure that
storing AL helps you to reliably detect overflow.

> In my patchset so far, I'm storing AL in c15_ccnt and D in
> c15_ccnt_delta during normal operation and AL/AC in c15_ccnt, UL/UC in
> c15_ccnt_delta between op_start and op_finish (note that AL == AC, UL ==
> UC, and D == 0 during this time). Also note that UL/UC are only stored
> between op_start and op_finish calls to avoid losing time, and are not
> needed to maintain architectural state across migrations.

> Now for migration - The read/writefn's already marshal/unmarshal both
> the counter value and overflow state to their architectural values in
> PMCCNTR and PMOVSCLR. I think this would work on its own if we made
> two big (and wrong) assumptions:
>         1. PMOVSCLR is read after PMCCNTR on save (ensures all overflows
>            which have occurred are recorded in PMOVSCLR) and written before
>            it on load (ensures any overflow bits in PMOVSCLR cause the
>            interrupt line to be raised)

There is indeed no guaranteed ordering in which registers are saved/loaded.
Also, the load function mustn't raise interrupt lines: it has to transfer
the state just of this device, and trust that the device on the other
end restores its state into one that matches the current level of the
interrupt line.

>         2. Time (as measured by `qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)`) does
>            not change during the process of saving or loading CP registers
>            (ensures PMCCNTR can't have overflowed between when it was saved
>            and when PMOVSCLR was later saved).

I think this assumption is true, isn't it? (VM migration happens while
the VM is stopped, and do_vm_stop() calls cpu_disable_ticks() which
means QEMU_CLOCK_VIRTUAL won't advance.)

thanks
-- PMM
Aaron Lindsay Sept. 26, 2018, 3:22 p.m. UTC | #4
On Sep 25 16:18, Peter Maydell wrote:
> On 28 August 2018 at 21:03, Aaron Lindsay <aclindsa@gmail.com> wrote:
> > On Jun 28 17:40, Peter Maydell wrote:
> >> I had a look through the patchset, and I still can't see
> >> anything here which indicates how we're handling migration.
> >
> > I'll admit that I had not previously thought through everything
> > exhaustively, but I think I've convinced myself that *almost* everything
> > is already handled by calling .readfn/writefn on the registers for
> > migration... more below.
> 
> Thanks for writing up this explanation; it's very helpful. Sorry
> it's taken me a while to get to it -- you unfortunately sent it
> just as I was going on holiday...

No problem - I noticed you went quiet on the list right when I sent it
and figured I scared you away!

I've got a v6 staged - If you don't come up with significant design
changes based on my response below, I'll plan to send it in the next few
days.

> >> If this field is only ever used as a temporary while we're
> >> between op_start and op_finish, then we can just return it from
> >> start and take it as an argument to finish, and we're fine
> >> (migration of the PMCCNTR_EL0 register will be done by calling
> >> its readfn on the source and its writefn on the destination).
> >>
> >> If there's a reason for having this be a field in the state struct
> >> (I think you said counter overflow)? then we need to be sure that
> >> migration is going to do the right thing and that we don't for instance
> >> lose overflow indications during a migration.
> >
> > I think we do need additional state for supporting counter overflow
> > during normal operation (more than just between op_start and op_finish),
> > but I don't think sending additional state over the wire is necessary
> > for migration. Let me lay out my reasoning to see if you think it makes
> > sense:
> >
> > I observe that we seem to deal mostly with the following values with
> > respect to PMU counters, and I think it might be easier to discuss this
> > in terms of values rather than variables:
> >
> >         AC = current architectural value of PMCCNTR
> >         AL = architectural value of PMCCNTR when last observed/updated by
> >              the OS
> >         UC = current underlying counter value
> >         UL = underlying counter value when PMCCNTR last observed/updated by
> >              OS
> >         D = difference between underlying counter value and architectural
> >             value
> >
> > At any given point in time, the following should then be true during
> > normal execution (i.e. between OS accesses to the counter):
> >         UL - AL == D == UC - AC
> >
> > The approach taken previous to my patchset was to store D in
> > cp15.c15_ccnt most of the time, updating that variable to hold AC when
> > read (by doing `c15_ccnt = UC - c15_ccnt`... an implementation using `AC
> > = UC - D`, which is a different way to write `D = UC - AC` from above).
> > This operation was reversed after a write completes, so that c15_ccnt
> > again holds a value corresponding to D.
> >
> > Only storing D works well and minimizes memory usage if you don't care
> > about detecting overflows. However, without some frame of reference for
> > what the last observed architectural value was relative to its current
> > value, we have no way to know if the counter has overflowed in the
> > intervening time. In other words, we need to know both AC and AL to know
> > if the counter has overflowed. Assuming we store D and can obtain UC we
> > can calculate AC, but we need to store at least one additional piece of
> > information to obtain AL (either AL itself or UL).
> 
> In this scheme, how do you tell the difference between "AC > AL
> because it hasn't overflowed" and "AC > AL because it has overflowed
> and run all the way round past AL again" ? That is, I'm not sure that
> storing AL helps you to reliably detect overflow.

I wrestled with how to accomplish generic overflow detection in a
foolproof way that fits with the event-driven design. My current
implementation (see [1]) adds a function called ns_per_count for each
counter type, which is used to setup a timer to overflow at the earliest
time a given counter is expected to overflow. My thinking is that even
if this isn't perfect (i.e. if there's a little 'skid'), it is likely to
be close enough to prevent the type of wrap-around double-overflow you
describe. I'm not wedded to this design - please don't hesitate to let
me know if you think of something better. 

> > In my patchset so far, I'm storing AL in c15_ccnt and D in
> > c15_ccnt_delta during normal operation and AL/AC in c15_ccnt, UL/UC in
> > c15_ccnt_delta between op_start and op_finish (note that AL == AC, UL ==
> > UC, and D == 0 during this time). Also note that UL/UC are only stored
> > between op_start and op_finish calls to avoid losing time, and are not
> > needed to maintain architectural state across migrations.
> 
> > Now for migration - The read/writefn's already marshal/unmarshal both
> > the counter value and overflow state to their architectural values in
> > PMCCNTR and PMOVSCLR. I think this would work on its own if we made
> > two big (and wrong) assumptions:
> >         1. PMOVSCLR is read after PMCCNTR on save (ensures all overflows
> >            which have occurred are recorded in PMOVSCLR) and written before
> >            it on load (ensures any overflow bits in PMOVSCLR cause the
> >            interrupt line to be raised)
> 
> There is indeed no guaranteed ordering in which registers are saved/loaded.
> Also, the load function mustn't raise interrupt lines: it has to transfer
> the state just of this device, and trust that the device on the other
> end restores its state into one that matches the current level of the
> interrupt line.

Okay, got it.

> >         2. Time (as measured by `qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)`) does
> >            not change during the process of saving or loading CP registers
> >            (ensures PMCCNTR can't have overflowed between when it was saved
> >            and when PMOVSCLR was later saved).
> 
> I think this assumption is true, isn't it? (VM migration happens while
> the VM is stopped, and do_vm_stop() calls cpu_disable_ticks() which
> means QEMU_CLOCK_VIRTUAL won't advance.)

Ah, okay. I wasn't aware of this detail.

-Aaron

[1] - http://lists.nongnu.org/archive/html/qemu-devel/2018-06/msg06843.html
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8488273..ba2c876 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -467,10 +467,20 @@  typedef struct CPUARMState {
         uint64_t oslsr_el1; /* OS Lock Status */
         uint64_t mdcr_el2;
         uint64_t mdcr_el3;
-        /* If the counter is enabled, this stores the last time the counter
-         * was reset. Otherwise it stores the counter value
+        /* Stores the architectural value of the counter *the last time it was
+         * updated* by pmccntr_op_start. Accesses should always be surrounded
+         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
+         * architecturally-corect value is being read/set.
          */
         uint64_t c15_ccnt;
+        /* Stores the delta between the architectural value and the underlying
+         * cycle count during normal operation. It is used to update c15_ccnt
+         * to be the correct architectural value before accesses. During
+         * accesses, c15_ccnt_delta contains the underlying count being used
+         * for the access, after which it reverts to the delta value in
+         * pmccntr_op_finish.
+         */
+        uint64_t c15_ccnt_delta;
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
         uint64_t vpidr_el2; /* Virtualization Processor ID Register */
         uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
@@ -924,15 +934,17 @@  int cpu_arm_signal_handler(int host_signum, void *pinfo,
                            void *puc);
 
 /**
- * pmccntr_sync
+ * pmccntr_op_start/finish
  * @env: CPUARMState
  *
- * Synchronises the counter in the PMCCNTR. This must always be called twice,
- * once before any action that might affect the timer and again afterwards.
- * The function is used to swap the state of the register if required.
- * This only happens when not in user mode (!CONFIG_USER_ONLY)
+ * Convert the counter in the PMCCNTR between its delta form (the typical mode
+ * when it's enabled) and the guest-visible value. These two calls must always
+ * surround any action which might affect the counter, and the return value
+ * from pmccntr_op_start must be supplied as the second argument to
+ * pmccntr_op_finish.
  */
-void pmccntr_sync(CPUARMState *env);
+void pmccntr_op_start(CPUARMState *env);
+void pmccntr_op_finish(CPUARMState *env);
 
 /* SCTLR bit meanings. Several bits have been reused in newer
  * versions of the architecture; in that case we define constants
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 1248d84..23b3a16 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1044,28 +1044,53 @@  static inline bool arm_ccnt_enabled(CPUARMState *env)
 
     return true;
 }
-
-void pmccntr_sync(CPUARMState *env)
+/*
+ * Ensure c15_ccnt is the guest-visible count so that operations such as
+ * enabling/disabling the counter or filtering, modifying the count itself,
+ * etc. can be done logically. This is essentially a no-op if the counter is
+ * not enabled at the time of the call.
+ */
+void pmccntr_op_start(CPUARMState *env)
 {
-    uint64_t temp_ticks;
-
-    temp_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+    uint64_t cycles = 0;
+    cycles = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
                           ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
 
-    if (env->cp15.c9_pmcr & PMCRD) {
-        /* Increment once every 64 processor clock cycles */
-        temp_ticks /= 64;
-    }
-
     if (arm_ccnt_enabled(env)) {
-        env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
+        uint64_t eff_cycles = cycles;
+        if (env->cp15.c9_pmcr & PMCRD) {
+            /* Increment once every 64 processor clock cycles */
+            eff_cycles /= 64;
+        }
+
+        env->cp15.c15_ccnt = eff_cycles - env->cp15.c15_ccnt_delta;
+    }
+    env->cp15.c15_ccnt_delta = cycles;
+}
+
+/*
+ * If PMCCNTR is enabled, recalculate the delta between the clock and the
+ * guest-visible count. A call to pmccntr_op_finish should follow every call to
+ * pmccntr_op_start.
+ */
+void pmccntr_op_finish(CPUARMState *env)
+{
+    if (arm_ccnt_enabled(env)) {
+        uint64_t prev_cycles = env->cp15.c15_ccnt_delta;
+
+        if (env->cp15.c9_pmcr & PMCRD) {
+            /* Increment once every 64 processor clock cycles */
+            prev_cycles /= 64;
+        }
+
+        env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt;
     }
 }
 
 static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
-    pmccntr_sync(env);
+    pmccntr_op_start(env);
 
     if (value & PMCRC) {
         /* The counter has been reset */
@@ -1076,26 +1101,16 @@  static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->cp15.c9_pmcr &= ~0x39;
     env->cp15.c9_pmcr |= (value & 0x39);
 
-    pmccntr_sync(env);
+    pmccntr_op_finish(env);
 }
 
 static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    uint64_t total_ticks;
-
-    if (!arm_ccnt_enabled(env)) {
-        /* Counter is disabled, do not change value */
-        return env->cp15.c15_ccnt;
-    }
-
-    total_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-                           ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
-
-    if (env->cp15.c9_pmcr & PMCRD) {
-        /* Increment once every 64 processor clock cycles */
-        total_ticks /= 64;
-    }
-    return total_ticks - env->cp15.c15_ccnt;
+    uint64_t ret;
+    pmccntr_op_start(env);
+    ret = env->cp15.c15_ccnt;
+    pmccntr_op_finish(env);
+    return ret;
 }
 
 static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1112,22 +1127,9 @@  static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
-    uint64_t total_ticks;
-
-    if (!arm_ccnt_enabled(env)) {
-        /* Counter is disabled, set the absolute value */
-        env->cp15.c15_ccnt = value;
-        return;
-    }
-
-    total_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-                           ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
-
-    if (env->cp15.c9_pmcr & PMCRD) {
-        /* Increment once every 64 processor clock cycles */
-        total_ticks /= 64;
-    }
-    env->cp15.c15_ccnt = total_ticks - value;
+    pmccntr_op_start(env);
+    env->cp15.c15_ccnt = value;
+    pmccntr_op_finish(env);
 }
 
 static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1140,7 +1142,11 @@  static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
 
 #else /* CONFIG_USER_ONLY */
 
-void pmccntr_sync(CPUARMState *env)
+void pmccntr_op_start(CPUARMState *env)
+{
+}
+
+void pmccntr_op_finish(CPUARMState *env)
 {
 }
 
@@ -1149,9 +1155,9 @@  void pmccntr_sync(CPUARMState *env)
 static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
-    pmccntr_sync(env);
+    pmccntr_op_start(env);
     env->cp15.pmccfiltr_el0 = value & 0xfc000000;
-    pmccntr_sync(env);
+    pmccntr_op_finish(env);
 }
 
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,