diff mbox

[v4,3/6] hypertrace: [*-user] Add QEMU-side proxy to "guest_hypertrace" event

Message ID 148278449426.8988.2219094135462471980.stgit@fimbulvetr.bsc.es
State New
Headers show

Commit Message

Lluís Vilanova Dec. 26, 2016, 8:34 p.m. UTC
QEMU detects when the guest uses 'mmap' on hypertrace's control channel
file, and then uses 'mprotect' to detect accesses to it, which are used
to trigger tracing event "guest_hypertrace".

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
---
 Makefile.objs            |    4 +
 bsd-user/main.c          |   17 ++
 bsd-user/mmap.c          |   15 ++
 bsd-user/syscall.c       |   34 +++--
 hypertrace/Makefile.objs |   18 ++
 hypertrace/common.c      |   26 ++++
 hypertrace/common.h      |   23 +++
 hypertrace/user.c        |  332 ++++++++++++++++++++++++++++++++++++++++++++++
 hypertrace/user.h        |   63 +++++++++
 include/qom/cpu.h        |    4 +
 linux-user/main.c        |   19 +++
 linux-user/mmap.c        |   17 ++
 linux-user/qemu.h        |    3 
 linux-user/syscall.c     |   31 +++-
 14 files changed, 581 insertions(+), 25 deletions(-)
 create mode 100644 hypertrace/Makefile.objs
 create mode 100644 hypertrace/common.c
 create mode 100644 hypertrace/common.h
 create mode 100644 hypertrace/user.c
 create mode 100644 hypertrace/user.h

Comments

Stefan Hajnoczi Jan. 9, 2017, 3:44 p.m. UTC | #1
On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
> @@ -847,6 +855,10 @@ int main(int argc, char **argv)
>          } else if (!strcmp(r, "trace")) {
>              g_free(trace_file);
>              trace_file = trace_opt_parse(optarg);
> +        } else if (!strcmp(r, "hypertrace")) {
> +            g_free(hypertrace_file);

This variable hasn't been declared yet.  Perhaps it's in a later patch.
Please reorder things to avoid the compilation error.

Or was this supposed to be hypertrace_base?

> +void hypertrace_init_config(struct hypertrace_config *config,
> +                            unsigned int max_clients)
> +{
> +    config->max_clients = max_clients;
> +    config->client_args = CONFIG_HYPERTRACE_ARGS;
> +    config->client_data_size = config->client_args * sizeof(uint64_t);
> +    config->control_size = QEMU_ALIGN_UP(
> +        config->max_clients * sizeof(uint64_t), TARGET_PAGE_SIZE);

This needs to be host page size aligned, too.  Otherwise protect will
affect bytes beyond the end of the control region.

> +static void init_channel(const char *base, const char *suffix, size_t size,
> +                         char **path, int *fd, uint64_t **addr)
> +{
> +    *path = g_malloc(strlen(base) + strlen(suffix) + 1);
> +    sprintf(*path, "%s%s", base, suffix);
> +
> +    *fd = open(*path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
> +    if (*fd == -1) {
> +        error_report("error: open(%s): %s", *path, strerror(errno));
> +        abort();
> +    }

open() can fail for reasons outside QEMU's control.  This isn't an
internal error.  Please exit cleanly instead of using abort(3).

> +void hypertrace_init(const char *base, unsigned int max_clients)
> +{
> +    struct sigaction sigint;
> +    struct hypertrace_config *pconfig;
> +
> +    if (base == NULL) {
> +        return;
> +    }
> +
> +    memset(&sigint, 0, sizeof(sigint));
> +    sigint.sa_sigaction = fini_handler;
> +    sigint.sa_flags = SA_SIGINFO | SA_RESTART;
> +    if (sigaction(SIGINT, &sigint, NULL) != 0) {
> +        error_report("error: sigaction(SIGINT): %s", strerror(errno));
> +        abort();
> +    }
> +    if (sigaction(SIGABRT, &sigint, NULL) != 0) {
> +        error_report("error: sigaction(SIGABRT): %s", strerror(errno));
> +        abort();
> +    }

I don't know whether it's okay to set up signal handlers in user mode.

Will this break guest code SIGINT/SIGABRT handling?

> +bool hypertrace_guest_mmap_check(int fd, unsigned long len,
> +                                 unsigned long offset)
> +{
> +    struct stat s;
> +    if (fstat(fd, &s) < 0) {
> +        return true;

Should this be return false?

> +    }
> +
> +    if (s.st_dev != control_fd_stat.st_dev ||
> +        s.st_ino != control_fd_stat.st_ino) {
> +        return true;

Here too.

> +static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
> +{
> +    CPUState *vcpu = current_cpu;
> +    void *control_0 = vcpu->hypertrace_control;
> +    void *control_1 = vcpu->hypertrace_control + config.control_size / 2;
> +    void *control_2 = control_1 + config.control_size / 2;
> +
> +    if (control_0 <= siginfo->si_addr && siginfo->si_addr < control_1) {
> +
> +        /* 1st fault (guest will write cmd) */
> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);

Please use uintptr_t instead of unsigned long.  It's more portable
because it doesn't assume sizeof(void*) == sizeof(unsigned long).  On
Windows the 64-bit data model is LLP64, not LP64:
https://en.wikipedia.org/wiki/64-bit_computing#64-bit_data_models

> +        swap_control(control_0, control_1);
> +
> +    } else if (control_1 <= siginfo->si_addr && siginfo->si_addr < control_2) {
> +        size_t client = (siginfo->si_addr - control_1) / sizeof(uint64_t);
> +        uint64_t vcontrol = ((uint64_t *)control_0)[client];
> +        uint64_t *data_ptr = &qemu_data[client * config.client_data_size];

Is byte swapping required?

> +
> +        /* 2nd fault (invoke) */
> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
> +        hypertrace_emit(current_cpu, vcontrol, data_ptr);
> +        swap_control(control_1, control_0);

I don't understand how this scheme works for multi-threaded programs.
If two threads are both writing at the same time can we miss events due
to swap_control() changing mprotect?

> +
> +    } else {
> +        /* proxy to next handler */
> +        if (segv_next.sa_sigaction != NULL) {
> +            segv_next.sa_sigaction(signum, siginfo, sigctxt);
> +        } else if (segv_next.sa_handler != NULL) {
> +            segv_next.sa_handler(signum);
> +        }

Is there a case when no signal handler was installed (i.e. default
action)?
Stefan Hajnoczi Jan. 9, 2017, 4:35 p.m. UTC | #2
On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
> +static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
> +{
> +    CPUState *vcpu = current_cpu;
> +    void *control_0 = vcpu->hypertrace_control;
> +    void *control_1 = vcpu->hypertrace_control + config.control_size / 2;
> +    void *control_2 = control_1 + config.control_size / 2;
> +
> +    if (control_0 <= siginfo->si_addr && siginfo->si_addr < control_1) {
> +
> +        /* 1st fault (guest will write cmd) */
> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
> +        swap_control(control_0, control_1);
> +
> +    } else if (control_1 <= siginfo->si_addr && siginfo->si_addr < control_2) {
> +        size_t client = (siginfo->si_addr - control_1) / sizeof(uint64_t);
> +        uint64_t vcontrol = ((uint64_t *)control_0)[client];
> +        uint64_t *data_ptr = &qemu_data[client * config.client_data_size];
> +
> +        /* 2nd fault (invoke) */
> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
> +        hypertrace_emit(current_cpu, vcontrol, data_ptr);
> +        swap_control(control_1, control_0);

A simpler and faster approach is to permanently mprotect just one region
and load all arguments from data[] (including the first argument).  Then
swapping isn't necessary.
Lluís Vilanova Jan. 9, 2017, 6:20 p.m. UTC | #3
Stefan Hajnoczi writes:

> On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
>> +static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
>> +{
>> +    CPUState *vcpu = current_cpu;
>> +    void *control_0 = vcpu->hypertrace_control;
>> +    void *control_1 = vcpu->hypertrace_control + config.control_size / 2;
>> +    void *control_2 = control_1 + config.control_size / 2;
>> +
>> +    if (control_0 <= siginfo->si_addr && siginfo->si_addr < control_1) {
>> +
>> +        /* 1st fault (guest will write cmd) */
>> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
>> +        swap_control(control_0, control_1);
>> +
>> +    } else if (control_1 <= siginfo->si_addr && siginfo->si_addr < control_2) {
>> +        size_t client = (siginfo->si_addr - control_1) / sizeof(uint64_t);
>> +        uint64_t vcontrol = ((uint64_t *)control_0)[client];
>> +        uint64_t *data_ptr = &qemu_data[client * config.client_data_size];
>> +
>> +        /* 2nd fault (invoke) */
>> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
>> +        hypertrace_emit(current_cpu, vcontrol, data_ptr);
>> +        swap_control(control_1, control_0);

> A simpler and faster approach is to permanently mprotect just one region
> and load all arguments from data[] (including the first argument).  Then
> swapping isn't necessary.

I'm don't understand what you propose.

With a single protected region, you don't know when to restore protection of it
so that later accesses will be detected too. That could be solved if we used
single-stepping (maybe that's what you meant):

* trap access
* unprotect memory region
* single-step guest
* read written data and emit event
* protect memory region again
* resume guest

If the single-stepping can be done without too much complexity, that'd be a
faster option, and that piece of code might be cleaner too.

We could only avoid the protect/unprotect sequence if we added target-specific
code to "skip" the failed instruction (assuming all useful writes go to the data
channel), but I wanted to make all code target-agnostic.


Cheers,
  Lluis
Stefan Hajnoczi Jan. 10, 2017, 2:47 p.m. UTC | #4
On Mon, Jan 09, 2017 at 07:20:07PM +0100, Lluís Vilanova wrote:
> Stefan Hajnoczi writes:
> 
> > On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
> >> +static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
> >> +{
> >> +    CPUState *vcpu = current_cpu;
> >> +    void *control_0 = vcpu->hypertrace_control;
> >> +    void *control_1 = vcpu->hypertrace_control + config.control_size / 2;
> >> +    void *control_2 = control_1 + config.control_size / 2;
> >> +
> >> +    if (control_0 <= siginfo->si_addr && siginfo->si_addr < control_1) {
> >> +
> >> +        /* 1st fault (guest will write cmd) */
> >> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
> >> +        swap_control(control_0, control_1);
> >> +
> >> +    } else if (control_1 <= siginfo->si_addr && siginfo->si_addr < control_2) {
> >> +        size_t client = (siginfo->si_addr - control_1) / sizeof(uint64_t);
> >> +        uint64_t vcontrol = ((uint64_t *)control_0)[client];
> >> +        uint64_t *data_ptr = &qemu_data[client * config.client_data_size];
> >> +
> >> +        /* 2nd fault (invoke) */
> >> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
> >> +        hypertrace_emit(current_cpu, vcontrol, data_ptr);
> >> +        swap_control(control_1, control_0);
> 
> > A simpler and faster approach is to permanently mprotect just one region
> > and load all arguments from data[] (including the first argument).  Then
> > swapping isn't necessary.
> 
> I'm don't understand what you propose.
> 
> With a single protected region, you don't know when to restore protection of it
> so that later accesses will be detected too. That could be solved if we used
> single-stepping (maybe that's what you meant):
> 
> * trap access
> * unprotect memory region
> * single-step guest
> * read written data and emit event
> * protect memory region again
> * resume guest
> 
> If the single-stepping can be done without too much complexity, that'd be a
> faster option, and that piece of code might be cleaner too.
> 
> We could only avoid the protect/unprotect sequence if we added target-specific
> code to "skip" the failed instruction (assuming all useful writes go to the data
> channel), but I wanted to make all code target-agnostic.

Okay, I didn't realize the instruction would be restarted.  I thought
swapping was solely to allow the guest to write vcontrol =
control_0[client].

Assuming for a second that the instruction isn't restarted, my
suggestion was to use si_addr to identify which client and then load all
args out of the (read/write) data region.  This eliminates vcontrol.

Stefan
Lluís Vilanova Jan. 16, 2017, 5:05 p.m. UTC | #5
Stefan Hajnoczi writes:

> On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
>> @@ -847,6 +855,10 @@ int main(int argc, char **argv)
>> } else if (!strcmp(r, "trace")) {
>> g_free(trace_file);
>> trace_file = trace_opt_parse(optarg);
>> +        } else if (!strcmp(r, "hypertrace")) {
>> +            g_free(hypertrace_file);

> This variable hasn't been declared yet.  Perhaps it's in a later patch.
> Please reorder things to avoid the compilation error.

> Or was this supposed to be hypertrace_base?

It's hypertrace_base, yes.


>> +void hypertrace_init_config(struct hypertrace_config *config,
>> +                            unsigned int max_clients)
>> +{
>> +    config->max_clients = max_clients;
>> +    config->client_args = CONFIG_HYPERTRACE_ARGS;
>> +    config->client_data_size = config->client_args * sizeof(uint64_t);
>> +    config->control_size = QEMU_ALIGN_UP(
>> +        config->max_clients * sizeof(uint64_t), TARGET_PAGE_SIZE);

> This needs to be host page size aligned, too.  Otherwise protect will
> affect bytes beyond the end of the control region.

Ummm, so right. Although I think only host page alignment is required (there's
no soft TLB in user-mode, right?).


>> +static void init_channel(const char *base, const char *suffix, size_t size,
>> +                         char **path, int *fd, uint64_t **addr)
>> +{
>> +    *path = g_malloc(strlen(base) + strlen(suffix) + 1);
>> +    sprintf(*path, "%s%s", base, suffix);
>> +
>> +    *fd = open(*path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
>> +    if (*fd == -1) {
>> +        error_report("error: open(%s): %s", *path, strerror(errno));
>> +        abort();
>> +    }

> open() can fail for reasons outside QEMU's control.  This isn't an
> internal error.  Please exit cleanly instead of using abort(3).

By cleanly you mean exit with a non-zero code, right? It still is an error that
cannot be recovered.

Also, if this goes with exit() what about the abort()s I have added in other
places? (e.g., on a failed call to sigaction)


>> +void hypertrace_init(const char *base, unsigned int max_clients)
>> +{
>> +    struct sigaction sigint;
>> +    struct hypertrace_config *pconfig;
>> +
>> +    if (base == NULL) {
>> +        return;
>> +    }
>> +
>> +    memset(&sigint, 0, sizeof(sigint));
>> +    sigint.sa_sigaction = fini_handler;
>> +    sigint.sa_flags = SA_SIGINFO | SA_RESTART;
>> +    if (sigaction(SIGINT, &sigint, NULL) != 0) {
>> +        error_report("error: sigaction(SIGINT): %s", strerror(errno));
>> +        abort();
>> +    }
>> +    if (sigaction(SIGABRT, &sigint, NULL) != 0) {
>> +        error_report("error: sigaction(SIGABRT): %s", strerror(errno));
>> +        abort();
>> +    }

> I don't know whether it's okay to set up signal handlers in user mode.

> Will this break guest code SIGINT/SIGABRT handling?

Yes, I should reflect the signal back to the guest.


>> +bool hypertrace_guest_mmap_check(int fd, unsigned long len,
>> +                                 unsigned long offset)
>> +{
>> +    struct stat s;
>> +    if (fstat(fd, &s) < 0) {
>> +        return true;

> Should this be return false?

>> +    }
>> +
>> +    if (s.st_dev != control_fd_stat.st_dev ||
>> +        s.st_ino != control_fd_stat.st_ino) {
>> +        return true;

> Here too.

Yes, that's so embarrassing.


>> +static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
>> +{
>> +    CPUState *vcpu = current_cpu;
>> +    void *control_0 = vcpu->hypertrace_control;
>> +    void *control_1 = vcpu->hypertrace_control + config.control_size / 2;
>> +    void *control_2 = control_1 + config.control_size / 2;
>> +
>> +    if (control_0 <= siginfo->si_addr && siginfo->si_addr < control_1) {
>> +
>> +        /* 1st fault (guest will write cmd) */
>> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);

> Please use uintptr_t instead of unsigned long.  It's more portable
> because it doesn't assume sizeof(void*) == sizeof(unsigned long).  On
> Windows the 64-bit data model is LLP64, not LP64:
> https://en.wikipedia.org/wiki/64-bit_computing#64-bit_data_models

Got it.


>> +        swap_control(control_0, control_1);
>> +
>> +    } else if (control_1 <= siginfo->si_addr && siginfo->si_addr < control_2) {
>> +        size_t client = (siginfo->si_addr - control_1) / sizeof(uint64_t);
>> +        uint64_t vcontrol = ((uint64_t *)control_0)[client];
>> +        uint64_t *data_ptr = &qemu_data[client * config.client_data_size];

> Is byte swapping required?

Ummmm, these are values passed to the trace emitter, so either is "correct".

But for the sake of people's sanity when looking at traces, it might be better
to always swap to host endianness.


>> +
>> +        /* 2nd fault (invoke) */
>> +        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
>> +        hypertrace_emit(current_cpu, vcontrol, data_ptr);
>> +        swap_control(control_1, control_0);

> I don't understand how this scheme works for multi-threaded programs.
> If two threads are both writing at the same time can we miss events due
> to swap_control() changing mprotect?

Dang, with the version changes I forgot to add the per-client padding to place
each on a separate page.


>> +
>> +    } else {
>> +        /* proxy to next handler */
>> +        if (segv_next.sa_sigaction != NULL) {
>> +            segv_next.sa_sigaction(signum, siginfo, sigctxt);
>> +        } else if (segv_next.sa_handler != NULL) {
>> +            segv_next.sa_handler(signum);
>> +        }

> Is there a case when no signal handler was installed (i.e. default
> action)?

Yes, before calling hypertrace_init() or if it is called without a
"hypertrace_base" argument set (i.e., the user has not enabled hypertrace in the
command line).


Thanks,
  Lluis
Lluís Vilanova Jan. 16, 2017, 5:10 p.m. UTC | #6
Stefan Hajnoczi writes:

> On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
[...]
>> +
>> +    } else {
>> +        /* proxy to next handler */
>> +        if (segv_next.sa_sigaction != NULL) {
>> +            segv_next.sa_sigaction(signum, siginfo, sigctxt);
>> +        } else if (segv_next.sa_handler != NULL) {
>> +            segv_next.sa_handler(signum);
>> +        }

> Is there a case when no signal handler was installed (i.e. default
> action)?

Sorry, in my previous email I meant no, there is none. If hypertrace is not
initialized, the segv handler will never be installed; otherwise segv_next will
always have one of the two values.


Thanks,
  Lluis
Stefan Hajnoczi Jan. 17, 2017, 9:46 a.m. UTC | #7
On Mon, Jan 16, 2017 at 06:05:26PM +0100, Lluís Vilanova wrote:
> Stefan Hajnoczi writes:
> > On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
> >> +void hypertrace_init_config(struct hypertrace_config *config,
> >> +                            unsigned int max_clients)
> >> +{
> >> +    config->max_clients = max_clients;
> >> +    config->client_args = CONFIG_HYPERTRACE_ARGS;
> >> +    config->client_data_size = config->client_args * sizeof(uint64_t);
> >> +    config->control_size = QEMU_ALIGN_UP(
> >> +        config->max_clients * sizeof(uint64_t), TARGET_PAGE_SIZE);
> 
> > This needs to be host page size aligned, too.  Otherwise protect will
> > affect bytes beyond the end of the control region.
> 
> Ummm, so right. Although I think only host page alignment is required (there's
> no soft TLB in user-mode, right?).

Yes.

> >> +static void init_channel(const char *base, const char *suffix, size_t size,
> >> +                         char **path, int *fd, uint64_t **addr)
> >> +{
> >> +    *path = g_malloc(strlen(base) + strlen(suffix) + 1);
> >> +    sprintf(*path, "%s%s", base, suffix);
> >> +
> >> +    *fd = open(*path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
> >> +    if (*fd == -1) {
> >> +        error_report("error: open(%s): %s", *path, strerror(errno));
> >> +        abort();
> >> +    }
> 
> > open() can fail for reasons outside QEMU's control.  This isn't an
> > internal error.  Please exit cleanly instead of using abort(3).
> 
> By cleanly you mean exit with a non-zero code, right? It still is an error that
> cannot be recovered.

Right, it's an error.

> Also, if this goes with exit() what about the abort()s I have added in other
> places? (e.g., on a failed call to sigaction)

abort(3) is useful for internal errors where a core dump and debugging
are required.

exit(3) is useful for graceful exit (both successful and unsuccessful).
Over the past few years the codebase has been moving towards using Error
**errp and letting the top-level functions handle errors instead of
exiting deep inside QEMU.  This is necessary because lots of things can
be initialized at runtime (like device hotplug) and shouldn't bring down
QEMU.  But it's okay to exit in initialization code that will only be
called once.

> >> +
> >> +    } else {
> >> +        /* proxy to next handler */
> >> +        if (segv_next.sa_sigaction != NULL) {
> >> +            segv_next.sa_sigaction(signum, siginfo, sigctxt);
> >> +        } else if (segv_next.sa_handler != NULL) {
> >> +            segv_next.sa_handler(signum);
> >> +        }
> 
> > Is there a case when no signal handler was installed (i.e. default
> > action)?
> 
> Yes, before calling hypertrace_init() or if it is called without a
> "hypertrace_base" argument set (i.e., the user has not enabled hypertrace in the
> command line).

I meant "what happens if !segv_next.sa_action &&
!segv_next.sa_handler?".  The default signal disposition should take
effect.  This code is ignoring that case, turning everything into
SIG_IGN but there is also SIG_DFL.
Lluís Vilanova Jan. 17, 2017, 11:35 p.m. UTC | #8
Stefan Hajnoczi writes:

> On Mon, Jan 16, 2017 at 06:05:26PM +0100, Lluís Vilanova wrote:
>> Stefan Hajnoczi writes:
>> > On Mon, Dec 26, 2016 at 09:34:54PM +0100, Lluís Vilanova wrote:
[...]
>> >> +
>> >> +    } else {
>> >> +        /* proxy to next handler */
>> >> +        if (segv_next.sa_sigaction != NULL) {
>> >> +            segv_next.sa_sigaction(signum, siginfo, sigctxt);
>> >> +        } else if (segv_next.sa_handler != NULL) {
>> >> +            segv_next.sa_handler(signum);
>> >> +        }
>> 
>> > Is there a case when no signal handler was installed (i.e. default
>> > action)?
>> 
>> Yes, before calling hypertrace_init() or if it is called without a
>> "hypertrace_base" argument set (i.e., the user has not enabled hypertrace in the
>> command line).

> I meant "what happens if !segv_next.sa_action &&
> !segv_next.sa_handler?".  The default signal disposition should take
> effect.  This code is ignoring that case, turning everything into
> SIG_IGN but there is also SIG_DFL.

I see, I didn't take SIG_DFL and SIG_IGN into account, and if both are null (no
handler was installed by the user), I should cleanup my handler and raise() the
signal again to let it go through its default system action.

Sorry for the overall messy patch.


Thanks,
  Lluis
diff mbox

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 5f0be2c3fb..5f3a68ca48 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -105,6 +105,10 @@  util-obj-y +=  trace/
 target-obj-y += trace/
 
 ######################################################################
+# hypertrace
+target-obj-y += hypertrace/
+
+######################################################################
 # guest agent
 
 # FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 714a692e6f..d42ded4fc8 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -31,9 +31,12 @@ 
 #include "tcg.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
+#include "qemu/error-report.h"
 #include "exec/log.h"
 #include "trace/control.h"
 #include "glib-compat.h"
+#include "hypertrace/user.h"
+
 
 int singlestep;
 unsigned long mmap_min_addr;
@@ -676,6 +679,8 @@  static void usage(void)
            "-strace           log system calls\n"
            "-trace            [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
            "                  specify tracing options\n"
+           "-hypertrace       [[base=]<path>][,max-clients=<uint>]\n"
+           "                  specify hypertrace options\n"
            "\n"
            "Environment variables:\n"
            "QEMU_STRACE       Print system calls and arguments similar to the\n"
@@ -736,6 +741,8 @@  int main(int argc, char **argv)
     envlist_t *envlist = NULL;
     char *trace_file = NULL;
     bsd_type = target_openbsd;
+    char *hypertrace_base = NULL;
+    unsigned int hypertrace_max_clients = 0;
 
     if (argc <= 1)
         usage();
@@ -757,6 +764,7 @@  int main(int argc, char **argv)
     cpu_model = NULL;
 
     qemu_add_opts(&qemu_trace_opts);
+    qemu_add_opts(&qemu_hypertrace_opts);
 
     optind = 1;
     for (;;) {
@@ -847,6 +855,10 @@  int main(int argc, char **argv)
         } else if (!strcmp(r, "trace")) {
             g_free(trace_file);
             trace_file = trace_opt_parse(optarg);
+        } else if (!strcmp(r, "hypertrace")) {
+            g_free(hypertrace_file);
+            hypertrace_opt_parse(optarg, &hypertrace_base,
+                                 &hypertrace_max_clients);
         } else {
             usage();
         }
@@ -981,6 +993,11 @@  int main(int argc, char **argv)
     target_set_brk(info->brk);
     syscall_init();
     signal_init();
+    if (atexit(hypertrace_fini) != 0) {
+        fprintf(stderr, "error: atexit: %s\n", strerror(errno));
+        abort();
+    }
+    hypertrace_init(hypertrace_base, hypertrace_size);
 
     /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
        generating the prologue until now so that the prologue can take
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index ee5907330f..650b8203fa 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -21,6 +21,7 @@ 
 #include "qemu.h"
 #include "qemu-common.h"
 #include "bsd-mman.h"
+#include "hypertrace/user.h"
 
 //#define DEBUG_MMAP
 
@@ -256,10 +257,17 @@  static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
     return addr;
 }
 
-/* NOTE: all the constants are the HOST ones */
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
                      int flags, int fd, abi_ulong offset)
 {
+    return target_mmap_cpu(start, len, prot, flags, fd, offset, NULL);
+}
+
+/* NOTE: all the constants are the HOST ones */
+abi_long target_mmap_cpu(abi_ulong start, abi_ulong len, int prot,
+                         int flags, int fd, abi_ulong offset,
+                         CPUState *cpu)
+{
     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
     unsigned long host_start;
 
@@ -301,6 +309,10 @@  abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
         goto the_end;
     real_start = start & qemu_host_page_mask;
 
+    if (!hypertrace_guest_mmap_check(fd, len, offset)) {
+        goto fail;
+    }
+
     if (!(flags & MAP_FIXED)) {
         abi_ulong mmap_start;
         void *p;
@@ -412,6 +424,7 @@  abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
         }
     }
  the_end1:
+    hypertrace_guest_mmap_apply(fd, g2h(start), cpu);
     page_set_flags(start, start + len, prot | PAGE_VALID);
  the_end:
 #ifdef DEBUG_MMAP
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index 66492aaf5d..f88f21876c 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -26,6 +26,7 @@ 
 
 #include "qemu.h"
 #include "qemu-common.h"
+#include "hypertrace/user.h"
 
 //#define DEBUG
 
@@ -332,6 +333,7 @@  abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -369,10 +371,12 @@  abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
         unlock_user(p, arg1, 0);
         break;
     case TARGET_FREEBSD_NR_mmap:
-        ret = get_errno(target_mmap(arg1, arg2, arg3,
-                                    target_to_host_bitmask(arg4, mmap_flags_tbl),
-                                    arg5,
-                                    arg6));
+        ret = get_errno(target_mmap_cpu(
+                            arg1, arg2, arg3,
+                            target_to_host_bitmask(arg4, mmap_flags_tbl),
+                            arg5,
+                            arg6,
+                            cpu));
         break;
     case TARGET_FREEBSD_NR_mprotect:
         ret = get_errno(target_mprotect(arg1, arg2, arg3));
@@ -430,6 +434,7 @@  abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -455,10 +460,12 @@  abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1,
         unlock_user(p, arg1, 0);
         break;
     case TARGET_NETBSD_NR_mmap:
-        ret = get_errno(target_mmap(arg1, arg2, arg3,
-                                    target_to_host_bitmask(arg4, mmap_flags_tbl),
-                                    arg5,
-                                    arg6));
+        ret = get_errno(target_mmap_cpu(
+                            arg1, arg2, arg3,
+                            target_to_host_bitmask(arg4, mmap_flags_tbl),
+                            arg5,
+                            arg6,
+                            cpu));
         break;
     case TARGET_NETBSD_NR_mprotect:
         ret = get_errno(target_mprotect(arg1, arg2, arg3));
@@ -505,6 +512,7 @@  abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -530,10 +538,12 @@  abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
         unlock_user(p, arg1, 0);
         break;
     case TARGET_OPENBSD_NR_mmap:
-        ret = get_errno(target_mmap(arg1, arg2, arg3,
-                                    target_to_host_bitmask(arg4, mmap_flags_tbl),
-                                    arg5,
-                                    arg6));
+        ret = get_errno(target_mmap_cpu(
+                            arg1, arg2, arg3,
+                            target_to_host_bitmask(arg4, mmap_flags_tbl),
+                            arg5,
+                            arg6,
+                            cpu));
         break;
     case TARGET_OPENBSD_NR_mprotect:
         ret = get_errno(target_mprotect(arg1, arg2, arg3));
diff --git a/hypertrace/Makefile.objs b/hypertrace/Makefile.objs
new file mode 100644
index 0000000000..24e8fb4eea
--- /dev/null
+++ b/hypertrace/Makefile.objs
@@ -0,0 +1,18 @@ 
+# -*- mode: makefile -*-
+
+target-obj-$(CONFIG_USER_ONLY) += user.o
+target-obj-y += common.o
+
+$(obj)/user.o: $(obj)/emit.c
+
+$(obj)/emit.c: $(obj)/emit.c-timestamp $(BUILD_DIR)/config-host.mak
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+$(obj)/emit.c-timestamp: $(BUILD_DIR)/config-host.mak
+	@echo "static void hypertrace_emit(CPUState *cpu, uint64_t arg1, uint64_t *data)" >$@
+	@echo "{" >>$@
+	@echo -n "    trace_guest_hypertrace(cpu, arg1" >>$@
+	@for i in `seq $$(( $(CONFIG_HYPERTRACE_ARGS) - 1 ))`; do \
+	    echo -n ", data[$$i-1]" >>$@; \
+	done
+	@echo ");" >>$@
+	@echo "}" >>$@
diff --git a/hypertrace/common.c b/hypertrace/common.c
new file mode 100644
index 0000000000..baca098aa0
--- /dev/null
+++ b/hypertrace/common.c
@@ -0,0 +1,26 @@ 
+/*
+ * QEMU-side management of hypertrace in user-level emulation.
+ *
+ * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "hypertrace/common.h"
+#include "qemu/osdep.h"
+
+void hypertrace_init_config(struct hypertrace_config *config,
+                            unsigned int max_clients)
+{
+    config->max_clients = max_clients;
+    config->client_args = CONFIG_HYPERTRACE_ARGS;
+    config->client_data_size = config->client_args * sizeof(uint64_t);
+    config->control_size = QEMU_ALIGN_UP(
+        config->max_clients * sizeof(uint64_t), TARGET_PAGE_SIZE);
+    config->data_size = QEMU_ALIGN_UP(
+        config->max_clients * config->client_data_size, TARGET_PAGE_SIZE);
+}
diff --git a/hypertrace/common.h b/hypertrace/common.h
new file mode 100644
index 0000000000..6eabbd6551
--- /dev/null
+++ b/hypertrace/common.h
@@ -0,0 +1,23 @@ 
+/*
+ * QEMU-side management of hypertrace in user-level emulation.
+ *
+ * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#if !defined(__KERNEL__)
+#include <stdint.h>
+#endif
+
+struct hypertrace_config {
+    uint64_t max_clients;
+    uint64_t client_args;
+    uint64_t client_data_size;
+    uint64_t control_size;
+    uint64_t data_size;
+};
+
+void hypertrace_init_config(struct hypertrace_config *config,
+                            unsigned int max_clients);
diff --git a/hypertrace/user.c b/hypertrace/user.c
new file mode 100644
index 0000000000..5649c0369a
--- /dev/null
+++ b/hypertrace/user.c
@@ -0,0 +1,332 @@ 
+/*
+ * QEMU-side management of hypertrace in user-level emulation.
+ *
+ * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Implementation details
+ * ======================
+ *
+ * There are 3 channels, each a regular file in the host system, and mmap'ed by
+ * the guest application.
+ *
+ * - Configuration channel: Exposes configuration parameters. Mapped once and
+ *   directly readable.
+ *
+ * - Data channel: Lets guests write argument values. Each guest thread should
+ *   use a different offset to avoid concurrency problems. Mapped once and
+ *   directly accessible.
+ *
+ * - Control channel: Triggers the hypertrace event on a write, providing the
+ *   first argument. Offset in the control channel sets the offset in the data
+ *   channel. Mapped once per thread, using two pages to reliably detect
+ *   accesses and their written value through a SEGV handler.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+
+#include "hypertrace/common.h"
+#include "hypertrace/user.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+
+static struct hypertrace_config config;
+static char *config_path;
+static int config_fd = -1;
+static uint64_t *qemu_config;
+
+static char *data_path;
+static int data_fd = -1;
+static uint64_t *qemu_data;
+
+static char *control_path;
+static int control_fd = -1;
+static uint64_t *qemu_control;
+static struct stat control_fd_stat;
+
+struct sigaction segv_next;
+static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt);
+
+
+QemuOptsList qemu_hypertrace_opts = {
+    .name = "hypertrace",
+    .implied_opt_name = "path",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_hypertrace_opts.head),
+    .desc = {
+        {
+            .name = "path",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "max-clients",
+            .type = QEMU_OPT_NUMBER,
+            .def_value_str = "1",
+        },
+        { /* end of list */ }
+    },
+};
+
+void hypertrace_opt_parse(const char *optarg, char **base,
+                          unsigned int *max_clients_)
+{
+    int max_clients;
+    QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("hypertrace"),
+                                             optarg, true);
+    if (!opts) {
+        exit(1);
+    }
+    if (qemu_opt_get(opts, "path")) {
+        *base = g_strdup(qemu_opt_get(opts, "path"));
+    } else {
+        *base = NULL;
+    }
+    max_clients = qemu_opt_get_number(opts, "pages", 1);
+    if (max_clients <= 0) {
+        error_report("Parameter 'max-clients' expects a positive number");
+        exit(EXIT_FAILURE);
+    }
+    *max_clients_ = max_clients;
+}
+
+static void init_channel(const char *base, const char *suffix, size_t size,
+                         char **path, int *fd, uint64_t **addr)
+{
+    *path = g_malloc(strlen(base) + strlen(suffix) + 1);
+    sprintf(*path, "%s%s", base, suffix);
+
+    *fd = open(*path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
+    if (*fd == -1) {
+        error_report("error: open(%s): %s", *path, strerror(errno));
+        abort();
+    }
+
+    off_t lres = lseek(*fd, size - 1, SEEK_SET);
+    if (lres == (off_t)-1) {
+        error_report("error: lseek(%s): %s", *path, strerror(errno));
+        abort();
+    }
+
+    char tmp;
+    ssize_t wres = write(*fd, &tmp, 1);
+    if (wres == -1) {
+        error_report("error: write(%s): %s", *path, strerror(errno));
+        abort();
+    }
+
+    if (addr) {
+        *addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
+        if (*addr == MAP_FAILED) {
+            error_report("error: mmap(%s): %s", *path, strerror(errno));
+            abort();
+        }
+    }
+}
+
+static void fini_handler(int signum, siginfo_t *siginfo, void *sigctxt)
+{
+    hypertrace_fini();
+}
+
+void hypertrace_init(const char *base, unsigned int max_clients)
+{
+    struct sigaction sigint;
+    struct hypertrace_config *pconfig;
+
+    if (base == NULL) {
+        return;
+    }
+
+    memset(&sigint, 0, sizeof(sigint));
+    sigint.sa_sigaction = fini_handler;
+    sigint.sa_flags = SA_SIGINFO | SA_RESTART;
+    if (sigaction(SIGINT, &sigint, NULL) != 0) {
+        error_report("error: sigaction(SIGINT): %s", strerror(errno));
+        abort();
+    }
+    if (sigaction(SIGABRT, &sigint, NULL) != 0) {
+        error_report("error: sigaction(SIGABRT): %s", strerror(errno));
+        abort();
+    }
+
+    hypertrace_init_config(&config, max_clients);
+    /* We need twice the space for the double-fault protocol */
+    config.control_size *= 2;
+
+    init_channel(base, "-config", TARGET_PAGE_SIZE,
+                 &config_path, &config_fd, &qemu_config);
+    pconfig = (struct hypertrace_config *)qemu_config;
+    pconfig->max_clients = tswap64(config.max_clients);
+    pconfig->client_args = tswap64(config.client_args);
+    pconfig->client_data_size = tswap64(config.client_data_size);
+    pconfig->control_size = tswap64(config.control_size);
+    pconfig->data_size = tswap64(config.data_size);
+
+    init_channel(base, "-data", config.data_size,
+                 &data_path, &data_fd, &qemu_data);
+    if (fstat(data_fd, &control_fd_stat) == -1) {
+        error_report("error: fstat(hypertrace_control): %s", strerror(errno));
+        abort();
+    }
+
+    init_channel(base, "-control", config.control_size,
+                 &control_path, &control_fd, &qemu_control);
+
+    if (fstat(control_fd, &control_fd_stat) == -1) {
+        error_report("error: fstat(hypertrace_control): %s", strerror(errno));
+        abort();
+    }
+
+    struct sigaction segv;
+    memset(&segv, 0, sizeof(segv));
+    segv.sa_sigaction = segv_handler;
+    segv.sa_flags = SA_SIGINFO | SA_RESTART;
+    sigemptyset(&segv.sa_mask);
+
+    if (sigaction(SIGSEGV, &segv, &segv_next) != 0) {
+        error_report("error: sigaction(SIGSEGV): %s", strerror(errno));
+        abort();
+    }
+}
+
+
+static void fini_channel(int *fd, char **path)
+{
+    if (*fd != -1) {
+        if (close(*fd) == -1) {
+            error_report("error: close: %s", strerror(errno));
+            abort();
+        }
+        if (unlink(*path) == -1) {
+            error_report("error: unlink(%s): %s", *path, strerror(errno));
+            abort();
+        }
+        *fd = -1;
+    }
+    if (*path != NULL) {
+        g_free(*path);
+        *path =  NULL;
+    }
+}
+
+void hypertrace_fini(void)
+{
+    static bool atexit_in;
+    if (atexit_in) {
+        return;
+    }
+    atexit_in = true;
+
+    if (sigaction(SIGSEGV, &segv_next, NULL) != 0) {
+        error_report("error: sigaction(SIGSEGV): %s", strerror(errno));
+        abort();
+    }
+    fini_channel(&config_fd, &config_path);
+    fini_channel(&data_fd, &data_path);
+    fini_channel(&control_fd, &control_path);
+}
+
+
+bool hypertrace_guest_mmap_check(int fd, unsigned long len,
+                                 unsigned long offset)
+{
+    struct stat s;
+    if (fstat(fd, &s) < 0) {
+        return true;
+    }
+
+    if (s.st_dev != control_fd_stat.st_dev ||
+        s.st_ino != control_fd_stat.st_ino) {
+        return true;
+    }
+
+    return len == (config.control_size) && offset == 0;
+}
+
+void hypertrace_guest_mmap_apply(int fd, void *qemu_addr, CPUState *vcpu)
+{
+    struct stat s;
+
+    if (vcpu == NULL) {
+        return;
+    }
+
+    if (fstat(fd, &s) != 0) {
+        return;
+    }
+
+    if (s.st_dev != control_fd_stat.st_dev ||
+        s.st_ino != control_fd_stat.st_ino) {
+        return;
+    }
+
+    /* it's an mmap of the control channel; split it in two and mprotect it to
+     * detect writes (cmd is written once on each part)
+     */
+    vcpu->hypertrace_control = qemu_addr;
+    if (mprotect(qemu_addr, config.control_size / 2, PROT_READ) == -1) {
+        error_report("error: mprotect(hypertrace_control): %s",
+                     strerror(errno));
+        abort();
+    }
+}
+
+static void swap_control(void *from, void *to)
+{
+    if (mprotect(from, config.control_size / 2, PROT_READ | PROT_WRITE) == -1) {
+        error_report("error: mprotect(from): %s", strerror(errno));
+        abort();
+    }
+    if (mprotect(to, config.control_size / 2, PROT_READ) == -1) {
+        error_report("error: mprotect(to): %s", strerror(errno));
+        abort();
+    }
+}
+
+#include "hypertrace/emit.c"
+
+static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
+{
+    CPUState *vcpu = current_cpu;
+    void *control_0 = vcpu->hypertrace_control;
+    void *control_1 = vcpu->hypertrace_control + config.control_size / 2;
+    void *control_2 = control_1 + config.control_size / 2;
+
+    if (control_0 <= siginfo->si_addr && siginfo->si_addr < control_1) {
+
+        /* 1st fault (guest will write cmd) */
+        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
+        swap_control(control_0, control_1);
+
+    } else if (control_1 <= siginfo->si_addr && siginfo->si_addr < control_2) {
+        size_t client = (siginfo->si_addr - control_1) / sizeof(uint64_t);
+        uint64_t vcontrol = ((uint64_t *)control_0)[client];
+        uint64_t *data_ptr = &qemu_data[client * config.client_data_size];
+
+        /* 2nd fault (invoke) */
+        assert(((unsigned long)siginfo->si_addr % sizeof(uint64_t)) == 0);
+        hypertrace_emit(current_cpu, vcontrol, data_ptr);
+        swap_control(control_1, control_0);
+
+    } else {
+        /* proxy to next handler */
+        if (segv_next.sa_sigaction != NULL) {
+            segv_next.sa_sigaction(signum, siginfo, sigctxt);
+        } else if (segv_next.sa_handler != NULL) {
+            segv_next.sa_handler(signum);
+        }
+    }
+}
diff --git a/hypertrace/user.h b/hypertrace/user.h
new file mode 100644
index 0000000000..b2bc60d507
--- /dev/null
+++ b/hypertrace/user.h
@@ -0,0 +1,63 @@ 
+/*
+ * QEMU-side management of hypertrace in user-level emulation.
+ *
+ * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+
+
+/**
+ * Definition of QEMU options describing hypertrace subsystem configuration
+ */
+extern QemuOptsList qemu_hypertrace_opts;
+
+/**
+ * hypertrace_opt_parse:
+ * @optarg: Input arguments.
+ * @base: Output base path for the hypertrace channel files.
+ * @max_clients: Output maximum number of concurrent clients.
+ *
+ * Parse the commandline arguments for hypertrace.
+ */
+void hypertrace_opt_parse(const char *optarg, char **base,
+                          unsigned int *max_clients);
+
+/**
+ * hypertrace_init:
+ * @base: Base path for the hypertrace channel files.
+ * @max_clients: Maximum number of concurrent clients.
+ *
+ * Initialize the backing files for the hypertrace channel.
+ */
+void hypertrace_init(const char *base, unsigned int max_clients);
+
+/**
+ * hypertrace_guest_mmap_check:
+ *
+ * Verify argument validity when mapping the control channel.
+ *
+ * Precondition: defined(CONFIG_USER_ONLY)
+ */
+bool hypertrace_guest_mmap_check(int fd, unsigned long len,
+                                 unsigned long offset);
+
+/**
+ * hypertrace_guest_mmap_apply:
+ *
+ * Configure initial mprotect if mapping the control channel.
+ *
+ * Precondition: defined(CONFIG_USER_ONLY)
+ */
+void hypertrace_guest_mmap_apply(int fd, void *qemu_addr, CPUState *vcpu);
+
+/**
+ * hypertrace_fini:
+ *
+ * Remove the backing files for the hypertrace channel.
+ */
+void hypertrace_fini(void);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 3f79a8e955..8e7b579083 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -296,6 +296,7 @@  struct qemu_work_item;
  * @work_mutex: Lock to prevent multiple access to queued_work_*.
  * @queued_work_first: First asynchronous work pending.
  * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
+ * @hypertrace_control: Per-vCPU address of the hypertrace control channel.
  *
  * State of one CPU core or thread.
  */
@@ -372,6 +373,9 @@  struct CPUState {
      */
     unsigned long *trace_dstate;
 
+    /* Only used when defined(CONFIG_USER_ONLY) */
+    void *hypertrace_control;
+
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index; /* used by alpha TCG */
     uint32_t halted; /* used by alpha, cris, ppc TCG */
diff --git a/linux-user/main.c b/linux-user/main.c
index 75b199f274..8006abb102 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -32,10 +32,12 @@ 
 #include "tcg.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
+#include "qemu/error-report.h"
 #include "elf.h"
 #include "exec/log.h"
 #include "trace/control.h"
 #include "glib-compat.h"
+#include "hypertrace/user.h"
 
 char *exec_path;
 
@@ -3748,6 +3750,14 @@  static void handle_arg_trace(const char *arg)
     trace_file = trace_opt_parse(arg);
 }
 
+static char *hypertrace_base;
+static unsigned int hypertrace_max_clients;
+static void handle_arg_hypertrace(const char *arg)
+{
+    g_free(hypertrace_base);
+    hypertrace_opt_parse(arg, &hypertrace_base, &hypertrace_max_clients);
+}
+
 struct qemu_argument {
     const char *argv;
     const char *env;
@@ -3797,6 +3807,8 @@  static const struct qemu_argument arg_table[] = {
      "",           "Seed for pseudo-random number generator"},
     {"trace",      "QEMU_TRACE",       true,  handle_arg_trace,
      "",           "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
+    {"hypertrace", "QEMU_HYPERTRACE",  true,  handle_arg_hypertrace,
+     "",           "[[base=]<path>][,max-clients=<uint>]"},
     {"version",    "QEMU_VERSION",     false, handle_arg_version,
      "",           "display version information and exit"},
     {NULL, NULL, false, NULL, NULL, NULL}
@@ -3989,6 +4001,7 @@  int main(int argc, char **argv, char **envp)
     srand(time(NULL));
 
     qemu_add_opts(&qemu_trace_opts);
+    qemu_add_opts(&qemu_hypertrace_opts);
 
     optind = parse_args(argc, argv);
 
@@ -4187,6 +4200,12 @@  int main(int argc, char **argv, char **envp)
     syscall_init();
     signal_init();
 
+    if (atexit(hypertrace_fini)) {
+        fprintf(stderr, "error: atexit: %s\n", strerror(errno));
+        abort();
+    }
+    hypertrace_init(hypertrace_base, hypertrace_max_clients);
+
     /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
        generating the prologue until now so that the prologue can take
        the real value of GUEST_BASE into account.  */
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 61685bf79e..52f1a54281 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -21,6 +21,7 @@ 
 #include "qemu.h"
 #include "qemu-common.h"
 #include "translate-all.h"
+#include "hypertrace/user.h"
 
 //#define DEBUG_MMAP
 
@@ -360,10 +361,18 @@  abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
     }
 }
 
-/* NOTE: all the constants are the HOST ones */
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
                      int flags, int fd, abi_ulong offset)
 {
+    return target_mmap_cpu(start, len, prot, flags, fd, offset, NULL);
+}
+
+
+/* NOTE: all the constants are the HOST ones */
+abi_long target_mmap_cpu(abi_ulong start, abi_ulong len, int prot,
+                         int flags, int fd, abi_ulong offset,
+                         CPUState *cpu)
+{
     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
 
     mmap_lock();
@@ -445,6 +454,10 @@  abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
        }
     }
 
+    if (!hypertrace_guest_mmap_check(fd, len, offset)) {
+        goto fail;
+    }
+
     if (!(flags & MAP_FIXED)) {
         unsigned long host_start;
         void *p;
@@ -556,6 +569,8 @@  abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
         }
     }
  the_end1:
+    printf("mmap "TARGET_ABI_FMT_lx" "TARGET_ABI_FMT_lu"\n", start, len);
+    hypertrace_guest_mmap_apply(fd, g2h(start), cpu);
     page_set_flags(start, start + len, prot | PAGE_VALID);
  the_end:
 #ifdef DEBUG_MMAP
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index da73a01106..da10920907 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -422,6 +422,9 @@  void sparc64_get_context(CPUSPARCState *env);
 int target_mprotect(abi_ulong start, abi_ulong len, int prot);
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
                      int flags, int fd, abi_ulong offset);
+abi_long target_mmap_cpu(abi_ulong start, abi_ulong len, int prot,
+                         int flags, int fd, abi_ulong offset,
+                         CPUState *cpu);
 int target_munmap(abi_ulong start, abi_ulong len);
 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
                        abi_ulong new_size, unsigned long flags,
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 7b77503f94..0432001cda 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -113,6 +113,7 @@  int __clone2(int (*fn)(void *), void *child_stack_base,
 #include "uname.h"
 
 #include "qemu.h"
+#include "hypertrace/user.h"
 
 #ifndef CLONE_IO
 #define CLONE_IO                0x80000000      /* Clone io context */
@@ -7603,6 +7604,7 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         _exit(arg1);
         ret = 0; /* avoid warning */
         break;
@@ -9051,15 +9053,19 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             v5 = tswapal(v[4]);
             v6 = tswapal(v[5]);
             unlock_user(v, arg1, 0);
-            ret = get_errno(target_mmap(v1, v2, v3,
-                                        target_to_host_bitmask(v4, mmap_flags_tbl),
-                                        v5, v6));
+            ret = get_errno(target_mmap_cpu(
+                                v1, v2, v3,
+                                target_to_host_bitmask(v4, mmap_flags_tbl),
+                                v5, v6,
+                                cpu));
         }
 #else
-        ret = get_errno(target_mmap(arg1, arg2, arg3,
-                                    target_to_host_bitmask(arg4, mmap_flags_tbl),
-                                    arg5,
-                                    arg6));
+        ret = get_errno(target_mmap_cpu(
+                            arg1, arg2, arg3,
+                            target_to_host_bitmask(arg4, mmap_flags_tbl),
+                            arg5,
+                            arg6,
+                            cpu));
 #endif
         break;
 #endif
@@ -9068,10 +9074,12 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #ifndef MMAP_SHIFT
 #define MMAP_SHIFT 12
 #endif
-        ret = get_errno(target_mmap(arg1, arg2, arg3,
-                                    target_to_host_bitmask(arg4, mmap_flags_tbl),
-                                    arg5,
-                                    arg6 << MMAP_SHIFT));
+        ret = get_errno(target_mmap_cpu(
+                            arg1, arg2, arg3,
+                            target_to_host_bitmask(arg4, mmap_flags_tbl),
+                            arg5,
+                            arg6 << MMAP_SHIFT,
+                            cpu));
         break;
 #endif
     case TARGET_NR_munmap:
@@ -9642,6 +9650,7 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         ret = get_errno(exit_group(arg1));
         break;
 #endif