diff mbox

Initial support for Ilumos build and Illumos-kvm

Message ID D757FFAE-B826-4293-BB82-05E8BCF01FB2@nowonline.co.uk
State New
Headers show

Commit Message

Lee Essen March 16, 2012, 9:23 a.m. UTC
This fixes a number of issues with the build process (namely ensuring the use of bash), adds specific support for the Illumos port of KVM and fixes a few general Solaris compatibility issues.

There are still some things outstanding:

- there's a duplicate smb_wmb() definition in qemu-barrier.h and the illumos kvm_x86.h which generates some warnings.
- there's a repeated call to page_size() that should probably be fixed.
- dtrace support needs to be fixed (-m64/32 option, reserved words and linking issues)
- vnics need to be added
- the original illumos code added another timer source (multiticks)
- the issue with Linux needs to be resolved

Other than that, this gets it to the point where it will build and run with illumos kvm, and works fine for Windows.

It's my first patch to qemu, and most of the real kvm stuff has come from the original illumos-kvm-cmd tree, so be gentle with me!


Signed-off-by: Lee Essen <lee.essen@nowonline.co.uk>

--
 Makefile.objs              |    6 +-
 Makefile.target            |    6 +-
 configure                  |    8 +++-
 cpus.c                     |    4 +-
 exec.c                     |   88 ++++++++++++++++++++++++++++++++++++++++++++
 fpu/softfloat-specialize.h |    4 ++
 hw/kvm/clock.c             |    4 ++
 kvm-all.c                  |   81 ++++++++++++++++++++++++++++++++++++++++-
 kvm.h                      |   15 +++++++
 qemu-timer.c               |   10 ++--
 qga/channel-posix.c        |   16 ++++++++
 qga/commands-posix.c       |    9 ++++
 target-i386/hyperv.h       |    4 ++
 target-i386/kvm.c          |   53 +++++++++++++++++++++++++-
 14 files changed, 291 insertions(+), 17 deletions(-)

Comments

Andreas Färber March 16, 2012, 10:15 a.m. UTC | #1
Am 16.03.2012 10:23, schrieb Lee Essen:
> This fixes a number of issues with the build process (namely ensuring
> the use of bash), adds specific support for the Illumos port of KVM
> and fixes a few general Solaris compatibility issues.
>
> There are still some things outstanding:
>
> - there's a duplicate smb_wmb() definition in qemu-barrier.h and the
> illumos kvm_x86.h which generates some warnings.
> - there's a repeated call to page_size() that should probably be fixed.
> - dtrace support needs to be fixed (-m64/32 option, reserved words and
> linking issues)
> - vnics need to be added
> - the original illumos code added another timer source (multiticks)
> - the issue with Linux needs to be resolved
>
> Other than that, this gets it to the point where it will build and run
> with illumos kvm, and works fine for Windows.
>
> It's my first patch to qemu, and most of the real kvm stuff has come
> from the original illumos-kvm-cmd tree, so be gentle with me!
>
>
> Signed-off-by: Lee Essen <l
> <mailto:david@gibson.dropbear.id.au>ee.essen@nowonline.co.uk
> <mailto:ee.essen@nowonline.co.uk>>

Your patch is HTML-formatted. Please use git-send-email to avoid that.

It is also doing way too many things at once. Properly using existing
$(SHELL) everywhere in Makefiles could be one patch, for instance,
adding $shell in configure another, same for functional
CONFIG_SOLARIS/__sun__ changes, KVM stuff in yet another. Making the
patches smaller and confined to subsystems or aspects will make it more
reviewable, especially since different maintainers are involved in the
files you touch.

"LEE - todo" doesn't sound too assuring. Either write it as a proper
"TODO This and that needs to be done." so that someone can address it or
send it as an [RFC] rather than a [PATCH].
If this patch introduces an issue for Linux (you don't say which?) while
adding support for illumos, it won't be acceptable in the first place. A
[PATCH] is expected to be of regression-free quality for qemu.git.

Is there any SystemTap on illumos? If not, we don't need the .stp file
at all.

Please resubmit with those issues addressed. I just cc'ed you on the C99
fix mentioned yesterday and am rebasing my queue on OpenIndiana (oi_151a).

Regards,
Andreas
Paolo Bonzini March 16, 2012, 11:56 a.m. UTC | #2
Il 16/03/2012 10:23, Lee Essen ha scritto:
> diff --git a/Makefile.objs b/Makefile.objs
> index 226b01d..c2a440a 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -373,12 +373,12 @@ else
>  trace.h: trace.h-timestamp
>  endif
>  trace.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
> -       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool
> --$(TRACE_BACKEND) -h < $< > $@,"  GEN   trace.h")
> +       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool
> --$(TRACE_BACKEND) -h < $< > $@,"  GEN   trace.h")
>         @cmp -s $@ trace.h || cp $@ trace.h
>  
>  trace.c: trace.c-timestamp
>  trace.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
> -       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool
> --$(TRACE_BACKEND) -c < $< > $@,"  GEN   trace.c")
> +       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool
> --$(TRACE_BACKEND) -c < $< > $@,"  GEN   trace.c")
>         @cmp -s $@ trace.c || cp $@ trace.c
>  
>  trace.o: trace.c $(GENERATED_HEADERS)
> @@ -391,7 +391,7 @@ trace-dtrace.h: trace-dtrace.dtrace
>  # rule file. So we use '.dtrace' instead
>  trace-dtrace.dtrace: trace-dtrace.dtrace-timestamp
>  trace-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events
> $(BUILD_DIR)/config-host.mak
> -       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool
> --$(TRACE_BACKEND) -d < $< > $@,"  GEN   trace-dtrace.dtrace")
> +       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool
> --$(TRACE_BACKEND) -d < $< > $@,"  GEN   trace-dtrace.dtrace")
>         @cmp -s $@ trace-dtrace.dtrace || cp $@ trace-dtrace.dtrace
>  
>  trace-dtrace.o: trace-dtrace.dtrace $(GENERATED_HEADERS)
> diff --git a/Makefile.target b/Makefile.target
> index eb25941..d32afc9 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -59,7 +59,7 @@ TARGET_TYPE=system
>  endif
>  
>  $(QEMU_PROG).stp:
> -       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool \
> +       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool \
>                 --$(TRACE_BACKEND) \
>                 --binary $(bindir)/$(QEMU_PROG) \
>                 --target-arch $(TARGET_ARCH) \
> @@ -443,10 +443,10 @@ gdbstub-xml.c: $(TARGET_XML_FILES)
> $(SRC_PATH)/scripts/feature_to_c.sh
>         $(call quiet-command,rm -f $@ && $(SHELL)
> $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"  GEN  
> $(TARGET_DIR)$@")
>  
>  hmp-commands.h: $(SRC_PATH)/hmp-commands.hx
> -       $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< >
> $@,"  GEN   $(TARGET_DIR)$@")
> +       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/hxtool -h < $<
>> $@,"  GEN   $(TARGET_DIR)$@")
>  
>  qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx
> -       $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< >
> $@,"  GEN   $(TARGET_DIR)$@")
> +       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/hxtool -h < $<
>> $@,"  GEN   $(TARGET_DIR)$@")
>  
>  clean:
>         rm -f *.o *.a *~ $(PROGS) nwfpe/*.o fpu/*.o
> diff --git a/configure b/configure
> index afe7395..601f77a 100755
> --- a/configure
> +++ b/configure
> @@ -101,6 +101,7 @@ audio_win_int=""
>  cc_i386=i386-pc-linux-gnu-gcc
>  libs_qga=""
>  debug_info="yes"
> +shell="sh"
>  
>  target_list=""
>  
> @@ -442,6 +443,7 @@ SunOS)
>    # have to select again, because `uname -m` returns i86pc
>    # even on an x86_64 box.
>    solariscpu=`isainfo -k`
> +  shell="bash"
>    if test "${solariscpu}" = "amd64" ; then
>      cpu="x86_64"
>    fi
> @@ -471,6 +473,7 @@ SunOS)
>    QEMU_CFLAGS="-D__EXTENSIONS__ $QEMU_CFLAGS"
>    QEMU_CFLAGS="-std=gnu99 $QEMU_CFLAGS"
>    LIBS="-lsocket -lnsl -lresolv $LIBS"
> +  libs_qga="-lsocket -lxnet $lib_qga"
>  ;;
>  AIX)
>    aix="yes"
> @@ -1097,7 +1100,7 @@ echo "  --disable-docs           disable
> documentation build"
>  echo "  --disable-vhost-net      disable vhost-net acceleration support"
>  echo "  --enable-vhost-net       enable vhost-net acceleration support"
>  echo "  --enable-trace-backend=B Set trace backend"
> -echo "                           Available backends:"
> $("$source_path"/scripts/tracetool --list-backends)
> +echo "                           Available backends:" $($shell
> "$source_path"/scripts/tracetool --list-backends)
>  echo "  --with-trace-file=NAME   Full PATH,NAME of file to store traces"
>  echo "                           Default:trace-<pid>"
>  echo "  --disable-spice          disable spice"
> @@ -2654,7 +2657,7 @@ fi
>  ##########################################
>  # check if trace backend exists
>  
> -sh "$source_path/scripts/tracetool" "--$trace_backend" --check-backend
>> /dev/null 2> /dev/null
> +$shell "$source_path/scripts/tracetool" "--$trace_backend"
> --check-backend > /dev/null 2> /dev/null
>  if test "$?" -ne 0 ; then
>    echo
>    echo "Error: invalid trace backend"
> @@ -3358,6 +3361,7 @@ echo "LIBS+=$LIBS" >> $config_host_mak
>  echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
>  echo "EXESUF=$EXESUF" >> $config_host_mak
>  echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
> +echo "SHELL=$shell" >> $config_host_mak
>  
>  # generate list of library paths for linker script
>  
> diff --git a/cpus.c b/cpus.c

Everything up to here should be a separate patch, but it looks sane.

> index 25ba621..7a32ee6 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -455,7 +455,7 @@ static void cpu_signal(int sig)
>      exit_request = 1;
>  }
>  
> -#ifdef CONFIG_LINUX
> +#if defined(CONFIG_LINUX) || defined(CONFIG_SOLARIS)
>  static void sigbus_reraise(void)
>  {
>      sigset_t set;
> @@ -491,7 +491,9 @@ static void qemu_init_sigbus(void)
>      action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
>      sigaction(SIGBUS, &action, NULL);
>  
> +#ifndef __sun__
>      prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
> +#endif
>  }
>  
>  static void qemu_kvm_eat_signals(CPUArchState *env)
> diff --git a/exec.c b/exec.c
> index 8fd50a1..57e2890 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2853,6 +2853,79 @@ static ram_addr_t last_ram_offset(void)
>      return last;
>  }
>  
> +#ifdef CONFIG_SOLARIS
> +static int
> +qemu_mlock(caddr_t base, ram_addr_t size)
> +{
> +  /* LEE - todo */
> +  qemu_real_host_page_size = getpagesize();
> +
> +  ram_addr_t ps = qemu_real_host_page_size, nbytes, locked = 0;
> +  ram_addr_t remaining = size / ps;
> +  ram_addr_t step = remaining;
> +  timespec_t tv;
> +  hrtime_t waiting = 0, threshold;
> +
> +  tv.tv_sec = 0;
> +  tv.tv_nsec = NANOSEC / MILLISEC;
> +  threshold = 10 * (hrtime_t)NANOSEC;
> +
> +  /*
> +   * We cannot lock memory with a single call to mlock() because it
> +   * won't result in sustained memory pressure:  if there is a
> +   * substantial amount of kernel memory in use electively (e.g., for
> +   * the ARC) a single call to mlock() may fail where sustained memory
> +   * pressure would succeed.  We therefore start by trying to lock the
> +   * entire region, adjusting our size down as we fail with EAGAIN; once
> +   * we successfully lock a portion of the region, we advance to the
> +   * unlocked portion of the region (if any remains) and increase the
> +   * size.  Note that this will continue to hoard memory until it locks
> +   * what it needs -- it won't give up.  To help debug situations in
> +   * which one has mistakenly overprovisioned, we emit a message every
> +   * ten seconds with no forward progress.
> +   */
> +  while (remaining) {
> +    if (step > remaining) {
> +        step = remaining;
> +    }
> +
> +    while (mlock(base, (nbytes = step * ps)) == -1) {
> +        if (errno != EAGAIN) {
> +            return -1;
> +        }
> +
> +        if (waiting == 0) {
> +            waiting = gethrtime();

Please use qemu_get_clock_ns(rt_clock) here.

> +        }
> +
> +        if (step > 1) {
> +            step >>= 1;
> +            continue;
> +        }
> +
> +        (void) nanosleep(&tv, NULL);
> +
> +        if (gethrtime() - waiting > threshold) {
> +            (void) fprintf(stderr, "qemu_mlock: have only "
> +              "locked %ld of %ld bytes; still "
> +              "trying...\n", locked, size);
> +            waiting = 0;
> +        }
> +    }
> +
> +    waiting = 0;
> +    base += nbytes;
> +    locked += nbytes;
> +    remaining -= step;
> +
> +    step <<= 1;
> +  }
> +
> +  return 0;
> +}
> +#endif
> +
> +
>  void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState
> *dev)
>  {
>      RAMBlock *new_block, *block;
> @@ -2931,6 +3004,21 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t
> size, void *host,
>                  new_block->host = qemu_vmalloc(size);
>              }
>  #endif
> +
> +#ifdef CONFIG_SOLARIS
> +            /*
> +              * XXX For right now, we'll lock down the memory.  This needs
> +              * to be revisited if we implement mmu notifiers in the
> kernel.
> +              * Note also that pages are touched in
> kvm_set_user_memory_region.
> +              */
> +            if (qemu_mlock((caddr_t)new_block->host, size) != 0) {
> +                fprintf(stderr, "qemu_ram_alloc: Could not lock %ld
> memory,"
> +                    " errno = %d\n",
> +                    size, errno);
> +                exit(1);
> +            }
> +#endif /*CONFIG_SOLARIS*/
> +
>              qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>          }
>      }
> diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
> index c5e2dab..3889041 100644
> --- a/fpu/softfloat-specialize.h
> +++ b/fpu/softfloat-specialize.h
> @@ -89,8 +89,10 @@ const float64 float64_default_nan =
> const_float64(LIT64( 0xFFF8000000000000 ));
>  #define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
>  #endif
>  
> +#ifndef __sun__
>  const floatx80 floatx80_default_nan =
> make_floatx80(floatx80_default_nan_high,
>                                                    
>  floatx80_default_nan_low);
> +#endif

Why is this needed.

>  
>  /*----------------------------------------------------------------------------
>  | The pattern for a default generated quadruple-precision NaN.  The
> `high' and
> @@ -104,8 +106,10 @@ const floatx80 floatx80_default_nan =
> make_floatx80(floatx80_default_nan_high,
>  #define float128_default_nan_low  LIT64( 0x0000000000000000 )
>  #endif
>  
> +#ifndef __sun__
>  const float128 float128_default_nan =
> make_float128(float128_default_nan_high,
>                                                    
>  float128_default_nan_low);
> +#endif
>  
>  /*----------------------------------------------------------------------------
>  | Raises the exceptions specified by `flags'.  Floating-point traps can be
> diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c
> index 446bd62..3fd5e1e 100644
> --- a/hw/kvm/clock.c
> +++ b/hw/kvm/clock.c
> @@ -19,8 +19,12 @@
>  #include "hw/sysbus.h"
>  #include "hw/kvm/clock.h"
>  
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +#else
>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
> +#endif

Perhaps you can put this in kvm.h instead, and just include that file:

#ifdef __sun__
#include <sys/kvm.h>
#else
#include <linux/kvm.h>
#endif
#include <linux/kvm_para.h>

kvm_para.h should be independent of the host OS, hoping there's no
conflict between Solaris and Linux headers.

>  
>  typedef struct KVMClockState {
>      SysBusDevice busdev;
> diff --git a/kvm-all.c b/kvm-all.c
> index 42e5e23..27f3177 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -18,7 +18,11 @@
>  #include <sys/mman.h>
>  #include <stdarg.h>
>  
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +#else
>  #include <linux/kvm.h>
> +#endif
>  
>  #include "qemu-common.h"
>  #include "qemu-barrier.h"
> @@ -176,12 +180,23 @@ int kvm_physical_memory_addr_from_host(KVMState
> *s, void *ram,
>  static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
>  {
>      struct kvm_userspace_memory_region mem;
> +#ifdef CONFIG_SOLARIS
> +    caddr_t p;
> +    char c;
> +#endif
>  
>      mem.slot = slot->slot;
>      mem.guest_phys_addr = slot->start_addr;
>      mem.memory_size = slot->memory_size;
>      mem.userspace_addr = (unsigned long)slot->ram;
>      mem.flags = slot->flags;
> +#ifdef CONFIG_SOLARIS
> +    for (p = (caddr_t)mem.userspace_addr;
> +      p < (caddr_t)mem.userspace_addr + mem.memory_size;
> +      p += PAGE_SIZE)
> +        c = *p;
> +#endif /* CONFIG_SOLARIS */

What is this needed for?

> +
>      if (s->migration_log) {
>          mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
>      }
> @@ -200,6 +215,31 @@ int kvm_pit_in_kernel(void)
>      return kvm_state->pit_in_kernel;
>  }
>  
> +#ifdef CONFIG_SOLARIS
> +static int kvm_vm_clone(KVMState *s)
> +{
> +    struct stat stat;
> +    int fd;
> +
> +    if (fstat(s->fd, &stat) != 0) {
> +        return -errno;
> +    }
> +
> +    fd = qemu_open("/dev/kvm", O_RDWR);
> +
> +    if (fd == -1) {
> +        return -errno;
> +    }
> +
> +    if (ioctl(fd, KVM_CLONE, stat.st_rdev) == -1) {
> +        close(fd);
> +        return -errno;
> +    }
> +
> +    return fd;
> +}
> +#endif
> +
>  int kvm_init_vcpu(CPUArchState *env)
>  {
>      KVMState *s = kvm_state;
> @@ -208,14 +248,29 @@ int kvm_init_vcpu(CPUArchState *env)
>  
>      DPRINTF("kvm_init_vcpu\n");
>  
> +#ifdef CONFIG_SOLARIS
> +    ret = kvm_vm_clone(kvm_state);
> +
> +    if (ret < 0) {
> +        fprintf(stderr, "kvm_init_vcpu could not clone fd: %m\n");
> +        goto err;
> +    }
> +    env->kvm_fd = ret;
> +    env->kvm_state = kvm_state;
> +
> +    ret = ioctl(env->kvm_fd, KVM_CREATE_VCPU, env->cpu_index);
> +#else
>      ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
> +#endif
>      if (ret < 0) {
>          DPRINTF("kvm_create_vcpu failed\n");
>          goto err;
>      }
>  
> +#ifndef CONFIG_SOLARIS
>      env->kvm_fd = ret;
>      env->kvm_state = s;
> +#endif

env->kvm_state assignment need not be split, right?

>      env->kvm_vcpu_dirty = 1;
>  
>      mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
> @@ -1021,6 +1076,9 @@ int kvm_init(void)
>          ret = s->vmfd;
>          goto err;
>      }
> +#ifdef CONFIG_SOLARIS
> +    s->vmfd = s->fd;
> +#endif
>  
>      missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
>      if (!missing_cap) {
> @@ -1287,6 +1345,19 @@ int kvm_cpu_exec(CPUArchState *env)
>              DPRINTF("irq_window_open\n");
>              ret = EXCP_INTERRUPT;
>              break;
> +#ifdef CONFIG_SOLARIS
> +        /*
> +         * In the case of an external interrupt we can get a zero
> +         * return from the ioctl, with a KVM_EXIT_INTR. This doesn't
> +         * happen on linux
> +         *
> +         * Not entirely sure what to do here.
> +         */
> +        case KVM_EXIT_INTR:
> +            DPRINTF("exit_intr (run_ret is %d)\n", run_ret);
> +            ret = EXCP_INTERRUPT;
> +            break;
> +#endif
>          case KVM_EXIT_SHUTDOWN:
>              DPRINTF("shutdown\n");
>              qemu_system_reset_request();
> @@ -1631,7 +1702,7 @@ int kvm_set_signal_mask(CPUArchState *env, const
> sigset_t *sigset)
>  
>      sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
>  
> -    sigmask->len = 8;
> +    sigmask->len = sizeof(sigset_t);
>      memcpy(sigmask->sigset, sigset, sizeof(*sigset));
>      r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
>      g_free(sigmask);
> @@ -1641,6 +1712,7 @@ int kvm_set_signal_mask(CPUArchState *env, const
> sigset_t *sigset)
>  
>  int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val,
> bool assign)
>  {
> +#ifdef CONFIG_EVENTFD
>      int ret;
>      struct kvm_ioeventfd iofd;
>  
> @@ -1665,10 +1737,14 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t
> addr, uint32_t val, bool assign
>      }
>  
>      return 0;
> +#else
> +    return -ENOSYS;
> +#endif

The guard probably needs to be added also around ioeventfd definitions
in virtio-pci.c.

>  }
>  
>  int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val,
> bool assign)
>  {
> +#ifdef CONFIG_EVENTFD
>      struct kvm_ioeventfd kick = {
>          .datamatch = val,
>          .addr = addr,
> @@ -1688,6 +1764,9 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t
> addr, uint16_t val, bool assign)
>          return r;
>      }
>      return 0;
> +#else
> +    return -ENOSYS;
> +#endif
>  }
>  
>  int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
> diff --git a/kvm.h b/kvm.h
> index 330f17b..8960b4e 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -19,8 +19,23 @@
>  #include "qemu-queue.h"
>  
>  #ifdef CONFIG_KVM
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +/*
> + * it's a bit horrible to include these here, but the kvm_para.h
> include file
> + * isn't public with the illumos kvm implementation
> + */
> +#define KVM_CPUID_SIGNATURE       0x40000000
> +#define KVM_CPUID_FEATURES        0x40000001
> +#define KVM_FEATURE_CLOCKSOURCE   0
> +#define KVM_FEATURE_NOP_IO_DELAY  1
> +#define KVM_FEATURE_MMU_OP        2
> +#define KVM_FEATURE_CLOCKSOURCE2  3
> +#define HYPERV_CPUID_MIN          0x40000005
> +#else
>  #include <linux/kvm.h>
>  #endif
> +#endif
>  
>  extern int kvm_allowed;
>  extern bool kvm_kernel_irqchip;
> diff --git a/qemu-timer.c b/qemu-timer.c
> index d7f56e5..f35ad4e 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -77,7 +77,7 @@ struct qemu_alarm_timer {
>      int (*start)(struct qemu_alarm_timer *t);
>      void (*stop)(struct qemu_alarm_timer *t);
>      void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns);
> -#if defined(__linux__)
> +#if defined(__linux__) || defined(__sun__)

Starting from here it would also be a separate patch.

Perhaps you can add a configure test for timer_t instead.

>      int fd;
>      timer_t timer;
>  #elif defined(_WIN32)
> @@ -165,7 +165,7 @@ static int unix_start_timer(struct qemu_alarm_timer *t);
>  static void unix_stop_timer(struct qemu_alarm_timer *t);
>  static void unix_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
>  
> -#ifdef __linux__
> +#if defined(__linux__) || defined(__sun__)
>  
>  static int dynticks_start_timer(struct qemu_alarm_timer *t);
>  static void dynticks_stop_timer(struct qemu_alarm_timer *t);
> @@ -177,7 +177,7 @@ static void dynticks_rearm_timer(struct
> qemu_alarm_timer *t, int64_t delta);
>  
>  static struct qemu_alarm_timer alarm_timers[] = {
>  #ifndef _WIN32
> -#ifdef __linux__
> +#if defined(__linux__) || defined(__sun__)
>      {"dynticks", dynticks_start_timer,
>       dynticks_stop_timer, dynticks_rearm_timer},
>  #endif
> @@ -502,7 +502,7 @@ static void host_alarm_handler(int host_signum)
>      }
>  }
>  
> -#if defined(__linux__)
> +#if defined(__linux__) || defined(__sun__)
>  
>  #include "compatfd.h"
>  
> @@ -585,7 +585,7 @@ static void dynticks_rearm_timer(struct
> qemu_alarm_timer *t,
>      }
>  }
>  
> -#endif /* defined(__linux__) */
> +#endif /* defined(__linux__) || defined(__sun__) */
>  
>  #if !defined(_WIN32)
>  
> diff --git a/qga/channel-posix.c b/qga/channel-posix.c
> index 40f7658..86245c1 100644
> --- a/qga/channel-posix.c
> +++ b/qga/channel-posix.c
> @@ -3,6 +3,10 @@
>  #include "qemu_socket.h"
>  #include "qga/channel.h"
>  
> +#ifdef CONFIG_SOLARIS
> +#include <sys/stropts.h>
> +#endif
> +
>  #define GA_CHANNEL_BAUDRATE_DEFAULT B38400 /* for isa-serial channels */
>  
>  struct GAChannel {

Starting from here it would also be a separate patch.

> @@ -123,7 +127,19 @@ static gboolean ga_channel_open(GAChannel *c, const
> gchar *path, GAChannelMethod
>  
>      switch (c->method) {
>      case GA_CHANNEL_VIRTIO_SERIAL: {
> +#ifdef CONFIG_SOLARIS
> +        int fd = qemu_open(path, O_RDWR | O_NONBLOCK);
> +        if (fd == -1) {
> +            g_critical("error opening channel: %s", strerror(errno));
> +            exit(EXIT_FAILURE);
> +        }
> +        if (ioctl(fd, I_SETSIG, S_OUTPUT | S_INPUT | S_HIPRI) < 0) {
> +            g_critical("error with setsig on channel: %s",
> strerror(errno));
> +            exit(EXIT_FAILURE);
> +        }
> +#else
>          int fd = qemu_open(path, O_RDWR | O_NONBLOCK | O_ASYNC);
> +#endif
>          if (fd == -1) {
>              g_critical("error opening channel: %s", strerror(errno));
>              exit(EXIT_FAILURE);
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index 7b2be2f..67531aa 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -35,6 +35,11 @@
>  #include "qemu-queue.h"
>  #include "host-utils.h"
>  
> +#if defined(__sun__)
> +#include <sys/sockio.h>
> +extern char **environ;
> +#endif
> +
>  static void reopen_fd_to_null(int fd)
>  {
>      int nullfd;
> @@ -807,7 +812,11 @@ GuestNetworkInterfaceList
> *qmp_guest_network_get_interfaces(Error **errp)
>                  goto error;
>              }
>  
> +#if defined(__sun__)
> +            mac_addr = (unsigned char *) &ifr.ifr_enaddr;
> +#else
>              mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
> +#endif
>  
>              if (asprintf(&info->value->hardware_address,
>                           "%02x:%02x:%02x:%02x:%02x:%02x",
> diff --git a/target-i386/hyperv.h b/target-i386/hyperv.h
> index bacb1d4..9c08750 100644
> --- a/target-i386/hyperv.h
> +++ b/target-i386/hyperv.h
> @@ -15,8 +15,12 @@
>  
>  #include "qemu-common.h"
>  #ifdef CONFIG_KVM
> +#ifdef __sun__
> +#include <hyperv.h>
> +#else
>  #include <asm/hyperv.h>
>  #endif
> +#endif
>  
>  #ifndef HYPERV_SPINLOCK_NEVER_RETRY
>  #define HYPERV_SPINLOCK_NEVER_RETRY             0xFFFFFFFF
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index e74a9e4..6d007cc 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -17,8 +17,12 @@
>  #include <sys/mman.h>
>  #include <sys/utsname.h>
>  
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +#else
>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
> +#endif
>  
>  #include "qemu-common.h"
>  #include "sysemu.h"
> @@ -61,7 +65,9 @@ const KVMCapabilityInfo
> kvm_arch_required_capabilities[] = {
>  static bool has_msr_star;
>  static bool has_msr_hsave_pa;
>  static bool has_msr_tsc_deadline;
> +#ifdef KVM_CAP_ASYNC_PF
>  static bool has_msr_async_pf_en;
> +#endif
>  static bool has_msr_misc_enable;
>  static int lm_capable_kernel;
>  
> @@ -97,7 +103,9 @@ struct kvm_para_features {
>      { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
>      { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
>      { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
> +#ifdef KVM_CAP_ASYNC_PF
>      { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
> +#endif
>      { -1, -1 }
>  };
>  
> @@ -442,7 +450,9 @@ int kvm_arch_init_vcpu(CPUX86State *env)
>          c->edx = signature[2];
>      }
>  
> +#ifdef KVM_CAP_ASYNC_PF
>      has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
> +#endif
>  
>      cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
>  
> @@ -561,6 +571,7 @@ int kvm_arch_init_vcpu(CPUX86State *env)
>          return r;
>      }
>  
> +#ifdef KVM_CAP_TSC_CONTROL
>      r = kvm_check_extension(env->kvm_state, KVM_CAP_TSC_CONTROL);
>      if (r && env->tsc_khz) {
>          r = kvm_vcpu_ioctl(env, KVM_SET_TSC_KHZ, env->tsc_khz);
> @@ -569,10 +580,12 @@ int kvm_arch_init_vcpu(CPUX86State *env)
>              return r;
>          }
>      }
> -
> +#endif
> +#ifdef KVM_CAP_XSAVE
>      if (kvm_has_xsave()) {
>          env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
>      }
> +#endif
>  
>      return 0;
>  }
> @@ -759,7 +772,7 @@ static void get_seg(SegmentCache *lhs, const struct
> kvm_segment *rhs)
>                   (rhs->avl * DESC_AVL_MASK);
>  }
>  
> -static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
> +static void kvm_getput_reg(uint64_t *kvm_reg, target_ulong *qemu_reg,
> int set)
>  {
>      if (set) {
>          *kvm_reg = *qemu_reg;
> @@ -841,6 +854,7 @@ static int kvm_put_fpu(CPUX86State *env)
>  #define XSAVE_XSTATE_BV   128
>  #define XSAVE_YMMH_SPACE  144
>  
> +#ifdef KVM_CAP_XSAVE
>  static int kvm_put_xsave(CPUX86State *env)
>  {
>      struct kvm_xsave* xsave = env->kvm_xsave_buf;
> @@ -874,7 +888,9 @@ static int kvm_put_xsave(CPUX86State *env)
>      r = kvm_vcpu_ioctl(env, KVM_SET_XSAVE, xsave);
>      return r;
>  }
> +#endif
>  
> +#ifdef KVM_CAP_XSCRS
>  static int kvm_put_xcrs(CPUX86State *env)
>  {
>      struct kvm_xcrs xcrs;
> @@ -889,6 +905,7 @@ static int kvm_put_xcrs(CPUX86State *env)
>      xcrs.xcrs[0].value = env->xcr0;
>      return kvm_vcpu_ioctl(env, KVM_SET_XCRS, &xcrs);
>  }
> +#endif
>  
>  static int kvm_put_sregs(CPUX86State *env)
>  {
> @@ -1000,10 +1017,12 @@ static int kvm_put_msrs(CPUX86State *env, int level)
>          kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
>                            env->system_time_msr);
>          kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK,
> env->wall_clock_msr);
> +#ifdef KVM_CAP_ASYNC_PF
>          if (has_msr_async_pf_en) {
>              kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN,
>                                env->async_pf_en_msr);
>          }
> +#endif
>          if (hyperv_hypercall_available()) {
>              kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
>              kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0);
> @@ -1055,6 +1074,7 @@ static int kvm_get_fpu(CPUX86State *env)
>      return 0;
>  }
>  
> +#ifdef KVM_CAP_XSAVE
>  static int kvm_get_xsave(CPUX86State *env)
>  {
>      struct kvm_xsave* xsave = env->kvm_xsave_buf;
> @@ -1092,7 +1112,9 @@ static int kvm_get_xsave(CPUX86State *env)
>              sizeof env->ymmh_regs);
>      return 0;
>  }
> +#endif
>  
> +#ifdef KVM_CAP_XCRS
>  static int kvm_get_xcrs(CPUX86State *env)
>  {
>      int i, ret;
> @@ -1116,6 +1138,7 @@ static int kvm_get_xcrs(CPUX86State *env)
>      }
>      return 0;
>  }
> +#endif
>  
>  static int kvm_get_sregs(CPUX86State *env)
>  {
> @@ -1243,9 +1266,11 @@ static int kvm_get_msrs(CPUX86State *env)
>  #endif
>      msrs[n++].index = MSR_KVM_SYSTEM_TIME;
>      msrs[n++].index = MSR_KVM_WALL_CLOCK;
> +#ifdef KVM_CAP_ASYNC_PF
>      if (has_msr_async_pf_en) {
>          msrs[n++].index = MSR_KVM_ASYNC_PF_EN;
>      }
> +#endif
>  
>      if (env->mcg_cap) {
>          msrs[n++].index = MSR_MCG_STATUS;
> @@ -1322,9 +1347,11 @@ static int kvm_get_msrs(CPUX86State *env)
>                  env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
>              }
>              break;
> +#ifdef KVM_CAP_ASYNC_PF
>          case MSR_KVM_ASYNC_PF_EN:
>              env->async_pf_en_msr = msrs[i].data;
>              break;
> +#endif
>          }
>      }
>  
> @@ -1482,6 +1509,7 @@ static int kvm_guest_debug_workarounds(CPUX86State
> *env)
>      return ret;
>  }
>  
> +#ifdef KVM_CAP_DEBUGREGS
>  static int kvm_put_debugregs(CPUX86State *env)
>  {
>      struct kvm_debugregs dbgregs;
> @@ -1522,6 +1550,7 @@ static int kvm_get_debugregs(CPUX86State *env)
>  
>      return 0;
>  }
> +#endif
>  
>  int kvm_arch_put_registers(CPUX86State *env, int level)
>  {
> @@ -1533,14 +1562,20 @@ int kvm_arch_put_registers(CPUX86State *env, int
> level)
>      if (ret < 0) {
>          return ret;
>      }
> +#ifdef KVM_CAP_XSAVE
>      ret = kvm_put_xsave(env);
> +#else
> +    ret = kvm_put_fpu(env);
> +#endif
>      if (ret < 0) {
>          return ret;
>      }
> +#ifdef KVM_CAP_XCRS
>      ret = kvm_put_xcrs(env);
>      if (ret < 0) {
>          return ret;
>      }
> +#endif
>      ret = kvm_put_sregs(env);
>      if (ret < 0) {
>          return ret;
> @@ -1568,10 +1603,12 @@ int kvm_arch_put_registers(CPUX86State *env, int
> level)
>      if (ret < 0) {
>          return ret;
>      }
> +#ifdef KVM_CAP_DEBUGREGS
>      ret = kvm_put_debugregs(env);
>      if (ret < 0) {
>          return ret;
>      }
> +#endif
>      /* must be last */
>      ret = kvm_guest_debug_workarounds(env);
>      if (ret < 0) {
> @@ -1590,14 +1627,20 @@ int kvm_arch_get_registers(CPUX86State *env)
>      if (ret < 0) {
>          return ret;
>      }
> +#ifdef KVM_CAP_XSAVE
>      ret = kvm_get_xsave(env);
> +#else
> +    ret = kvm_get_fpu(env);
> +#endif
>      if (ret < 0) {
>          return ret;
>      }
> +#ifdef KVM_CAP_XCRS
>      ret = kvm_get_xcrs(env);
>      if (ret < 0) {
>          return ret;
>      }
> +#endif
>      ret = kvm_get_sregs(env);
>      if (ret < 0) {
>          return ret;
> @@ -1618,10 +1661,12 @@ int kvm_arch_get_registers(CPUX86State *env)
>      if (ret < 0) {
>          return ret;
>      }
> +#ifdef KVM_CAP_DEBUGREGS
>      ret = kvm_get_debugregs(env);
>      if (ret < 0) {
>          return ret;
>      }
> +#endif
>      return 0;
>  }
>  
> @@ -1770,6 +1815,7 @@ static int kvm_handle_tpr_access(CPUX86State *env)
>      return 1;
>  }
>  
> +#ifdef KVM_CAP_SET_GUEST_DEBUG
>  int kvm_arch_insert_sw_breakpoint(CPUX86State *env, struct
> kvm_sw_breakpoint *bp)
>  {
>      static const uint8_t int3 = 0xcc;
> @@ -1950,6 +1996,7 @@ void kvm_arch_update_guest_debug(CPUX86State *env,
> struct kvm_guest_debug *dbg)
>          }
>      }
>  }
> +#endif /* KVM_CAP_SET_GUEST_DEBUG */
>  
>  static bool host_supports_vmx(void)
>  {
> @@ -1999,10 +2046,12 @@ int kvm_arch_handle_exit(CPUX86State *env,
> struct kvm_run *run)
>                  run->ex.exception, run->ex.error_code);
>          ret = -1;
>          break;
> +#ifdef KVM_CAP_SET_GUEST_DEBUG
>      case KVM_EXIT_DEBUG:
>          DPRINTF("kvm_exit_debug\n");
>          ret = kvm_handle_debug(&run->debug.arch);
>          break;
> +#endif
>      default:
>          fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>          ret = -1;
> 

Did you have any problem with IOV_MAX?  IIRC, it's quite low (16
perhaps) in Solaris.

Paolo
Jan Kiszka March 16, 2012, 1:14 p.m. UTC | #3
On 2012-03-16 10:23, Lee Essen wrote:
> This fixes a number of issues with the build process (namely ensuring the use of bash), adds specific support for the Illumos port of KVM and fixes a few general Solaris compatibility issues.
> 
> There are still some things outstanding:
> 
> - there's a duplicate smb_wmb() definition in qemu-barrier.h and the illumos kvm_x86.h which generates some warnings.
> - there's a repeated call to page_size() that should probably be fixed.
> - dtrace support needs to be fixed (-m64/32 option, reserved words and linking issues)
> - vnics need to be added
> - the original illumos code added another timer source (multiticks)
> - the issue with Linux needs to be resolved
> 
> Other than that, this gets it to the point where it will build and run with illumos kvm, and works fine for Windows.
> 
> It's my first patch to qemu, and most of the real kvm stuff has come from the original illumos-kvm-cmd tree, so be gentle with me!
> 
> 
> Signed-off-by: Lee Essen <lee.essen@nowonline.co.uk>
> 
> --
>  Makefile.objs              |    6 +-
>  Makefile.target            |    6 +-
>  configure                  |    8 +++-
>  cpus.c                     |    4 +-
>  exec.c                     |   88 ++++++++++++++++++++++++++++++++++++++++++++
>  fpu/softfloat-specialize.h |    4 ++
>  hw/kvm/clock.c             |    4 ++
>  kvm-all.c                  |   81 ++++++++++++++++++++++++++++++++++++++++-
>  kvm.h                      |   15 +++++++
>  qemu-timer.c               |   10 ++--
>  qga/channel-posix.c        |   16 ++++++++
>  qga/commands-posix.c       |    9 ++++
>  target-i386/hyperv.h       |    4 ++
>  target-i386/kvm.c          |   53 +++++++++++++++++++++++++-
>  14 files changed, 291 insertions(+), 17 deletions(-)
> 

...

> diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c
> index 446bd62..3fd5e1e 100644
> --- a/hw/kvm/clock.c
> +++ b/hw/kvm/clock.c
> @@ -19,8 +19,12 @@
>  #include "hw/sysbus.h"
>  #include "hw/kvm/clock.h"
>  
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +#else
>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
> +#endif

As Paolo already said, this should somehow be centralized.

Also, CONFIG_SOLARIS vs. __sun__: please use a consistent pattern.

>  
>  typedef struct KVMClockState {
>      SysBusDevice busdev;
> diff --git a/kvm-all.c b/kvm-all.c
> index 42e5e23..27f3177 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -18,7 +18,11 @@
>  #include <sys/mman.h>
>  #include <stdarg.h>
>  
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +#else
>  #include <linux/kvm.h>
> +#endif
>  
>  #include "qemu-common.h"
>  #include "qemu-barrier.h"
> @@ -176,12 +180,23 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
>  static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
>  {
>      struct kvm_userspace_memory_region mem;
> +#ifdef CONFIG_SOLARIS
> +    caddr_t p;
> +    char c;
> +#endif
>  
>      mem.slot = slot->slot;
>      mem.guest_phys_addr = slot->start_addr;
>      mem.memory_size = slot->memory_size;
>      mem.userspace_addr = (unsigned long)slot->ram;
>      mem.flags = slot->flags;
> +#ifdef CONFIG_SOLARIS
> +    for (p = (caddr_t)mem.userspace_addr;
> +      p < (caddr_t)mem.userspace_addr + mem.memory_size;
> +      p += PAGE_SIZE)
> +        c = *p;
> +#endif /* CONFIG_SOLARIS */
> +

I bet gcc will like this write-only pattern and bark at you.

>      if (s->migration_log) {
>          mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
>      }
> @@ -200,6 +215,31 @@ int kvm_pit_in_kernel(void)
>      return kvm_state->pit_in_kernel;
>  }
>  
> +#ifdef CONFIG_SOLARIS
> +static int kvm_vm_clone(KVMState *s)
> +{
> +    struct stat stat;
> +    int fd;
> +
> +    if (fstat(s->fd, &stat) != 0) {
> +        return -errno;
> +    }
> +
> +    fd = qemu_open("/dev/kvm", O_RDWR);
> +
> +    if (fd == -1) {
> +        return -errno;
> +    }
> +
> +    if (ioctl(fd, KVM_CLONE, stat.st_rdev) == -1) {
> +        close(fd);
> +        return -errno;
> +    }
> +
> +    return fd;
> +}
> +#endif
> +
>  int kvm_init_vcpu(CPUArchState *env)
>  {
>      KVMState *s = kvm_state;
> @@ -208,14 +248,29 @@ int kvm_init_vcpu(CPUArchState *env)
>  
>      DPRINTF("kvm_init_vcpu\n");
>  
> +#ifdef CONFIG_SOLARIS
> +    ret = kvm_vm_clone(kvm_state);
> +
> +    if (ret < 0) {
> +        fprintf(stderr, "kvm_init_vcpu could not clone fd: %m\n");
> +        goto err;
> +    }
> +    env->kvm_fd = ret;
> +    env->kvm_state = kvm_state;
> +
> +    ret = ioctl(env->kvm_fd, KVM_CREATE_VCPU, env->cpu_index);

kvm_vcpu_ioctl

> +#else
>      ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
> +#endif

There is no chance to fix the Solaris KVM to do fd cloning in the kernel
and implement the same KVM_CREATE_VCPU ABI?

>      if (ret < 0) {
>          DPRINTF("kvm_create_vcpu failed\n");
>          goto err;
>      }
>  
> +#ifndef CONFIG_SOLARIS
>      env->kvm_fd = ret;
>      env->kvm_state = s;
> +#endif
>      env->kvm_vcpu_dirty = 1;
>  
>      mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
> @@ -1021,6 +1076,9 @@ int kvm_init(void)
>          ret = s->vmfd;
>          goto err;
>      }
> +#ifdef CONFIG_SOLARIS
> +    s->vmfd = s->fd;
> +#endif
>  
>      missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
>      if (!missing_cap) {
> @@ -1287,6 +1345,19 @@ int kvm_cpu_exec(CPUArchState *env)
>              DPRINTF("irq_window_open\n");
>              ret = EXCP_INTERRUPT;
>              break;
> +#ifdef CONFIG_SOLARIS
> +        /*
> +         * In the case of an external interrupt we can get a zero
> +         * return from the ioctl, with a KVM_EXIT_INTR. This doesn't
> +         * happen on linux
> +         *
> +         * Not entirely sure what to do here.

Fix the kernel?

> +         */
> +        case KVM_EXIT_INTR:
> +            DPRINTF("exit_intr (run_ret is %d)\n", run_ret);
> +            ret = EXCP_INTERRUPT;
> +            break;
> +#endif
>          case KVM_EXIT_SHUTDOWN:
>              DPRINTF("shutdown\n");
>              qemu_system_reset_request();
> @@ -1631,7 +1702,7 @@ int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
>  
>      sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
>  
> -    sigmask->len = 8;
> +    sigmask->len = sizeof(sigset_t);
>      memcpy(sigmask->sigset, sigset, sizeof(*sigset));
>      r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
>      g_free(sigmask);
> @@ -1641,6 +1712,7 @@ int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
>  
>  int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign)
>  {
> +#ifdef CONFIG_EVENTFD
>      int ret;
>      struct kvm_ioeventfd iofd;
>  
> @@ -1665,10 +1737,14 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign
>      }
>  
>      return 0;
> +#else
> +    return -ENOSYS;
> +#endif
>  }
>  
>  int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
>  {
> +#ifdef CONFIG_EVENTFD
>      struct kvm_ioeventfd kick = {
>          .datamatch = val,
>          .addr = addr,
> @@ -1688,6 +1764,9 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
>          return r;
>      }
>      return 0;
> +#else
> +    return -ENOSYS;
> +#endif
>  }
>  
>  int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
> diff --git a/kvm.h b/kvm.h
> index 330f17b..8960b4e 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -19,8 +19,23 @@
>  #include "qemu-queue.h"
>  
>  #ifdef CONFIG_KVM
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +/*
> + * it's a bit horrible to include these here, but the kvm_para.h include file
> + * isn't public with the illumos kvm implementation

Just provide a package of properly fixed kernel headers and let us carry
them in solaris-headers or so, analogously to linux-headers.

> + */
> +#define KVM_CPUID_SIGNATURE       0x40000000
> +#define KVM_CPUID_FEATURES        0x40000001
> +#define KVM_FEATURE_CLOCKSOURCE   0
> +#define KVM_FEATURE_NOP_IO_DELAY  1
> +#define KVM_FEATURE_MMU_OP        2
> +#define KVM_FEATURE_CLOCKSOURCE2  3
> +#define HYPERV_CPUID_MIN          0x40000005
> +#else
>  #include <linux/kvm.h>
>  #endif
> +#endif
>  
>  extern int kvm_allowed;
>  extern bool kvm_kernel_irqchip;

...

> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index e74a9e4..6d007cc 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -17,8 +17,12 @@
>  #include <sys/mman.h>
>  #include <sys/utsname.h>
>  
> +#ifdef __sun__
> +#include <sys/kvm.h>
> +#else
>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
> +#endif
>  
>  #include "qemu-common.h"
>  #include "sysemu.h"
> @@ -61,7 +65,9 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>  static bool has_msr_star;
>  static bool has_msr_hsave_pa;
>  static bool has_msr_tsc_deadline;
> +#ifdef KVM_CAP_ASYNC_PF
>  static bool has_msr_async_pf_en;
> +#endif

NACK. With proper kernel headers, all these KVM_CAP reintroductions
become obsolete again.

Note that KVM changes need to CC the kvm mailing list and will likely
flow via uq/master of qemu-kvm.git.

Jan
Lee Essen March 16, 2012, 1:46 p.m. UTC | #4
On 16 Mar 2012, at 13:14, Jan Kiszka wrote:

> On 2012-03-16 10:23, Lee Essen wrote:
>> +#ifdef __sun__
>> +#include <sys/kvm.h>
>> +#else
>> #include <linux/kvm.h>
>> #include <linux/kvm_para.h>
>> +#endif
> 
> As Paolo already said, this should somehow be centralised.

Yep, fair point. I'll address this one.

> Also, CONFIG_SOLARIS vs. __sun__: please use a consistent pattern.

Hmmm … I was trying to be consistent with the existing style :-) … see __linux__ and CONFIG_LINUX as well. I'll see what I can do to make this a bit tidier.

>> +#ifdef CONFIG_SOLARIS
>> +    for (p = (caddr_t)mem.userspace_addr;
>> +      p < (caddr_t)mem.userspace_addr + mem.memory_size;
>> +      p += PAGE_SIZE)
>> +        c = *p;
>> +#endif /* CONFIG_SOLARIS */
>> +
> 
> I bet gcc will like this write-only pattern and bark at you.
> 

It does indeed … this came from the original Joyent code, I must admit I did wonder whether gcc would optimise it away. I did consider adding something to stop gcc complaining, but I don't fully understand why this is necessary given the mlock() bit, so I thought it best to leave it alone.

Any suggestions?

>> +#else
>>     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
>> +#endif
> 
> There is no chance to fix the Solaris KVM to do fd cloning in the kernel
> and implement the same KVM_CREATE_VCPU ABI?
> 

I will raise this with the joyent guys, but they are pretty switched on and I suspect there is a reason.

My concern with the "fix the kernel" comments is that it would exclude the use of the newer qemu on existing installations, however I do understand the desire to not fill the code with workarounds that live forever.

How about a "broken_solaris_kvm_abi" option to configure with a suitable set of defines wrapping the code?

>> 
>> #ifdef CONFIG_KVM
>> +#ifdef __sun__
>> +#include <sys/kvm.h>
>> +/*
>> + * it's a bit horrible to include these here, but the kvm_para.h include file
>> + * isn't public with the illumos kvm implementation
> 
> Just provide a package of properly fixed kernel headers and let us carry
> them in solaris-headers or so, analogously to linux-headers.
> 

Interestingly this is what I did originally but then thought it best to use the "supplied" headers, but actually thinking more about it, this does make much more sense.

Regards,

Lee.
Paolo Bonzini March 16, 2012, 4:28 p.m. UTC | #5
Il 16/03/2012 14:46, Lee Essen ha scritto:
> It does indeed … this came from the original Joyent code, I must
> admit I did wonder whether gcc would optimise it away. I did consider
> adding something to stop gcc complaining, but I don't fully
> understand why this is necessary given the mlock() bit, so I thought
> it best to leave it alone.

"(void) *(volatile char *)p" should work, but a comment would be nice.

Paolo
Lee Essen March 16, 2012, 5:15 p.m. UTC | #6
On 16 Mar 2012, at 16:28, Paolo Bonzini wrote:

> Il 16/03/2012 14:46, Lee Essen ha scritto:
>> It does indeed … this came from the original Joyent code, I must
>> admit I did wonder whether gcc would optimise it away. I did consider
>> adding something to stop gcc complaining, but I don't fully
>> understand why this is necessary given the mlock() bit, so I thought
>> it best to leave it alone.
> 
> "(void) *(volatile char *)p" should work, but a comment would be nice.
> 
> Paolo

This is interesting … just did a little testing …

With no-optimisation that original code is ok, although obviously comes with a compiler
warning.

With any optimisation at all the whole thing gets optimised away -- the existing illumos/smartos
build looks like it's built with debug set, so no optimisation.

Using volatile as above seems to do the trick nicely.

Thanks,

Lee.
Lee Essen March 16, 2012, 5:25 p.m. UTC | #7
On 16 Mar 2012, at 11:56, Paolo Bonzini wrote:

> Il 16/03/2012 10:23, Lee Essen ha scritto:
>> +    while (mlock(base, (nbytes = step * ps)) == -1) {
>> +        if (errno != EAGAIN) {
>> +            return -1;
>> +        }
>> +
>> +        if (waiting == 0) {
>> +            waiting = gethrtime();
> 
> Please use qemu_get_clock_ns(rt_clock) here.

ok. done in my upcoming revision.

>> +#ifndef __sun__
>> const floatx80 floatx80_default_nan =
>> make_floatx80(floatx80_default_nan_high,
>> 
>> floatx80_default_nan_low);
>> +#endif
> 
> Why is this needed.

This is actually an issue with -std=gnu99, I believe Andreas has a patch for this, so I will remove it from
my ones. 

>> +#ifdef __sun__
>> +#include <sys/kvm.h>
>> +#else
>> #include <linux/kvm.h>
>> #include <linux/kvm_para.h>
>> +#endif
> 
> Perhaps you can put this in kvm.h instead, and just include that file:
> 
> #ifdef __sun__
> #include <sys/kvm.h>
> #else
> #include <linux/kvm.h>
> #endif
> #include <linux/kvm_para.h>
> 
> kvm_para.h should be independent of the host OS, hoping there's no
> conflict between Solaris and Linux headers.
> 

I've created a set of headers which are a mix of the original Illumos ones and the linux ones
and put them in solaris-headers. It all seems to work nicely and results in much cleaner code.

>> +#ifdef CONFIG_SOLARIS
>> +    for (p = (caddr_t)mem.userspace_addr;
>> +      p < (caddr_t)mem.userspace_addr + mem.memory_size;
>> +      p += PAGE_SIZE)
>> +        c = *p;
>> +#endif /* CONFIG_SOLARIS */
> 
> What is this needed for?

[discussed (and hopefully improved) in other email thread]

>> 
>> +    env->kvm_fd = ret;
>> +    env->kvm_state = kvm_state;
>> +
>> +    ret = ioctl(env->kvm_fd, KVM_CREATE_VCPU, env->cpu_index);
>> +#else
>>     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
>> +#endif
>>     if (ret < 0) {
>>         DPRINTF("kvm_create_vcpu failed\n");
>>         goto err;
>>     }
>> 
>> +#ifndef CONFIG_SOLARIS
>>     env->kvm_fd = ret;
>>     env->kvm_state = s;
>> +#endif
> 
> env->kvm_state assignment need not be split, right?

Correct … but actually this raises another question … why create a separate pointer to kvm_state…

    KVMState *s = kvm_state;

… why not just use kvm_state throughout the function?

This seems to be a common approach in many of the functions in kvm-all.c … is there a reason?

>> 
>> +#ifdef CONFIG_EVENTFD
>>     int ret;
>>     struct kvm_ioeventfd iofd;
>> 
>> @@ -1665,10 +1737,14 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t
>> addr, uint32_t val, bool assign
>>     }
>> 
>>     return 0;
>> +#else
>> +    return -ENOSYS;
>> +#endif
> 
> The guard probably needs to be added also around ioeventfd definitions
> in virtio-pci.c.

With the full set of header files this doesn't seem to be needed anymore anyway.

> 
> Did you have any problem with IOV_MAX?  IIRC, it's quite low (16
> perhaps) in Solaris.
> 

Hmmm, yes IOV_MAX is 16, I've not seen any issues with this yet .. although I haven't
really looked … there do seem to be places not considering IOV_MAX, I'll have a
deeper look when I get a chance.

Cheers,

Lee.
Paolo Bonzini March 16, 2012, 6:15 p.m. UTC | #8
Il 16/03/2012 18:25, Lee Essen ha scritto:
> Correct … but actually this raises another question … why create a separate pointer to kvm_state…
> 
>     KVMState *s = kvm_state;
> 
> … why not just use kvm_state throughout the function?
> 
> This seems to be a common approach in many of the functions in kvm-all.c … is there a reason?

It's historical due to the way things were done in the qemu-kvm fork.
It was done this way in order to let qemu.git and qemu-kvm.git share a
bit more code, IIUC.

Paolo
Jan Kiszka March 17, 2012, 9 a.m. UTC | #9
On 2012-03-16 14:46, Lee Essen wrote:
> 
> On 16 Mar 2012, at 13:14, Jan Kiszka wrote:
> 
>> On 2012-03-16 10:23, Lee Essen wrote:
>>> +#ifdef __sun__
>>> +#include <sys/kvm.h>
>>> +#else
>>> #include <linux/kvm.h>
>>> #include <linux/kvm_para.h>
>>> +#endif
>>
>> As Paolo already said, this should somehow be centralised.
> 
> Yep, fair point. I'll address this one.
> 
>> Also, CONFIG_SOLARIS vs. __sun__: please use a consistent pattern.
> 
> Hmmm … I was trying to be consistent with the existing style :-) … see __linux__ and CONFIG_LINUX as well. I'll see what I can do to make this a bit tidier.

Maybe QEMU isn't consistent as well. :)

> 
>>> +#ifdef CONFIG_SOLARIS
>>> +    for (p = (caddr_t)mem.userspace_addr;
>>> +      p < (caddr_t)mem.userspace_addr + mem.memory_size;
>>> +      p += PAGE_SIZE)
>>> +        c = *p;
>>> +#endif /* CONFIG_SOLARIS */
>>> +
>>
>> I bet gcc will like this write-only pattern and bark at you.
>>
> 
> It does indeed … this came from the original Joyent code, I must admit I did wonder whether gcc would optimise it away. I did consider adding something to stop gcc complaining, but I don't fully understand why this is necessary given the mlock() bit, so I thought it best to leave it alone.
> 
> Any suggestions?

First of all: understand if and why this is needed. Talk to the Joyent
people, check if it works without, comment on the why. But please do not
just dump code that may date back to early solaris-kvm days and were
possibly just hacks. This is upstream here and should ideally carry only
the cleaned up versions (we are trying to achieve this during the
qemu-kvm -> qemu upstreaming as well).

> 
>>> +#else
>>>     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
>>> +#endif
>>
>> There is no chance to fix the Solaris KVM to do fd cloning in the kernel
>> and implement the same KVM_CREATE_VCPU ABI?
>>
> 
> I will raise this with the joyent guys, but they are pretty switched on and I suspect there is a reason.
> 
> My concern with the "fix the kernel" comments is that it would exclude the use of the newer qemu on existing installations, however I do understand the desire to not fill the code with workarounds that live forever.
> 
> How about a "broken_solaris_kvm_abi" option to configure with a suitable set of defines wrapping the code?

Well, if there are working, considered stable versions of solaris-kvm
out there that expose this ABI, we probably want to support this anyway.
If the released stuff is experimental only anyway and can be changed
before it becomes stable, then lets go for that destination.

> 
>>>
>>> #ifdef CONFIG_KVM
>>> +#ifdef __sun__
>>> +#include <sys/kvm.h>
>>> +/*
>>> + * it's a bit horrible to include these here, but the kvm_para.h include file
>>> + * isn't public with the illumos kvm implementation
>>
>> Just provide a package of properly fixed kernel headers and let us carry
>> them in solaris-headers or so, analogously to linux-headers.
>>
> 
> Interestingly this is what I did originally but then thought it best to use the "supplied" headers, but actually thinking more about it, this does make much more sense.

Pushing fixed-up headers to qemu should still be only an temporary
solution. Fixing the headers upstream so that future solaris-kvm
versions provide them properly remains a worthwhile goal nevertheless.

Jan
diff mbox

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 226b01d..c2a440a 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -373,12 +373,12 @@  else
 trace.h: trace.h-timestamp
 endif
 trace.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool --$(TRACE_BACKEND) -h < $< > $@,"  GEN   trace.h")
+       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool --$(TRACE_BACKEND) -h < $< > $@,"  GEN   trace.h")
        @cmp -s $@ trace.h || cp $@ trace.h
 
 trace.c: trace.c-timestamp
 trace.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool --$(TRACE_BACKEND) -c < $< > $@,"  GEN   trace.c")
+       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool --$(TRACE_BACKEND) -c < $< > $@,"  GEN   trace.c")
        @cmp -s $@ trace.c || cp $@ trace.c
 
 trace.o: trace.c $(GENERATED_HEADERS)
@@ -391,7 +391,7 @@  trace-dtrace.h: trace-dtrace.dtrace
 # rule file. So we use '.dtrace' instead
 trace-dtrace.dtrace: trace-dtrace.dtrace-timestamp
 trace-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool --$(TRACE_BACKEND) -d < $< > $@,"  GEN   trace-dtrace.dtrace")
+       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool --$(TRACE_BACKEND) -d < $< > $@,"  GEN   trace-dtrace.dtrace")
        @cmp -s $@ trace-dtrace.dtrace || cp $@ trace-dtrace.dtrace
 
 trace-dtrace.o: trace-dtrace.dtrace $(GENERATED_HEADERS)
diff --git a/Makefile.target b/Makefile.target
index eb25941..d32afc9 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -59,7 +59,7 @@  TARGET_TYPE=system
 endif
 
 $(QEMU_PROG).stp:
-       $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool \
+       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/tracetool \
                --$(TRACE_BACKEND) \
                --binary $(bindir)/$(QEMU_PROG) \
                --target-arch $(TARGET_ARCH) \
@@ -443,10 +443,10 @@  gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
        $(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"  GEN   $(TARGET_DIR)$@")
 
 hmp-commands.h: $(SRC_PATH)/hmp-commands.hx
-       $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
+       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
 
 qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx
-       $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
+       $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
 
 clean:
        rm -f *.o *.a *~ $(PROGS) nwfpe/*.o fpu/*.o
diff --git a/configure b/configure
index afe7395..601f77a 100755
--- a/configure
+++ b/configure
@@ -101,6 +101,7 @@  audio_win_int=""
 cc_i386=i386-pc-linux-gnu-gcc
 libs_qga=""
 debug_info="yes"
+shell="sh"
 
 target_list=""
 
@@ -442,6 +443,7 @@  SunOS)
   # have to select again, because `uname -m` returns i86pc
   # even on an x86_64 box.
   solariscpu=`isainfo -k`
+  shell="bash"
   if test "${solariscpu}" = "amd64" ; then
     cpu="x86_64"
   fi
@@ -471,6 +473,7 @@  SunOS)
   QEMU_CFLAGS="-D__EXTENSIONS__ $QEMU_CFLAGS"
   QEMU_CFLAGS="-std=gnu99 $QEMU_CFLAGS"
   LIBS="-lsocket -lnsl -lresolv $LIBS"
+  libs_qga="-lsocket -lxnet $lib_qga"
 ;;
 AIX)
   aix="yes"
@@ -1097,7 +1100,7 @@  echo "  --disable-docs           disable documentation build"
 echo "  --disable-vhost-net      disable vhost-net acceleration support"
 echo "  --enable-vhost-net       enable vhost-net acceleration support"
 echo "  --enable-trace-backend=B Set trace backend"
-echo "                           Available backends:" $("$source_path"/scripts/tracetool --list-backends)
+echo "                           Available backends:" $($shell "$source_path"/scripts/tracetool --list-backends)
 echo "  --with-trace-file=NAME   Full PATH,NAME of file to store traces"
 echo "                           Default:trace-<pid>"
 echo "  --disable-spice          disable spice"
@@ -2654,7 +2657,7 @@  fi
 ##########################################
 # check if trace backend exists
 
-sh "$source_path/scripts/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null
+$shell "$source_path/scripts/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null
 if test "$?" -ne 0 ; then
   echo
   echo "Error: invalid trace backend"
@@ -3358,6 +3361,7 @@  echo "LIBS+=$LIBS" >> $config_host_mak
 echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
 echo "EXESUF=$EXESUF" >> $config_host_mak
 echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
+echo "SHELL=$shell" >> $config_host_mak
 
 # generate list of library paths for linker script
 
diff --git a/cpus.c b/cpus.c
index 25ba621..7a32ee6 100644
--- a/cpus.c
+++ b/cpus.c
@@ -455,7 +455,7 @@  static void cpu_signal(int sig)
     exit_request = 1;
 }
 
-#ifdef CONFIG_LINUX
+#if defined(CONFIG_LINUX) || defined(CONFIG_SOLARIS)
 static void sigbus_reraise(void)
 {
     sigset_t set;
@@ -491,7 +491,9 @@  static void qemu_init_sigbus(void)
     action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
     sigaction(SIGBUS, &action, NULL);
 
+#ifndef __sun__
     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
+#endif
 }
 
 static void qemu_kvm_eat_signals(CPUArchState *env)
diff --git a/exec.c b/exec.c
index 8fd50a1..57e2890 100644
--- a/exec.c
+++ b/exec.c
@@ -2853,6 +2853,79 @@  static ram_addr_t last_ram_offset(void)
     return last;
 }
 
+#ifdef CONFIG_SOLARIS
+static int
+qemu_mlock(caddr_t base, ram_addr_t size)
+{
+  /* LEE - todo */
+  qemu_real_host_page_size = getpagesize();
+
+  ram_addr_t ps = qemu_real_host_page_size, nbytes, locked = 0;
+  ram_addr_t remaining = size / ps;
+  ram_addr_t step = remaining;
+  timespec_t tv;
+  hrtime_t waiting = 0, threshold;
+
+  tv.tv_sec = 0;
+  tv.tv_nsec = NANOSEC / MILLISEC;
+  threshold = 10 * (hrtime_t)NANOSEC;
+
+  /*
+   * We cannot lock memory with a single call to mlock() because it
+   * won't result in sustained memory pressure:  if there is a
+   * substantial amount of kernel memory in use electively (e.g., for
+   * the ARC) a single call to mlock() may fail where sustained memory
+   * pressure would succeed.  We therefore start by trying to lock the
+   * entire region, adjusting our size down as we fail with EAGAIN; once
+   * we successfully lock a portion of the region, we advance to the
+   * unlocked portion of the region (if any remains) and increase the
+   * size.  Note that this will continue to hoard memory until it locks
+   * what it needs -- it won't give up.  To help debug situations in
+   * which one has mistakenly overprovisioned, we emit a message every
+   * ten seconds with no forward progress.
+   */
+  while (remaining) {
+    if (step > remaining) {
+        step = remaining;
+    }
+
+    while (mlock(base, (nbytes = step * ps)) == -1) {
+        if (errno != EAGAIN) {
+            return -1;
+        }
+
+        if (waiting == 0) {
+            waiting = gethrtime();
+        }
+
+        if (step > 1) {
+            step >>= 1;
+            continue;
+        }
+
+        (void) nanosleep(&tv, NULL);
+
+        if (gethrtime() - waiting > threshold) {
+            (void) fprintf(stderr, "qemu_mlock: have only "
+              "locked %ld of %ld bytes; still "
+              "trying...\n", locked, size);
+            waiting = 0;
+        }
+    }
+
+    waiting = 0;
+    base += nbytes;
+    locked += nbytes;
+    remaining -= step;
+
+    step <<= 1;
+  }
+
+  return 0;
+}
+#endif
+
+
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
 {
     RAMBlock *new_block, *block;
@@ -2931,6 +3004,21 @@  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                 new_block->host = qemu_vmalloc(size);
             }
 #endif
+
+#ifdef CONFIG_SOLARIS
+            /*
+              * XXX For right now, we'll lock down the memory.  This needs
+              * to be revisited if we implement mmu notifiers in the kernel.
+              * Note also that pages are touched in kvm_set_user_memory_region.
+              */
+            if (qemu_mlock((caddr_t)new_block->host, size) != 0) {
+                fprintf(stderr, "qemu_ram_alloc: Could not lock %ld memory,"
+                    " errno = %d\n",
+                    size, errno);
+                exit(1);
+            }
+#endif /*CONFIG_SOLARIS*/
+
             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
         }
     }
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index c5e2dab..3889041 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -89,8 +89,10 @@  const float64 float64_default_nan = const_float64(LIT64( 0xFFF8000000000000 ));
 #define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
 #endif
 
+#ifndef __sun__
 const floatx80 floatx80_default_nan = make_floatx80(floatx80_default_nan_high,
                                                     floatx80_default_nan_low);
+#endif
 
 /*----------------------------------------------------------------------------
 | The pattern for a default generated quadruple-precision NaN.  The `high' and
@@ -104,8 +106,10 @@  const floatx80 floatx80_default_nan = make_floatx80(floatx80_default_nan_high,
 #define float128_default_nan_low  LIT64( 0x0000000000000000 )
 #endif
 
+#ifndef __sun__
 const float128 float128_default_nan = make_float128(float128_default_nan_high,
                                                     float128_default_nan_low);
+#endif
 
 /*----------------------------------------------------------------------------
 | Raises the exceptions specified by `flags'.  Floating-point traps can be
diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c
index 446bd62..3fd5e1e 100644
--- a/hw/kvm/clock.c
+++ b/hw/kvm/clock.c
@@ -19,8 +19,12 @@ 
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
+#ifdef __sun__
+#include <sys/kvm.h>
+#else
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
+#endif
 
 typedef struct KVMClockState {
     SysBusDevice busdev;
diff --git a/kvm-all.c b/kvm-all.c
index 42e5e23..27f3177 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -18,7 +18,11 @@ 
 #include <sys/mman.h>
 #include <stdarg.h>
 
+#ifdef __sun__
+#include <sys/kvm.h>
+#else
 #include <linux/kvm.h>
+#endif
 
 #include "qemu-common.h"
 #include "qemu-barrier.h"
@@ -176,12 +180,23 @@  int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
 static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
 {
     struct kvm_userspace_memory_region mem;
+#ifdef CONFIG_SOLARIS
+    caddr_t p;
+    char c;
+#endif
 
     mem.slot = slot->slot;
     mem.guest_phys_addr = slot->start_addr;
     mem.memory_size = slot->memory_size;
     mem.userspace_addr = (unsigned long)slot->ram;
     mem.flags = slot->flags;
+#ifdef CONFIG_SOLARIS
+    for (p = (caddr_t)mem.userspace_addr;
+      p < (caddr_t)mem.userspace_addr + mem.memory_size;
+      p += PAGE_SIZE)
+        c = *p;
+#endif /* CONFIG_SOLARIS */
+
     if (s->migration_log) {
         mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
     }
@@ -200,6 +215,31 @@  int kvm_pit_in_kernel(void)
     return kvm_state->pit_in_kernel;
 }
 
+#ifdef CONFIG_SOLARIS
+static int kvm_vm_clone(KVMState *s)
+{
+    struct stat stat;
+    int fd;
+
+    if (fstat(s->fd, &stat) != 0) {
+        return -errno;
+    }
+
+    fd = qemu_open("/dev/kvm", O_RDWR);
+
+    if (fd == -1) {
+        return -errno;
+    }
+
+    if (ioctl(fd, KVM_CLONE, stat.st_rdev) == -1) {
+        close(fd);
+        return -errno;
+    }
+
+    return fd;
+}
+#endif
+
 int kvm_init_vcpu(CPUArchState *env)
 {
     KVMState *s = kvm_state;
@@ -208,14 +248,29 @@  int kvm_init_vcpu(CPUArchState *env)
 
     DPRINTF("kvm_init_vcpu\n");
 
+#ifdef CONFIG_SOLARIS
+    ret = kvm_vm_clone(kvm_state);
+
+    if (ret < 0) {
+        fprintf(stderr, "kvm_init_vcpu could not clone fd: %m\n");
+        goto err;
+    }
+    env->kvm_fd = ret;
+    env->kvm_state = kvm_state;
+
+    ret = ioctl(env->kvm_fd, KVM_CREATE_VCPU, env->cpu_index);
+#else
     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
+#endif
     if (ret < 0) {
         DPRINTF("kvm_create_vcpu failed\n");
         goto err;
     }
 
+#ifndef CONFIG_SOLARIS
     env->kvm_fd = ret;
     env->kvm_state = s;
+#endif
     env->kvm_vcpu_dirty = 1;
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
@@ -1021,6 +1076,9 @@  int kvm_init(void)
         ret = s->vmfd;
         goto err;
     }
+#ifdef CONFIG_SOLARIS
+    s->vmfd = s->fd;
+#endif
 
     missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
     if (!missing_cap) {
@@ -1287,6 +1345,19 @@  int kvm_cpu_exec(CPUArchState *env)
             DPRINTF("irq_window_open\n");
             ret = EXCP_INTERRUPT;
             break;
+#ifdef CONFIG_SOLARIS
+        /*
+         * In the case of an external interrupt we can get a zero
+         * return from the ioctl, with a KVM_EXIT_INTR. This doesn't
+         * happen on linux
+         *
+         * Not entirely sure what to do here.
+         */
+        case KVM_EXIT_INTR:
+            DPRINTF("exit_intr (run_ret is %d)\n", run_ret);
+            ret = EXCP_INTERRUPT;
+            break;
+#endif
         case KVM_EXIT_SHUTDOWN:
             DPRINTF("shutdown\n");
             qemu_system_reset_request();
@@ -1631,7 +1702,7 @@  int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
 
     sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
 
-    sigmask->len = 8;
+    sigmask->len = sizeof(sigset_t);
     memcpy(sigmask->sigset, sigset, sizeof(*sigset));
     r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
     g_free(sigmask);
@@ -1641,6 +1712,7 @@  int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
 
 int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign)
 {
+#ifdef CONFIG_EVENTFD
     int ret;
     struct kvm_ioeventfd iofd;
 
@@ -1665,10 +1737,14 @@  int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign
     }
 
     return 0;
+#else
+    return -ENOSYS;
+#endif
 }
 
 int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
 {
+#ifdef CONFIG_EVENTFD
     struct kvm_ioeventfd kick = {
         .datamatch = val,
         .addr = addr,
@@ -1688,6 +1764,9 @@  int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
         return r;
     }
     return 0;
+#else
+    return -ENOSYS;
+#endif
 }
 
 int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
diff --git a/kvm.h b/kvm.h
index 330f17b..8960b4e 100644
--- a/kvm.h
+++ b/kvm.h
@@ -19,8 +19,23 @@ 
 #include "qemu-queue.h"
 
 #ifdef CONFIG_KVM
+#ifdef __sun__
+#include <sys/kvm.h>
+/*
+ * it's a bit horrible to include these here, but the kvm_para.h include file
+ * isn't public with the illumos kvm implementation
+ */
+#define KVM_CPUID_SIGNATURE       0x40000000
+#define KVM_CPUID_FEATURES        0x40000001
+#define KVM_FEATURE_CLOCKSOURCE   0
+#define KVM_FEATURE_NOP_IO_DELAY  1
+#define KVM_FEATURE_MMU_OP        2
+#define KVM_FEATURE_CLOCKSOURCE2  3
+#define HYPERV_CPUID_MIN          0x40000005
+#else
 #include <linux/kvm.h>
 #endif
+#endif
 
 extern int kvm_allowed;
 extern bool kvm_kernel_irqchip;
diff --git a/qemu-timer.c b/qemu-timer.c
index d7f56e5..f35ad4e 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -77,7 +77,7 @@  struct qemu_alarm_timer {
     int (*start)(struct qemu_alarm_timer *t);
     void (*stop)(struct qemu_alarm_timer *t);
     void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns);
-#if defined(__linux__)
+#if defined(__linux__) || defined(__sun__)
     int fd;
     timer_t timer;
 #elif defined(_WIN32)
@@ -165,7 +165,7 @@  static int unix_start_timer(struct qemu_alarm_timer *t);
 static void unix_stop_timer(struct qemu_alarm_timer *t);
 static void unix_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
 
-#ifdef __linux__
+#if defined(__linux__) || defined(__sun__)
 
 static int dynticks_start_timer(struct qemu_alarm_timer *t);
 static void dynticks_stop_timer(struct qemu_alarm_timer *t);
@@ -177,7 +177,7 @@  static void dynticks_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
 
 static struct qemu_alarm_timer alarm_timers[] = {
 #ifndef _WIN32
-#ifdef __linux__
+#if defined(__linux__) || defined(__sun__)
     {"dynticks", dynticks_start_timer,
      dynticks_stop_timer, dynticks_rearm_timer},
 #endif
@@ -502,7 +502,7 @@  static void host_alarm_handler(int host_signum)
     }
 }
 
-#if defined(__linux__)
+#if defined(__linux__) || defined(__sun__)
 
 #include "compatfd.h"
 
@@ -585,7 +585,7 @@  static void dynticks_rearm_timer(struct qemu_alarm_timer *t,
     }
 }
 
-#endif /* defined(__linux__) */
+#endif /* defined(__linux__) || defined(__sun__) */
 
 #if !defined(_WIN32)
 
diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index 40f7658..86245c1 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -3,6 +3,10 @@ 
 #include "qemu_socket.h"
 #include "qga/channel.h"
 
+#ifdef CONFIG_SOLARIS
+#include <sys/stropts.h>
+#endif
+
 #define GA_CHANNEL_BAUDRATE_DEFAULT B38400 /* for isa-serial channels */
 
 struct GAChannel {
@@ -123,7 +127,19 @@  static gboolean ga_channel_open(GAChannel *c, const gchar *path, GAChannelMethod
 
     switch (c->method) {
     case GA_CHANNEL_VIRTIO_SERIAL: {
+#ifdef CONFIG_SOLARIS
+        int fd = qemu_open(path, O_RDWR | O_NONBLOCK);
+        if (fd == -1) {
+            g_critical("error opening channel: %s", strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+        if (ioctl(fd, I_SETSIG, S_OUTPUT | S_INPUT | S_HIPRI) < 0) {
+            g_critical("error with setsig on channel: %s", strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+#else
         int fd = qemu_open(path, O_RDWR | O_NONBLOCK | O_ASYNC);
+#endif
         if (fd == -1) {
             g_critical("error opening channel: %s", strerror(errno));
             exit(EXIT_FAILURE);
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 7b2be2f..67531aa 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -35,6 +35,11 @@ 
 #include "qemu-queue.h"
 #include "host-utils.h"
 
+#if defined(__sun__)
+#include <sys/sockio.h>
+extern char **environ;
+#endif
+
 static void reopen_fd_to_null(int fd)
 {
     int nullfd;
@@ -807,7 +812,11 @@  GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
                 goto error;
             }
 
+#if defined(__sun__)
+            mac_addr = (unsigned char *) &ifr.ifr_enaddr;
+#else
             mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
+#endif
 
             if (asprintf(&info->value->hardware_address,
                          "%02x:%02x:%02x:%02x:%02x:%02x",
diff --git a/target-i386/hyperv.h b/target-i386/hyperv.h
index bacb1d4..9c08750 100644
--- a/target-i386/hyperv.h
+++ b/target-i386/hyperv.h
@@ -15,8 +15,12 @@ 
 
 #include "qemu-common.h"
 #ifdef CONFIG_KVM
+#ifdef __sun__
+#include <hyperv.h>
+#else
 #include <asm/hyperv.h>
 #endif
+#endif
 
 #ifndef HYPERV_SPINLOCK_NEVER_RETRY
 #define HYPERV_SPINLOCK_NEVER_RETRY             0xFFFFFFFF
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index e74a9e4..6d007cc 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -17,8 +17,12 @@ 
 #include <sys/mman.h>
 #include <sys/utsname.h>
 
+#ifdef __sun__
+#include <sys/kvm.h>
+#else
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
+#endif
 
 #include "qemu-common.h"
 #include "sysemu.h"
@@ -61,7 +65,9 @@  const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 static bool has_msr_star;
 static bool has_msr_hsave_pa;
 static bool has_msr_tsc_deadline;
+#ifdef KVM_CAP_ASYNC_PF
 static bool has_msr_async_pf_en;
+#endif
 static bool has_msr_misc_enable;
 static int lm_capable_kernel;
 
@@ -97,7 +103,9 @@  struct kvm_para_features {
     { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
     { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
     { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
+#ifdef KVM_CAP_ASYNC_PF
     { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
+#endif
     { -1, -1 }
 };
 
@@ -442,7 +450,9 @@  int kvm_arch_init_vcpu(CPUX86State *env)
         c->edx = signature[2];
     }
 
+#ifdef KVM_CAP_ASYNC_PF
     has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
+#endif
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
@@ -561,6 +571,7 @@  int kvm_arch_init_vcpu(CPUX86State *env)
         return r;
     }
 
+#ifdef KVM_CAP_TSC_CONTROL
     r = kvm_check_extension(env->kvm_state, KVM_CAP_TSC_CONTROL);
     if (r && env->tsc_khz) {
         r = kvm_vcpu_ioctl(env, KVM_SET_TSC_KHZ, env->tsc_khz);
@@ -569,10 +580,12 @@  int kvm_arch_init_vcpu(CPUX86State *env)
             return r;
         }
     }
-
+#endif
+#ifdef KVM_CAP_XSAVE
     if (kvm_has_xsave()) {
         env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
     }
+#endif
 
     return 0;
 }
@@ -759,7 +772,7 @@  static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
                  (rhs->avl * DESC_AVL_MASK);
 }
 
-static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
+static void kvm_getput_reg(uint64_t *kvm_reg, target_ulong *qemu_reg, int set)
 {
     if (set) {
         *kvm_reg = *qemu_reg;
@@ -841,6 +854,7 @@  static int kvm_put_fpu(CPUX86State *env)
 #define XSAVE_XSTATE_BV   128
 #define XSAVE_YMMH_SPACE  144
 
+#ifdef KVM_CAP_XSAVE
 static int kvm_put_xsave(CPUX86State *env)
 {
     struct kvm_xsave* xsave = env->kvm_xsave_buf;
@@ -874,7 +888,9 @@  static int kvm_put_xsave(CPUX86State *env)
     r = kvm_vcpu_ioctl(env, KVM_SET_XSAVE, xsave);
     return r;
 }
+#endif
 
+#ifdef KVM_CAP_XSCRS
 static int kvm_put_xcrs(CPUX86State *env)
 {
     struct kvm_xcrs xcrs;
@@ -889,6 +905,7 @@  static int kvm_put_xcrs(CPUX86State *env)
     xcrs.xcrs[0].value = env->xcr0;
     return kvm_vcpu_ioctl(env, KVM_SET_XCRS, &xcrs);
 }
+#endif
 
 static int kvm_put_sregs(CPUX86State *env)
 {
@@ -1000,10 +1017,12 @@  static int kvm_put_msrs(CPUX86State *env, int level)
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
                           env->system_time_msr);
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
+#ifdef KVM_CAP_ASYNC_PF
         if (has_msr_async_pf_en) {
             kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN,
                               env->async_pf_en_msr);
         }
+#endif
         if (hyperv_hypercall_available()) {
             kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
             kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0);
@@ -1055,6 +1074,7 @@  static int kvm_get_fpu(CPUX86State *env)
     return 0;
 }
 
+#ifdef KVM_CAP_XSAVE
 static int kvm_get_xsave(CPUX86State *env)
 {
     struct kvm_xsave* xsave = env->kvm_xsave_buf;
@@ -1092,7 +1112,9 @@  static int kvm_get_xsave(CPUX86State *env)
             sizeof env->ymmh_regs);
     return 0;
 }
+#endif
 
+#ifdef KVM_CAP_XCRS
 static int kvm_get_xcrs(CPUX86State *env)
 {
     int i, ret;
@@ -1116,6 +1138,7 @@  static int kvm_get_xcrs(CPUX86State *env)
     }
     return 0;
 }
+#endif
 
 static int kvm_get_sregs(CPUX86State *env)
 {
@@ -1243,9 +1266,11 @@  static int kvm_get_msrs(CPUX86State *env)
 #endif
     msrs[n++].index = MSR_KVM_SYSTEM_TIME;
     msrs[n++].index = MSR_KVM_WALL_CLOCK;
+#ifdef KVM_CAP_ASYNC_PF
     if (has_msr_async_pf_en) {
         msrs[n++].index = MSR_KVM_ASYNC_PF_EN;
     }
+#endif
 
     if (env->mcg_cap) {
         msrs[n++].index = MSR_MCG_STATUS;
@@ -1322,9 +1347,11 @@  static int kvm_get_msrs(CPUX86State *env)
                 env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
             }
             break;
+#ifdef KVM_CAP_ASYNC_PF
         case MSR_KVM_ASYNC_PF_EN:
             env->async_pf_en_msr = msrs[i].data;
             break;
+#endif
         }
     }
 
@@ -1482,6 +1509,7 @@  static int kvm_guest_debug_workarounds(CPUX86State *env)
     return ret;
 }
 
+#ifdef KVM_CAP_DEBUGREGS
 static int kvm_put_debugregs(CPUX86State *env)
 {
     struct kvm_debugregs dbgregs;
@@ -1522,6 +1550,7 @@  static int kvm_get_debugregs(CPUX86State *env)
 
     return 0;
 }
+#endif
 
 int kvm_arch_put_registers(CPUX86State *env, int level)
 {
@@ -1533,14 +1562,20 @@  int kvm_arch_put_registers(CPUX86State *env, int level)
     if (ret < 0) {
         return ret;
     }
+#ifdef KVM_CAP_XSAVE
     ret = kvm_put_xsave(env);
+#else
+    ret = kvm_put_fpu(env);
+#endif
     if (ret < 0) {
         return ret;
     }
+#ifdef KVM_CAP_XCRS
     ret = kvm_put_xcrs(env);
     if (ret < 0) {
         return ret;
     }
+#endif
     ret = kvm_put_sregs(env);
     if (ret < 0) {
         return ret;
@@ -1568,10 +1603,12 @@  int kvm_arch_put_registers(CPUX86State *env, int level)
     if (ret < 0) {
         return ret;
     }
+#ifdef KVM_CAP_DEBUGREGS
     ret = kvm_put_debugregs(env);
     if (ret < 0) {
         return ret;
     }
+#endif
     /* must be last */
     ret = kvm_guest_debug_workarounds(env);
     if (ret < 0) {
@@ -1590,14 +1627,20 @@  int kvm_arch_get_registers(CPUX86State *env)
     if (ret < 0) {
         return ret;
     }
+#ifdef KVM_CAP_XSAVE
     ret = kvm_get_xsave(env);
+#else
+    ret = kvm_get_fpu(env);
+#endif
     if (ret < 0) {
         return ret;
     }
+#ifdef KVM_CAP_XCRS
     ret = kvm_get_xcrs(env);
     if (ret < 0) {
         return ret;
     }
+#endif
     ret = kvm_get_sregs(env);
     if (ret < 0) {
         return ret;
@@ -1618,10 +1661,12 @@  int kvm_arch_get_registers(CPUX86State *env)
     if (ret < 0) {
         return ret;
     }
+#ifdef KVM_CAP_DEBUGREGS
     ret = kvm_get_debugregs(env);
     if (ret < 0) {
         return ret;
     }
+#endif
     return 0;
 }
 
@@ -1770,6 +1815,7 @@  static int kvm_handle_tpr_access(CPUX86State *env)
     return 1;
 }
 
+#ifdef KVM_CAP_SET_GUEST_DEBUG
 int kvm_arch_insert_sw_breakpoint(CPUX86State *env, struct kvm_sw_breakpoint *bp)
 {
     static const uint8_t int3 = 0xcc;
@@ -1950,6 +1996,7 @@  void kvm_arch_update_guest_debug(CPUX86State *env, struct kvm_guest_debug *dbg)
         }
     }
 }
+#endif /* KVM_CAP_SET_GUEST_DEBUG */
 
 static bool host_supports_vmx(void)
 {
@@ -1999,10 +2046,12 @@  int kvm_arch_handle_exit(CPUX86State *env, struct kvm_run *run)
                 run->ex.exception, run->ex.error_code);
         ret = -1;
         break;
+#ifdef KVM_CAP_SET_GUEST_DEBUG
     case KVM_EXIT_DEBUG:
         DPRINTF("kvm_exit_debug\n");
         ret = kvm_handle_debug(&run->debug.arch);
         break;
+#endif
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;