diff mbox series

[04/17] xen: Add essential and required interface headers

Message ID 20200701162959.9814-5-vicooodin@gmail.com
State Superseded
Delegated to: Tom Rini
Headers show
Series Add new board: Xen guest for ARM64 | expand

Commit Message

Nastya Vicodin July 1, 2020, 4:29 p.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

Add essential and required Xen interface headers only taken from
the stable Linux kernel stable/linux-5.7.y at commit
66dfe45221605e11f38a0bf5eb2ee808cea7cfe7.

These are better suited for U-boot than the original headers
from Xen as they are the stripped versions of the same.

At the same time use public protocols from Xen RELEASE-4.13.1, at
commit 6278553325a9f76d37811923221b21db3882e017
as those have more comments in them.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Signed-off-by: Anastasiia Lukianenko <anastasiia_lukianenko@epam.com>
---
 include/xen/arm/interface.h           |  88 ++++
 include/xen/interface/event_channel.h | 281 ++++++++++
 include/xen/interface/grant_table.h   | 582 +++++++++++++++++++++
 include/xen/interface/hvm/hvm_op.h    |  69 +++
 include/xen/interface/hvm/params.h    | 127 +++++
 include/xen/interface/io/blkif.h      | 726 ++++++++++++++++++++++++++
 include/xen/interface/io/console.h    |  56 ++
 include/xen/interface/io/protocols.h  |  42 ++
 include/xen/interface/io/ring.h       | 479 +++++++++++++++++
 include/xen/interface/io/xenbus.h     |  81 +++
 include/xen/interface/io/xs_wire.h    | 151 ++++++
 include/xen/interface/memory.h        | 332 ++++++++++++
 include/xen/interface/sched.h         | 188 +++++++
 include/xen/interface/xen.h           | 225 ++++++++
 14 files changed, 3427 insertions(+)
 create mode 100644 include/xen/arm/interface.h
 create mode 100644 include/xen/interface/event_channel.h
 create mode 100644 include/xen/interface/grant_table.h
 create mode 100644 include/xen/interface/hvm/hvm_op.h
 create mode 100644 include/xen/interface/hvm/params.h
 create mode 100644 include/xen/interface/io/blkif.h
 create mode 100644 include/xen/interface/io/console.h
 create mode 100644 include/xen/interface/io/protocols.h
 create mode 100644 include/xen/interface/io/ring.h
 create mode 100644 include/xen/interface/io/xenbus.h
 create mode 100644 include/xen/interface/io/xs_wire.h
 create mode 100644 include/xen/interface/memory.h
 create mode 100644 include/xen/interface/sched.h
 create mode 100644 include/xen/interface/xen.h

Comments

Peng Fan July 2, 2020, 1:30 a.m. UTC | #1
> Subject: [PATCH 04/17] xen: Add essential and required interface headers
> 
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> 
> Add essential and required Xen interface headers only taken from
> the stable Linux kernel stable/linux-5.7.y at commit
> 66dfe45221605e11f38a0bf5eb2ee808cea7cfe7.

Please use commit <12+> ("commit header")

> 
> These are better suited for U-boot than the original headers
> from Xen as they are the stripped versions of the same.
> 
> At the same time use public protocols from Xen RELEASE-4.13.1, at
> commit 6278553325a9f76d37811923221b21db3882e017

Please use commit <12+> ("commit header")

Then:

Acked-by: Peng Fan <peng.fan@nxp.com>

> as those have more comments in them.
> 
> Signed-off-by: Oleksandr Andrushchenko
> <oleksandr_andrushchenko@epam.com>
> Signed-off-by: Anastasiia Lukianenko <anastasiia_lukianenko@epam.com>
> ---
>  include/xen/arm/interface.h           |  88 ++++
>  include/xen/interface/event_channel.h | 281 ++++++++++
>  include/xen/interface/grant_table.h   | 582 +++++++++++++++++++++
>  include/xen/interface/hvm/hvm_op.h    |  69 +++
>  include/xen/interface/hvm/params.h    | 127 +++++
>  include/xen/interface/io/blkif.h      | 726
> ++++++++++++++++++++++++++
>  include/xen/interface/io/console.h    |  56 ++
>  include/xen/interface/io/protocols.h  |  42 ++
>  include/xen/interface/io/ring.h       | 479 +++++++++++++++++
>  include/xen/interface/io/xenbus.h     |  81 +++
>  include/xen/interface/io/xs_wire.h    | 151 ++++++
>  include/xen/interface/memory.h        | 332 ++++++++++++
>  include/xen/interface/sched.h         | 188 +++++++
>  include/xen/interface/xen.h           | 225 ++++++++
>  14 files changed, 3427 insertions(+)
>  create mode 100644 include/xen/arm/interface.h
>  create mode 100644 include/xen/interface/event_channel.h
>  create mode 100644 include/xen/interface/grant_table.h
>  create mode 100644 include/xen/interface/hvm/hvm_op.h
>  create mode 100644 include/xen/interface/hvm/params.h
>  create mode 100644 include/xen/interface/io/blkif.h
>  create mode 100644 include/xen/interface/io/console.h
>  create mode 100644 include/xen/interface/io/protocols.h
>  create mode 100644 include/xen/interface/io/ring.h
>  create mode 100644 include/xen/interface/io/xenbus.h
>  create mode 100644 include/xen/interface/io/xs_wire.h
>  create mode 100644 include/xen/interface/memory.h
>  create mode 100644 include/xen/interface/sched.h
>  create mode 100644 include/xen/interface/xen.h
> 
> diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h
> new file mode 100644
> index 0000000000..79d5ae8563
> --- /dev/null
> +++ b/include/xen/arm/interface.h
> @@ -0,0 +1,88 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/************************************************************
> ******************
> + * Guest OS interface to ARM Xen.
> + *
> + * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
> + */
> +
> +#ifndef _ASM_ARM_XEN_INTERFACE_H
> +#define _ASM_ARM_XEN_INTERFACE_H
> +
> +#ifndef __ASSEMBLY__
> +#include <linux/types.h>
> +#endif
> +
> +#define uint64_aligned_t u64 __attribute__((aligned(8)))
> +
> +#define __DEFINE_GUEST_HANDLE(name, type) \
> +	typedef struct { union { type *p; uint64_aligned_t q; }; }  \
> +		__guest_handle_ ## name
> +
> +#define DEFINE_GUEST_HANDLE_STRUCT(name) \
> +	__DEFINE_GUEST_HANDLE(name, struct name)
> +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name,
> name)
> +#define GUEST_HANDLE(name)        __guest_handle_ ## name
> +
> +#define set_xen_guest_handle(hnd, val)			\
> +	do {						\
> +		if (sizeof(hnd) == 8)			\
> +			*(u64 *)&(hnd) = 0;	\
> +		(hnd).p = val;				\
> +	} while (0)
> +
> +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op
> +
> +#ifndef __ASSEMBLY__
> +/* Explicitly size integers that represent pfns in the interface with
> + * Xen so that we can have one ABI that works for 32 and 64 bit guests.
> + * Note that this means that the xen_pfn_t type may be capable of
> + * representing pfn's which the guest cannot represent in its own pfn
> + * type. However since pfn space is controlled by the guest this is
> + * fine since it simply wouldn't be able to create any sure pfns in
> + * the first place.
> + */
> +typedef u64 xen_pfn_t;
> +#define PRI_xen_pfn "llx"
> +typedef u64 xen_ulong_t;
> +#define PRI_xen_ulong "llx"
> +typedef s64 xen_long_t;
> +#define PRI_xen_long "llx"
> +/* Guest handles for primitive C types. */
> +__DEFINE_GUEST_HANDLE(uchar, unsigned char);
> +__DEFINE_GUEST_HANDLE(uint,  unsigned int);
> +DEFINE_GUEST_HANDLE(char);
> +DEFINE_GUEST_HANDLE(int);
> +DEFINE_GUEST_HANDLE(void);
> +DEFINE_GUEST_HANDLE(u64);
> +DEFINE_GUEST_HANDLE(u32);
> +DEFINE_GUEST_HANDLE(xen_pfn_t);
> +DEFINE_GUEST_HANDLE(xen_ulong_t);
> +
> +/* Maximum number of virtual CPUs in multi-processor guests. */
> +#define MAX_VIRT_CPUS 1
> +
> +struct arch_vcpu_info { };
> +struct arch_shared_info { };
> +
> +/* TODO: Move pvclock definitions some place arch independent */
> +struct pvclock_vcpu_time_info {
> +	u32   version;
> +	u32   pad0;
> +	u64   tsc_timestamp;
> +	u64   system_time;
> +	u32   tsc_to_system_mul;
> +	s8    tsc_shift;
> +	u8    flags;
> +	u8    pad[2];
> +} __attribute__((__packed__)); /* 32 bytes */
> +
> +/* It is OK to have a 12 bytes struct with no padding because it is packed */
> +struct pvclock_wall_clock {
> +	u32   version;
> +	u32   sec;
> +	u32   nsec;
> +	u32   sec_hi;
> +} __attribute__((__packed__));
> +#endif
> +
> +#endif /* _ASM_ARM_XEN_INTERFACE_H */
> diff --git a/include/xen/interface/event_channel.h
> b/include/xen/interface/event_channel.h
> new file mode 100644
> index 0000000000..8174999c2f
> --- /dev/null
> +++ b/include/xen/interface/event_channel.h
> @@ -0,0 +1,281 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/************************************************************
> ******************
> + * event_channel.h
> + *
> + * Event channels between domains.
> + *
> + * Copyright (c) 2003-2004, K A Fraser.
> + */
> +
> +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
> +#define __XEN_PUBLIC_EVENT_CHANNEL_H__
> +
> +#include <xen/interface/xen.h>
> +
> +typedef u32 evtchn_port_t;
> +DEFINE_GUEST_HANDLE(evtchn_port_t);
> +
> +/*
> + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
> + * accepting interdomain bindings from domain <remote_dom>. A fresh port
> + * is allocated in <dom> and returned as <port>.
> + * NOTES:
> + *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
> + *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
> + */
> +#define EVTCHNOP_alloc_unbound	  6
> +struct evtchn_alloc_unbound {
> +	/* IN parameters */
> +	domid_t dom, remote_dom;
> +	/* OUT parameters */
> +	evtchn_port_t port;
> +};
> +
> +/*
> + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel
> between
> + * the calling domain and <remote_dom>. <remote_dom,remote_port> must
> identify
> + * a port that is unbound and marked as accepting bindings from the calling
> + * domain. A fresh port is allocated in the calling domain and returned as
> + * <local_port>.
> + * NOTES:
> + *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
> + */
> +#define EVTCHNOP_bind_interdomain 0
> +struct evtchn_bind_interdomain {
> +	/* IN parameters. */
> +	domid_t remote_dom;
> +	evtchn_port_t remote_port;
> +	/* OUT parameters. */
> +	evtchn_port_t local_port;
> +};
> +
> +/*
> + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on
> specified
> + * vcpu.
> + * NOTES:
> + *  1. A virtual IRQ may be bound to at most one event channel per vcpu.
> + *  2. The allocated event channel is bound to the specified vcpu. The
> binding
> + *     may not be changed.
> + */
> +#define EVTCHNOP_bind_virq	  1
> +struct evtchn_bind_virq {
> +	/* IN parameters. */
> +	u32 virq;
> +	u32 vcpu;
> +	/* OUT parameters. */
> +	evtchn_port_t port;
> +};
> +
> +/*
> + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
> + * NOTES:
> + *  1. A physical IRQ may be bound to at most one event channel per
> domain.
> + *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
> + */
> +#define EVTCHNOP_bind_pirq	  2
> +struct evtchn_bind_pirq {
> +	/* IN parameters. */
> +	u32 pirq;
> +#define BIND_PIRQ__WILL_SHARE 1
> +	u32 flags; /* BIND_PIRQ__* */
> +	/* OUT parameters. */
> +	evtchn_port_t port;
> +};
> +
> +/*
> + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
> + * NOTES:
> + *  1. The allocated event channel is bound to the specified vcpu. The
> binding
> + *     may not be changed.
> + */
> +#define EVTCHNOP_bind_ipi	  7
> +struct evtchn_bind_ipi {
> +	u32 vcpu;
> +	/* OUT parameters. */
> +	evtchn_port_t port;
> +};
> +
> +/*
> + * EVTCHNOP_close: Close a local event channel <port>. If the channel is
> + * interdomain then the remote end is placed in the unbound state
> + * (EVTCHNSTAT_unbound), awaiting a new connection.
> + */
> +#define EVTCHNOP_close		  3
> +struct evtchn_close {
> +	/* IN parameters. */
> +	evtchn_port_t port;
> +};
> +
> +/*
> + * EVTCHNOP_send: Send an event to the remote end of the channel whose
> local
> + * endpoint is <port>.
> + */
> +#define EVTCHNOP_send		  4
> +struct evtchn_send {
> +	/* IN parameters. */
> +	evtchn_port_t port;
> +};
> +
> +/*
> + * EVTCHNOP_status: Get the current status of the communication channel
> which
> + * has an endpoint at <dom, port>.
> + * NOTES:
> + *  1. <dom> may be specified as DOMID_SELF.
> + *  2. Only a sufficiently-privileged domain may obtain the status of an
> event
> + *     channel for which <dom> is not DOMID_SELF.
> + */
> +#define EVTCHNOP_status		  5
> +struct evtchn_status {
> +	/* IN parameters */
> +	domid_t  dom;
> +	evtchn_port_t port;
> +	/* OUT parameters */
> +#define EVTCHNSTAT_closed	0  /* Channel is not in use.		     */
> +#define EVTCHNSTAT_unbound	1  /* Channel is waiting interdom
> connection.*/
> +#define EVTCHNSTAT_interdomain	2  /* Channel is connected to remote
> domain. */
> +#define EVTCHNSTAT_pirq		3  /* Channel is bound to a phys IRQ line.
> */
> +#define EVTCHNSTAT_virq		4  /* Channel is bound to a virtual IRQ line
> */
> +#define EVTCHNSTAT_ipi		5  /* Channel is bound to a virtual IPI line
> */
> +	u32 status;
> +	u32 vcpu;		   /* VCPU to which this channel is bound.   */
> +	union {
> +		struct {
> +			domid_t dom;
> +		} unbound; /* EVTCHNSTAT_unbound */
> +		struct {
> +			domid_t dom;
> +			evtchn_port_t port;
> +		} interdomain; /* EVTCHNSTAT_interdomain */
> +		u32 pirq;	    /* EVTCHNSTAT_pirq	      */
> +		u32 virq;	    /* EVTCHNSTAT_virq	      */
> +	} u;
> +};
> +
> +/*
> + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when
> an
> + * event is pending.
> + * NOTES:
> + *  1. IPI- and VIRQ-bound channels always notify the vcpu that initialised
> + *     the binding. This binding cannot be changed.
> + *  2. All other channels notify vcpu0 by default. This default is set when
> + *     the channel is allocated (a port that is freed and subsequently reused
> + *     has its binding reset to vcpu0).
> + */
> +#define EVTCHNOP_bind_vcpu	  8
> +struct evtchn_bind_vcpu {
> +	/* IN parameters. */
> +	evtchn_port_t port;
> +	u32 vcpu;
> +};
> +
> +/*
> + * EVTCHNOP_unmask: Unmask the specified local event-channel port and
> deliver
> + * a notification to the appropriate VCPU if an event is pending.
> + */
> +#define EVTCHNOP_unmask		  9
> +struct evtchn_unmask {
> +	/* IN parameters. */
> +	evtchn_port_t port;
> +};
> +
> +/*
> + * EVTCHNOP_reset: Close all event channels associated with specified
> domain.
> + * NOTES:
> + *  1. <dom> may be specified as DOMID_SELF.
> + *  2. Only a sufficiently-privileged domain may specify other than
> DOMID_SELF.
> + */
> +#define EVTCHNOP_reset		 10
> +struct evtchn_reset {
> +	/* IN parameters. */
> +	domid_t dom;
> +};
> +
> +typedef struct evtchn_reset evtchn_reset_t;
> +
> +/*
> + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
> + */
> +#define EVTCHNOP_init_control    11
> +struct evtchn_init_control {
> +	/* IN parameters. */
> +	u64 control_gfn;
> +	u32 offset;
> +	u32 vcpu;
> +	/* OUT parameters. */
> +	u8 link_bits;
> +	u8 _pad[7];
> +};
> +
> +/*
> + * EVTCHNOP_expand_array: add an additional page to the event array.
> + */
> +#define EVTCHNOP_expand_array    12
> +struct evtchn_expand_array {
> +	/* IN parameters. */
> +	u64 array_gfn;
> +};
> +
> +/*
> + * EVTCHNOP_set_priority: set the priority for an event channel.
> + */
> +#define EVTCHNOP_set_priority    13
> +struct evtchn_set_priority {
> +	/* IN parameters. */
> +	evtchn_port_t port;
> +	u32 priority;
> +};
> +
> +struct evtchn_op {
> +	u32 cmd; /* EVTCHNOP_* */
> +	union {
> +		struct evtchn_alloc_unbound    alloc_unbound;
> +		struct evtchn_bind_interdomain bind_interdomain;
> +		struct evtchn_bind_virq	       bind_virq;
> +		struct evtchn_bind_pirq	       bind_pirq;
> +		struct evtchn_bind_ipi	       bind_ipi;
> +		struct evtchn_close	       close;
> +		struct evtchn_send	       send;
> +		struct evtchn_status	       status;
> +		struct evtchn_bind_vcpu	       bind_vcpu;
> +		struct evtchn_unmask	       unmask;
> +	} u;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
> +
> +/*
> + * 2-level ABI
> + */
> +
> +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) *
> sizeof(xen_ulong_t) * 64)
> +
> +/*
> + * FIFO ABI
> + */
> +
> +/* Events may have priorities from 0 (highest) to 15 (lowest). */
> +#define EVTCHN_FIFO_PRIORITY_MAX     0
> +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
> +#define EVTCHN_FIFO_PRIORITY_MIN     15
> +
> +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
> +
> +typedef u32 event_word_t;
> +
> +#define EVTCHN_FIFO_PENDING 31
> +#define EVTCHN_FIFO_MASKED  30
> +#define EVTCHN_FIFO_LINKED  29
> +#define EVTCHN_FIFO_BUSY    28
> +
> +#define EVTCHN_FIFO_LINK_BITS 17
> +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
> +
> +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
> +
> +struct evtchn_fifo_control_block {
> +	u32     ready;
> +	u32     _rsvd;
> +	event_word_t head[EVTCHN_FIFO_MAX_QUEUES];
> +};
> +
> +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
> diff --git a/include/xen/interface/grant_table.h
> b/include/xen/interface/grant_table.h
> new file mode 100644
> index 0000000000..197a0d0d58
> --- /dev/null
> +++ b/include/xen/interface/grant_table.h
> @@ -0,0 +1,582 @@
> +/************************************************************
> ******************
> + * grant_table.h
> + *
> + * Interface for granting foreign access to page frames, and receiving
> + * page-ownership transfers.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2004, K A Fraser
> + */
> +
> +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
> +#define __XEN_PUBLIC_GRANT_TABLE_H__
> +
> +#include <xen/interface/xen.h>
> +
> +/***********************************
> + * GRANT TABLE REPRESENTATION
> + */
> +
> +/* Some rough guidelines on accessing and updating grant-table entries
> + * in a concurrency-safe manner. For more information, Linux contains a
> + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
> + *
> + * NB. WMB is a no-op on current-generation x86 processors. However, a
> + *     compiler barrier will still be required.
> + *
> + * Introducing a valid entry into the grant table:
> + *  1. Write ent->domid.
> + *  2. Write ent->frame:
> + *      GTF_permit_access:   Frame to which access is permitted.
> + *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
> + *                           frame, or zero if none.
> + *  3. Write memory barrier (WMB).
> + *  4. Write ent->flags, inc. valid type.
> + *
> + * Invalidating an unused GTF_permit_access entry:
> + *  1. flags = ent->flags.
> + *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
> + *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
> + *  NB. No need for WMB as reuse of entry is control-dependent on success
> of
> + *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
> + *
> + * Invalidating an in-use GTF_permit_access entry:
> + *  This cannot be done directly. Request assistance from the domain
> controller
> + *  which can set a timeout on the use of a grant entry and take necessary
> + *  action. (NB. This is not yet implemented!).
> + *
> + * Invalidating an unused GTF_accept_transfer entry:
> + *  1. flags = ent->flags.
> + *  2. Observe that !(flags & GTF_transfer_committed). [*]
> + *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
> + *  NB. No need for WMB as reuse of entry is control-dependent on success
> of
> + *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
> + *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
> + *      The guest must /not/ modify the grant entry until the address of
> the
> + *      transferred frame is written. It is safe for the guest to spin waiting
> + *      for this to occur (detect by observing GTF_transfer_completed in
> + *      ent->flags).
> + *
> + * Invalidating a committed GTF_accept_transfer entry:
> + *  1. Wait for (ent->flags & GTF_transfer_completed).
> + *
> + * Changing a GTF_permit_access from writable to read-only:
> + *  Use SMP-safe CMPXCHG to set GTF_readonly, while
> checking !GTF_writing.
> + *
> + * Changing a GTF_permit_access from read-only to writable:
> + *  Use SMP-safe bit-setting instruction.
> + */
> +
> +/*
> + * Reference to a grant entry in a specified domain's grant table.
> + */
> +typedef u32 grant_ref_t;
> +
> +/*
> + * A grant table comprises a packed array of grant entries in one or more
> + * page frames shared between Xen and a guest.
> + * [XEN]: This field is written by Xen and read by the sharing guest.
> + * [GST]: This field is written by the guest and read by Xen.
> + */
> +
> +/*
> + * Version 1 of the grant table entry structure is maintained purely
> + * for backwards compatibility.  New guests should use version 2.
> + */
> +struct grant_entry_v1 {
> +	/* GTF_xxx: various type and flag information.  [XEN,GST] */
> +	u16 flags;
> +	/* The domain being granted foreign privileges. [GST] */
> +	domid_t  domid;
> +	/*
> +	 * GTF_permit_access: Frame that @domid is allowed to map and
> access. [GST]
> +	 * GTF_accept_transfer: Frame whose ownership transferred by
> @domid. [XEN]
> +	 */
> +	u32 frame;
> +};
> +
> +/*
> + * Type of grant entry.
> + *  GTF_invalid: This grant entry grants no privileges.
> + *  GTF_permit_access: Allow @domid to map/access @frame.
> + *  GTF_accept_transfer: Allow @domid to transfer ownership of one page
> frame
> + *                       to this guest. Xen writes the page number to
> @frame.
> + *  GTF_transitive: Allow @domid to transitively access a subrange of
> + *                  @trans_grant in @trans_domid.  No mappings are
> allowed.
> + */
> +#define GTF_invalid         (0U << 0)
> +#define GTF_permit_access   (1U << 0)
> +#define GTF_accept_transfer (2U << 0)
> +#define GTF_transitive      (3U << 0)
> +#define GTF_type_mask       (3U << 0)
> +
> +/*
> + * Subflags for GTF_permit_access.
> + *  GTF_readonly: Restrict @domid to read-only mappings and accesses.
> [GST]
> + *  GTF_reading: Grant entry is currently mapped for reading by @domid.
> [XEN]
> + *  GTF_writing: Grant entry is currently mapped for writing by @domid.
> [XEN]
> + *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
> + *                will only be allowed to copy from the grant, and not
> + *                map it. [GST]
> + */
> +#define _GTF_readonly       (2)
> +#define GTF_readonly        (1U << _GTF_readonly)
> +#define _GTF_reading        (3)
> +#define GTF_reading         (1U << _GTF_reading)
> +#define _GTF_writing        (4)
> +#define GTF_writing         (1U << _GTF_writing)
> +#define _GTF_sub_page       (8)
> +#define GTF_sub_page        (1U << _GTF_sub_page)
> +
> +/*
> + * Subflags for GTF_accept_transfer:
> + *  GTF_transfer_committed: Xen sets this flag to indicate that it is
> committed
> + *      to transferring ownership of a page frame. When a guest sees this
> flag
> + *      it must /not/ modify the grant entry until GTF_transfer_completed
> is
> + *      set by Xen.
> + *  GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
> + *      after reading GTF_transfer_committed. Xen will always write the
> frame
> + *      address, followed by ORing this flag, in a timely manner.
> + */
> +#define _GTF_transfer_committed (2)
> +#define GTF_transfer_committed  (1U << _GTF_transfer_committed)
> +#define _GTF_transfer_completed (3)
> +#define GTF_transfer_completed  (1U << _GTF_transfer_completed)
> +
> +/*
> + * Version 2 grant table entries.  These fulfil the same role as
> + * version 1 entries, but can represent more complicated operations.
> + * Any given domain will have either a version 1 or a version 2 table,
> + * and every entry in the table will be the same version.
> + *
> + * The interface by which domains use grant references does not depend
> + * on the grant table version in use by the other domain.
> + */
> +
> +/*
> + * Version 1 and version 2 grant entries share a common prefix.  The
> + * fields of the prefix are documented as part of struct
> + * grant_entry_v1.
> + */
> +struct grant_entry_header {
> +	u16 flags;
> +	domid_t  domid;
> +};
> +
> +/*
> + * Version 2 of the grant entry structure, here is a union because three
> + * different types are suppotted: full_page, sub_page and transitive.
> + */
> +union grant_entry_v2 {
> +	struct grant_entry_header hdr;
> +
> +	/*
> +	 * This member is used for V1-style full page grants, where either:
> +	 *
> +	 * -- hdr.type is GTF_accept_transfer, or
> +	 * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
> +	 *
> +	 * In that case, the frame field has the same semantics as the
> +	 * field of the same name in the V1 entry structure.
> +	 */
> +	struct {
> +	struct grant_entry_header hdr;
> +	u32 pad0;
> +	u64 frame;
> +	} full_page;
> +
> +	/*
> +	 * If the grant type is GTF_grant_access and GTF_sub_page is set,
> +	 * @domid is allowed to access bytes [@page_off,@page_off+@length)
> +	 * in frame @frame.
> +	 */
> +	struct {
> +	struct grant_entry_header hdr;
> +	u16 page_off;
> +	u16 length;
> +	u64 frame;
> +	} sub_page;
> +
> +	/*
> +	 * If the grant is GTF_transitive, @domid is allowed to use the
> +	 * grant @gref in domain @trans_domid, as if it was the local
> +	 * domain.  Obviously, the transitive access must be compatible
> +	 * with the original grant.
> +	 */
> +	struct {
> +	struct grant_entry_header hdr;
> +	domid_t trans_domid;
> +	u16 pad0;
> +	grant_ref_t gref;
> +	} transitive;
> +
> +	u32 __spacer[4]; /* Pad to a power of two */
> +};
> +
> +typedef u16 grant_status_t;
> +
> +/***********************************
> + * GRANT TABLE QUERIES AND USES
> + */
> +
> +/*
> + * Handle to track a mapping created via a grant reference.
> + */
> +typedef u32 grant_handle_t;
> +
> +/*
> + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
> + * by devices and/or host CPUs. If successful, <handle> is a tracking number
> + * that must be presented later to destroy the mapping(s). On error,
> <handle>
> + * is a negative status code.
> + * NOTES:
> + *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the
> address
> + *     via which I/O devices may access the granted frame.
> + *  2. If GNTMAP_host_map is specified then a mapping will be added at
> + *     either a host virtual address in the current address space, or at
> + *     a PTE at the specified machine address.  The type of mapping to
> + *     perform is selected through the GNTMAP_contains_pte flag, and the
> + *     address is specified in <host_addr>.
> + *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref.
> If a
> + *     host mapping is destroyed by other means then it is *NOT*
> guaranteed
> + *     to be accounted to the correct grant reference!
> + */
> +#define GNTTABOP_map_grant_ref        0
> +struct gnttab_map_grant_ref {
> +	/* IN parameters. */
> +	u64 host_addr;
> +	u32 flags;               /* GNTMAP_* */
> +	grant_ref_t ref;
> +	domid_t  dom;
> +	/* OUT parameters. */
> +	s16  status;              /* GNTST_* */
> +	grant_handle_t handle;
> +	u64 dev_bus_addr;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
> +
> +/*
> + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference
> mappings
> + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
> + * field is ignored. If non-zero, they must refer to a device/host mapping
> + * that is tracked by <handle>
> + * NOTES:
> + *  1. The call may fail in an undefined manner if either mapping is not
> + *     tracked by <handle>.
> + *  3. After executing a batch of unmaps, it is guaranteed that no stale
> + *     mappings will remain in the device or host TLBs.
> + */
> +#define GNTTABOP_unmap_grant_ref      1
> +struct gnttab_unmap_grant_ref {
> +	/* IN parameters. */
> +	u64 host_addr;
> +	u64 dev_bus_addr;
> +	grant_handle_t handle;
> +	/* OUT parameters. */
> +	s16  status;              /* GNTST_* */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
> +
> +/*
> + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at
> least
> + * <nr_frames> pages. The frame addresses are written to the <frame_list>.
> + * Only <nr_frames> addresses are written, even if the table is larger.
> + * NOTES:
> + *  1. <dom> may be specified as DOMID_SELF.
> + *  2. Only a sufficiently-privileged domain may specify <dom> !=
> DOMID_SELF.
> + *  3. Xen may not support more than a single grant-table page per domain.
> + */
> +#define GNTTABOP_setup_table          2
> +struct gnttab_setup_table {
> +	/* IN parameters. */
> +	domid_t  dom;
> +	u32 nr_frames;
> +	/* OUT parameters. */
> +	s16  status;              /* GNTST_* */
> +
> +	GUEST_HANDLE(xen_pfn_t)frame_list;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
> +
> +/*
> + * GNTTABOP_dump_table: Dump the contents of the grant table to the
> + * xen console. Debugging use only.
> + */
> +#define GNTTABOP_dump_table           3
> +struct gnttab_dump_table {
> +	/* IN parameters. */
> +	domid_t dom;
> +	/* OUT parameters. */
> +	s16 status;               /* GNTST_* */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
> +
> +/*
> + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
> + * foreign domain has previously registered its interest in the transfer via
> + * <domid, ref>.
> + *
> + * Note that, even if the transfer fails, the specified page no longer belongs
> + * to the calling domain *unless* the error is GNTST_bad_page.
> + */
> +#define GNTTABOP_transfer                4
> +struct gnttab_transfer {
> +	/* IN parameters. */
> +	xen_pfn_t mfn;
> +	domid_t       domid;
> +	grant_ref_t   ref;
> +	/* OUT parameters. */
> +	s16       status;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
> +
> +/*
> + * GNTTABOP_copy: Hypervisor based copy
> + * source and destinations can be eithers MFNs or, for foreign domains,
> + * grant references. the foreign domain has to grant read/write access
> + * in its grant table.
> + *
> + * The flags specify what type source and destinations are (either MFN
> + * or grant reference).
> + *
> + * Note that this can also be used to copy data between two domains
> + * via a third party if the source and destination domains had previously
> + * grant appropriate access to their pages to the third party.
> + *
> + * source_offset specifies an offset in the source frame, dest_offset
> + * the offset in the target frame and  len specifies the number of
> + * bytes to be copied.
> + */
> +
> +#define _GNTCOPY_source_gref      (0)
> +#define GNTCOPY_source_gref       (1 << _GNTCOPY_source_gref)
> +#define _GNTCOPY_dest_gref        (1)
> +#define GNTCOPY_dest_gref         (1 << _GNTCOPY_dest_gref)
> +
> +#define GNTTABOP_copy                 5
> +struct gnttab_copy {
> +	/* IN parameters. */
> +	struct {
> +		union {
> +			grant_ref_t ref;
> +			xen_pfn_t   gmfn;
> +		} u;
> +		domid_t  domid;
> +		u16 offset;
> +	} source, dest;
> +	u16      len;
> +	u16      flags;          /* GNTCOPY_* */
> +	/* OUT parameters. */
> +	s16       status;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
> +
> +/*
> + * GNTTABOP_query_size: Query the current and maximum sizes of the
> shared
> + * grant table.
> + * NOTES:
> + *  1. <dom> may be specified as DOMID_SELF.
> + *  2. Only a sufficiently-privileged domain may specify <dom> !=
> DOMID_SELF.
> + */
> +#define GNTTABOP_query_size           6
> +struct gnttab_query_size {
> +	/* IN parameters. */
> +	domid_t  dom;
> +	/* OUT parameters. */
> +	u32 nr_frames;
> +	u32 max_nr_frames;
> +	s16  status;              /* GNTST_* */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
> +
> +/*
> + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference
> mappings
> + * tracked by <handle> but atomically replace the page table entry with one
> + * pointing to the machine address under <new_addr>.  <new_addr> will
> be
> + * redirected to the null entry.
> + * NOTES:
> + *  1. The call may fail in an undefined manner if either mapping is not
> + *     tracked by <handle>.
> + *  2. After executing a batch of unmaps, it is guaranteed that no stale
> + *     mappings will remain in the device or host TLBs.
> + */
> +#define GNTTABOP_unmap_and_replace    7
> +struct gnttab_unmap_and_replace {
> +	/* IN parameters. */
> +	u64 host_addr;
> +	u64 new_addr;
> +	grant_handle_t handle;
> +	/* OUT parameters. */
> +	s16  status;              /* GNTST_* */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
> +
> +/*
> + * GNTTABOP_set_version: Request a particular version of the grant
> + * table shared table structure.  This operation can only be performed
> + * once in any given domain.  It must be performed before any grants
> + * are activated; otherwise, the domain will be stuck with version 1.
> + * The only defined versions are 1 and 2.
> + */
> +#define GNTTABOP_set_version          8
> +struct gnttab_set_version {
> +	/* IN parameters */
> +	u32 version;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
> +
> +/*
> + * GNTTABOP_get_status_frames: Get the list of frames used to store grant
> + * status for <dom>. In grant format version 2, the status is separated
> + * from the other shared grant fields to allow more efficient synchronization
> + * using barriers instead of atomic cmpexch operations.
> + * <nr_frames> specify the size of vector <frame_list>.
> + * The frame addresses are returned in the <frame_list>.
> + * Only <nr_frames> addresses are returned, even if the table is larger.
> + * NOTES:
> + *  1. <dom> may be specified as DOMID_SELF.
> + *  2. Only a sufficiently-privileged domain may specify <dom> !=
> DOMID_SELF.
> + */
> +#define GNTTABOP_get_status_frames     9
> +struct gnttab_get_status_frames {
> +	/* IN parameters. */
> +	u32 nr_frames;
> +	domid_t  dom;
> +	/* OUT parameters. */
> +	s16  status;              /* GNTST_* */
> +
> +	GUEST_HANDLE(u64)frame_list;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
> +
> +/*
> + * GNTTABOP_get_version: Get the grant table version which is in
> + * effect for domain <dom>.
> + */
> +#define GNTTABOP_get_version          10
> +struct gnttab_get_version {
> +	/* IN parameters */
> +	domid_t dom;
> +	u16 pad;
> +	/* OUT parameters */
> +	u32 version;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
> +
> +/*
> + * Issue one or more cache maintenance operations on a portion of a
> + * page granted to the calling domain by a foreign domain.
> + */
> +#define GNTTABOP_cache_flush          12
> +struct gnttab_cache_flush {
> +	union {
> +		u64 dev_bus_addr;
> +		grant_ref_t ref;
> +	} a;
> +	u16 offset;   /* offset from start of grant */
> +	u16 length;   /* size within the grant */
> +#define GNTTAB_CACHE_CLEAN          (1 << 0)
> +#define GNTTAB_CACHE_INVAL          (1 << 1)
> +#define GNTTAB_CACHE_SOURCE_GREF    (1 << 31)
> +	u32 op;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
> +
> +/*
> + * Bitfield values for update_pin_status.flags.
> + */
> + /* Map the grant entry for access by I/O devices. */
> +#define _GNTMAP_device_map      (0)
> +#define GNTMAP_device_map       (1 << _GNTMAP_device_map)
> +/* Map the grant entry for access by host CPUs. */
> +#define _GNTMAP_host_map        (1)
> +#define GNTMAP_host_map         (1 << _GNTMAP_host_map)
> +/* Accesses to the granted frame will be restricted to read-only access. */
> +#define _GNTMAP_readonly        (2)
> +#define GNTMAP_readonly         (1 << _GNTMAP_readonly)
> +/*
> + * GNTMAP_host_map subflag:
> + *  0 => The host mapping is usable only by the guest OS.
> + *  1 => The host mapping is usable by guest OS + current application.
> + */
> +#define _GNTMAP_application_map (3)
> +#define GNTMAP_application_map  (1 << _GNTMAP_application_map)
> +
> +/*
> + * GNTMAP_contains_pte subflag:
> + *  0 => This map request contains a host virtual address.
> + *  1 => This map request contains the machine addess of the PTE to
> update.
> + */
> +#define _GNTMAP_contains_pte    (4)
> +#define GNTMAP_contains_pte     (1 << _GNTMAP_contains_pte)
> +
> +/*
> + * Bits to be placed in guest kernel available PTE bits (architecture
> + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
> + */
> +#define _GNTMAP_guest_avail0    (16)
> +#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0)
> +
> +/*
> + * Values for error status returns. All errors are -ve.
> + */
> +#define GNTST_okay             (0)  /* Normal return.
> */
> +#define GNTST_general_error    (-1) /* General undefined error.
> */
> +#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.
> */
> +#define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate
> gntref. */
> +#define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate
> handle. */
> +#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to
> map. */
> +#define GNTST_bad_dev_addr     (-6) /* Inappropriate device address to
> unmap.*/
> +#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.
> */
> +#define GNTST_permission_denied (-8) /* Not enough privilege for operation.
> */
> +#define GNTST_bad_page         (-9) /* Specified page was invalid for op.
> */
> +#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page
> boundary.   */
> +#define GNTST_address_too_big (-11) /* transfer page address too large.
> */
> +#define GNTST_eagain          (-12) /* Operation not done; try again.
> */
> +
> +#define GNTTABOP_error_msgs {                   \
> +	"okay",                                     \
> +	"undefined error",                          \
> +	"unrecognised domain id",                   \
> +	"invalid grant reference",                  \
> +	"invalid mapping handle",                   \
> +	"invalid virtual address",                  \
> +	"invalid device address",                   \
> +	"no spare translation slot in the I/O MMU", \
> +	"permission denied",                        \
> +	"bad page",                                 \
> +	"copy arguments cross page boundary",       \
> +	"page address size too large",              \
> +	"operation not done; try again"             \
> +}
> +
> +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
> diff --git a/include/xen/interface/hvm/hvm_op.h
> b/include/xen/interface/hvm/hvm_op.h
> new file mode 100644
> index 0000000000..1c53cad729
> --- /dev/null
> +++ b/include/xen/interface/hvm/hvm_op.h
> @@ -0,0 +1,69 @@
> +/*
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
> +#define __XEN_PUBLIC_HVM_HVM_OP_H__
> +
> +/* Get/set subcommands: the second argument of the hypercall is a
> + * pointer to a xen_hvm_param struct.
> + */
> +#define HVMOP_set_param           0
> +#define HVMOP_get_param           1
> +struct xen_hvm_param {
> +	domid_t  domid;    /* IN */
> +	u32 index;    /* IN */
> +	u64 value;    /* IN/OUT */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
> +
> +/* Hint from PV drivers for pagetable destruction. */
> +#define HVMOP_pagetable_dying       9
> +struct xen_hvm_pagetable_dying {
> +	/* Domain with a pagetable about to be destroyed. */
> +	domid_t  domid;
> +	/* guest physical address of the toplevel pagetable dying */
> +	aligned_u64 gpa;
> +};
> +
> +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
> +
> +enum hvmmem_type_t {
> +	HVMMEM_ram_rw,             /* Normal read/write guest RAM */
> +	HVMMEM_ram_ro,             /* Read-only; writes are discarded */
> +	HVMMEM_mmio_dm,            /* Reads and write go to the device
> model */
> +};
> +
> +#define HVMOP_get_mem_type    15
> +/* Return hvmmem_type_t for the specified pfn. */
> +struct xen_hvm_get_mem_type {
> +	/* Domain to be queried. */
> +	domid_t domid;
> +	/* OUT variable. */
> +	u16 mem_type;
> +	u16 pad[2]; /* align next field on 8-byte boundary */
> +	/* IN variable. */
> +	u64 pfn;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type);
> +
> +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
> diff --git a/include/xen/interface/hvm/params.h
> b/include/xen/interface/hvm/params.h
> new file mode 100644
> index 0000000000..4d61fc58d9
> --- /dev/null
> +++ b/include/xen/interface/hvm/params.h
> @@ -0,0 +1,127 @@
> +/*
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
> +#define __XEN_PUBLIC_HVM_PARAMS_H__
> +
> +#include <xen/interface/hvm/hvm_op.h>
> +
> +/*
> + * Parameter space for HVMOP_{set,get}_param.
> + */
> +
> +#define HVM_PARAM_CALLBACK_IRQ 0
> +/*
> + * How should CPU0 event-channel notifications be delivered?
> + *
> + * If val == 0 then CPU0 event-channel notifications are not delivered.
> + * If val != 0, val[63:56] encodes the type, as follows:
> + */
> +
> +#define HVM_PARAM_CALLBACK_TYPE_GSI      0
> +/*
> + * val[55:0] is a delivery GSI.  GSI 0 cannot be used, as it aliases val == 0,
> + * and disables all notifications.
> + */
> +
> +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
> +/*
> + * val[55:0] is a delivery PCI INTx line:
> + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0]
> + */
> +
> +#if defined(__i386__) || defined(__x86_64__)
> +#define HVM_PARAM_CALLBACK_TYPE_VECTOR   2
> +/*
> + * val[7:0] is a vector number.  Check for XENFEAT_hvm_callback_vector to
> know
> + * if this delivery method is available.
> + */
> +#elif defined(__arm__) || defined(__aarch64__)
> +#define HVM_PARAM_CALLBACK_TYPE_PPI      2
> +/*
> + * val[55:16] needs to be zero.
> + * val[15:8] is interrupt flag of the PPI used by event-channel:
> + *  bit 8: the PPI is edge(1) or level(0) triggered
> + *  bit 9: the PPI is active low(1) or high(0)
> + * val[7:0] is a PPI number used by event-channel.
> + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated
> to
> + * the notification is handled by the interrupt controller.
> + */
> +#endif
> +
> +#define HVM_PARAM_STORE_PFN    1
> +#define HVM_PARAM_STORE_EVTCHN 2
> +
> +#define HVM_PARAM_PAE_ENABLED  4
> +
> +#define HVM_PARAM_IOREQ_PFN    5
> +
> +#define HVM_PARAM_BUFIOREQ_PFN 6
> +
> +/*
> + * Set mode for virtual timers (currently x86 only):
> + *  delay_for_missed_ticks (default):
> + *   Do not advance a vcpu's time beyond the correct delivery time for
> + *   interrupts that have been missed due to preemption. Deliver missed
> + *   interrupts when the vcpu is rescheduled and advance the vcpu's virtual
> + *   time stepwise for each one.
> + *  no_delay_for_missed_ticks:
> + *   As above, missed interrupts are delivered, but guest time always tracks
> + *   wallclock (i.e., real) time while doing so.
> + *  no_missed_ticks_pending:
> + *   No missed interrupts are held pending. Instead, to ensure ticks are
> + *   delivered at some non-zero rate, if we detect missed ticks then the
> + *   internal tick alarm is not disabled if the VCPU is preempted during the
> + *   next tick period.
> + *  one_missed_tick_pending:
> + *   Missed interrupts are collapsed together and delivered as one 'late
> tick'.
> + *   Guest time always tracks wallclock (i.e., real) time.
> + */
> +#define HVM_PARAM_TIMER_MODE   10
> +#define HVMPTM_delay_for_missed_ticks    0
> +#define HVMPTM_no_delay_for_missed_ticks 1
> +#define HVMPTM_no_missed_ticks_pending   2
> +#define HVMPTM_one_missed_tick_pending   3
> +
> +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
> +#define HVM_PARAM_HPET_ENABLED 11
> +
> +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
> +#define HVM_PARAM_IDENT_PT     12
> +
> +/* Device Model domain, defaults to 0. */
> +#define HVM_PARAM_DM_DOMAIN    13
> +
> +/* ACPI S state: currently support S0 and S3 on x86. */
> +#define HVM_PARAM_ACPI_S_STATE 14
> +
> +/* TSS used on Intel when CR0.PE=0. */
> +#define HVM_PARAM_VM86_TSS     15
> +
> +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
> +#define HVM_PARAM_VPT_ALIGN    16
> +
> +/* Console debug shared memory ring and event channel */
> +#define HVM_PARAM_CONSOLE_PFN    17
> +#define HVM_PARAM_CONSOLE_EVTCHN 18
> +
> +#define HVM_NR_PARAMS          19
> +
> +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
> diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
> new file mode 100644
> index 0000000000..7d74c99226
> --- /dev/null
> +++ b/include/xen/interface/io/blkif.h
> @@ -0,0 +1,726 @@
> +/************************************************************
> ******************
> + * blkif.h
> + *
> + * Unified block-device I/O interface for Xen guest OSes.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2003-2004, Keir Fraser
> + * Copyright (c) 2012, Spectra Logic Corporation
> + */
> +
> +#ifndef __XEN_PUBLIC_IO_BLKIF_H__
> +#define __XEN_PUBLIC_IO_BLKIF_H__
> +
> +#include "ring.h"
> +#include "../grant_table.h"
> +
> +/*
> + * Front->back notifications: When enqueuing a new request, sending a
> + * notification can be made conditional on req_event (i.e., the generic
> + * hold-off mechanism provided by the ring macros). Backends must set
> + * req_event appropriately (e.g., using
> RING_FINAL_CHECK_FOR_REQUESTS()).
> + *
> + * Back->front notifications: When enqueuing a new response, sending a
> + * notification can be made conditional on rsp_event (i.e., the generic
> + * hold-off mechanism provided by the ring macros). Frontends must set
> + * rsp_event appropriately (e.g., using
> RING_FINAL_CHECK_FOR_RESPONSES()).
> + */
> +
> +#ifndef blkif_vdev_t
> +#define blkif_vdev_t   u16
> +#endif
> +#define blkif_sector_t u64
> +
> +/*
> + * Feature and Parameter Negotiation
> + * =================================
> + * The two halves of a Xen block driver utilize nodes within the XenStore to
> + * communicate capabilities and to negotiate operating parameters.  This
> + * section enumerates these nodes which reside in the respective front and
> + * backend portions of the XenStore, following the XenBus convention.
> + *
> + * All data in the XenStore is stored as strings.  Nodes specifying numeric
> + * values are encoded in decimal.  Integer value ranges listed below are
> + * expressed as fixed sized integer types capable of storing the conversion
> + * of a properly formated node string, without loss of information.
> + *
> + * Any specified default value is in effect if the corresponding XenBus node
> + * is not present in the XenStore.
> + *
> + * XenStore nodes in sections marked "PRIVATE" are solely for use by the
> + * driver side whose XenBus tree contains them.
> + *
> + * XenStore nodes marked "DEPRECATED" in their notes section should only
> be
> + * used to provide interoperability with legacy implementations.
> + *
> + * See the XenBus state transition diagram below for details on when XenBus
> + * nodes must be published and when they can be queried.
> + *
> +
> **************************************************************
> ***************
> + *                            Backend XenBus Nodes
> +
> **************************************************************
> ***************
> + *
> + *------------------ Backend Device Identification (PRIVATE) ------------------
> + *
> + * mode
> + *      Values:         "r" (read only), "w" (writable)
> + *
> + *      The read or write access permissions to the backing store to be
> + *      granted to the frontend.
> + *
> + * params
> + *      Values:         string
> + *
> + *      A free formatted string providing sufficient information for the
> + *      hotplug script to attach the device and provide a suitable
> + *      handler (ie: a block device) for blkback to use.
> + *
> + * physical-device
> + *      Values:         "MAJOR:MINOR"
> + *      Notes: 11
> + *
> + *      MAJOR and MINOR are the major number and minor number of
> the
> + *      backing device respectively.
> + *
> + * physical-device-path
> + *      Values:         path string
> + *
> + *      A string that contains the absolute path to the disk image. On
> + *      NetBSD and Linux this is always a block device, while on FreeBSD
> + *      it can be either a block device or a regular file.
> + *
> + * type
> + *      Values:         "file", "phy", "tap"
> + *
> + *      The type of the backing device/object.
> + *
> + *
> + * direct-io-safe
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *
> + *      The underlying storage is not affected by the direct IO memory
> + *      lifetime bug.  See:
> + *
> https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Flists.xe
> n.org%2Farchives%2Fhtml%2Fxen-devel%2F2012-12%2Fmsg01154.html&am
> p;data=02%7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81
> ddc0812%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63729217
> 8170181802&amp;sdata=wXiKB5EvbBokB%2BYrOdMDiKDBwSHo8m1ssXFp0K
> RQ0Io%3D&amp;reserved=0
> + *
> + *      Therefore this option gives the backend permission to use
> + *      O_DIRECT, notwithstanding that bug.
> + *
> + *      That is, if this option is enabled, use of O_DIRECT is safe,
> + *      in circumstances where we would normally have avoided it as a
> + *      workaround for that bug.  This option is not relevant for all
> + *      backends, and even not necessarily supported for those for
> + *      which it is relevant.  A backend which knows that it is not
> + *      affected by the bug can ignore this option.
> + *
> + *      This option doesn't require a backend to use O_DIRECT, so it
> + *      should not be used to try to control the caching behaviour.
> + *
> + *--------------------------------- Features ---------------------------------
> + *
> + * feature-barrier
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *
> + *      A value of "1" indicates that the backend can process requests
> + *      containing the BLKIF_OP_WRITE_BARRIER request opcode.
> Requests
> + *      of this type may still be returned at any time with the
> + *      BLKIF_RSP_EOPNOTSUPP result code.
> + *
> + * feature-flush-cache
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *
> + *      A value of "1" indicates that the backend can process requests
> + *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.
> Requests
> + *      of this type may still be returned at any time with the
> + *      BLKIF_RSP_EOPNOTSUPP result code.
> + *
> + * feature-discard
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *
> + *      A value of "1" indicates that the backend can process requests
> + *      containing the BLKIF_OP_DISCARD request opcode.  Requests
> + *      of this type may still be returned at any time with the
> + *      BLKIF_RSP_EOPNOTSUPP result code.
> + *
> + * feature-persistent
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *      Notes: 7
> + *
> + *      A value of "1" indicates that the backend can keep the grants used
> + *      by the frontend driver mapped, so the same set of grants should be
> + *      used in all transactions. The maximum number of grants the
> backend
> + *      can map persistently depends on the implementation, but ideally it
> + *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
> Using this
> + *      feature the backend doesn't need to unmap each grant, preventing
> + *      costly TLB flushes. The backend driver should only map grants
> + *      persistently if the frontend supports it. If a backend driver chooses
> + *      to use the persistent protocol when the frontend doesn't support it,
> + *      it will probably hit the maximum number of persistently mapped
> grants
> + *      (due to the fact that the frontend won't be reusing the same
> grants),
> + *      and fall back to non-persistent mode. Backend implementations
> may
> + *      shrink or expand the number of persistently mapped grants without
> + *      notifying the frontend depending on memory constraints (this might
> + *      cause a performance degradation).
> + *
> + *      If a backend driver wants to limit the maximum number of
> persistently
> + *      mapped grants to a value less than RING_SIZE *
> + *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be
> used to
> + *      discard the grants that are less commonly used. Using a LRU in the
> + *      backend driver paired with a LIFO queue in the frontend will
> + *      allow us to have better performance in this scenario.
> + *
> + *----------------------- Request Transport Parameters ------------------------
> + *
> + * max-ring-page-order
> + *      Values:         <uint32_t>
> + *      Default Value:  0
> + *      Notes:          1, 3
> + *
> + *      The maximum supported size of the request ring buffer in units of
> + *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4 pages,
> + *      etc.).
> + *
> + * max-ring-pages
> + *      Values:         <uint32_t>
> + *      Default Value:  1
> + *      Notes:          DEPRECATED, 2, 3
> + *
> + *      The maximum supported size of the request ring buffer in units of
> + *      machine pages.  The value must be a power of 2.
> + *
> + *------------------------- Backend Device Properties -------------------------
> + *
> + * discard-enable
> + *      Values:         0/1 (boolean)
> + *      Default Value:  1
> + *
> + *      This optional property, set by the toolstack, instructs the backend
> + *      to offer (or not to offer) discard to the frontend. If the property
> + *      is missing the backend should offer discard if the backing storage
> + *      actually supports it.
> + *
> + * discard-alignment
> + *      Values:         <uint32_t>
> + *      Default Value:  0
> + *      Notes:          4, 5
> + *
> + *      The offset, in bytes from the beginning of the virtual block device,
> + *      to the first, addressable, discard extent on the underlying device.
> + *
> + * discard-granularity
> + *      Values:         <uint32_t>
> + *      Default Value:  <"sector-size">
> + *      Notes:          4
> + *
> + *      The size, in bytes, of the individually addressable discard extents
> + *      of the underlying device.
> + *
> + * discard-secure
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *      Notes:          10
> + *
> + *      A value of "1" indicates that the backend can process
> BLKIF_OP_DISCARD
> + *      requests with the BLKIF_DISCARD_SECURE flag set.
> + *
> + * info
> + *      Values:         <uint32_t> (bitmap)
> + *
> + *      A collection of bit flags describing attributes of the backing
> + *      device.  The VDISK_* macros define the meaning of each bit
> + *      location.
> + *
> + * sector-size
> + *      Values:         <uint32_t>
> + *
> + *      The logical block size, in bytes, of the underlying storage. This
> + *      must be a power of two with a minimum value of 512.
> + *
> + *      NOTE: Because of implementation bugs in some frontends this
> must be
> + *            set to 512, unless the frontend advertizes a non-zero value
> + *            in its "feature-large-sector-size" xenbus node. (See below).
> + *
> + * physical-sector-size
> + *      Values:         <uint32_t>
> + *      Default Value:  <"sector-size">
> + *
> + *      The physical block size, in bytes, of the backend storage. This
> + *      must be an integer multiple of "sector-size".
> + *
> + * sectors
> + *      Values:         <u64>
> + *
> + *      The size of the backend device, expressed in units of "sector-size".
> + *      The product of "sector-size" and "sectors" must also be an integer
> + *      multiple of "physical-sector-size", if that node is present.
> + *
> +
> **************************************************************
> ***************
> + *                            Frontend XenBus Nodes
> +
> **************************************************************
> ***************
> + *
> + *----------------------- Request Transport Parameters -----------------------
> + *
> + * event-channel
> + *      Values:         <uint32_t>
> + *
> + *      The identifier of the Xen event channel used to signal activity
> + *      in the ring buffer.
> + *
> + * ring-ref
> + *      Values:         <uint32_t>
> + *      Notes:          6
> + *
> + *      The Xen grant reference granting permission for the backend to
> map
> + *      the sole page in a single page sized ring buffer.
> + *
> + * ring-ref%u
> + *      Values:         <uint32_t>
> + *      Notes:          6
> + *
> + *      For a frontend providing a multi-page ring, a "number of ring pages"
> + *      sized list of nodes, each containing a Xen grant reference granting
> + *      permission for the backend to map the page of the ring located
> + *      at page index "%u".  Page indexes are zero based.
> + *
> + * protocol
> + *      Values:         string (XEN_IO_PROTO_ABI_*)
> + *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
> + *
> + *      The machine ABI rules governing the format of all ring request and
> + *      response structures.
> + *
> + * ring-page-order
> + *      Values:         <uint32_t>
> + *      Default Value:  0
> + *      Maximum Value:  MAX(ffs(max-ring-pages) - 1,
> max-ring-page-order)
> + *      Notes:          1, 3
> + *
> + *      The size of the frontend allocated request ring buffer in units
> + *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
> + *      etc.).
> + *
> + * num-ring-pages
> + *      Values:         <uint32_t>
> + *      Default Value:  1
> + *      Maximum Value:  MAX(max-ring-pages,(0x1 <<
> max-ring-page-order))
> + *      Notes:          DEPRECATED, 2, 3
> + *
> + *      The size of the frontend allocated request ring buffer in units of
> + *      machine pages.  The value must be a power of 2.
> + *
> + *--------------------------------- Features ---------------------------------
> + *
> + * feature-persistent
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *      Notes: 7, 8, 9
> + *
> + *      A value of "1" indicates that the frontend will reuse the same grants
> + *      for all transactions, allowing the backend to map them with write
> + *      access (even when it should be read-only). If the frontend hits the
> + *      maximum number of allowed persistently mapped grants, it can
> fallback
> + *      to non persistent mode. This will cause a performance degradation,
> + *      since the the backend driver will still try to map those grants
> + *      persistently. Since the persistent grants protocol is compatible with
> + *      the previous protocol, a frontend driver can choose to work in
> + *      persistent mode even when the backend doesn't support it.
> + *
> + *      It is recommended that the frontend driver stores the persistently
> + *      mapped grants in a LIFO queue, so a subset of all persistently
> mapped
> + *      grants gets used commonly. This is done in case the backend driver
> + *      decides to limit the maximum number of persistently mapped
> grants
> + *      to a value less than RING_SIZE *
> BLKIF_MAX_SEGMENTS_PER_REQUEST.
> + *
> + * feature-large-sector-size
> + *      Values:         0/1 (boolean)
> + *      Default Value:  0
> + *
> + *      A value of "1" indicates that the frontend will correctly supply and
> + *      interpret all sector-based quantities in terms of the "sector-size"
> + *      value supplied in the backend info, whatever that may be set to.
> + *      If this node is not present or its value is "0" then it is assumed
> + *      that the frontend requires that the logical block size is 512 as it
> + *      is hardcoded (which is the case in some frontend implementations).
> + *
> + *------------------------- Virtual Device Properties -------------------------
> + *
> + * device-type
> + *      Values:         "disk", "cdrom", "floppy", etc.
> + *
> + * virtual-device
> + *      Values:         <uint32_t>
> + *
> + *      A value indicating the physical device to virtualize within the
> + *      frontend's domain.  (e.g. "The first ATA disk", "The third SCSI
> + *      disk", etc.)
> + *
> + *      See docs/misc/vbd-interface.txt for details on the format of this
> + *      value.
> + *
> + * Notes
> + * -----
> + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
> + *     PV drivers.
> + * (2) Multi-page ring buffer scheme first used in some RedHat distributions
> + *     including a distribution deployed on certain nodes of the Amazon
> + *     EC2 cluster.
> + * (3) Support for multi-page ring buffers was implemented independently,
> + *     in slightly different forms, by both Citrix and RedHat/Amazon.
> + *     For full interoperability, block front and backends should publish
> + *     identical ring parameters, adjusted for unit differences, to the
> + *     XenStore nodes used in both schemes.
> + * (4) Devices that support discard functionality may internally allocate space
> + *     (discardable extents) in units that are larger than the exported
> logical
> + *     block size. If the backing device has such discardable extents the
> + *     backend should provide both discard-granularity and
> discard-alignment.
> + *     Providing just one of the two may be considered an error by the
> frontend.
> + *     Backends supporting discard should include discard-granularity and
> + *     discard-alignment even if it supports discarding individual sectors.
> + *     Frontends should assume discard-alignment == 0 and
> discard-granularity
> + *     == sector size if these keys are missing.
> + * (5) The discard-alignment parameter allows a physical device to be
> + *     partitioned into virtual devices that do not necessarily begin or
> + *     end on a discardable extent boundary.
> + * (6) When there is only a single page allocated to the request ring,
> + *     'ring-ref' is used to communicate the grant reference for this
> + *     page to the backend.  When using a multi-page ring, the 'ring-ref'
> + *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are used.
> + * (7) When using persistent grants data has to be copied from/to the page
> + *     where the grant is currently mapped. The overhead of doing this
> copy
> + *     however doesn't suppress the speed improvement of not having to
> unmap
> + *     the grants.
> + * (8) The frontend driver has to allow the backend driver to map all grants
> + *     with write access, even when they should be mapped read-only,
> since
> + *     further requests may reuse these grants and require write
> permissions.
> + * (9) Linux implementation doesn't have a limit on the maximum number of
> + *     grants that can be persistently mapped in the frontend driver, but
> + *     due to the frontent driver implementation it should never be bigger
> + *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
> + *(10) The discard-secure property may be present and will be set to 1 if the
> + *     backing device supports secure discard.
> + *(11) Only used by Linux and NetBSD.
> + */
> +
> +/*
> + * Multiple hardware queues/rings:
> + * If supported, the backend will write the key "multi-queue-max-queues" to
> + * the directory for that vbd, and set its value to the maximum supported
> + * number of queues.
> + * Frontends that are aware of this feature and wish to use it can write the
> + * key "multi-queue-num-queues" with the number they wish to use, which
> must be
> + * greater than zero, and no more than the value reported by the backend in
> + * "multi-queue-max-queues".
> + *
> + * For frontends requesting just one queue, the usual event-channel and
> + * ring-ref keys are written as before, simplifying the backend processing
> + * to avoid distinguishing between a frontend that doesn't understand the
> + * multi-queue feature, and one that does, but requested only one queue.
> + *
> + * Frontends requesting two or more queues must not write the toplevel
> + * event-channel and ring-ref keys, instead writing those keys under
> sub-keys
> + * having the name "queue-N" where N is the integer ID of the queue/ring
> for
> + * which those keys belong. Queues are indexed from zero.
> + * For example, a frontend with two queues must write the following set of
> + * queue-related keys:
> + *
> + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
> + * /local/domain/1/device/vbd/0/queue-0 = ""
> + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
> + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
> + * /local/domain/1/device/vbd/0/queue-1 = ""
> + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
> + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
> + *
> + * It is also possible to use multiple queues/rings together with
> + * feature multi-page ring buffer.
> + * For example, a frontend requests two queues/rings and the size of each
> ring
> + * buffer is two pages must write the following set of related keys:
> + *
> + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
> + * /local/domain/1/device/vbd/0/ring-page-order = "1"
> + * /local/domain/1/device/vbd/0/queue-0 = ""
> + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
> + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
> + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
> + * /local/domain/1/device/vbd/0/queue-1 = ""
> + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
> + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
> + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
> + *
> + */
> +
> +/*
> + * STATE DIAGRAMS
> + *
> +
> **************************************************************
> ***************
> + *                                   Startup
> *
> +
> **************************************************************
> ***************
> + *
> + * Tool stack creates front and back nodes with state XenbusStateInitialising.
> + *
> + * Front                                Back
> + * =================================
> =====================================
> + * XenbusStateInitialising              XenbusStateInitialising
> + *  o Query virtual device               o Query backend device
> identification
> + *    properties.                          data.
> + *  o Setup OS device instance.          o Open and validate backend
> device.
> + *                                       o Publish backend
> features and
> + *                                         transport parameters.
> + *                                                      |
> + *                                                      |
> + *                                                      V
> + *                                      XenbusStateInitWait
> + *
> + * o Query backend features and
> + *   transport parameters.
> + * o Allocate and initialize the
> + *   request ring.
> + * o Publish transport parameters
> + *   that will be in effect during
> + *   this connection.
> + *              |
> + *              |
> + *              V
> + * XenbusStateInitialised
> + *
> + *                                       o Query frontend
> transport parameters.
> + *                                       o Connect to the request
> ring and
> + *                                         event channel.
> + *                                       o Publish backend device
> properties.
> + *                                                      |
> + *                                                      |
> + *                                                      V
> + *                                      XenbusStateConnected
> + *
> + *  o Query backend device properties.
> + *  o Finalize OS virtual device
> + *    instance.
> + *              |
> + *              |
> + *              V
> + * XenbusStateConnected
> + *
> + * Note: Drivers that do not support any optional features, or the negotiation
> + *       of transport parameters, can skip certain states in the state
> machine:
> + *
> + *       o A frontend may transition to XenbusStateInitialised without
> + *         waiting for the backend to enter XenbusStateInitWait.  In this
> + *         case, default transport parameters are in effect and any
> + *         transport parameters published by the frontend must contain
> + *         their default values.
> + *
> + *       o A backend may transition to XenbusStateInitialised, bypassing
> + *         XenbusStateInitWait, without waiting for the frontend to first
> + *         enter the XenbusStateInitialised state.  In this case, default
> + *         transport parameters are in effect and any transport
> parameters
> + *         published by the backend must contain their default values.
> + *
> + *       Drivers that support optional features and/or transport parameter
> + *       negotiation must tolerate these additional state transition paths.
> + *       In general this means performing the work of any skipped state
> + *       transition, if it has not already been performed, in addition to the
> + *       work associated with entry into the current state.
> + */
> +
> +/*
> + * REQUEST CODES.
> + */
> +#define BLKIF_OP_READ              0
> +#define BLKIF_OP_WRITE             1
> +/*
> + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
> + * operation code ("barrier request") must be completed prior to the
> + * execution of the barrier request.  All writes issued after the barrier
> + * request must not execute until after the completion of the barrier request.
> + *
> + * Optional.  See "feature-barrier" XenBus node documentation above.
> + */
> +#define BLKIF_OP_WRITE_BARRIER     2
> +/*
> + * Commit any uncommitted contents of the backing device's volatile cache
> + * to stable storage.
> + *
> + * Optional.  See "feature-flush-cache" XenBus node documentation above.
> + */
> +#define BLKIF_OP_FLUSH_DISKCACHE   3
> +/*
> + * Used in SLES sources for device specific command packet
> + * contained within the request. Reserved for that purpose.
> + */
> +#define BLKIF_OP_RESERVED_1        4
> +/*
> + * Indicate to the backend device that a region of storage is no longer in
> + * use, and may be discarded at any time without impact to the client.  If
> + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
> + * discarded region on the device must be rendered unrecoverable before
> the
> + * command returns.
> + *
> + * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
> + * command on a native device.
> + *
> + * More information about trim/unmap operations can be found at:
> + *
> https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Ft13.org
> %2FDocuments%2FUploadedDocuments%2Fdocs2008%2F&amp;data=02%7
> C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C
> 686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802
> &amp;sdata=JOOjsvkjqxkuoF47PMVw1loNNDhxPCXQVdPQQklTIGM%3D&am
> p;reserved=0
> + *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
> + *
> https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.s
> eagate.com%2Fstaticfiles%2Fsupport%2Fdisc%2Fmanuals%2F&amp;data=02
> %7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%
> 7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C6372921781701818
> 02&amp;sdata=gd5Cvr1Q9%2Bv%2BfUS5OleuozBITkjbybYoR302s4XsVv8%3D
> &amp;reserved=0
> + *     Interface%20manuals/100293068c.pdf
> + *
> + * Optional.  See "feature-discard", "discard-alignment",
> + * "discard-granularity", and "discard-secure" in the XenBus node
> + * documentation above.
> + */
> +#define BLKIF_OP_DISCARD           5
> +
> +/*
> + * Recognized if "feature-max-indirect-segments" in present in the backend
> + * xenbus info. The "feature-max-indirect-segments" node contains the
> maximum
> + * number of segments allowed by the backend per request. If the node is
> + * present, the frontend might use blkif_request_indirect structs in order to
> + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST
> (11). The
> + * maximum number of indirect segments is fixed by the backend, but the
> + * frontend can issue requests with any number of indirect segments as long
> as
> + * it's less than the number provided by the backend. The indirect_grefs field
> + * in blkif_request_indirect should be filled by the frontend with the
> + * grant references of the pages that are holding the indirect segments.
> + * These pages are filled with an array of blkif_request_segment that hold
> the
> + * information about the segments. The number of indirect pages to use is
> + * determined by the number of segments an indirect request contains.
> Every
> + * indirect page can contain a maximum of
> + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
> + * calculate the number of indirect pages to use we have to do
> + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct
> blkif_request_segment))).
> + *
> + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
> + * create the "feature-max-indirect-segments" node!
> + */
> +#define BLKIF_OP_INDIRECT          6
> +
> +/*
> + * Maximum scatter/gather segments per request.
> + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
> + * NB. This could be 12 if the ring indexes weren't stored in the same page.
> + */
> +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
> +
> +/*
> + * Maximum number of indirect pages to use per request.
> + */
> +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
> +
> +/*
> + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as
> + * 'sector_number' in blkif_request, blkif_request_discard and
> + * blkif_request_indirect are sector-based quantities. See the description
> + * of the "feature-large-sector-size" frontend xenbus node above for
> + * more information.
> + */
> +struct blkif_request_segment {
> +	grant_ref_t gref;        /* reference to I/O buffer frame        */
> +	/* @first_sect: first sector in frame to transfer (inclusive).   */
> +	/* @last_sect: last sector in frame to transfer (inclusive).     */
> +	u8     first_sect, last_sect;
> +};
> +
> +/*
> + * Starting ring element for any I/O request.
> + */
> +struct blkif_request {
> +	u8        operation;    /* BLKIF_OP_???
> */
> +	u8        nr_segments;  /* number of segments
> */
> +	blkif_vdev_t   handle;       /* only for read/write requests
> */
> +	u64       id;           /* private guest value, echoed in resp  */
> +	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
> +	struct blkif_request_segment
> seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +};
> +
> +typedef struct blkif_request blkif_request_t;
> +
> +/*
> + * Cast to this structure when blkif_request.operation ==
> BLKIF_OP_DISCARD
> + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
> + */
> +struct blkif_request_discard {
> +	u8        operation;    /* BLKIF_OP_DISCARD
> */
> +	u8        flag;         /* BLKIF_DISCARD_SECURE or zero
> */
> +#define BLKIF_DISCARD_SECURE (1 << 0)  /* ignored if discard-secure=0
> */
> +	blkif_vdev_t   handle;       /* same as for read/write requests
> */
> +	u64       id;           /* private guest value, echoed in resp  */
> +	blkif_sector_t sector_number;/* start sector idx on disk
> */
> +	u64       nr_sectors;   /* number of contiguous sectors to discard*/
> +};
> +
> +typedef struct blkif_request_discard blkif_request_discard_t;
> +
> +struct blkif_request_indirect {
> +	u8        operation;    /* BLKIF_OP_INDIRECT
> */
> +	u8        indirect_op;  /* BLKIF_OP_{READ/WRITE}
> */
> +	u16       nr_segments;  /* number of segments
> */
> +	u64       id;           /* private guest value, echoed in resp  */
> +	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
> +	blkif_vdev_t   handle;       /* same as for read/write requests
> */
> +	grant_ref_t
> indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
> +#ifdef __i386__
> +	u64       pad;          /* Make it 64 byte aligned on i386
> */
> +#endif
> +};
> +
> +typedef struct blkif_request_indirect blkif_request_indirect_t;
> +
> +struct blkif_response {
> +	u64        id;              /* copied from request */
> +	u8         operation;       /* copied from request */
> +	s16         status;          /* BLKIF_RSP_???       */
> +};
> +
> +typedef struct blkif_response blkif_response_t;
> +
> +/*
> + * STATUS RETURN CODES.
> + */
> + /* Operation not supported (only happens on barrier writes). */
> +#define BLKIF_RSP_EOPNOTSUPP  -2
> + /* Operation failed for some unspecified reason (-EIO). */
> +#define BLKIF_RSP_ERROR       -1
> + /* Operation completed successfully. */
> +#define BLKIF_RSP_OKAY         0
> +
> +/*
> + * Generate blkif ring structures and types.
> + */
> +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
> +
> +#define VDISK_CDROM        0x1
> +#define VDISK_REMOVABLE    0x2
> +#define VDISK_READONLY     0x4
> +
> +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/interface/io/console.h
> b/include/xen/interface/io/console.h
> new file mode 100644
> index 0000000000..3489fc7a60
> --- /dev/null
> +++ b/include/xen/interface/io/console.h
> @@ -0,0 +1,56 @@
> +/************************************************************
> ******************
> + * console.h
> + *
> + * Console I/O interface for Xen guest OSes.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2005, Keir Fraser
> + */
> +
> +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
> +#define __XEN_PUBLIC_IO_CONSOLE_H__
> +
> +typedef u32 XENCONS_RING_IDX;
> +
> +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1))
> +
> +struct xencons_interface {
> +	char in[1024];
> +	char out[2048];
> +	XENCONS_RING_IDX in_cons, in_prod;
> +	XENCONS_RING_IDX out_cons, out_prod;
> +};
> +
> +#ifdef XEN_WANT_FLEX_CONSOLE_RING
> +#include "ring.h"
> +DEFINE_XEN_FLEX_RING(xencons);
> +#endif
> +
> +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/interface/io/protocols.h
> b/include/xen/interface/io/protocols.h
> new file mode 100644
> index 0000000000..52b4de0f81
> --- /dev/null
> +++ b/include/xen/interface/io/protocols.h
> @@ -0,0 +1,42 @@
> +/************************************************************
> ******************
> + * protocols.h
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2008, Keir Fraser
> + */
> +
> +#ifndef __XEN_PROTOCOLS_H__
> +#define __XEN_PROTOCOLS_H__
> +
> +#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
> +#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
> +#define XEN_IO_PROTO_ABI_ARM        "arm-abi"
> +
> +#if defined(__i386__)
> +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
> +#elif defined(__x86_64__)
> +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
> +#elif defined(__arm__) || defined(__aarch64__)
> +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
> +#else
> +# error arch fixup needed here
> +#endif
> +
> +#endif
> diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
> new file mode 100644
> index 0000000000..4e02678e3c
> --- /dev/null
> +++ b/include/xen/interface/io/ring.h
> @@ -0,0 +1,479 @@
> +/************************************************************
> ******************
> + * ring.h
> + *
> + * Shared producer-consumer ring macros.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Tim Deegan and Andrew Warfield November 2004.
> + */
> +
> +#ifndef __XEN_PUBLIC_IO_RING_H__
> +#define __XEN_PUBLIC_IO_RING_H__
> +
> +/*
> + * When #include'ing this header, you need to provide the following
> + * declaration upfront:
> + * - standard integers types (u8, u16, etc)
> + * They are provided by stdint.h of the standard headers.
> + *
> + * In addition, if you intend to use the FLEX macros, you also need to
> + * provide the following, before invoking the FLEX macros:
> + * - size_t
> + * - memcpy
> + * - grant_ref_t
> + * These declarations are provided by string.h of the standard headers,
> + * and grant_table.h from the Xen public headers.
> + */
> +
> +#include <xen/interface/grant_table.h>
> +
> +typedef unsigned int RING_IDX;
> +
> +/* Round a 32-bit unsigned constant down to the nearest power of two. */
> +#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x)
> & 0x1))
> +#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    :
> __RD2(_x))
> +#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    :
> __RD4(_x))
> +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    :
> __RD8(_x))
> +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 :
> __RD16(_x))
> +
> +/*
> + * Calculate size of a shared ring, given the total available space for the
> + * ring and indexes (_sz), and the name tag of the request/response
> structure.
> + * A ring contains as many entries as will fit, rounded down to the nearest
> + * power of two (so we can mask with (size-1) to loop around).
> + */
> +#define __CONST_RING_SIZE(_s, _sz) \
> +	(__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
> +		sizeof(((struct _s##_sring *)0)->ring[0])))
> +/*
> + * The same for passing in an actual pointer instead of a name tag.
> + */
> +#define __RING_SIZE(_s, _sz) \
> +	(__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
> +
> +/*
> + * Macros to make the correct C datatypes for a new kind of ring.
> + *
> + * To make a new ring datatype, you need to have two message structures,
> + * let's say request_t, and response_t already defined.
> + *
> + * In a header where you want the ring datatype declared, you then do:
> + *
> + *     DEFINE_RING_TYPES(mytag, request_t, response_t);
> + *
> + * These expand out to give you a set of types, as you can see below.
> + * The most important of these are:
> + *
> + *     mytag_sring_t      - The shared ring.
> + *     mytag_front_ring_t - The 'front' half of the ring.
> + *     mytag_back_ring_t  - The 'back' half of the ring.
> + *
> + * To initialize a ring in your code you need to know the location and size
> + * of the shared memory area (PAGE_SIZE, for instance). To initialise
> + * the front half:
> + *
> + *     mytag_front_ring_t front_ring;
> + *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
> + *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page,
> PAGE_SIZE);
> + *
> + * Initializing the back follows similarly (note that only the front
> + * initializes the shared ring):
> + *
> + *     mytag_back_ring_t back_ring;
> + *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page,
> PAGE_SIZE);
> + */
> +
> +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)
> \
> +										  \
> +/* Shared ring entry */
> \
> +union __name##_sring_entry
> {                                                      \
> +	__req_t req;
> \
> +	__rsp_t rsp;
> \
> +};
> \
> +										  \
> +/* Shared ring page */
> \
> +struct __name##_sring
> {                                                           \
> +	RING_IDX req_prod, req_event;
> \
> +	RING_IDX rsp_prod, rsp_event;
> \
> +	union
> {
>       \
> +		struct
> {                                                          \
> +			u8 smartpoll_active;
> \
> +		} netif;
> \
> +		struct
> {                                                          \
> +			u8 msg;
> \
> +		} tapif_user;
> \
> +		u8 pvt_pad[4];
> \
> +	} pvt;
> \
> +	u8 __pad[44];
> \
> +	union __name##_sring_entry ring[1]; /* variable-length */
> \
> +};
> \
> +										  \
> +/* "Front" end's private variables */
> \
> +struct __name##_front_ring
> {                                                      \
> +	RING_IDX req_prod_pvt;
> \
> +	RING_IDX rsp_cons;
> \
> +	unsigned int nr_ents;
> \
> +	struct __name##_sring *sring;
> \
> +};
> \
> +										  \
> +/* "Back" end's private variables */
> \
> +struct __name##_back_ring
> {                                                       \
> +	RING_IDX rsp_prod_pvt;
> \
> +	RING_IDX req_cons;
> \
> +	unsigned int nr_ents;
> \
> +	struct __name##_sring *sring;
> \
> +};
> \
> +										  \
> +/* Syntactic sugar */
> \
> +typedef struct __name##_sring __name##_sring_t;
> \
> +typedef struct __name##_front_ring __name##_front_ring_t;
> \
> +typedef struct __name##_back_ring __name##_back_ring_t
> +
> +/*
> + * Macros for manipulating rings.
> + *
> + * FRONT_RING_whatever works on the "front end" of a ring: here
> + * requests are pushed on to the ring and responses taken off it.
> + *
> + * BACK_RING_whatever works on the "back end" of a ring: here
> + * requests are taken off the ring and responses put on.
> + *
> + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
> + * This is OK in 1-for-1 request-response situations where the
> + * requestor (front end) never has more than RING_SIZE()-1
> + * outstanding requests.
> + */
> +
> +/* Initialising empty rings */
> +#define SHARED_RING_INIT(_s) do
> {                                                 \
> +	(_s)->req_prod  = (_s)->rsp_prod  = 0;
> \
> +	(_s)->req_event = (_s)->rsp_event = 1;
> \
> +	(void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad));
> \
> +	(void)memset((_s)->__pad, 0, sizeof((_s)->__pad));
> \
> +} while (0)
> +
> +#define FRONT_RING_INIT(_r, _s, __size) do
> {                                      \
> +	(_r)->req_prod_pvt = 0;
> \
> +	(_r)->rsp_cons = 0;
> \
> +	(_r)->nr_ents = __RING_SIZE(_s, __size);
> \
> +	(_r)->sring = (_s);
> \
> +} while (0)
> +
> +#define BACK_RING_INIT(_r, _s, __size) do
> {                                       \
> +	(_r)->rsp_prod_pvt = 0;
> \
> +	(_r)->req_cons = 0;
> \
> +	(_r)->nr_ents = __RING_SIZE(_s, __size);
> \
> +	(_r)->sring = (_s);
> \
> +} while (0)
> +
> +/* How big is this ring? */
> +#define RING_SIZE(_r)
> \
> +	((_r)->nr_ents)
> +
> +/* Number of free requests (for use on front side only). */
> +#define RING_FREE_REQUESTS(_r)
> \
> +	(RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
> +
> +/* Test if there is an empty slot available on the front ring.
> + * (This is only meaningful from the front. )
> + */
> +#define RING_FULL(_r)
> \
> +	(RING_FREE_REQUESTS(_r) == 0)
> +
> +/* Test if there are outstanding messages to be processed on a ring. */
> +#define RING_HAS_UNCONSUMED_RESPONSES(_r)
> \
> +	((_r)->sring->rsp_prod - (_r)->rsp_cons)
> +
> +#ifdef __GNUC__
> +#define RING_HAS_UNCONSUMED_REQUESTS(_r)
> ({                                       \
> +	unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;
> \
> +	unsigned int rsp = RING_SIZE(_r) -
> \
> +		((_r)->req_cons - (_r)->rsp_prod_pvt);
> \
> +	req < rsp ? req : rsp;
> \
> +})
> +#else
> +/* Same as above, but without the nice GCC ({ ... }) syntax. */
> +#define RING_HAS_UNCONSUMED_REQUESTS(_r)
> \
> +	((((_r)->sring->req_prod - (_r)->req_cons) <
> \
> +	  (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?
> \
> +	 ((_r)->sring->req_prod - (_r)->req_cons) :
> \
> +	 (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
> +#endif
> +
> +/* Direct access to individual ring elements, by index. */
> +#define RING_GET_REQUEST(_r, _idx)
> \
> +	(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
> +
> +/*
> + * Get a local copy of a request.
> + *
> + * Use this in preference to RING_GET_REQUEST() so all processing is
> + * done on a local copy that cannot be modified by the other end.
> + *
> + * Note that
> https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gn
> u.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D58145&amp;data=02%7C01%7C
> peng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C686ea1d
> 3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802&amp;sd
> ata=hZDVA%2FOZbJO%2Fh4uzROYzVzmB05ekJWbcnkDAXsHzClc%3D&amp;re
> served=0 may cause this
> + * to be ineffective where _req is a struct which consists of only bitfields.
> + */
> +#define RING_COPY_REQUEST(_r, _idx, _req) do {
> \
> +	/* Use volatile to force the copy into _req. */			          \
> +	*(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx);
> \
> +} while (0)
> +
> +#define RING_GET_RESPONSE(_r, _idx)
> \
> +	(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
> +
> +/* Loop termination condition: Would the specified index overflow the ring?
> */
> +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)
> \
> +	(((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
> +
> +/* Ill-behaved frontend determination: Can there be this many requests? */
> +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)
> \
> +	(((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
> +
> +#define RING_PUSH_REQUESTS(_r) do
> {                                               \
> +	xen_wmb(); /* back sees requests /before/ updated producer index */
> \
> +	(_r)->sring->req_prod = (_r)->req_prod_pvt;
> \
> +} while (0)
> +
> +#define RING_PUSH_RESPONSES(_r) do
> {                                              \
> +	xen_wmb(); /* front sees resps /before/ updated producer index */
> \
> +	(_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;
> \
> +} while (0)
> +
> +/*
> + * Notification hold-off (req_event and rsp_event):
> + *
> + * When queueing requests or responses on a shared ring, it may not always
> be
> + * necessary to notify the remote end. For example, if requests are in flight
> + * in a backend, the front may be able to queue further requests without
> + * notifying the back (if the back checks for new requests when it queues
> + * responses).
> + *
> + * When enqueuing requests or responses:
> + *
> + *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The
> second argument
> + *  is a boolean return value. True indicates that the receiver requires an
> + *  asynchronous notification.
> + *
> + * After dequeuing requests or responses (before sleeping the connection):
> + *
> + *  Use RING_FINAL_CHECK_FOR_REQUESTS() or
> RING_FINAL_CHECK_FOR_RESPONSES().
> + *  The second argument is a boolean return value. True indicates that there
> + *  are pending messages on the ring (i.e., the connection should not be put
> + *  to sleep).
> + *
> + *  These macros will set the req_event/rsp_event field to trigger a
> + *  notification on the very next message that is enqueued. If you want to
> + *  create batches of work (i.e., only receive a notification after several
> + *  messages have been enqueued) then you will need to create a
> customised
> + *  version of the FINAL_CHECK macro in your own code, which sets the
> event
> + *  field appropriately.
> + */
> +
> +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do
> {                     \
> +	RING_IDX __old = (_r)->sring->req_prod;
> \
> +	RING_IDX __new = (_r)->req_prod_pvt;
> \
> +	xen_wmb(); /* back sees requests /before/ updated producer index */
> \
> +	(_r)->sring->req_prod = __new;
> \
> +	xen_mb(); /* back sees new requests /before/ we check req_event */
> \
> +	(_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <
> \
> +				 (RING_IDX)(__new - __old));                      \
> +} while (0)
> +
> +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do
> {                    \
> +	RING_IDX __old = (_r)->sring->rsp_prod;
> \
> +	RING_IDX __new = (_r)->rsp_prod_pvt;
> \
> +	xen_wmb(); /* front sees resps /before/ updated producer index */
> \
> +	(_r)->sring->rsp_prod = __new;
> \
> +	xen_mb(); /* front sees new resps /before/ we check rsp_event */
> \
> +	(_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <
> \
> +				 (RING_IDX)(__new - __old));                      \
> +} while (0)
> +
> +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do
> {                       \
> +	(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);
> \
> +	if (_work_to_do)							  \
> +		break;
> \
> +	(_r)->sring->req_event = (_r)->req_cons + 1;
> \
> +	xen_mb();
> \
> +	(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);
> \
> +} while (0)
> +
> +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do
> {                      \
> +	(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);
> \
> +	if (_work_to_do)							  \
> +		break;
> \
> +	(_r)->sring->rsp_event = (_r)->rsp_cons + 1;
> \
> +	xen_mb();
> \
> +	(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);
> \
> +} while (0)
> +
> +/*
> + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings
> and
> + * functions to check if there is data on the ring, and to read and
> + * write to them.
> + *
> + * DEFINE_XEN_FLEX_RING is similar to
> DEFINE_XEN_FLEX_RING_AND_INTF, but
> + * does not define the indexes page. As different protocols can have
> + * extensions to the basic format, this macro allow them to define their
> + * own struct.
> + *
> + * XEN_FLEX_RING_SIZE
> + *   Convenience macro to calculate the size of one of the two rings
> + *   from the overall order.
> + *
> + * $NAME_mask
> + *   Function to apply the size mask to an index, to reduce the index
> + *   within the range [0-size].
> + *
> + * $NAME_read_packet
> + *   Function to read data from the ring. The amount of data to read is
> + *   specified by the "size" argument.
> + *
> + * $NAME_write_packet
> + *   Function to write data to the ring. The amount of data to write is
> + *   specified by the "size" argument.
> + *
> + * $NAME_get_ring_ptr
> + *   Convenience function that returns a pointer to read/write to the
> + *   ring at the right location.
> + *
> + * $NAME_data_intf
> + *   Indexes page, shared between frontend and backend. It also
> + *   contains the array of grant refs.
> + *
> + * $NAME_queued
> + *   Function to calculate how many bytes are currently on the ring,
> + *   ready to be read. It can also be used to calculate how much free
> + *   space is currently on the ring (XEN_FLEX_RING_SIZE() -
> + *   $NAME_queued()).
> + */
> +
> +#ifndef XEN_PAGE_SHIFT
> +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always
> + * 4K, regardless of the architecture, and page granularity chosen by
> + * operating systems.
> + */
> +#define XEN_PAGE_SHIFT 12
> +#endif
> +#define XEN_FLEX_RING_SIZE(order)
> \
> +	(1UL << ((order) + XEN_PAGE_SHIFT - 1))
> +
> +#define DEFINE_XEN_FLEX_RING(name)
> \
> +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size)
> \
> +{
>                      \
> +	return idx & (ring_size - 1);
> \
> +}
> \
> +										  \
> +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf,
> \
> +						 RING_IDX idx,                    \
> +						 RING_IDX ring_size)              \
> +{
>                      \
> +	return buf + name##_mask(idx, ring_size);
> \
> +}
> \
> +										  \
> +static inline void name##_read_packet(void *opaque,
> \
> +				      const unsigned char *buf,                   \
> +				      size_t size,
> \
> +				      RING_IDX masked_prod,
> \
> +				      RING_IDX *masked_cons,
> \
> +				      RING_IDX ring_size)
> \
> +{
>                      \
> +	if (*masked_cons < masked_prod ||
> \
> +		size <= ring_size - *masked_cons)
> {                               \
> +		memcpy(opaque, buf + *masked_cons, size);
> \
> +	} else
> {
>      \
> +		memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons);
> \
> +		memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf,
> \
> +			   size - (ring_size - *masked_cons));
> \
> +	}
> \
> +	*masked_cons = name##_mask(*masked_cons + size, ring_size);
> \
> +}
> \
> +										  \
> +static inline void name##_write_packet(unsigned char *buf,
> \
> +				       const void *opaque,
> \
> +				       size_t size,
> \
> +				       RING_IDX *masked_prod,
> \
> +				       RING_IDX masked_cons,
> \
> +				       RING_IDX ring_size)
> \
> +{
>                      \
> +	if (*masked_prod < masked_cons ||
> \
> +		size <= ring_size - *masked_prod)
> {                               \
> +		memcpy(buf + *masked_prod, opaque, size);
> \
> +	} else
> {
>      \
> +		memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod);
> \
> +		memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod),
> \
> +		       size - (ring_size - *masked_prod));
> \
> +	}
> \
> +	*masked_prod = name##_mask(*masked_prod + size, ring_size);
> \
> +}
> \
> +										  \
> +static inline RING_IDX name##_queued(RING_IDX prod,
> \
> +				     RING_IDX cons,
> \
> +				     RING_IDX ring_size)
> \
> +{
>                      \
> +	RING_IDX size;
> \
> +										  \
> +	if (prod == cons)
> \
> +		return 0;
> \
> +										  \
> +	prod = name##_mask(prod, ring_size);
> \
> +	cons = name##_mask(cons, ring_size);
> \
> +										  \
> +	if (prod == cons)
> \
> +		return ring_size;
> \
> +										  \
> +	if (prod > cons)
> \
> +		size = prod - cons;
> \
> +	else
> \
> +		size = ring_size - (cons - prod);
> \
> +	return size;
> \
> +}
> \
> +										  \
> +struct name##_data
> {
>  \
> +	unsigned char *in; /* half of the allocation */
> \
> +	unsigned char *out; /* half of the allocation */
> \
> +}
> +
> +#define DEFINE_XEN_FLEX_RING_AND_INTF(name)
> \
> +struct name##_data_intf
> {                                                         \
> +	RING_IDX in_cons, in_prod;
> \
> +										  \
> +	u8 pad1[56];
> \
> +										  \
> +	RING_IDX out_cons, out_prod;
> \
> +										  \
> +	u8 pad2[56];
> \
> +										  \
> +	RING_IDX ring_order;
> \
> +	grant_ref_t ref[];
> \
> +};
> \
> +DEFINE_XEN_FLEX_RING(name)
> +
> +#endif /* __XEN_PUBLIC_IO_RING_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 8
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/interface/io/xenbus.h
> b/include/xen/interface/io/xenbus.h
> new file mode 100644
> index 0000000000..f452748b03
> --- /dev/null
> +++ b/include/xen/interface/io/xenbus.h
> @@ -0,0 +1,81 @@
> +/************************************************************
> *****************
> + * xenbus.h
> + *
> + * Xenbus protocol details.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (C) 2005 XenSource Ltd.
> + */
> +
> +#ifndef _XEN_PUBLIC_IO_XENBUS_H
> +#define _XEN_PUBLIC_IO_XENBUS_H
> +
> +/*
> + * The state of either end of the Xenbus, i.e. the current communication
> + * status of initialisation across the bus.  States here imply nothing about
> + * the state of the connection between the driver and the kernel's device
> + * layers.
> + */
> +enum xenbus_state {
> +	XenbusStateUnknown       = 0,
> +
> +	XenbusStateInitialising  = 1,
> +
> +	/*
> +	 * InitWait: Finished early initialisation but waiting for information
> +	 * from the peer or hotplug scripts.
> +	 */
> +	XenbusStateInitWait      = 2,
> +
> +	/*
> +	 * Initialised: Waiting for a connection from the peer.
> +	 */
> +	XenbusStateInitialised   = 3,
> +
> +	XenbusStateConnected     = 4,
> +
> +	/*
> +	 * Closing: The device is being closed due to an error or an unplug event.
> +	 */
> +	XenbusStateClosing       = 5,
> +
> +	XenbusStateClosed        = 6,
> +
> +	/*
> +	 * Reconfiguring: The device is being reconfigured.
> +	 */
> +	XenbusStateReconfiguring = 7,
> +
> +	XenbusStateReconfigured  = 8
> +};
> +
> +typedef enum xenbus_state XenbusState;
> +
> +#endif /* _XEN_PUBLIC_IO_XENBUS_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/interface/io/xs_wire.h
> b/include/xen/interface/io/xs_wire.h
> new file mode 100644
> index 0000000000..87987334bf
> --- /dev/null
> +++ b/include/xen/interface/io/xs_wire.h
> @@ -0,0 +1,151 @@
> +/*
> + * Details of the "wire" protocol between Xen Store Daemon and client
> + * library or guest kernel.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (C) 2005 Rusty Russell IBM Corporation
> + */
> +
> +#ifndef _XS_WIRE_H
> +#define _XS_WIRE_H
> +
> +enum xsd_sockmsg_type {
> +	XS_CONTROL,
> +#define XS_DEBUG XS_CONTROL
> +	XS_DIRECTORY,
> +	XS_READ,
> +	XS_GET_PERMS,
> +	XS_WATCH,
> +	XS_UNWATCH,
> +	XS_TRANSACTION_START,
> +	XS_TRANSACTION_END,
> +	XS_INTRODUCE,
> +	XS_RELEASE,
> +	XS_GET_DOMAIN_PATH,
> +	XS_WRITE,
> +	XS_MKDIR,
> +	XS_RM,
> +	XS_SET_PERMS,
> +	XS_WATCH_EVENT,
> +	XS_ERROR,
> +	XS_IS_DOMAIN_INTRODUCED,
> +	XS_RESUME,
> +	XS_SET_TARGET,
> +	/* XS_RESTRICT has been removed */
> +	XS_RESET_WATCHES = XS_SET_TARGET + 2,
> +	XS_DIRECTORY_PART,
> +
> +	XS_TYPE_COUNT,      /* Number of valid types. */
> +
> +	XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
> +};
> +
> +#define XS_WRITE_NONE "NONE"
> +#define XS_WRITE_CREATE "CREATE"
> +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
> +
> +/* We hand errors as strings, for portability. */
> +struct xsd_errors {
> +	int errnum;
> +	const char *errstring;
> +};
> +
> +#ifdef EINVAL
> +#define XSD_ERROR(x) { x, #x }
> +/* LINTED: static unused */
> +static struct xsd_errors xsd_errors[]
> +#if defined(__GNUC__)
> +__attribute__((unused))
> +#endif
> +	= {
> +	XSD_ERROR(EINVAL),
> +	XSD_ERROR(EACCES),
> +	XSD_ERROR(EEXIST),
> +	XSD_ERROR(EISDIR),
> +	XSD_ERROR(ENOENT),
> +	XSD_ERROR(ENOMEM),
> +	XSD_ERROR(ENOSPC),
> +	XSD_ERROR(EIO),
> +	XSD_ERROR(ENOTEMPTY),
> +	XSD_ERROR(ENOSYS),
> +	XSD_ERROR(EROFS),
> +	XSD_ERROR(EBUSY),
> +	XSD_ERROR(EAGAIN),
> +	XSD_ERROR(EISCONN),
> +	XSD_ERROR(E2BIG)
> +};
> +#endif
> +
> +struct xsd_sockmsg {
> +	u32 type;  /* XS_??? */
> +	u32 req_id;/* Request identifier, echoed in daemon's response.  */
> +	u32 tx_id; /* Transaction id (0 if not related to a transaction). */
> +	u32 len;   /* Length of data following this. */
> +
> +	/* Generally followed by nul-terminated string(s). */
> +};
> +
> +enum xs_watch_type {
> +	XS_WATCH_PATH = 0,
> +	XS_WATCH_TOKEN
> +};
> +
> +/*
> + * `incontents 150 xenstore_struct XenStore wire protocol.
> + *
> + * Inter-domain shared memory communications.
> + */
> +#define XENSTORE_RING_SIZE 1024
> +typedef u32 XENSTORE_RING_IDX;
> +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1))
> +struct xenstore_domain_interface {
> +	char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
> +	char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
> +	XENSTORE_RING_IDX req_cons, req_prod;
> +	XENSTORE_RING_IDX rsp_cons, rsp_prod;
> +	u32 server_features; /* Bitmap of features supported by the server */
> +	u32 connection;
> +};
> +
> +/* Violating this is very bad.  See docs/misc/xenstore.txt. */
> +#define XENSTORE_PAYLOAD_MAX 4096
> +
> +/* Violating these just gets you an error back */
> +#define XENSTORE_ABS_PATH_MAX 3072
> +#define XENSTORE_REL_PATH_MAX 2048
> +
> +/* The ability to reconnect a ring */
> +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
> +
> +/* Valid values for the connection field */
> +#define XENSTORE_CONNECTED 0 /* the steady-state */
> +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
> +
> +#endif /* _XS_WIRE_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 8
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/interface/memory.h
> b/include/xen/interface/memory.h
> new file mode 100644
> index 0000000000..19959da8b4
> --- /dev/null
> +++ b/include/xen/interface/memory.h
> @@ -0,0 +1,332 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/************************************************************
> ******************
> + * memory.h
> + *
> + * Memory reservation and information.
> + *
> + * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
> + */
> +
> +#ifndef __XEN_PUBLIC_MEMORY_H__
> +#define __XEN_PUBLIC_MEMORY_H__
> +
> +/*
> + * Increase or decrease the specified domain's memory reservation. Returns
> a
> + * -ve errcode on failure, or the # extents successfully allocated or freed.
> + * arg == addr of struct xen_memory_reservation.
> + */
> +#define XENMEM_increase_reservation 0
> +#define XENMEM_decrease_reservation 1
> +#define XENMEM_populate_physmap     6
> +struct xen_memory_reservation {
> +	/*
> +	 * XENMEM_increase_reservation:
> +	 *   OUT: MFN (*not* GMFN) bases of extents that were allocated
> +	 * XENMEM_decrease_reservation:
> +	 *   IN:  GMFN bases of extents to free
> +	 * XENMEM_populate_physmap:
> +	 *   IN:  GPFN bases of extents to populate with memory
> +	 *   OUT: GMFN bases of extents that were allocated
> +	 *   (NB. This command also updates the mach_to_phys translation
> table)
> +	 */
> +	GUEST_HANDLE(xen_pfn_t)extent_start;
> +
> +	/* Number of extents, and size/alignment of each (2^extent_order
> pages). */
> +	xen_ulong_t  nr_extents;
> +	unsigned int   extent_order;
> +
> +	/*
> +	 * Maximum # bits addressable by the user of the allocated region (e.g.,
> +	 * I/O devices often have a 32-bit limitation even in 64-bit systems). If
> +	 * zero then the user has no addressing restriction.
> +	 * This field is not used by XENMEM_decrease_reservation.
> +	 */
> +	unsigned int   address_bits;
> +
> +	/*
> +	 * Domain whose reservation is being changed.
> +	 * Unprivileged domains can specify only DOMID_SELF.
> +	 */
> +	domid_t        domid;
> +
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
> +
> +/*
> + * An atomic exchange of memory pages. If return code is zero then
> + * @out.extent_list provides GMFNs of the newly-allocated memory.
> + * Returns zero on complete success, otherwise a negative error code.
> + * On complete success then always @nr_exchanged == @in.nr_extents.
> + * On partial success @nr_exchanged indicates how much work was done.
> + */
> +#define XENMEM_exchange             11
> +struct xen_memory_exchange {
> +	/*
> +	 * [IN] Details of memory extents to be exchanged (GMFN bases).
> +	 * Note that @in.address_bits is ignored and unused.
> +	 */
> +	struct xen_memory_reservation in;
> +
> +	/*
> +	 * [IN/OUT] Details of new memory extents.
> +	 * We require that:
> +	 *  1. @in.domid == @out.domid
> +	 *  2. @in.nr_extents  << @in.extent_order ==
> +	 *     @out.nr_extents << @out.extent_order
> +	 *  3. @in.extent_start and @out.extent_start lists must not overlap
> +	 *  4. @out.extent_start lists GPFN bases to be populated
> +	 *  5. @out.extent_start is overwritten with allocated GMFN bases
> +	 */
> +	struct xen_memory_reservation out;
> +
> +	/*
> +	 * [OUT] Number of input extents that were successfully exchanged:
> +	 *  1. The first @nr_exchanged input extents were successfully
> +	 *     deallocated.
> +	 *  2. The corresponding first entries in the output extent list correctly
> +	 *     indicate the GMFNs that were successfully exchanged.
> +	 *  3. All other input and output extents are untouched.
> +	 *  4. If not all input exents are exchanged then the return code of this
> +	 *     command will be non-zero.
> +	 *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
> +	 */
> +	xen_ulong_t nr_exchanged;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
> +/*
> + * Returns the maximum machine frame number of mapped RAM in this
> system.
> + * This command always succeeds (it never returns an error code).
> + * arg == NULL.
> + */
> +#define XENMEM_maximum_ram_page     2
> +
> +/*
> + * Returns the current or maximum memory reservation, in pages, of the
> + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
> + * arg == addr of domid_t.
> + */
> +#define XENMEM_current_reservation  3
> +#define XENMEM_maximum_reservation  4
> +
> +/*
> + * Returns a list of MFN bases of 2MB extents comprising the
> machine_to_phys
> + * mapping table. Architectures which do not have a m2p table do not
> implement
> + * this command.
> + * arg == addr of xen_machphys_mfn_list_t.
> + */
> +#define XENMEM_machphys_mfn_list    5
> +struct xen_machphys_mfn_list {
> +	/*
> +	 * Size of the 'extent_start' array. Fewer entries will be filled if the
> +	 * machphys table is smaller than max_extents * 2MB.
> +	 */
> +	unsigned int max_extents;
> +
> +	/*
> +	 * Pointer to buffer to fill with list of extent starts. If there are
> +	 * any large discontiguities in the machine address space, 2MB gaps in
> +	 * the machphys table will be represented by an MFN base of zero.
> +	 */
> +	GUEST_HANDLE(xen_pfn_t)extent_start;
> +
> +	/*
> +	 * Number of extents written to the above array. This will be smaller
> +	 * than 'max_extents' if the machphys table is smaller than max_e *
> 2MB.
> +	 */
> +	unsigned int nr_extents;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
> +
> +/*
> + * Returns the location in virtual address space of the machine_to_phys
> + * mapping table. Architectures which do not have a m2p table, or which do
> not
> + * map it by default into guest address space, do not implement this
> command.
> + * arg == addr of xen_machphys_mapping_t.
> + */
> +#define XENMEM_machphys_mapping     12
> +struct xen_machphys_mapping {
> +	xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
> +	xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up.
> */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
> +
> +#define XENMAPSPACE_shared_info  0 /* shared info page */
> +#define XENMAPSPACE_grant_table  1 /* grant table page */
> +#define XENMAPSPACE_gmfn         2 /* GMFN */
> +#define XENMAPSPACE_gmfn_range   3 /* GMFN range,
> XENMEM_add_to_physmap only. */
> +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
> +				    * XENMEM_add_to_physmap_range only.
> +				    */
> +#define XENMAPSPACE_dev_mmio     5 /* device mmio region */
> +
> +/*
> + * Sets the GPFN at which a particular page appears in the specified guest's
> + * pseudophysical address space.
> + * arg == addr of xen_add_to_physmap_t.
> + */
> +#define XENMEM_add_to_physmap      7
> +struct xen_add_to_physmap {
> +	/* Which domain to change the mapping for. */
> +	domid_t domid;
> +
> +	/* Number of pages to go through for gmfn_range */
> +	u16    size;
> +
> +	/* Source mapping space. */
> +	unsigned int space;
> +
> +	/* Index into source mapping space. */
> +	xen_ulong_t idx;
> +
> +	/* GPFN where the source mapping page should appear. */
> +	xen_pfn_t gpfn;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
> +
> +/*** REMOVED ***/
> +/*#define XENMEM_translate_gpfn_list  8*/
> +
> +#define XENMEM_add_to_physmap_range 23
> +struct xen_add_to_physmap_range {
> +	/* IN */
> +	/* Which domain to change the mapping for. */
> +	domid_t domid;
> +	u16 space; /* => enum phys_map_space */
> +
> +	/* Number of pages to go through */
> +	u16 size;
> +	domid_t foreign_domid; /* IFF gmfn_foreign */
> +
> +	/* Indexes into space being mapped. */
> +	GUEST_HANDLE(xen_ulong_t)idxs;
> +
> +	/* GPFN in domid where the source mapping page should appear. */
> +	GUEST_HANDLE(xen_pfn_t)gpfns;
> +
> +	/* OUT */
> +
> +	/* Per index error code. */
> +	GUEST_HANDLE(int)errs;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range);
> +
> +/*
> + * Returns the pseudo-physical memory map as it was when the domain
> + * was started (specified by XENMEM_set_memory_map).
> + * arg == addr of struct xen_memory_map.
> + */
> +#define XENMEM_memory_map           9
> +struct xen_memory_map {
> +	/*
> +	 * On call the number of entries which can be stored in buffer. On
> +	 * return the number of entries which have been stored in
> +	 * buffer.
> +	 */
> +	unsigned int nr_entries;
> +
> +	/*
> +	 * Entries in the buffer are in the same format as returned by the
> +	 * BIOS INT 0x15 EAX=0xE820 call.
> +	 */
> +	GUEST_HANDLE(void)buffer;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
> +
> +/*
> + * Returns the real physical memory map. Passes the same structure as
> + * XENMEM_memory_map.
> + * arg == addr of struct xen_memory_map.
> + */
> +#define XENMEM_machine_memory_map   10
> +
> +/*
> + * Unmaps the page appearing at a particular GPFN from the specified
> guest's
> + * pseudophysical address space.
> + * arg == addr of xen_remove_from_physmap_t.
> + */
> +#define XENMEM_remove_from_physmap      15
> +struct xen_remove_from_physmap {
> +	/* Which domain to change the mapping for. */
> +	domid_t domid;
> +
> +	/* GPFN of the current mapping of the page. */
> +	xen_pfn_t gpfn;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
> +
> +/*
> + * Get the pages for a particular guest resource, so that they can be
> + * mapped directly by a tools domain.
> + */
> +#define XENMEM_acquire_resource 28
> +struct xen_mem_acquire_resource {
> +	/* IN - The domain whose resource is to be mapped */
> +	domid_t domid;
> +	/* IN - the type of resource */
> +	u16 type;
> +
> +#define XENMEM_resource_ioreq_server 0
> +#define XENMEM_resource_grant_table 1
> +
> +	/*
> +	 * IN - a type-specific resource identifier, which must be zero
> +	 *      unless stated otherwise.
> +	 *
> +	 * type == XENMEM_resource_ioreq_server -> id == ioreq server id
> +	 * type == XENMEM_resource_grant_table -> id defined below
> +	 */
> +	u32 id;
> +
> +#define XENMEM_resource_grant_table_id_shared 0
> +#define XENMEM_resource_grant_table_id_status 1
> +
> +	/* IN/OUT - As an IN parameter number of frames of the resource
> +	 *          to be mapped. However, if the specified value is 0 and
> +	 *          frame_list is NULL then this field will be set to the
> +	 *          maximum value supported by the implementation on
> return.
> +	 */
> +	u32 nr_frames;
> +	/*
> +	 * OUT - Must be zero on entry. On return this may contain a bitwise
> +	 *       OR of the following values.
> +	 */
> +	u32 flags;
> +
> +	/* The resource pages have been assigned to the calling domain */
> +#define _XENMEM_rsrc_acq_caller_owned 0
> +#define XENMEM_rsrc_acq_caller_owned (1u <<
> _XENMEM_rsrc_acq_caller_owned)
> +
> +	/*
> +	 * IN - the index of the initial frame to be mapped. This parameter
> +	 *      is ignored if nr_frames is 0.
> +	 */
> +	u64 frame;
> +
> +#define XENMEM_resource_ioreq_server_frame_bufioreq 0
> +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
> +
> +	/*
> +	 * IN/OUT - If the tools domain is PV then, upon return, frame_list
> +	 *          will be populated with the MFNs of the resource.
> +	 *          If the tools domain is HVM then it is expected that, on
> +	 *          entry, frame_list will be populated with a list of GFNs
> +	 *          that will be mapped to the MFNs of the resource.
> +	 *          If -EIO is returned then the frame_list has only been
> +	 *          partially mapped and it is up to the caller to unmap all
> +	 *          the GFNs.
> +	 *          This parameter may be NULL if nr_frames is 0.
> +	 */
> +	GUEST_HANDLE(xen_pfn_t)frame_list;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource);
> +
> +#endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
> new file mode 100644
> index 0000000000..0f12dcf267
> --- /dev/null
> +++ b/include/xen/interface/sched.h
> @@ -0,0 +1,188 @@
> +/************************************************************
> ******************
> + * sched.h
> + *
> + * Scheduler state interactions
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
> + */
> +
> +#ifndef __XEN_PUBLIC_SCHED_H__
> +#define __XEN_PUBLIC_SCHED_H__
> +
> +#include <xen/interface/event_channel.h>
> +
> +/*
> + * Guest Scheduler Operations
> + *
> + * The SCHEDOP interface provides mechanisms for a guest to interact
> + * with the scheduler, including yield, blocking and shutting itself
> + * down.
> + */
> +
> +/*
> + * The prototype for this hypercall is:
> + * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
> + *
> + * @cmd == SCHEDOP_??? (scheduler operation).
> + * @arg == Operation-specific extra argument(s), as described below.
> + * ...  == Additional Operation-specific extra arguments, described below.
> + *
> + * Versions of Xen prior to 3.0.2 provided only the following legacy version
> + * of this hypercall, supporting only the commands yield, block and
> shutdown:
> + *  long sched_op(int cmd, unsigned long arg)
> + * @cmd == SCHEDOP_??? (scheduler operation).
> + * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
> + *      == SHUTDOWN_* code (SCHEDOP_shutdown)
> + *
> + * This legacy version is available to new guests as:
> + * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long
> arg)
> + */
> +
> +/*
> + * Voluntarily yield the CPU.
> + * @arg == NULL.
> + */
> +#define SCHEDOP_yield       0
> +
> +/*
> + * Block execution of this VCPU until an event is received for processing.
> + * If called with event upcalls masked, this operation will atomically
> + * reenable event delivery and check for pending events before blocking the
> + * VCPU. This avoids a "wakeup waiting" race.
> + * @arg == NULL.
> + */
> +#define SCHEDOP_block       1
> +
> +/*
> + * Halt execution of this domain (all VCPUs) and notify the system controller.
> + * @arg == pointer to sched_shutdown structure.
> + *
> + * If the sched_shutdown_t reason is SHUTDOWN_suspend then
> + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
> + * of the guest's start info page.  RDX/EDX is the third hypercall
> + * argument.
> + *
> + * In addition, which reason is SHUTDOWN_suspend this hypercall
> + * returns 1 if suspend was cancelled or the domain was merely
> + * checkpointed, and 0 if it is resuming in a new domain.
> + */
> +#define SCHEDOP_shutdown    2
> +
> +/*
> + * Poll a set of event-channel ports. Return when one or more are pending.
> An
> + * optional timeout may be specified.
> + * @arg == pointer to sched_poll structure.
> + */
> +#define SCHEDOP_poll        3
> +
> +/*
> + * Declare a shutdown for another domain. The main use of this function is
> + * in interpreting shutdown requests and reasons for fully-virtualized
> + * domains.  A para-virtualized domain may use SCHEDOP_shutdown
> directly.
> + * @arg == pointer to sched_remote_shutdown structure.
> + */
> +#define SCHEDOP_remote_shutdown        4
> +
> +/*
> + * Latch a shutdown code, so that when the domain later shuts down it
> + * reports this code to the control tools.
> + * @arg == sched_shutdown, as for SCHEDOP_shutdown.
> + */
> +#define SCHEDOP_shutdown_code 5
> +
> +/*
> + * Setup, poke and destroy a domain watchdog timer.
> + * @arg == pointer to sched_watchdog structure.
> + * With id == 0, setup a domain watchdog timer to cause domain shutdown
> + *               after timeout, returns watchdog id.
> + * With id != 0 and timeout == 0, destroy domain watchdog timer.
> + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
> + */
> +#define SCHEDOP_watchdog    6
> +
> +/*
> + * Override the current vcpu affinity by pinning it to one physical cpu or
> + * undo this override restoring the previous affinity.
> + * @arg == pointer to sched_pin_override structure.
> + *
> + * A negative pcpu value will undo a previous pin override and restore the
> + * previous cpu affinity.
> + * This call is allowed for the hardware domain only and requires the cpu
> + * to be part of the domain's cpupool.
> + */
> +#define SCHEDOP_pin_override 7
> +
> +struct sched_shutdown {
> +	unsigned int reason; /* SHUTDOWN_* => shutdown reason */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
> +
> +struct sched_poll {
> +	GUEST_HANDLE(evtchn_port_t)ports;
> +	unsigned int nr_ports;
> +	u64 timeout;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
> +
> +struct sched_remote_shutdown {
> +	domid_t domain_id;         /* Remote domain ID */
> +	unsigned int reason;       /* SHUTDOWN_* => shutdown reason */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown);
> +
> +struct sched_watchdog {
> +	u32 id;                /* watchdog ID */
> +	u32 timeout;           /* timeout */
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog);
> +
> +struct sched_pin_override {
> +	s32 pcpu;
> +};
> +
> +DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override);
> +
> +/*
> + * Reason codes for SCHEDOP_shutdown. These may be interpreted by
> control
> + * software to determine the appropriate action. For the most part, Xen does
> + * not care about the shutdown code.
> + */
> +#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up
> and kill. */
> +#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.
> */
> +#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.
> */
> +#define SHUTDOWN_crash      3  /* Tell controller we've crashed.
> */
> +#define SHUTDOWN_watchdog   4  /* Restart because watchdog time
> expired.     */
> +
> +/*
> + * Domain asked to perform 'soft reset' for it. The expected behavior is to
> + * reset internal Xen state for the domain returning it to the point where it
> + * was created but leaving the domain's memory contents and vCPU
> contexts
> + * intact. This will allow the domain to start over and set up all Xen specific
> + * interfaces again.
> + */
> +#define SHUTDOWN_soft_reset 5
> +#define SHUTDOWN_MAX        5  /* Maximum valid shutdown reason.
> */
> +
> +#endif /* __XEN_PUBLIC_SCHED_H__ */
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> new file mode 100644
> index 0000000000..964daaedfb
> --- /dev/null
> +++ b/include/xen/interface/xen.h
> @@ -0,0 +1,225 @@
> +/************************************************************
> ******************
> + * xen.h
> + *
> + * Guest OS interface to Xen.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Copyright (c) 2004, K A Fraser
> + */
> +
> +#ifndef __XEN_PUBLIC_XEN_H__
> +#define __XEN_PUBLIC_XEN_H__
> +
> +#include <xen/arm/interface.h>
> +
> +/*
> + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
> + */
> +
> +/*
> + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5.
> + *         EAX = return value
> + *         (argument registers may be clobbered on return)
> + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6.
> + *         RAX = return value
> + *         (argument registers not clobbered on return; RCX, R11 are)
> + */
> +#define __HYPERVISOR_set_trap_table        0
> +#define __HYPERVISOR_mmu_update            1
> +#define __HYPERVISOR_set_gdt               2
> +#define __HYPERVISOR_stack_switch          3
> +#define __HYPERVISOR_set_callbacks         4
> +#define __HYPERVISOR_fpu_taskswitch        5
> +#define __HYPERVISOR_sched_op_compat       6
> +#define __HYPERVISOR_platform_op           7
> +#define __HYPERVISOR_set_debugreg          8
> +#define __HYPERVISOR_get_debugreg          9
> +#define __HYPERVISOR_update_descriptor    10
> +#define __HYPERVISOR_memory_op            12
> +#define __HYPERVISOR_multicall            13
> +#define __HYPERVISOR_update_va_mapping    14
> +#define __HYPERVISOR_set_timer_op         15
> +#define __HYPERVISOR_event_channel_op_compat 16
> +#define __HYPERVISOR_xen_version          17
> +#define __HYPERVISOR_console_io           18
> +#define __HYPERVISOR_physdev_op_compat    19
> +#define __HYPERVISOR_grant_table_op       20
> +#define __HYPERVISOR_vm_assist            21
> +#define __HYPERVISOR_update_va_mapping_otherdomain 22
> +#define __HYPERVISOR_iret                 23 /* x86 only */
> +#define __HYPERVISOR_vcpu_op              24
> +#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
> +#define __HYPERVISOR_mmuext_op            26
> +#define __HYPERVISOR_xsm_op               27
> +#define __HYPERVISOR_nmi_op               28
> +#define __HYPERVISOR_sched_op             29
> +#define __HYPERVISOR_callback_op          30
> +#define __HYPERVISOR_xenoprof_op          31
> +#define __HYPERVISOR_event_channel_op     32
> +#define __HYPERVISOR_physdev_op           33
> +#define __HYPERVISOR_hvm_op               34
> +#define __HYPERVISOR_sysctl               35
> +#define __HYPERVISOR_domctl               36
> +#define __HYPERVISOR_kexec_op             37
> +#define __HYPERVISOR_tmem_op              38
> +#define __HYPERVISOR_xc_reserved_op       39 /* reserved for
> XenClient */
> +#define __HYPERVISOR_xenpmu_op            40
> +#define __HYPERVISOR_dm_op                41
> +
> +/* Architecture-specific hypercall definitions. */
> +#define __HYPERVISOR_arch_0               48
> +#define __HYPERVISOR_arch_1               49
> +#define __HYPERVISOR_arch_2               50
> +#define __HYPERVISOR_arch_3               51
> +#define __HYPERVISOR_arch_4               52
> +#define __HYPERVISOR_arch_5               53
> +#define __HYPERVISOR_arch_6               54
> +#define __HYPERVISOR_arch_7               55
> +
> +#ifndef __ASSEMBLY__
> +
> +typedef u16 domid_t;
> +
> +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary
> domains. */
> +#define DOMID_FIRST_RESERVED (0x7FF0U)
> +
> +/* DOMID_SELF is used in certain contexts to refer to oneself. */
> +#define DOMID_SELF (0x7FF0U)
> +
> +/*
> + * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
> + * Although no Foreign Domain need be specified to map I/O pages,
> DOMID_IO
> + * is useful to ensure that no mappings to the OS's own heap are accidentally
> + * installed. (e.g., in Linux this could cause havoc as reference counts
> + * aren't adjusted on the I/O-mapping code path).
> + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that
> context can
> + * be specified by any calling domain.
> + */
> +#define DOMID_IO   (0x7FF1U)
> +
> +/*
> + * DOMID_XEN is used to allow privileged domains to map restricted parts of
> + * Xen's heap space (e.g., the machine_to_phys table).
> + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only
> permitted if
> + * the caller is privileged.
> + */
> +#define DOMID_XEN  (0x7FF2U)
> +
> +/* DOMID_COW is used as the owner of sharable pages */
> +#define DOMID_COW  (0x7FF3U)
> +
> +/* DOMID_INVALID is used to identify pages with unknown owner. */
> +#define DOMID_INVALID (0x7FF4U)
> +
> +/* Idle domain. */
> +#define DOMID_IDLE (0x7FFFU)
> +
> +struct vcpu_info {
> +	/*
> +	 * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
> +	 * a pending notification for a particular VCPU. It is then cleared
> +	 * by the guest OS /before/ checking for pending work, thus avoiding
> +	 * a set-and-check race. Note that the mask is only accessed by Xen
> +	 * on the CPU that is currently hosting the VCPU. This means that the
> +	 * pending and mask flags can be updated by the guest without special
> +	 * synchronisation (i.e., no need for the x86 LOCK prefix).
> +	 * This may seem suboptimal because if the pending flag is set by
> +	 * a different CPU then an IPI may be scheduled even when the mask
> +	 * is set. However, note:
> +	 *  1. The task of 'interrupt holdoff' is covered by the per-event-
> +	 *     channel mask bits. A 'noisy' event that is continually being
> +	 *     triggered can be masked at source at this very precise
> +	 *     granularity.
> +	 *  2. The main purpose of the per-VCPU mask is therefore to restrict
> +	 *     reentrant execution: whether for concurrency control, or to
> +	 *     prevent unbounded stack usage. Whatever the purpose, we
> expect
> +	 *     that the mask will be asserted only for short periods at a time,
> +	 *     and so the likelihood of a 'spurious' IPI is suitably small.
> +	 * The mask is read before making an event upcall to the guest: a
> +	 * non-zero mask therefore guarantees that the VCPU will not receive
> +	 * an upcall activation. The mask is cleared when the VCPU requests
> +	 * to block: this avoids wakeup-waiting races.
> +	 */
> +	u8 evtchn_upcall_pending;
> +	u8 evtchn_upcall_mask;
> +	xen_ulong_t evtchn_pending_sel;
> +	struct arch_vcpu_info arch;
> +	struct pvclock_vcpu_time_info time;
> +}; /* 64 bytes (x86) */
> +
> +/*
> + * Xen/kernel shared data -- pointer provided in start_info.
> + * NB. We expect that this struct is smaller than a page.
> + */
> +struct shared_info {
> +	struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
> +
> +	/*
> +	 * A domain can create "event channels" on which it can send and
> receive
> +	 * asynchronous event notifications. There are three classes of event
> that
> +	 * are delivered by this mechanism:
> +	 *  1. Bi-directional inter- and intra-domain connections. Domains must
> +	 *     arrange out-of-band to set up a connection (usually by allocating
> +	 *     an unbound 'listener' port and avertising that via a storage
> service
> +	 *     such as xenstore).
> +	 *  2. Physical interrupts. A domain with suitable hardware-access
> +	 *     privileges can bind an event-channel port to a physical interrupt
> +	 *     source.
> +	 *  3. Virtual interrupts ('events'). A domain can bind an event-channel
> +	 *     port to a virtual interrupt source, such as the virtual-timer
> +	 *     device or the emergency console.
> +	 *
> +	 * Event channels are addressed by a "port index". Each channel is
> +	 * associated with two bits of information:
> +	 *  1. PENDING -- notifies the domain that there is a pending
> notification
> +	 *     to be processed. This bit is cleared by the guest.
> +	 *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING
> +	 *     will cause an asynchronous upcall to be scheduled. This bit is
> only
> +	 *     updated by the guest. It is read-only within Xen. If a channel
> +	 *     becomes pending while the channel is masked then the 'edge' is
> lost
> +	 *     (i.e., when the channel is unmasked, the guest must manually
> handle
> +	 *     pending notifications as no upcall will be scheduled by Xen).
> +	 *
> +	 * To expedite scanning of pending notifications, any 0->1 pending
> +	 * transition on an unmasked channel causes a corresponding bit in a
> +	 * per-vcpu selector word to be set. Each bit in the selector covers a
> +	 * 'C long' in the PENDING bitfield array.
> +	 */
> +	xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];
> +	xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];
> +
> +	/*
> +	 * Wallclock time: updated only by control software. Guests should base
> +	 * their gettimeofday() syscall on this wallclock-base value.
> +	 */
> +	struct pvclock_wall_clock wc;
> +
> +	struct arch_shared_info arch;
> +
> +};
> +
> +#else /* __ASSEMBLY__ */
> +
> +/* In assembly code we cannot use C numeric constant suffixes. */
> +#define mk_unsigned_long(x) x
> +
> +#endif /* !__ASSEMBLY__ */
> +
> +#endif /* __XEN_PUBLIC_XEN_H__ */
> --
> 2.17.1
Anastasiia Lukianenko July 3, 2020, 12:46 p.m. UTC | #2
Hi Peng,

On Thu, 2020-07-02 at 01:30 +0000, Peng Fan wrote:
> > Subject: [PATCH 04/17] xen: Add essential and required interface
> > headers
> > 
> > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> > 
> > Add essential and required Xen interface headers only taken from
> > the stable Linux kernel stable/linux-5.7.y at commit
> > 66dfe45221605e11f38a0bf5eb2ee808cea7cfe7.
> 
> Please use commit <12+> ("commit header")

Ok, will fix it in the next version.

> 
> > 
> > These are better suited for U-boot than the original headers
> > from Xen as they are the stripped versions of the same.
> > 
> > At the same time use public protocols from Xen RELEASE-4.13.1, at
> > commit 6278553325a9f76d37811923221b21db3882e017
> 
> Please use commit <12+> ("commit header")

Ok, will fix it in the next version.

> 
> Then:
> 
> Acked-by: Peng Fan <peng.fan@nxp.com>

Regards,
Anastasiia

> 
> > as those have more comments in them.
> > 
> > Signed-off-by: Oleksandr Andrushchenko
> > <oleksandr_andrushchenko@epam.com>
> > Signed-off-by: Anastasiia Lukianenko <
> > anastasiia_lukianenko@epam.com>
> > ---
> >  include/xen/arm/interface.h           |  88 ++++
> >  include/xen/interface/event_channel.h | 281 ++++++++++
> >  include/xen/interface/grant_table.h   | 582 +++++++++++++++++++++
> >  include/xen/interface/hvm/hvm_op.h    |  69 +++
> >  include/xen/interface/hvm/params.h    | 127 +++++
> >  include/xen/interface/io/blkif.h      | 726
> > ++++++++++++++++++++++++++
> >  include/xen/interface/io/console.h    |  56 ++
> >  include/xen/interface/io/protocols.h  |  42 ++
> >  include/xen/interface/io/ring.h       | 479 +++++++++++++++++
> >  include/xen/interface/io/xenbus.h     |  81 +++
> >  include/xen/interface/io/xs_wire.h    | 151 ++++++
> >  include/xen/interface/memory.h        | 332 ++++++++++++
> >  include/xen/interface/sched.h         | 188 +++++++
> >  include/xen/interface/xen.h           | 225 ++++++++
> >  14 files changed, 3427 insertions(+)
> >  create mode 100644 include/xen/arm/interface.h
> >  create mode 100644 include/xen/interface/event_channel.h
> >  create mode 100644 include/xen/interface/grant_table.h
> >  create mode 100644 include/xen/interface/hvm/hvm_op.h
> >  create mode 100644 include/xen/interface/hvm/params.h
> >  create mode 100644 include/xen/interface/io/blkif.h
> >  create mode 100644 include/xen/interface/io/console.h
> >  create mode 100644 include/xen/interface/io/protocols.h
> >  create mode 100644 include/xen/interface/io/ring.h
> >  create mode 100644 include/xen/interface/io/xenbus.h
> >  create mode 100644 include/xen/interface/io/xs_wire.h
> >  create mode 100644 include/xen/interface/memory.h
> >  create mode 100644 include/xen/interface/sched.h
> >  create mode 100644 include/xen/interface/xen.h
> > 
> > diff --git a/include/xen/arm/interface.h
> > b/include/xen/arm/interface.h
> > new file mode 100644
> > index 0000000000..79d5ae8563
> > --- /dev/null
> > +++ b/include/xen/arm/interface.h
> > @@ -0,0 +1,88 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/************************************************************
> > ******************
> > + * Guest OS interface to ARM Xen.
> > + *
> > + * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix,
> > 2012
> > + */
> > +
> > +#ifndef _ASM_ARM_XEN_INTERFACE_H
> > +#define _ASM_ARM_XEN_INTERFACE_H
> > +
> > +#ifndef __ASSEMBLY__
> > +#include <linux/types.h>
> > +#endif
> > +
> > +#define uint64_aligned_t u64 __attribute__((aligned(8)))
> > +
> > +#define __DEFINE_GUEST_HANDLE(name, type) \
> > +	typedef struct { union { type *p; uint64_aligned_t q; }; }  \
> > +		__guest_handle_ ## name
> > +
> > +#define DEFINE_GUEST_HANDLE_STRUCT(name) \
> > +	__DEFINE_GUEST_HANDLE(name, struct name)
> > +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name,
> > name)
> > +#define GUEST_HANDLE(name)        __guest_handle_ ## name
> > +
> > +#define set_xen_guest_handle(hnd, val)			\
> > +	do {						\
> > +		if (sizeof(hnd) == 8)			\
> > +			*(u64 *)&(hnd) = 0;	\
> > +		(hnd).p = val;				\
> > +	} while (0)
> > +
> > +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op
> > +
> > +#ifndef __ASSEMBLY__
> > +/* Explicitly size integers that represent pfns in the interface
> > with
> > + * Xen so that we can have one ABI that works for 32 and 64 bit
> > guests.
> > + * Note that this means that the xen_pfn_t type may be capable of
> > + * representing pfn's which the guest cannot represent in its own
> > pfn
> > + * type. However since pfn space is controlled by the guest this
> > is
> > + * fine since it simply wouldn't be able to create any sure pfns
> > in
> > + * the first place.
> > + */
> > +typedef u64 xen_pfn_t;
> > +#define PRI_xen_pfn "llx"
> > +typedef u64 xen_ulong_t;
> > +#define PRI_xen_ulong "llx"
> > +typedef s64 xen_long_t;
> > +#define PRI_xen_long "llx"
> > +/* Guest handles for primitive C types. */
> > +__DEFINE_GUEST_HANDLE(uchar, unsigned char);
> > +__DEFINE_GUEST_HANDLE(uint,  unsigned int);
> > +DEFINE_GUEST_HANDLE(char);
> > +DEFINE_GUEST_HANDLE(int);
> > +DEFINE_GUEST_HANDLE(void);
> > +DEFINE_GUEST_HANDLE(u64);
> > +DEFINE_GUEST_HANDLE(u32);
> > +DEFINE_GUEST_HANDLE(xen_pfn_t);
> > +DEFINE_GUEST_HANDLE(xen_ulong_t);
> > +
> > +/* Maximum number of virtual CPUs in multi-processor guests. */
> > +#define MAX_VIRT_CPUS 1
> > +
> > +struct arch_vcpu_info { };
> > +struct arch_shared_info { };
> > +
> > +/* TODO: Move pvclock definitions some place arch independent */
> > +struct pvclock_vcpu_time_info {
> > +	u32   version;
> > +	u32   pad0;
> > +	u64   tsc_timestamp;
> > +	u64   system_time;
> > +	u32   tsc_to_system_mul;
> > +	s8    tsc_shift;
> > +	u8    flags;
> > +	u8    pad[2];
> > +} __attribute__((__packed__)); /* 32 bytes */
> > +
> > +/* It is OK to have a 12 bytes struct with no padding because it
> > is packed */
> > +struct pvclock_wall_clock {
> > +	u32   version;
> > +	u32   sec;
> > +	u32   nsec;
> > +	u32   sec_hi;
> > +} __attribute__((__packed__));
> > +#endif
> > +
> > +#endif /* _ASM_ARM_XEN_INTERFACE_H */
> > diff --git a/include/xen/interface/event_channel.h
> > b/include/xen/interface/event_channel.h
> > new file mode 100644
> > index 0000000000..8174999c2f
> > --- /dev/null
> > +++ b/include/xen/interface/event_channel.h
> > @@ -0,0 +1,281 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/************************************************************
> > ******************
> > + * event_channel.h
> > + *
> > + * Event channels between domains.
> > + *
> > + * Copyright (c) 2003-2004, K A Fraser.
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
> > +#define __XEN_PUBLIC_EVENT_CHANNEL_H__
> > +
> > +#include <xen/interface/xen.h>
> > +
> > +typedef u32 evtchn_port_t;
> > +DEFINE_GUEST_HANDLE(evtchn_port_t);
> > +
> > +/*
> > + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and
> > mark as
> > + * accepting interdomain bindings from domain <remote_dom>. A
> > fresh port
> > + * is allocated in <dom> and returned as <port>.
> > + * NOTES:
> > + *  1. If the caller is unprivileged then <dom> must be
> > DOMID_SELF.
> > + *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
> > + */
> > +#define EVTCHNOP_alloc_unbound	  6
> > +struct evtchn_alloc_unbound {
> > +	/* IN parameters */
> > +	domid_t dom, remote_dom;
> > +	/* OUT parameters */
> > +	evtchn_port_t port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_bind_interdomain: Construct an interdomain event
> > channel
> > between
> > + * the calling domain and <remote_dom>. <remote_dom,remote_port>
> > must
> > identify
> > + * a port that is unbound and marked as accepting bindings from
> > the calling
> > + * domain. A fresh port is allocated in the calling domain and
> > returned as
> > + * <local_port>.
> > + * NOTES:
> > + *  2. <remote_dom> may be DOMID_SELF, allowing loopback
> > connections.
> > + */
> > +#define EVTCHNOP_bind_interdomain 0
> > +struct evtchn_bind_interdomain {
> > +	/* IN parameters. */
> > +	domid_t remote_dom;
> > +	evtchn_port_t remote_port;
> > +	/* OUT parameters. */
> > +	evtchn_port_t local_port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on
> > specified
> > + * vcpu.
> > + * NOTES:
> > + *  1. A virtual IRQ may be bound to at most one event channel per
> > vcpu.
> > + *  2. The allocated event channel is bound to the specified vcpu.
> > The
> > binding
> > + *     may not be changed.
> > + */
> > +#define EVTCHNOP_bind_virq	  1
> > +struct evtchn_bind_virq {
> > +	/* IN parameters. */
> > +	u32 virq;
> > +	u32 vcpu;
> > +	/* OUT parameters. */
> > +	evtchn_port_t port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
> > + * NOTES:
> > + *  1. A physical IRQ may be bound to at most one event channel
> > per
> > domain.
> > + *  2. Only a sufficiently-privileged domain may bind to a
> > physical IRQ.
> > + */
> > +#define EVTCHNOP_bind_pirq	  2
> > +struct evtchn_bind_pirq {
> > +	/* IN parameters. */
> > +	u32 pirq;
> > +#define BIND_PIRQ__WILL_SHARE 1
> > +	u32 flags; /* BIND_PIRQ__* */
> > +	/* OUT parameters. */
> > +	evtchn_port_t port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_bind_ipi: Bind a local event channel to receive
> > events.
> > + * NOTES:
> > + *  1. The allocated event channel is bound to the specified vcpu.
> > The
> > binding
> > + *     may not be changed.
> > + */
> > +#define EVTCHNOP_bind_ipi	  7
> > +struct evtchn_bind_ipi {
> > +	u32 vcpu;
> > +	/* OUT parameters. */
> > +	evtchn_port_t port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_close: Close a local event channel <port>. If the
> > channel is
> > + * interdomain then the remote end is placed in the unbound state
> > + * (EVTCHNSTAT_unbound), awaiting a new connection.
> > + */
> > +#define EVTCHNOP_close		  3
> > +struct evtchn_close {
> > +	/* IN parameters. */
> > +	evtchn_port_t port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_send: Send an event to the remote end of the channel
> > whose
> > local
> > + * endpoint is <port>.
> > + */
> > +#define EVTCHNOP_send		  4
> > +struct evtchn_send {
> > +	/* IN parameters. */
> > +	evtchn_port_t port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_status: Get the current status of the communication
> > channel
> > which
> > + * has an endpoint at <dom, port>.
> > + * NOTES:
> > + *  1. <dom> may be specified as DOMID_SELF.
> > + *  2. Only a sufficiently-privileged domain may obtain the status
> > of an
> > event
> > + *     channel for which <dom> is not DOMID_SELF.
> > + */
> > +#define EVTCHNOP_status		  5
> > +struct evtchn_status {
> > +	/* IN parameters */
> > +	domid_t  dom;
> > +	evtchn_port_t port;
> > +	/* OUT parameters */
> > +#define EVTCHNSTAT_closed	0  /* Channel is not in use.		
> >      */
> > +#define EVTCHNSTAT_unbound	1  /* Channel is waiting interdom
> > connection.*/
> > +#define EVTCHNSTAT_interdomain	2  /* Channel is connected to
> > remote
> > domain. */
> > +#define EVTCHNSTAT_pirq		3  /* Channel is bound to a
> > phys IRQ line.
> > */
> > +#define EVTCHNSTAT_virq		4  /* Channel is bound to a
> > virtual IRQ line
> > */
> > +#define EVTCHNSTAT_ipi		5  /* Channel is bound to a
> > virtual IPI line
> > */
> > +	u32 status;
> > +	u32 vcpu;		   /* VCPU to which this channel is
> > bound.   */
> > +	union {
> > +		struct {
> > +			domid_t dom;
> > +		} unbound; /* EVTCHNSTAT_unbound */
> > +		struct {
> > +			domid_t dom;
> > +			evtchn_port_t port;
> > +		} interdomain; /* EVTCHNSTAT_interdomain */
> > +		u32 pirq;	    /* EVTCHNSTAT_pirq	      */
> > +		u32 virq;	    /* EVTCHNSTAT_virq	      */
> > +	} u;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify
> > when
> > an
> > + * event is pending.
> > + * NOTES:
> > + *  1. IPI- and VIRQ-bound channels always notify the vcpu that
> > initialised
> > + *     the binding. This binding cannot be changed.
> > + *  2. All other channels notify vcpu0 by default. This default is
> > set when
> > + *     the channel is allocated (a port that is freed and
> > subsequently reused
> > + *     has its binding reset to vcpu0).
> > + */
> > +#define EVTCHNOP_bind_vcpu	  8
> > +struct evtchn_bind_vcpu {
> > +	/* IN parameters. */
> > +	evtchn_port_t port;
> > +	u32 vcpu;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_unmask: Unmask the specified local event-channel port
> > and
> > deliver
> > + * a notification to the appropriate VCPU if an event is pending.
> > + */
> > +#define EVTCHNOP_unmask		  9
> > +struct evtchn_unmask {
> > +	/* IN parameters. */
> > +	evtchn_port_t port;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_reset: Close all event channels associated with
> > specified
> > domain.
> > + * NOTES:
> > + *  1. <dom> may be specified as DOMID_SELF.
> > + *  2. Only a sufficiently-privileged domain may specify other
> > than
> > DOMID_SELF.
> > + */
> > +#define EVTCHNOP_reset		 10
> > +struct evtchn_reset {
> > +	/* IN parameters. */
> > +	domid_t dom;
> > +};
> > +
> > +typedef struct evtchn_reset evtchn_reset_t;
> > +
> > +/*
> > + * EVTCHNOP_init_control: initialize the control block for the
> > FIFO ABI.
> > + */
> > +#define EVTCHNOP_init_control    11
> > +struct evtchn_init_control {
> > +	/* IN parameters. */
> > +	u64 control_gfn;
> > +	u32 offset;
> > +	u32 vcpu;
> > +	/* OUT parameters. */
> > +	u8 link_bits;
> > +	u8 _pad[7];
> > +};
> > +
> > +/*
> > + * EVTCHNOP_expand_array: add an additional page to the event
> > array.
> > + */
> > +#define EVTCHNOP_expand_array    12
> > +struct evtchn_expand_array {
> > +	/* IN parameters. */
> > +	u64 array_gfn;
> > +};
> > +
> > +/*
> > + * EVTCHNOP_set_priority: set the priority for an event channel.
> > + */
> > +#define EVTCHNOP_set_priority    13
> > +struct evtchn_set_priority {
> > +	/* IN parameters. */
> > +	evtchn_port_t port;
> > +	u32 priority;
> > +};
> > +
> > +struct evtchn_op {
> > +	u32 cmd; /* EVTCHNOP_* */
> > +	union {
> > +		struct evtchn_alloc_unbound    alloc_unbound;
> > +		struct evtchn_bind_interdomain bind_interdomain;
> > +		struct evtchn_bind_virq	       bind_virq;
> > +		struct evtchn_bind_pirq	       bind_pirq;
> > +		struct evtchn_bind_ipi	       bind_ipi;
> > +		struct evtchn_close	       close;
> > +		struct evtchn_send	       send;
> > +		struct evtchn_status	       status;
> > +		struct evtchn_bind_vcpu	       bind_vcpu;
> > +		struct evtchn_unmask	       unmask;
> > +	} u;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
> > +
> > +/*
> > + * 2-level ABI
> > + */
> > +
> > +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) *
> > sizeof(xen_ulong_t) * 64)
> > +
> > +/*
> > + * FIFO ABI
> > + */
> > +
> > +/* Events may have priorities from 0 (highest) to 15 (lowest). */
> > +#define EVTCHN_FIFO_PRIORITY_MAX     0
> > +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
> > +#define EVTCHN_FIFO_PRIORITY_MIN     15
> > +
> > +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
> > +
> > +typedef u32 event_word_t;
> > +
> > +#define EVTCHN_FIFO_PENDING 31
> > +#define EVTCHN_FIFO_MASKED  30
> > +#define EVTCHN_FIFO_LINKED  29
> > +#define EVTCHN_FIFO_BUSY    28
> > +
> > +#define EVTCHN_FIFO_LINK_BITS 17
> > +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
> > +
> > +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
> > +
> > +struct evtchn_fifo_control_block {
> > +	u32     ready;
> > +	u32     _rsvd;
> > +	event_word_t head[EVTCHN_FIFO_MAX_QUEUES];
> > +};
> > +
> > +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
> > diff --git a/include/xen/interface/grant_table.h
> > b/include/xen/interface/grant_table.h
> > new file mode 100644
> > index 0000000000..197a0d0d58
> > --- /dev/null
> > +++ b/include/xen/interface/grant_table.h
> > @@ -0,0 +1,582 @@
> > +/************************************************************
> > ******************
> > + * grant_table.h
> > + *
> > + * Interface for granting foreign access to page frames, and
> > receiving
> > + * page-ownership transfers.
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (c) 2004, K A Fraser
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
> > +#define __XEN_PUBLIC_GRANT_TABLE_H__
> > +
> > +#include <xen/interface/xen.h>
> > +
> > +/***********************************
> > + * GRANT TABLE REPRESENTATION
> > + */
> > +
> > +/* Some rough guidelines on accessing and updating grant-table
> > entries
> > + * in a concurrency-safe manner. For more information, Linux
> > contains a
> > + * reference implementation for guest OSes
> > (arch/xen/kernel/grant_table.c).
> > + *
> > + * NB. WMB is a no-op on current-generation x86 processors.
> > However, a
> > + *     compiler barrier will still be required.
> > + *
> > + * Introducing a valid entry into the grant table:
> > + *  1. Write ent->domid.
> > + *  2. Write ent->frame:
> > + *      GTF_permit_access:   Frame to which access is permitted.
> > + *      GTF_accept_transfer: Pseudo-phys frame slot being filled
> > by new
> > + *                           frame, or zero if none.
> > + *  3. Write memory barrier (WMB).
> > + *  4. Write ent->flags, inc. valid type.
> > + *
> > + * Invalidating an unused GTF_permit_access entry:
> > + *  1. flags = ent->flags.
> > + *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
> > + *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
> > + *  NB. No need for WMB as reuse of entry is control-dependent on
> > success
> > of
> > + *      step 3, and all architectures guarantee ordering of ctrl-
> > dep writes.
> > + *
> > + * Invalidating an in-use GTF_permit_access entry:
> > + *  This cannot be done directly. Request assistance from the
> > domain
> > controller
> > + *  which can set a timeout on the use of a grant entry and take
> > necessary
> > + *  action. (NB. This is not yet implemented!).
> > + *
> > + * Invalidating an unused GTF_accept_transfer entry:
> > + *  1. flags = ent->flags.
> > + *  2. Observe that !(flags & GTF_transfer_committed). [*]
> > + *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
> > + *  NB. No need for WMB as reuse of entry is control-dependent on
> > success
> > of
> > + *      step 3, and all architectures guarantee ordering of ctrl-
> > dep writes.
> > + *  [*] If GTF_transfer_committed is set then the grant entry is
> > 'committed'.
> > + *      The guest must /not/ modify the grant entry until the
> > address of
> > the
> > + *      transferred frame is written. It is safe for the guest to
> > spin waiting
> > + *      for this to occur (detect by observing
> > GTF_transfer_completed in
> > + *      ent->flags).
> > + *
> > + * Invalidating a committed GTF_accept_transfer entry:
> > + *  1. Wait for (ent->flags & GTF_transfer_completed).
> > + *
> > + * Changing a GTF_permit_access from writable to read-only:
> > + *  Use SMP-safe CMPXCHG to set GTF_readonly, while
> > checking !GTF_writing.
> > + *
> > + * Changing a GTF_permit_access from read-only to writable:
> > + *  Use SMP-safe bit-setting instruction.
> > + */
> > +
> > +/*
> > + * Reference to a grant entry in a specified domain's grant table.
> > + */
> > +typedef u32 grant_ref_t;
> > +
> > +/*
> > + * A grant table comprises a packed array of grant entries in one
> > or more
> > + * page frames shared between Xen and a guest.
> > + * [XEN]: This field is written by Xen and read by the sharing
> > guest.
> > + * [GST]: This field is written by the guest and read by Xen.
> > + */
> > +
> > +/*
> > + * Version 1 of the grant table entry structure is maintained
> > purely
> > + * for backwards compatibility.  New guests should use version 2.
> > + */
> > +struct grant_entry_v1 {
> > +	/* GTF_xxx: various type and flag information.  [XEN,GST] */
> > +	u16 flags;
> > +	/* The domain being granted foreign privileges. [GST] */
> > +	domid_t  domid;
> > +	/*
> > +	 * GTF_permit_access: Frame that @domid is allowed to map and
> > access. [GST]
> > +	 * GTF_accept_transfer: Frame whose ownership transferred by
> > @domid. [XEN]
> > +	 */
> > +	u32 frame;
> > +};
> > +
> > +/*
> > + * Type of grant entry.
> > + *  GTF_invalid: This grant entry grants no privileges.
> > + *  GTF_permit_access: Allow @domid to map/access @frame.
> > + *  GTF_accept_transfer: Allow @domid to transfer ownership of one
> > page
> > frame
> > + *                       to this guest. Xen writes the page number
> > to
> > @frame.
> > + *  GTF_transitive: Allow @domid to transitively access a subrange
> > of
> > + *                  @trans_grant in @trans_domid.  No mappings are
> > allowed.
> > + */
> > +#define GTF_invalid         (0U << 0)
> > +#define GTF_permit_access   (1U << 0)
> > +#define GTF_accept_transfer (2U << 0)
> > +#define GTF_transitive      (3U << 0)
> > +#define GTF_type_mask       (3U << 0)
> > +
> > +/*
> > + * Subflags for GTF_permit_access.
> > + *  GTF_readonly: Restrict @domid to read-only mappings and
> > accesses.
> > [GST]
> > + *  GTF_reading: Grant entry is currently mapped for reading by
> > @domid.
> > [XEN]
> > + *  GTF_writing: Grant entry is currently mapped for writing by
> > @domid.
> > [XEN]
> > + *  GTF_sub_page: Grant access to only a subrange of the
> > page.  @domid
> > + *                will only be allowed to copy from the grant, and
> > not
> > + *                map it. [GST]
> > + */
> > +#define _GTF_readonly       (2)
> > +#define GTF_readonly        (1U << _GTF_readonly)
> > +#define _GTF_reading        (3)
> > +#define GTF_reading         (1U << _GTF_reading)
> > +#define _GTF_writing        (4)
> > +#define GTF_writing         (1U << _GTF_writing)
> > +#define _GTF_sub_page       (8)
> > +#define GTF_sub_page        (1U << _GTF_sub_page)
> > +
> > +/*
> > + * Subflags for GTF_accept_transfer:
> > + *  GTF_transfer_committed: Xen sets this flag to indicate that it
> > is
> > committed
> > + *      to transferring ownership of a page frame. When a guest
> > sees this
> > flag
> > + *      it must /not/ modify the grant entry until
> > GTF_transfer_completed
> > is
> > + *      set by Xen.
> > + *  GTF_transfer_completed: It is safe for the guest to spin-wait
> > on this flag
> > + *      after reading GTF_transfer_committed. Xen will always
> > write the
> > frame
> > + *      address, followed by ORing this flag, in a timely manner.
> > + */
> > +#define _GTF_transfer_committed (2)
> > +#define GTF_transfer_committed  (1U << _GTF_transfer_committed)
> > +#define _GTF_transfer_completed (3)
> > +#define GTF_transfer_completed  (1U << _GTF_transfer_completed)
> > +
> > +/*
> > + * Version 2 grant table entries.  These fulfil the same role as
> > + * version 1 entries, but can represent more complicated
> > operations.
> > + * Any given domain will have either a version 1 or a version 2
> > table,
> > + * and every entry in the table will be the same version.
> > + *
> > + * The interface by which domains use grant references does not
> > depend
> > + * on the grant table version in use by the other domain.
> > + */
> > +
> > +/*
> > + * Version 1 and version 2 grant entries share a common
> > prefix.  The
> > + * fields of the prefix are documented as part of struct
> > + * grant_entry_v1.
> > + */
> > +struct grant_entry_header {
> > +	u16 flags;
> > +	domid_t  domid;
> > +};
> > +
> > +/*
> > + * Version 2 of the grant entry structure, here is a union because
> > three
> > + * different types are suppotted: full_page, sub_page and
> > transitive.
> > + */
> > +union grant_entry_v2 {
> > +	struct grant_entry_header hdr;
> > +
> > +	/*
> > +	 * This member is used for V1-style full page grants, where
> > either:
> > +	 *
> > +	 * -- hdr.type is GTF_accept_transfer, or
> > +	 * -- hdr.type is GTF_permit_access and GTF_sub_page is not
> > set.
> > +	 *
> > +	 * In that case, the frame field has the same semantics as the
> > +	 * field of the same name in the V1 entry structure.
> > +	 */
> > +	struct {
> > +	struct grant_entry_header hdr;
> > +	u32 pad0;
> > +	u64 frame;
> > +	} full_page;
> > +
> > +	/*
> > +	 * If the grant type is GTF_grant_access and GTF_sub_page is
> > set,
> > +	 * @domid is allowed to access bytes [@page_off,@
> > page_off+@length)
> > +	 * in frame @frame.
> > +	 */
> > +	struct {
> > +	struct grant_entry_header hdr;
> > +	u16 page_off;
> > +	u16 length;
> > +	u64 frame;
> > +	} sub_page;
> > +
> > +	/*
> > +	 * If the grant is GTF_transitive, @domid is allowed to use the
> > +	 * grant @gref in domain @trans_domid, as if it was the local
> > +	 * domain.  Obviously, the transitive access must be compatible
> > +	 * with the original grant.
> > +	 */
> > +	struct {
> > +	struct grant_entry_header hdr;
> > +	domid_t trans_domid;
> > +	u16 pad0;
> > +	grant_ref_t gref;
> > +	} transitive;
> > +
> > +	u32 __spacer[4]; /* Pad to a power of two */
> > +};
> > +
> > +typedef u16 grant_status_t;
> > +
> > +/***********************************
> > + * GRANT TABLE QUERIES AND USES
> > + */
> > +
> > +/*
> > + * Handle to track a mapping created via a grant reference.
> > + */
> > +typedef u32 grant_handle_t;
> > +
> > +/*
> > + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for
> > access
> > + * by devices and/or host CPUs. If successful, <handle> is a
> > tracking number
> > + * that must be presented later to destroy the mapping(s). On
> > error,
> > <handle>
> > + * is a negative status code.
> > + * NOTES:
> > + *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is
> > the
> > address
> > + *     via which I/O devices may access the granted frame.
> > + *  2. If GNTMAP_host_map is specified then a mapping will be
> > added at
> > + *     either a host virtual address in the current address space,
> > or at
> > + *     a PTE at the specified machine address.  The type of
> > mapping to
> > + *     perform is selected through the GNTMAP_contains_pte flag,
> > and the
> > + *     address is specified in <host_addr>.
> > + *  3. Mappings should only be destroyed via
> > GNTTABOP_unmap_grant_ref.
> > If a
> > + *     host mapping is destroyed by other means then it is *NOT*
> > guaranteed
> > + *     to be accounted to the correct grant reference!
> > + */
> > +#define GNTTABOP_map_grant_ref        0
> > +struct gnttab_map_grant_ref {
> > +	/* IN parameters. */
> > +	u64 host_addr;
> > +	u32 flags;               /* GNTMAP_* */
> > +	grant_ref_t ref;
> > +	domid_t  dom;
> > +	/* OUT parameters. */
> > +	s16  status;              /* GNTST_* */
> > +	grant_handle_t handle;
> > +	u64 dev_bus_addr;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
> > +
> > +/*
> > + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference
> > mappings
> > + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero,
> > that
> > + * field is ignored. If non-zero, they must refer to a device/host
> > mapping
> > + * that is tracked by <handle>
> > + * NOTES:
> > + *  1. The call may fail in an undefined manner if either mapping
> > is not
> > + *     tracked by <handle>.
> > + *  3. After executing a batch of unmaps, it is guaranteed that no
> > stale
> > + *     mappings will remain in the device or host TLBs.
> > + */
> > +#define GNTTABOP_unmap_grant_ref      1
> > +struct gnttab_unmap_grant_ref {
> > +	/* IN parameters. */
> > +	u64 host_addr;
> > +	u64 dev_bus_addr;
> > +	grant_handle_t handle;
> > +	/* OUT parameters. */
> > +	s16  status;              /* GNTST_* */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
> > +
> > +/*
> > + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising
> > at
> > least
> > + * <nr_frames> pages. The frame addresses are written to the
> > <frame_list>.
> > + * Only <nr_frames> addresses are written, even if the table is
> > larger.
> > + * NOTES:
> > + *  1. <dom> may be specified as DOMID_SELF.
> > + *  2. Only a sufficiently-privileged domain may specify <dom> !=
> > DOMID_SELF.
> > + *  3. Xen may not support more than a single grant-table page per
> > domain.
> > + */
> > +#define GNTTABOP_setup_table          2
> > +struct gnttab_setup_table {
> > +	/* IN parameters. */
> > +	domid_t  dom;
> > +	u32 nr_frames;
> > +	/* OUT parameters. */
> > +	s16  status;              /* GNTST_* */
> > +
> > +	GUEST_HANDLE(xen_pfn_t)frame_list;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
> > +
> > +/*
> > + * GNTTABOP_dump_table: Dump the contents of the grant table to
> > the
> > + * xen console. Debugging use only.
> > + */
> > +#define GNTTABOP_dump_table           3
> > +struct gnttab_dump_table {
> > +	/* IN parameters. */
> > +	domid_t dom;
> > +	/* OUT parameters. */
> > +	s16 status;               /* GNTST_* */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
> > +
> > +/*
> > + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign
> > domain. The
> > + * foreign domain has previously registered its interest in the
> > transfer via
> > + * <domid, ref>.
> > + *
> > + * Note that, even if the transfer fails, the specified page no
> > longer belongs
> > + * to the calling domain *unless* the error is GNTST_bad_page.
> > + */
> > +#define GNTTABOP_transfer                4
> > +struct gnttab_transfer {
> > +	/* IN parameters. */
> > +	xen_pfn_t mfn;
> > +	domid_t       domid;
> > +	grant_ref_t   ref;
> > +	/* OUT parameters. */
> > +	s16       status;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
> > +
> > +/*
> > + * GNTTABOP_copy: Hypervisor based copy
> > + * source and destinations can be eithers MFNs or, for foreign
> > domains,
> > + * grant references. the foreign domain has to grant read/write
> > access
> > + * in its grant table.
> > + *
> > + * The flags specify what type source and destinations are (either
> > MFN
> > + * or grant reference).
> > + *
> > + * Note that this can also be used to copy data between two
> > domains
> > + * via a third party if the source and destination domains had
> > previously
> > + * grant appropriate access to their pages to the third party.
> > + *
> > + * source_offset specifies an offset in the source frame,
> > dest_offset
> > + * the offset in the target frame and  len specifies the number of
> > + * bytes to be copied.
> > + */
> > +
> > +#define _GNTCOPY_source_gref      (0)
> > +#define GNTCOPY_source_gref       (1 << _GNTCOPY_source_gref)
> > +#define _GNTCOPY_dest_gref        (1)
> > +#define GNTCOPY_dest_gref         (1 << _GNTCOPY_dest_gref)
> > +
> > +#define GNTTABOP_copy                 5
> > +struct gnttab_copy {
> > +	/* IN parameters. */
> > +	struct {
> > +		union {
> > +			grant_ref_t ref;
> > +			xen_pfn_t   gmfn;
> > +		} u;
> > +		domid_t  domid;
> > +		u16 offset;
> > +	} source, dest;
> > +	u16      len;
> > +	u16      flags;          /* GNTCOPY_* */
> > +	/* OUT parameters. */
> > +	s16       status;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
> > +
> > +/*
> > + * GNTTABOP_query_size: Query the current and maximum sizes of the
> > shared
> > + * grant table.
> > + * NOTES:
> > + *  1. <dom> may be specified as DOMID_SELF.
> > + *  2. Only a sufficiently-privileged domain may specify <dom> !=
> > DOMID_SELF.
> > + */
> > +#define GNTTABOP_query_size           6
> > +struct gnttab_query_size {
> > +	/* IN parameters. */
> > +	domid_t  dom;
> > +	/* OUT parameters. */
> > +	u32 nr_frames;
> > +	u32 max_nr_frames;
> > +	s16  status;              /* GNTST_* */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
> > +
> > +/*
> > + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference
> > mappings
> > + * tracked by <handle> but atomically replace the page table entry
> > with one
> > + * pointing to the machine address under <new_addr>.  <new_addr>
> > will
> > be
> > + * redirected to the null entry.
> > + * NOTES:
> > + *  1. The call may fail in an undefined manner if either mapping
> > is not
> > + *     tracked by <handle>.
> > + *  2. After executing a batch of unmaps, it is guaranteed that no
> > stale
> > + *     mappings will remain in the device or host TLBs.
> > + */
> > +#define GNTTABOP_unmap_and_replace    7
> > +struct gnttab_unmap_and_replace {
> > +	/* IN parameters. */
> > +	u64 host_addr;
> > +	u64 new_addr;
> > +	grant_handle_t handle;
> > +	/* OUT parameters. */
> > +	s16  status;              /* GNTST_* */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
> > +
> > +/*
> > + * GNTTABOP_set_version: Request a particular version of the grant
> > + * table shared table structure.  This operation can only be
> > performed
> > + * once in any given domain.  It must be performed before any
> > grants
> > + * are activated; otherwise, the domain will be stuck with version
> > 1.
> > + * The only defined versions are 1 and 2.
> > + */
> > +#define GNTTABOP_set_version          8
> > +struct gnttab_set_version {
> > +	/* IN parameters */
> > +	u32 version;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
> > +
> > +/*
> > + * GNTTABOP_get_status_frames: Get the list of frames used to
> > store grant
> > + * status for <dom>. In grant format version 2, the status is
> > separated
> > + * from the other shared grant fields to allow more efficient
> > synchronization
> > + * using barriers instead of atomic cmpexch operations.
> > + * <nr_frames> specify the size of vector <frame_list>.
> > + * The frame addresses are returned in the <frame_list>.
> > + * Only <nr_frames> addresses are returned, even if the table is
> > larger.
> > + * NOTES:
> > + *  1. <dom> may be specified as DOMID_SELF.
> > + *  2. Only a sufficiently-privileged domain may specify <dom> !=
> > DOMID_SELF.
> > + */
> > +#define GNTTABOP_get_status_frames     9
> > +struct gnttab_get_status_frames {
> > +	/* IN parameters. */
> > +	u32 nr_frames;
> > +	domid_t  dom;
> > +	/* OUT parameters. */
> > +	s16  status;              /* GNTST_* */
> > +
> > +	GUEST_HANDLE(u64)frame_list;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
> > +
> > +/*
> > + * GNTTABOP_get_version: Get the grant table version which is in
> > + * effect for domain <dom>.
> > + */
> > +#define GNTTABOP_get_version          10
> > +struct gnttab_get_version {
> > +	/* IN parameters */
> > +	domid_t dom;
> > +	u16 pad;
> > +	/* OUT parameters */
> > +	u32 version;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
> > +
> > +/*
> > + * Issue one or more cache maintenance operations on a portion of
> > a
> > + * page granted to the calling domain by a foreign domain.
> > + */
> > +#define GNTTABOP_cache_flush          12
> > +struct gnttab_cache_flush {
> > +	union {
> > +		u64 dev_bus_addr;
> > +		grant_ref_t ref;
> > +	} a;
> > +	u16 offset;   /* offset from start of grant */
> > +	u16 length;   /* size within the grant */
> > +#define GNTTAB_CACHE_CLEAN          (1 << 0)
> > +#define GNTTAB_CACHE_INVAL          (1 << 1)
> > +#define GNTTAB_CACHE_SOURCE_GREF    (1 << 31)
> > +	u32 op;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
> > +
> > +/*
> > + * Bitfield values for update_pin_status.flags.
> > + */
> > + /* Map the grant entry for access by I/O devices. */
> > +#define _GNTMAP_device_map      (0)
> > +#define GNTMAP_device_map       (1 << _GNTMAP_device_map)
> > +/* Map the grant entry for access by host CPUs. */
> > +#define _GNTMAP_host_map        (1)
> > +#define GNTMAP_host_map         (1 << _GNTMAP_host_map)
> > +/* Accesses to the granted frame will be restricted to read-only
> > access. */
> > +#define _GNTMAP_readonly        (2)
> > +#define GNTMAP_readonly         (1 << _GNTMAP_readonly)
> > +/*
> > + * GNTMAP_host_map subflag:
> > + *  0 => The host mapping is usable only by the guest OS.
> > + *  1 => The host mapping is usable by guest OS + current
> > application.
> > + */
> > +#define _GNTMAP_application_map (3)
> > +#define GNTMAP_application_map  (1 << _GNTMAP_application_map)
> > +
> > +/*
> > + * GNTMAP_contains_pte subflag:
> > + *  0 => This map request contains a host virtual address.
> > + *  1 => This map request contains the machine addess of the PTE
> > to
> > update.
> > + */
> > +#define _GNTMAP_contains_pte    (4)
> > +#define GNTMAP_contains_pte     (1 << _GNTMAP_contains_pte)
> > +
> > +/*
> > + * Bits to be placed in guest kernel available PTE bits
> > (architecture
> > + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is
> > set).
> > + */
> > +#define _GNTMAP_guest_avail0    (16)
> > +#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0)
> > +
> > +/*
> > + * Values for error status returns. All errors are -ve.
> > + */
> > +#define GNTST_okay             (0)  /* Normal return.
> > */
> > +#define GNTST_general_error    (-1) /* General undefined error.
> > */
> > +#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.
> > */
> > +#define GNTST_bad_gntref       (-3) /* Unrecognised or
> > inappropriate
> > gntref. */
> > +#define GNTST_bad_handle       (-4) /* Unrecognised or
> > inappropriate
> > handle. */
> > +#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual
> > address to
> > map. */
> > +#define GNTST_bad_dev_addr     (-6) /* Inappropriate device
> > address to
> > unmap.*/
> > +#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.
> > */
> > +#define GNTST_permission_denied (-8) /* Not enough privilege for
> > operation.
> > */
> > +#define GNTST_bad_page         (-9) /* Specified page was invalid
> > for op.
> > */
> > +#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page
> > boundary.   */
> > +#define GNTST_address_too_big (-11) /* transfer page address too
> > large.
> > */
> > +#define GNTST_eagain          (-12) /* Operation not done; try
> > again.
> > */
> > +
> > +#define GNTTABOP_error_msgs {                   \
> > +	"okay",                                     \
> > +	"undefined error",                          \
> > +	"unrecognised domain id",                   \
> > +	"invalid grant reference",                  \
> > +	"invalid mapping handle",                   \
> > +	"invalid virtual address",                  \
> > +	"invalid device address",                   \
> > +	"no spare translation slot in the I/O MMU", \
> > +	"permission denied",                        \
> > +	"bad page",                                 \
> > +	"copy arguments cross page boundary",       \
> > +	"page address size too large",              \
> > +	"operation not done; try again"             \
> > +}
> > +
> > +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
> > diff --git a/include/xen/interface/hvm/hvm_op.h
> > b/include/xen/interface/hvm/hvm_op.h
> > new file mode 100644
> > index 0000000000..1c53cad729
> > --- /dev/null
> > +++ b/include/xen/interface/hvm/hvm_op.h
> > @@ -0,0 +1,69 @@
> > +/*
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
> > +#define __XEN_PUBLIC_HVM_HVM_OP_H__
> > +
> > +/* Get/set subcommands: the second argument of the hypercall is a
> > + * pointer to a xen_hvm_param struct.
> > + */
> > +#define HVMOP_set_param           0
> > +#define HVMOP_get_param           1
> > +struct xen_hvm_param {
> > +	domid_t  domid;    /* IN */
> > +	u32 index;    /* IN */
> > +	u64 value;    /* IN/OUT */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
> > +
> > +/* Hint from PV drivers for pagetable destruction. */
> > +#define HVMOP_pagetable_dying       9
> > +struct xen_hvm_pagetable_dying {
> > +	/* Domain with a pagetable about to be destroyed. */
> > +	domid_t  domid;
> > +	/* guest physical address of the toplevel pagetable dying */
> > +	aligned_u64 gpa;
> > +};
> > +
> > +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
> > +
> > +enum hvmmem_type_t {
> > +	HVMMEM_ram_rw,             /* Normal read/write guest RAM */
> > +	HVMMEM_ram_ro,             /* Read-only; writes are discarded
> > */
> > +	HVMMEM_mmio_dm,            /* Reads and write go to the device
> > model */
> > +};
> > +
> > +#define HVMOP_get_mem_type    15
> > +/* Return hvmmem_type_t for the specified pfn. */
> > +struct xen_hvm_get_mem_type {
> > +	/* Domain to be queried. */
> > +	domid_t domid;
> > +	/* OUT variable. */
> > +	u16 mem_type;
> > +	u16 pad[2]; /* align next field on 8-byte boundary */
> > +	/* IN variable. */
> > +	u64 pfn;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type);
> > +
> > +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
> > diff --git a/include/xen/interface/hvm/params.h
> > b/include/xen/interface/hvm/params.h
> > new file mode 100644
> > index 0000000000..4d61fc58d9
> > --- /dev/null
> > +++ b/include/xen/interface/hvm/params.h
> > @@ -0,0 +1,127 @@
> > +/*
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
> > +#define __XEN_PUBLIC_HVM_PARAMS_H__
> > +
> > +#include <xen/interface/hvm/hvm_op.h>
> > +
> > +/*
> > + * Parameter space for HVMOP_{set,get}_param.
> > + */
> > +
> > +#define HVM_PARAM_CALLBACK_IRQ 0
> > +/*
> > + * How should CPU0 event-channel notifications be delivered?
> > + *
> > + * If val == 0 then CPU0 event-channel notifications are not
> > delivered.
> > + * If val != 0, val[63:56] encodes the type, as follows:
> > + */
> > +
> > +#define HVM_PARAM_CALLBACK_TYPE_GSI      0
> > +/*
> > + * val[55:0] is a delivery GSI.  GSI 0 cannot be used, as it
> > aliases val == 0,
> > + * and disables all notifications.
> > + */
> > +
> > +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
> > +/*
> > + * val[55:0] is a delivery PCI INTx line:
> > + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX =
> > val[1:0]
> > + */
> > +
> > +#if defined(__i386__) || defined(__x86_64__)
> > +#define HVM_PARAM_CALLBACK_TYPE_VECTOR   2
> > +/*
> > + * val[7:0] is a vector number.  Check for
> > XENFEAT_hvm_callback_vector to
> > know
> > + * if this delivery method is available.
> > + */
> > +#elif defined(__arm__) || defined(__aarch64__)
> > +#define HVM_PARAM_CALLBACK_TYPE_PPI      2
> > +/*
> > + * val[55:16] needs to be zero.
> > + * val[15:8] is interrupt flag of the PPI used by event-channel:
> > + *  bit 8: the PPI is edge(1) or level(0) triggered
> > + *  bit 9: the PPI is active low(1) or high(0)
> > + * val[7:0] is a PPI number used by event-channel.
> > + * This is only used by ARM/ARM64 and masking/eoi the interrupt
> > associated
> > to
> > + * the notification is handled by the interrupt controller.
> > + */
> > +#endif
> > +
> > +#define HVM_PARAM_STORE_PFN    1
> > +#define HVM_PARAM_STORE_EVTCHN 2
> > +
> > +#define HVM_PARAM_PAE_ENABLED  4
> > +
> > +#define HVM_PARAM_IOREQ_PFN    5
> > +
> > +#define HVM_PARAM_BUFIOREQ_PFN 6
> > +
> > +/*
> > + * Set mode for virtual timers (currently x86 only):
> > + *  delay_for_missed_ticks (default):
> > + *   Do not advance a vcpu's time beyond the correct delivery time
> > for
> > + *   interrupts that have been missed due to preemption. Deliver
> > missed
> > + *   interrupts when the vcpu is rescheduled and advance the
> > vcpu's virtual
> > + *   time stepwise for each one.
> > + *  no_delay_for_missed_ticks:
> > + *   As above, missed interrupts are delivered, but guest time
> > always tracks
> > + *   wallclock (i.e., real) time while doing so.
> > + *  no_missed_ticks_pending:
> > + *   No missed interrupts are held pending. Instead, to ensure
> > ticks are
> > + *   delivered at some non-zero rate, if we detect missed ticks
> > then the
> > + *   internal tick alarm is not disabled if the VCPU is preempted
> > during the
> > + *   next tick period.
> > + *  one_missed_tick_pending:
> > + *   Missed interrupts are collapsed together and delivered as one
> > 'late
> > tick'.
> > + *   Guest time always tracks wallclock (i.e., real) time.
> > + */
> > +#define HVM_PARAM_TIMER_MODE   10
> > +#define HVMPTM_delay_for_missed_ticks    0
> > +#define HVMPTM_no_delay_for_missed_ticks 1
> > +#define HVMPTM_no_missed_ticks_pending   2
> > +#define HVMPTM_one_missed_tick_pending   3
> > +
> > +/* Boolean: Enable virtual HPET (high-precision event timer)?
> > (x86-only) */
> > +#define HVM_PARAM_HPET_ENABLED 11
> > +
> > +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
> > +#define HVM_PARAM_IDENT_PT     12
> > +
> > +/* Device Model domain, defaults to 0. */
> > +#define HVM_PARAM_DM_DOMAIN    13
> > +
> > +/* ACPI S state: currently support S0 and S3 on x86. */
> > +#define HVM_PARAM_ACPI_S_STATE 14
> > +
> > +/* TSS used on Intel when CR0.PE=0. */
> > +#define HVM_PARAM_VM86_TSS     15
> > +
> > +/* Boolean: Enable aligning all periodic vpts to reduce interrupts
> > */
> > +#define HVM_PARAM_VPT_ALIGN    16
> > +
> > +/* Console debug shared memory ring and event channel */
> > +#define HVM_PARAM_CONSOLE_PFN    17
> > +#define HVM_PARAM_CONSOLE_EVTCHN 18
> > +
> > +#define HVM_NR_PARAMS          19
> > +
> > +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
> > diff --git a/include/xen/interface/io/blkif.h
> > b/include/xen/interface/io/blkif.h
> > new file mode 100644
> > index 0000000000..7d74c99226
> > --- /dev/null
> > +++ b/include/xen/interface/io/blkif.h
> > @@ -0,0 +1,726 @@
> > +/************************************************************
> > ******************
> > + * blkif.h
> > + *
> > + * Unified block-device I/O interface for Xen guest OSes.
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (c) 2003-2004, Keir Fraser
> > + * Copyright (c) 2012, Spectra Logic Corporation
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_IO_BLKIF_H__
> > +#define __XEN_PUBLIC_IO_BLKIF_H__
> > +
> > +#include "ring.h"
> > +#include "../grant_table.h"
> > +
> > +/*
> > + * Front->back notifications: When enqueuing a new request,
> > sending a
> > + * notification can be made conditional on req_event (i.e., the
> > generic
> > + * hold-off mechanism provided by the ring macros). Backends must
> > set
> > + * req_event appropriately (e.g., using
> > RING_FINAL_CHECK_FOR_REQUESTS()).
> > + *
> > + * Back->front notifications: When enqueuing a new response,
> > sending a
> > + * notification can be made conditional on rsp_event (i.e., the
> > generic
> > + * hold-off mechanism provided by the ring macros). Frontends must
> > set
> > + * rsp_event appropriately (e.g., using
> > RING_FINAL_CHECK_FOR_RESPONSES()).
> > + */
> > +
> > +#ifndef blkif_vdev_t
> > +#define blkif_vdev_t   u16
> > +#endif
> > +#define blkif_sector_t u64
> > +
> > +/*
> > + * Feature and Parameter Negotiation
> > + * =================================
> > + * The two halves of a Xen block driver utilize nodes within the
> > XenStore to
> > + * communicate capabilities and to negotiate operating
> > parameters.  This
> > + * section enumerates these nodes which reside in the respective
> > front and
> > + * backend portions of the XenStore, following the XenBus
> > convention.
> > + *
> > + * All data in the XenStore is stored as strings.  Nodes
> > specifying numeric
> > + * values are encoded in decimal.  Integer value ranges listed
> > below are
> > + * expressed as fixed sized integer types capable of storing the
> > conversion
> > + * of a properly formated node string, without loss of
> > information.
> > + *
> > + * Any specified default value is in effect if the corresponding
> > XenBus node
> > + * is not present in the XenStore.
> > + *
> > + * XenStore nodes in sections marked "PRIVATE" are solely for use
> > by the
> > + * driver side whose XenBus tree contains them.
> > + *
> > + * XenStore nodes marked "DEPRECATED" in their notes section
> > should only
> > be
> > + * used to provide interoperability with legacy implementations.
> > + *
> > + * See the XenBus state transition diagram below for details on
> > when XenBus
> > + * nodes must be published and when they can be queried.
> > + *
> > +
> > **************************************************************
> > ***************
> > + *                            Backend XenBus Nodes
> > +
> > **************************************************************
> > ***************
> > + *
> > + *------------------ Backend Device Identification (PRIVATE) ---
> > ---------------
> > + *
> > + * mode
> > + *      Values:         "r" (read only), "w" (writable)
> > + *
> > + *      The read or write access permissions to the backing store
> > to be
> > + *      granted to the frontend.
> > + *
> > + * params
> > + *      Values:         string
> > + *
> > + *      A free formatted string providing sufficient information
> > for the
> > + *      hotplug script to attach the device and provide a suitable
> > + *      handler (ie: a block device) for blkback to use.
> > + *
> > + * physical-device
> > + *      Values:         "MAJOR:MINOR"
> > + *      Notes: 11
> > + *
> > + *      MAJOR and MINOR are the major number and minor number of
> > the
> > + *      backing device respectively.
> > + *
> > + * physical-device-path
> > + *      Values:         path string
> > + *
> > + *      A string that contains the absolute path to the disk
> > image. On
> > + *      NetBSD and Linux this is always a block device, while on
> > FreeBSD
> > + *      it can be either a block device or a regular file.
> > + *
> > + * type
> > + *      Values:         "file", "phy", "tap"
> > + *
> > + *      The type of the backing device/object.
> > + *
> > + *
> > + * direct-io-safe
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *
> > + *      The underlying storage is not affected by the direct IO
> > memory
> > + *      lifetime bug.  See:
> > + *
> > 
https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=http*3A*2F*2Flists.xe__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTi73eSJ8$
> >  
> > n.org%2Farchives%2Fhtml%2Fxen-devel%2F2012-12%2Fmsg01154.html&am
> > p;data=02%7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81
> > ddc0812%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63729217
> > 8170181802&amp;sdata=wXiKB5EvbBokB%2BYrOdMDiKDBwSHo8m1ssXFp0K
> > RQ0Io%3D&amp;reserved=0
> > + *
> > + *      Therefore this option gives the backend permission to use
> > + *      O_DIRECT, notwithstanding that bug.
> > + *
> > + *      That is, if this option is enabled, use of O_DIRECT is
> > safe,
> > + *      in circumstances where we would normally have avoided it
> > as a
> > + *      workaround for that bug.  This option is not relevant for
> > all
> > + *      backends, and even not necessarily supported for those for
> > + *      which it is relevant.  A backend which knows that it is
> > not
> > + *      affected by the bug can ignore this option.
> > + *
> > + *      This option doesn't require a backend to use O_DIRECT, so
> > it
> > + *      should not be used to try to control the caching
> > behaviour.
> > + *
> > + *--------------------------------- Features -------------------
> > --------------
> > + *
> > + * feature-barrier
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *
> > + *      A value of "1" indicates that the backend can process
> > requests
> > + *      containing the BLKIF_OP_WRITE_BARRIER request opcode.
> > Requests
> > + *      of this type may still be returned at any time with the
> > + *      BLKIF_RSP_EOPNOTSUPP result code.
> > + *
> > + * feature-flush-cache
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *
> > + *      A value of "1" indicates that the backend can process
> > requests
> > + *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.
> > Requests
> > + *      of this type may still be returned at any time with the
> > + *      BLKIF_RSP_EOPNOTSUPP result code.
> > + *
> > + * feature-discard
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *
> > + *      A value of "1" indicates that the backend can process
> > requests
> > + *      containing the BLKIF_OP_DISCARD request opcode.  Requests
> > + *      of this type may still be returned at any time with the
> > + *      BLKIF_RSP_EOPNOTSUPP result code.
> > + *
> > + * feature-persistent
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *      Notes: 7
> > + *
> > + *      A value of "1" indicates that the backend can keep the
> > grants used
> > + *      by the frontend driver mapped, so the same set of grants
> > should be
> > + *      used in all transactions. The maximum number of grants the
> > backend
> > + *      can map persistently depends on the implementation, but
> > ideally it
> > + *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
> > Using this
> > + *      feature the backend doesn't need to unmap each grant,
> > preventing
> > + *      costly TLB flushes. The backend driver should only map
> > grants
> > + *      persistently if the frontend supports it. If a backend
> > driver chooses
> > + *      to use the persistent protocol when the frontend doesn't
> > support it,
> > + *      it will probably hit the maximum number of persistently
> > mapped
> > grants
> > + *      (due to the fact that the frontend won't be reusing the
> > same
> > grants),
> > + *      and fall back to non-persistent mode. Backend
> > implementations
> > may
> > + *      shrink or expand the number of persistently mapped grants
> > without
> > + *      notifying the frontend depending on memory constraints
> > (this might
> > + *      cause a performance degradation).
> > + *
> > + *      If a backend driver wants to limit the maximum number of
> > persistently
> > + *      mapped grants to a value less than RING_SIZE *
> > + *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be
> > used to
> > + *      discard the grants that are less commonly used. Using a
> > LRU in the
> > + *      backend driver paired with a LIFO queue in the frontend
> > will
> > + *      allow us to have better performance in this scenario.
> > + *
> > + *----------------------- Request Transport Parameters ---------
> > ---------------
> > + *
> > + * max-ring-page-order
> > + *      Values:         <uint32_t>
> > + *      Default Value:  0
> > + *      Notes:          1, 3
> > + *
> > + *      The maximum supported size of the request ring buffer in
> > units of
> > + *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4
> > pages,
> > + *      etc.).
> > + *
> > + * max-ring-pages
> > + *      Values:         <uint32_t>
> > + *      Default Value:  1
> > + *      Notes:          DEPRECATED, 2, 3
> > + *
> > + *      The maximum supported size of the request ring buffer in
> > units of
> > + *      machine pages.  The value must be a power of 2.
> > + *
> > + *------------------------- Backend Device Properties ------------
> > -------------
> > + *
> > + * discard-enable
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  1
> > + *
> > + *      This optional property, set by the toolstack, instructs
> > the backend
> > + *      to offer (or not to offer) discard to the frontend. If the
> > property
> > + *      is missing the backend should offer discard if the backing
> > storage
> > + *      actually supports it.
> > + *
> > + * discard-alignment
> > + *      Values:         <uint32_t>
> > + *      Default Value:  0
> > + *      Notes:          4, 5
> > + *
> > + *      The offset, in bytes from the beginning of the virtual
> > block device,
> > + *      to the first, addressable, discard extent on the
> > underlying device.
> > + *
> > + * discard-granularity
> > + *      Values:         <uint32_t>
> > + *      Default Value:  <"sector-size">
> > + *      Notes:          4
> > + *
> > + *      The size, in bytes, of the individually addressable
> > discard extents
> > + *      of the underlying device.
> > + *
> > + * discard-secure
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *      Notes:          10
> > + *
> > + *      A value of "1" indicates that the backend can process
> > BLKIF_OP_DISCARD
> > + *      requests with the BLKIF_DISCARD_SECURE flag set.
> > + *
> > + * info
> > + *      Values:         <uint32_t> (bitmap)
> > + *
> > + *      A collection of bit flags describing attributes of the
> > backing
> > + *      device.  The VDISK_* macros define the meaning of each bit
> > + *      location.
> > + *
> > + * sector-size
> > + *      Values:         <uint32_t>
> > + *
> > + *      The logical block size, in bytes, of the underlying
> > storage. This
> > + *      must be a power of two with a minimum value of 512.
> > + *
> > + *      NOTE: Because of implementation bugs in some frontends
> > this
> > must be
> > + *            set to 512, unless the frontend advertizes a non-
> > zero value
> > + *            in its "feature-large-sector-size" xenbus node. (See
> > below).
> > + *
> > + * physical-sector-size
> > + *      Values:         <uint32_t>
> > + *      Default Value:  <"sector-size">
> > + *
> > + *      The physical block size, in bytes, of the backend storage.
> > This
> > + *      must be an integer multiple of "sector-size".
> > + *
> > + * sectors
> > + *      Values:         <u64>
> > + *
> > + *      The size of the backend device, expressed in units of
> > "sector-size".
> > + *      The product of "sector-size" and "sectors" must also be an
> > integer
> > + *      multiple of "physical-sector-size", if that node is
> > present.
> > + *
> > +
> > **************************************************************
> > ***************
> > + *                            Frontend XenBus Nodes
> > +
> > **************************************************************
> > ***************
> > + *
> > + *----------------------- Request Transport Parameters ---------
> > --------------
> > + *
> > + * event-channel
> > + *      Values:         <uint32_t>
> > + *
> > + *      The identifier of the Xen event channel used to signal
> > activity
> > + *      in the ring buffer.
> > + *
> > + * ring-ref
> > + *      Values:         <uint32_t>
> > + *      Notes:          6
> > + *
> > + *      The Xen grant reference granting permission for the
> > backend to
> > map
> > + *      the sole page in a single page sized ring buffer.
> > + *
> > + * ring-ref%u
> > + *      Values:         <uint32_t>
> > + *      Notes:          6
> > + *
> > + *      For a frontend providing a multi-page ring, a "number of
> > ring pages"
> > + *      sized list of nodes, each containing a Xen grant reference
> > granting
> > + *      permission for the backend to map the page of the ring
> > located
> > + *      at page index "%u".  Page indexes are zero based.
> > + *
> > + * protocol
> > + *      Values:         string (XEN_IO_PROTO_ABI_*)
> > + *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
> > + *
> > + *      The machine ABI rules governing the format of all ring
> > request and
> > + *      response structures.
> > + *
> > + * ring-page-order
> > + *      Values:         <uint32_t>
> > + *      Default Value:  0
> > + *      Maximum Value:  MAX(ffs(max-ring-pages) - 1,
> > max-ring-page-order)
> > + *      Notes:          1, 3
> > + *
> > + *      The size of the frontend allocated request ring buffer in
> > units
> > + *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 ==
> > 4 pages,
> > + *      etc.).
> > + *
> > + * num-ring-pages
> > + *      Values:         <uint32_t>
> > + *      Default Value:  1
> > + *      Maximum Value:  MAX(max-ring-pages,(0x1 <<
> > max-ring-page-order))
> > + *      Notes:          DEPRECATED, 2, 3
> > + *
> > + *      The size of the frontend allocated request ring buffer in
> > units of
> > + *      machine pages.  The value must be a power of 2.
> > + *
> > + *--------------------------------- Features -------------------
> > --------------
> > + *
> > + * feature-persistent
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *      Notes: 7, 8, 9
> > + *
> > + *      A value of "1" indicates that the frontend will reuse the
> > same grants
> > + *      for all transactions, allowing the backend to map them
> > with write
> > + *      access (even when it should be read-only). If the frontend
> > hits the
> > + *      maximum number of allowed persistently mapped grants, it
> > can
> > fallback
> > + *      to non persistent mode. This will cause a performance
> > degradation,
> > + *      since the the backend driver will still try to map those
> > grants
> > + *      persistently. Since the persistent grants protocol is
> > compatible with
> > + *      the previous protocol, a frontend driver can choose to
> > work in
> > + *      persistent mode even when the backend doesn't support it.
> > + *
> > + *      It is recommended that the frontend driver stores the
> > persistently
> > + *      mapped grants in a LIFO queue, so a subset of all
> > persistently
> > mapped
> > + *      grants gets used commonly. This is done in case the
> > backend driver
> > + *      decides to limit the maximum number of persistently mapped
> > grants
> > + *      to a value less than RING_SIZE *
> > BLKIF_MAX_SEGMENTS_PER_REQUEST.
> > + *
> > + * feature-large-sector-size
> > + *      Values:         0/1 (boolean)
> > + *      Default Value:  0
> > + *
> > + *      A value of "1" indicates that the frontend will correctly
> > supply and
> > + *      interpret all sector-based quantities in terms of the
> > "sector-size"
> > + *      value supplied in the backend info, whatever that may be
> > set to.
> > + *      If this node is not present or its value is "0" then it is
> > assumed
> > + *      that the frontend requires that the logical block size is
> > 512 as it
> > + *      is hardcoded (which is the case in some frontend
> > implementations).
> > + *
> > + *------------------------- Virtual Device Properties ------------
> > -------------
> > + *
> > + * device-type
> > + *      Values:         "disk", "cdrom", "floppy", etc.
> > + *
> > + * virtual-device
> > + *      Values:         <uint32_t>
> > + *
> > + *      A value indicating the physical device to virtualize
> > within the
> > + *      frontend's domain.  (e.g. "The first ATA disk", "The third
> > SCSI
> > + *      disk", etc.)
> > + *
> > + *      See docs/misc/vbd-interface.txt for details on the format
> > of this
> > + *      value.
> > + *
> > + * Notes
> > + * -----
> > + * (1) Multi-page ring buffer scheme first developed in the Citrix
> > XenServer
> > + *     PV drivers.
> > + * (2) Multi-page ring buffer scheme first used in some RedHat
> > distributions
> > + *     including a distribution deployed on certain nodes of the
> > Amazon
> > + *     EC2 cluster.
> > + * (3) Support for multi-page ring buffers was implemented
> > independently,
> > + *     in slightly different forms, by both Citrix and
> > RedHat/Amazon.
> > + *     For full interoperability, block front and backends should
> > publish
> > + *     identical ring parameters, adjusted for unit differences,
> > to the
> > + *     XenStore nodes used in both schemes.
> > + * (4) Devices that support discard functionality may internally
> > allocate space
> > + *     (discardable extents) in units that are larger than the
> > exported
> > logical
> > + *     block size. If the backing device has such discardable
> > extents the
> > + *     backend should provide both discard-granularity and
> > discard-alignment.
> > + *     Providing just one of the two may be considered an error by
> > the
> > frontend.
> > + *     Backends supporting discard should include discard-
> > granularity and
> > + *     discard-alignment even if it supports discarding individual
> > sectors.
> > + *     Frontends should assume discard-alignment == 0 and
> > discard-granularity
> > + *     == sector size if these keys are missing.
> > + * (5) The discard-alignment parameter allows a physical device to
> > be
> > + *     partitioned into virtual devices that do not necessarily
> > begin or
> > + *     end on a discardable extent boundary.
> > + * (6) When there is only a single page allocated to the request
> > ring,
> > + *     'ring-ref' is used to communicate the grant reference for
> > this
> > + *     page to the backend.  When using a multi-page ring, the
> > 'ring-ref'
> > + *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are
> > used.
> > + * (7) When using persistent grants data has to be copied from/to
> > the page
> > + *     where the grant is currently mapped. The overhead of doing
> > this
> > copy
> > + *     however doesn't suppress the speed improvement of not
> > having to
> > unmap
> > + *     the grants.
> > + * (8) The frontend driver has to allow the backend driver to map
> > all grants
> > + *     with write access, even when they should be mapped read-
> > only,
> > since
> > + *     further requests may reuse these grants and require write
> > permissions.
> > + * (9) Linux implementation doesn't have a limit on the maximum
> > number of
> > + *     grants that can be persistently mapped in the frontend
> > driver, but
> > + *     due to the frontent driver implementation it should never
> > be bigger
> > + *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
> > + *(10) The discard-secure property may be present and will be set
> > to 1 if the
> > + *     backing device supports secure discard.
> > + *(11) Only used by Linux and NetBSD.
> > + */
> > +
> > +/*
> > + * Multiple hardware queues/rings:
> > + * If supported, the backend will write the key "multi-queue-max-
> > queues" to
> > + * the directory for that vbd, and set its value to the maximum
> > supported
> > + * number of queues.
> > + * Frontends that are aware of this feature and wish to use it can
> > write the
> > + * key "multi-queue-num-queues" with the number they wish to use,
> > which
> > must be
> > + * greater than zero, and no more than the value reported by the
> > backend in
> > + * "multi-queue-max-queues".
> > + *
> > + * For frontends requesting just one queue, the usual event-
> > channel and
> > + * ring-ref keys are written as before, simplifying the backend
> > processing
> > + * to avoid distinguishing between a frontend that doesn't
> > understand the
> > + * multi-queue feature, and one that does, but requested only one
> > queue.
> > + *
> > + * Frontends requesting two or more queues must not write the
> > toplevel
> > + * event-channel and ring-ref keys, instead writing those keys
> > under
> > sub-keys
> > + * having the name "queue-N" where N is the integer ID of the
> > queue/ring
> > for
> > + * which those keys belong. Queues are indexed from zero.
> > + * For example, a frontend with two queues must write the
> > following set of
> > + * queue-related keys:
> > + *
> > + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
> > + * /local/domain/1/device/vbd/0/queue-0 = ""
> > + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
> > + * /local/domain/1/device/vbd/0/queue-0/event-channel =
> > "<evtchn#0>"
> > + * /local/domain/1/device/vbd/0/queue-1 = ""
> > + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
> > + * /local/domain/1/device/vbd/0/queue-1/event-channel =
> > "<evtchn#1>"
> > + *
> > + * It is also possible to use multiple queues/rings together with
> > + * feature multi-page ring buffer.
> > + * For example, a frontend requests two queues/rings and the size
> > of each
> > ring
> > + * buffer is two pages must write the following set of related
> > keys:
> > + *
> > + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
> > + * /local/domain/1/device/vbd/0/ring-page-order = "1"
> > + * /local/domain/1/device/vbd/0/queue-0 = ""
> > + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
> > + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
> > + * /local/domain/1/device/vbd/0/queue-0/event-channel =
> > "<evtchn#0>"
> > + * /local/domain/1/device/vbd/0/queue-1 = ""
> > + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
> > + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
> > + * /local/domain/1/device/vbd/0/queue-1/event-channel =
> > "<evtchn#1>"
> > + *
> > + */
> > +
> > +/*
> > + * STATE DIAGRAMS
> > + *
> > +
> > **************************************************************
> > ***************
> > + *                                   Startup
> > *
> > +
> > **************************************************************
> > ***************
> > + *
> > + * Tool stack creates front and back nodes with state
> > XenbusStateInitialising.
> > + *
> > + * Front                                Back
> > + * =================================
> > =====================================
> > + * XenbusStateInitialising              XenbusStateInitialising
> > + *  o Query virtual device               o Query backend device
> > identification
> > + *    properties.                          data.
> > + *  o Setup OS device instance.          o Open and validate
> > backend
> > device.
> > + *                                       o Publish backend
> > features and
> > + *                                         transport parameters.
> > + *                                                      |
> > + *                                                      |
> > + *                                                      V
> > + *                                      XenbusStateInitWait
> > + *
> > + * o Query backend features and
> > + *   transport parameters.
> > + * o Allocate and initialize the
> > + *   request ring.
> > + * o Publish transport parameters
> > + *   that will be in effect during
> > + *   this connection.
> > + *              |
> > + *              |
> > + *              V
> > + * XenbusStateInitialised
> > + *
> > + *                                       o Query frontend
> > transport parameters.
> > + *                                       o Connect to the request
> > ring and
> > + *                                         event channel.
> > + *                                       o Publish backend device
> > properties.
> > + *                                                      |
> > + *                                                      |
> > + *                                                      V
> > + *                                      XenbusStateConnected
> > + *
> > + *  o Query backend device properties.
> > + *  o Finalize OS virtual device
> > + *    instance.
> > + *              |
> > + *              |
> > + *              V
> > + * XenbusStateConnected
> > + *
> > + * Note: Drivers that do not support any optional features, or the
> > negotiation
> > + *       of transport parameters, can skip certain states in the
> > state
> > machine:
> > + *
> > + *       o A frontend may transition to XenbusStateInitialised
> > without
> > + *         waiting for the backend to enter
> > XenbusStateInitWait.  In this
> > + *         case, default transport parameters are in effect and
> > any
> > + *         transport parameters published by the frontend must
> > contain
> > + *         their default values.
> > + *
> > + *       o A backend may transition to XenbusStateInitialised,
> > bypassing
> > + *         XenbusStateInitWait, without waiting for the frontend
> > to first
> > + *         enter the XenbusStateInitialised state.  In this case,
> > default
> > + *         transport parameters are in effect and any transport
> > parameters
> > + *         published by the backend must contain their default
> > values.
> > + *
> > + *       Drivers that support optional features and/or transport
> > parameter
> > + *       negotiation must tolerate these additional state
> > transition paths.
> > + *       In general this means performing the work of any skipped
> > state
> > + *       transition, if it has not already been performed, in
> > addition to the
> > + *       work associated with entry into the current state.
> > + */
> > +
> > +/*
> > + * REQUEST CODES.
> > + */
> > +#define BLKIF_OP_READ              0
> > +#define BLKIF_OP_WRITE             1
> > +/*
> > + * All writes issued prior to a request with the
> > BLKIF_OP_WRITE_BARRIER
> > + * operation code ("barrier request") must be completed prior to
> > the
> > + * execution of the barrier request.  All writes issued after the
> > barrier
> > + * request must not execute until after the completion of the
> > barrier request.
> > + *
> > + * Optional.  See "feature-barrier" XenBus node documentation
> > above.
> > + */
> > +#define BLKIF_OP_WRITE_BARRIER     2
> > +/*
> > + * Commit any uncommitted contents of the backing device's
> > volatile cache
> > + * to stable storage.
> > + *
> > + * Optional.  See "feature-flush-cache" XenBus node documentation
> > above.
> > + */
> > +#define BLKIF_OP_FLUSH_DISKCACHE   3
> > +/*
> > + * Used in SLES sources for device specific command packet
> > + * contained within the request. Reserved for that purpose.
> > + */
> > +#define BLKIF_OP_RESERVED_1        4
> > +/*
> > + * Indicate to the backend device that a region of storage is no
> > longer in
> > + * use, and may be discarded at any time without impact to the
> > client.  If
> > + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies
> > of the
> > + * discarded region on the device must be rendered unrecoverable
> > before
> > the
> > + * command returns.
> > + *
> > + * This operation is analogous to performing a trim (ATA) or unamp
> > (SCSI),
> > + * command on a native device.
> > + *
> > + * More information about trim/unmap operations can be found at:
> > + *
> > 
https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=http*3A*2F*2Ft13.org__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTLlXS-Uk$
> >  
> > %2FDocuments%2FUploadedDocuments%2Fdocs2008%2F&amp;data=02%7
> > C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C
> > 686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802
> > &amp;sdata=JOOjsvkjqxkuoF47PMVw1loNNDhxPCXQVdPQQklTIGM%3D&am
> > p;reserved=0
> > + *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
> > + *
> > 
https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=http*3A*2F*2Fwww.s__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTiWVfQfs$
> >  
> > eagate.com%2Fstaticfiles%2Fsupport%2Fdisc%2Fmanuals%2F&amp;data=02
> > %7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%
> > 7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C6372921781701818
> > 02&amp;sdata=gd5Cvr1Q9%2Bv%2BfUS5OleuozBITkjbybYoR302s4XsVv8%3D
> > &amp;reserved=0
> > + *     Interface%20manuals/100293068c.pdf
> > + *
> > + * Optional.  See "feature-discard", "discard-alignment",
> > + * "discard-granularity", and "discard-secure" in the XenBus node
> > + * documentation above.
> > + */
> > +#define BLKIF_OP_DISCARD           5
> > +
> > +/*
> > + * Recognized if "feature-max-indirect-segments" in present in the
> > backend
> > + * xenbus info. The "feature-max-indirect-segments" node contains
> > the
> > maximum
> > + * number of segments allowed by the backend per request. If the
> > node is
> > + * present, the frontend might use blkif_request_indirect structs
> > in order to
> > + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST
> > (11). The
> > + * maximum number of indirect segments is fixed by the backend,
> > but the
> > + * frontend can issue requests with any number of indirect
> > segments as long
> > as
> > + * it's less than the number provided by the backend. The
> > indirect_grefs field
> > + * in blkif_request_indirect should be filled by the frontend with
> > the
> > + * grant references of the pages that are holding the indirect
> > segments.
> > + * These pages are filled with an array of blkif_request_segment
> > that hold
> > the
> > + * information about the segments. The number of indirect pages to
> > use is
> > + * determined by the number of segments an indirect request
> > contains.
> > Every
> > + * indirect page can contain a maximum of
> > + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so
> > to
> > + * calculate the number of indirect pages to use we have to do
> > + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct
> > blkif_request_segment))).
> > + *
> > + * If a backend does not recognize BLKIF_OP_INDIRECT, it should
> > *not*
> > + * create the "feature-max-indirect-segments" node!
> > + */
> > +#define BLKIF_OP_INDIRECT          6
> > +
> > +/*
> > + * Maximum scatter/gather segments per request.
> > + * This is carefully chosen so that sizeof(blkif_ring_t) <=
> > PAGE_SIZE.
> > + * NB. This could be 12 if the ring indexes weren't stored in the
> > same page.
> > + */
> > +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
> > +
> > +/*
> > + * Maximum number of indirect pages to use per request.
> > + */
> > +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
> > +
> > +/*
> > + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as
> > well as
> > + * 'sector_number' in blkif_request, blkif_request_discard and
> > + * blkif_request_indirect are sector-based quantities. See the
> > description
> > + * of the "feature-large-sector-size" frontend xenbus node above
> > for
> > + * more information.
> > + */
> > +struct blkif_request_segment {
> > +	grant_ref_t gref;        /* reference to I/O buffer
> > frame        */
> > +	/* @first_sect: first sector in frame to transfer
> > (inclusive).   */
> > +	/* @last_sect: last sector in frame to transfer
> > (inclusive).     */
> > +	u8     first_sect, last_sect;
> > +};
> > +
> > +/*
> > + * Starting ring element for any I/O request.
> > + */
> > +struct blkif_request {
> > +	u8        operation;    /* BLKIF_OP_???
> > */
> > +	u8        nr_segments;  /* number of segments
> > */
> > +	blkif_vdev_t   handle;       /* only for read/write requests
> > */
> > +	u64       id;           /* private guest value, echoed in
> > resp  */
> > +	blkif_sector_t sector_number;/* start sector idx on disk (r/w
> > only)  */
> > +	struct blkif_request_segment
> > seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +};
> > +
> > +typedef struct blkif_request blkif_request_t;
> > +
> > +/*
> > + * Cast to this structure when blkif_request.operation ==
> > BLKIF_OP_DISCARD
> > + * sizeof(struct blkif_request_discard) <= sizeof(struct
> > blkif_request)
> > + */
> > +struct blkif_request_discard {
> > +	u8        operation;    /* BLKIF_OP_DISCARD
> > */
> > +	u8        flag;         /* BLKIF_DISCARD_SECURE or zero
> > */
> > +#define BLKIF_DISCARD_SECURE (1 << 0)  /* ignored if discard-
> > secure=0
> > */
> > +	blkif_vdev_t   handle;       /* same as for read/write requests
> > */
> > +	u64       id;           /* private guest value, echoed in
> > resp  */
> > +	blkif_sector_t sector_number;/* start sector idx on disk
> > */
> > +	u64       nr_sectors;   /* number of contiguous sectors to
> > discard*/
> > +};
> > +
> > +typedef struct blkif_request_discard blkif_request_discard_t;
> > +
> > +struct blkif_request_indirect {
> > +	u8        operation;    /* BLKIF_OP_INDIRECT
> > */
> > +	u8        indirect_op;  /* BLKIF_OP_{READ/WRITE}
> > */
> > +	u16       nr_segments;  /* number of segments
> > */
> > +	u64       id;           /* private guest value, echoed in
> > resp  */
> > +	blkif_sector_t sector_number;/* start sector idx on disk (r/w
> > only)  */
> > +	blkif_vdev_t   handle;       /* same as for read/write requests
> > */
> > +	grant_ref_t
> > indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
> > +#ifdef __i386__
> > +	u64       pad;          /* Make it 64 byte aligned on i386
> > */
> > +#endif
> > +};
> > +
> > +typedef struct blkif_request_indirect blkif_request_indirect_t;
> > +
> > +struct blkif_response {
> > +	u64        id;              /* copied from request */
> > +	u8         operation;       /* copied from request */
> > +	s16         status;          /* BLKIF_RSP_???       */
> > +};
> > +
> > +typedef struct blkif_response blkif_response_t;
> > +
> > +/*
> > + * STATUS RETURN CODES.
> > + */
> > + /* Operation not supported (only happens on barrier writes). */
> > +#define BLKIF_RSP_EOPNOTSUPP  -2
> > + /* Operation failed for some unspecified reason (-EIO). */
> > +#define BLKIF_RSP_ERROR       -1
> > + /* Operation completed successfully. */
> > +#define BLKIF_RSP_OKAY         0
> > +
> > +/*
> > + * Generate blkif ring structures and types.
> > + */
> > +DEFINE_RING_TYPES(blkif, struct blkif_request, struct
> > blkif_response);
> > +
> > +#define VDISK_CDROM        0x1
> > +#define VDISK_REMOVABLE    0x2
> > +#define VDISK_READONLY     0x4
> > +
> > +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 4
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> > diff --git a/include/xen/interface/io/console.h
> > b/include/xen/interface/io/console.h
> > new file mode 100644
> > index 0000000000..3489fc7a60
> > --- /dev/null
> > +++ b/include/xen/interface/io/console.h
> > @@ -0,0 +1,56 @@
> > +/************************************************************
> > ******************
> > + * console.h
> > + *
> > + * Console I/O interface for Xen guest OSes.
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (c) 2005, Keir Fraser
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
> > +#define __XEN_PUBLIC_IO_CONSOLE_H__
> > +
> > +typedef u32 XENCONS_RING_IDX;
> > +
> > +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1))
> > +
> > +struct xencons_interface {
> > +	char in[1024];
> > +	char out[2048];
> > +	XENCONS_RING_IDX in_cons, in_prod;
> > +	XENCONS_RING_IDX out_cons, out_prod;
> > +};
> > +
> > +#ifdef XEN_WANT_FLEX_CONSOLE_RING
> > +#include "ring.h"
> > +DEFINE_XEN_FLEX_RING(xencons);
> > +#endif
> > +
> > +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 4
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> > diff --git a/include/xen/interface/io/protocols.h
> > b/include/xen/interface/io/protocols.h
> > new file mode 100644
> > index 0000000000..52b4de0f81
> > --- /dev/null
> > +++ b/include/xen/interface/io/protocols.h
> > @@ -0,0 +1,42 @@
> > +/************************************************************
> > ******************
> > + * protocols.h
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (c) 2008, Keir Fraser
> > + */
> > +
> > +#ifndef __XEN_PROTOCOLS_H__
> > +#define __XEN_PROTOCOLS_H__
> > +
> > +#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
> > +#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
> > +#define XEN_IO_PROTO_ABI_ARM        "arm-abi"
> > +
> > +#if defined(__i386__)
> > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
> > +#elif defined(__x86_64__)
> > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
> > +#elif defined(__arm__) || defined(__aarch64__)
> > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
> > +#else
> > +# error arch fixup needed here
> > +#endif
> > +
> > +#endif
> > diff --git a/include/xen/interface/io/ring.h
> > b/include/xen/interface/io/ring.h
> > new file mode 100644
> > index 0000000000..4e02678e3c
> > --- /dev/null
> > +++ b/include/xen/interface/io/ring.h
> > @@ -0,0 +1,479 @@
> > +/************************************************************
> > ******************
> > + * ring.h
> > + *
> > + * Shared producer-consumer ring macros.
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Tim Deegan and Andrew Warfield November 2004.
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_IO_RING_H__
> > +#define __XEN_PUBLIC_IO_RING_H__
> > +
> > +/*
> > + * When #include'ing this header, you need to provide the
> > following
> > + * declaration upfront:
> > + * - standard integers types (u8, u16, etc)
> > + * They are provided by stdint.h of the standard headers.
> > + *
> > + * In addition, if you intend to use the FLEX macros, you also
> > need to
> > + * provide the following, before invoking the FLEX macros:
> > + * - size_t
> > + * - memcpy
> > + * - grant_ref_t
> > + * These declarations are provided by string.h of the standard
> > headers,
> > + * and grant_table.h from the Xen public headers.
> > + */
> > +
> > +#include <xen/interface/grant_table.h>
> > +
> > +typedef unsigned int RING_IDX;
> > +
> > +/* Round a 32-bit unsigned constant down to the nearest power of
> > two. */
> > +#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  :
> > ((_x)
> > & 0x1))
> > +#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    :
> > __RD2(_x))
> > +#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    :
> > __RD4(_x))
> > +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    :
> > __RD8(_x))
> > +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 :
> > __RD16(_x))
> > +
> > +/*
> > + * Calculate size of a shared ring, given the total available
> > space for the
> > + * ring and indexes (_sz), and the name tag of the
> > request/response
> > structure.
> > + * A ring contains as many entries as will fit, rounded down to
> > the nearest
> > + * power of two (so we can mask with (size-1) to loop around).
> > + */
> > +#define __CONST_RING_SIZE(_s, _sz) \
> > +	(__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
> > +		sizeof(((struct _s##_sring *)0)->ring[0])))
> > +/*
> > + * The same for passing in an actual pointer instead of a name
> > tag.
> > + */
> > +#define __RING_SIZE(_s, _sz) \
> > +	(__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)-
> > >ring[0])))
> > +
> > +/*
> > + * Macros to make the correct C datatypes for a new kind of ring.
> > + *
> > + * To make a new ring datatype, you need to have two message
> > structures,
> > + * let's say request_t, and response_t already defined.
> > + *
> > + * In a header where you want the ring datatype declared, you then
> > do:
> > + *
> > + *     DEFINE_RING_TYPES(mytag, request_t, response_t);
> > + *
> > + * These expand out to give you a set of types, as you can see
> > below.
> > + * The most important of these are:
> > + *
> > + *     mytag_sring_t      - The shared ring.
> > + *     mytag_front_ring_t - The 'front' half of the ring.
> > + *     mytag_back_ring_t  - The 'back' half of the ring.
> > + *
> > + * To initialize a ring in your code you need to know the location
> > and size
> > + * of the shared memory area (PAGE_SIZE, for instance). To
> > initialise
> > + * the front half:
> > + *
> > + *     mytag_front_ring_t front_ring;
> > + *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
> > + *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page,
> > PAGE_SIZE);
> > + *
> > + * Initializing the back follows similarly (note that only the
> > front
> > + * initializes the shared ring):
> > + *
> > + *     mytag_back_ring_t back_ring;
> > + *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page,
> > PAGE_SIZE);
> > + */
> > +
> > +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)
> > \
> > +									
> > 	  \
> > +/* Shared ring entry */
> > \
> > +union __name##_sring_entry
> > {                                                      \
> > +	__req_t req;
> > \
> > +	__rsp_t rsp;
> > \
> > +};
> > \
> > +									
> > 	  \
> > +/* Shared ring page */
> > \
> > +struct __name##_sring
> > {                                                           \
> > +	RING_IDX req_prod, req_event;
> > \
> > +	RING_IDX rsp_prod, rsp_event;
> > \
> > +	union
> > {
> >       \
> > +		struct
> > {                                                          \
> > +			u8 smartpoll_active;
> > \
> > +		} netif;
> > \
> > +		struct
> > {                                                          \
> > +			u8 msg;
> > \
> > +		} tapif_user;
> > \
> > +		u8 pvt_pad[4];
> > \
> > +	} pvt;
> > \
> > +	u8 __pad[44];
> > \
> > +	union __name##_sring_entry ring[1]; /* variable-length */
> > \
> > +};
> > \
> > +									
> > 	  \
> > +/* "Front" end's private variables */
> > \
> > +struct __name##_front_ring
> > {                                                      \
> > +	RING_IDX req_prod_pvt;
> > \
> > +	RING_IDX rsp_cons;
> > \
> > +	unsigned int nr_ents;
> > \
> > +	struct __name##_sring *sring;
> > \
> > +};
> > \
> > +									
> > 	  \
> > +/* "Back" end's private variables */
> > \
> > +struct __name##_back_ring
> > {                                                       \
> > +	RING_IDX rsp_prod_pvt;
> > \
> > +	RING_IDX req_cons;
> > \
> > +	unsigned int nr_ents;
> > \
> > +	struct __name##_sring *sring;
> > \
> > +};
> > \
> > +									
> > 	  \
> > +/* Syntactic sugar */
> > \
> > +typedef struct __name##_sring __name##_sring_t;
> > \
> > +typedef struct __name##_front_ring __name##_front_ring_t;
> > \
> > +typedef struct __name##_back_ring __name##_back_ring_t
> > +
> > +/*
> > + * Macros for manipulating rings.
> > + *
> > + * FRONT_RING_whatever works on the "front end" of a ring: here
> > + * requests are pushed on to the ring and responses taken off it.
> > + *
> > + * BACK_RING_whatever works on the "back end" of a ring: here
> > + * requests are taken off the ring and responses put on.
> > + *
> > + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
> > + * This is OK in 1-for-1 request-response situations where the
> > + * requestor (front end) never has more than RING_SIZE()-1
> > + * outstanding requests.
> > + */
> > +
> > +/* Initialising empty rings */
> > +#define SHARED_RING_INIT(_s) do
> > {                                                 \
> > +	(_s)->req_prod  = (_s)->rsp_prod  = 0;
> > \
> > +	(_s)->req_event = (_s)->rsp_event = 1;
> > \
> > +	(void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad));
> > \
> > +	(void)memset((_s)->__pad, 0, sizeof((_s)->__pad));
> > \
> > +} while (0)
> > +
> > +#define FRONT_RING_INIT(_r, _s, __size) do
> > {                                      \
> > +	(_r)->req_prod_pvt = 0;
> > \
> > +	(_r)->rsp_cons = 0;
> > \
> > +	(_r)->nr_ents = __RING_SIZE(_s, __size);
> > \
> > +	(_r)->sring = (_s);
> > \
> > +} while (0)
> > +
> > +#define BACK_RING_INIT(_r, _s, __size) do
> > {                                       \
> > +	(_r)->rsp_prod_pvt = 0;
> > \
> > +	(_r)->req_cons = 0;
> > \
> > +	(_r)->nr_ents = __RING_SIZE(_s, __size);
> > \
> > +	(_r)->sring = (_s);
> > \
> > +} while (0)
> > +
> > +/* How big is this ring? */
> > +#define RING_SIZE(_r)
> > \
> > +	((_r)->nr_ents)
> > +
> > +/* Number of free requests (for use on front side only). */
> > +#define RING_FREE_REQUESTS(_r)
> > \
> > +	(RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
> > +
> > +/* Test if there is an empty slot available on the front ring.
> > + * (This is only meaningful from the front. )
> > + */
> > +#define RING_FULL(_r)
> > \
> > +	(RING_FREE_REQUESTS(_r) == 0)
> > +
> > +/* Test if there are outstanding messages to be processed on a
> > ring. */
> > +#define RING_HAS_UNCONSUMED_RESPONSES(_r)
> > \
> > +	((_r)->sring->rsp_prod - (_r)->rsp_cons)
> > +
> > +#ifdef __GNUC__
> > +#define RING_HAS_UNCONSUMED_REQUESTS(_r)
> > ({                                       \
> > +	unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;
> > \
> > +	unsigned int rsp = RING_SIZE(_r) -
> > \
> > +		((_r)->req_cons - (_r)->rsp_prod_pvt);
> > \
> > +	req < rsp ? req : rsp;
> > \
> > +})
> > +#else
> > +/* Same as above, but without the nice GCC ({ ... }) syntax. */
> > +#define RING_HAS_UNCONSUMED_REQUESTS(_r)
> > \
> > +	((((_r)->sring->req_prod - (_r)->req_cons) <
> > \
> > +	  (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?
> > \
> > +	 ((_r)->sring->req_prod - (_r)->req_cons) :
> > \
> > +	 (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
> > +#endif
> > +
> > +/* Direct access to individual ring elements, by index. */
> > +#define RING_GET_REQUEST(_r, _idx)
> > \
> > +	(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
> > +
> > +/*
> > + * Get a local copy of a request.
> > + *
> > + * Use this in preference to RING_GET_REQUEST() so all processing
> > is
> > + * done on a local copy that cannot be modified by the other end.
> > + *
> > + * Note that
> > 
https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=https*3A*2F*2Fgcc.gn__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTD-_NctI$
> >  
> > u.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D58145&amp;data=02%7C01%7C
> > peng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C686ea1d
> > 3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802&amp;sd
> > ata=hZDVA%2FOZbJO%2Fh4uzROYzVzmB05ekJWbcnkDAXsHzClc%3D&amp;re
> > served=0 may cause this
> > + * to be ineffective where _req is a struct which consists of only
> > bitfields.
> > + */
> > +#define RING_COPY_REQUEST(_r, _idx, _req) do {
> > \
> > +	/* Use volatile to force the copy into _req. */			
> >           \
> > +	*(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx);
> > \
> > +} while (0)
> > +
> > +#define RING_GET_RESPONSE(_r, _idx)
> > \
> > +	(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
> > +
> > +/* Loop termination condition: Would the specified index overflow
> > the ring?
> > */
> > +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)
> > \
> > +	(((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
> > +
> > +/* Ill-behaved frontend determination: Can there be this many
> > requests? */
> > +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)
> > \
> > +	(((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
> > +
> > +#define RING_PUSH_REQUESTS(_r) do
> > {                                               \
> > +	xen_wmb(); /* back sees requests /before/ updated producer
> > index */
> > \
> > +	(_r)->sring->req_prod = (_r)->req_prod_pvt;
> > \
> > +} while (0)
> > +
> > +#define RING_PUSH_RESPONSES(_r) do
> > {                                              \
> > +	xen_wmb(); /* front sees resps /before/ updated producer index
> > */
> > \
> > +	(_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;
> > \
> > +} while (0)
> > +
> > +/*
> > + * Notification hold-off (req_event and rsp_event):
> > + *
> > + * When queueing requests or responses on a shared ring, it may
> > not always
> > be
> > + * necessary to notify the remote end. For example, if requests
> > are in flight
> > + * in a backend, the front may be able to queue further requests
> > without
> > + * notifying the back (if the back checks for new requests when it
> > queues
> > + * responses).
> > + *
> > + * When enqueuing requests or responses:
> > + *
> > + *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The
> > second argument
> > + *  is a boolean return value. True indicates that the receiver
> > requires an
> > + *  asynchronous notification.
> > + *
> > + * After dequeuing requests or responses (before sleeping the
> > connection):
> > + *
> > + *  Use RING_FINAL_CHECK_FOR_REQUESTS() or
> > RING_FINAL_CHECK_FOR_RESPONSES().
> > + *  The second argument is a boolean return value. True indicates
> > that there
> > + *  are pending messages on the ring (i.e., the connection should
> > not be put
> > + *  to sleep).
> > + *
> > + *  These macros will set the req_event/rsp_event field to trigger
> > a
> > + *  notification on the very next message that is enqueued. If you
> > want to
> > + *  create batches of work (i.e., only receive a notification
> > after several
> > + *  messages have been enqueued) then you will need to create a
> > customised
> > + *  version of the FINAL_CHECK macro in your own code, which sets
> > the
> > event
> > + *  field appropriately.
> > + */
> > +
> > +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do
> > {                     \
> > +	RING_IDX __old = (_r)->sring->req_prod;
> > \
> > +	RING_IDX __new = (_r)->req_prod_pvt;
> > \
> > +	xen_wmb(); /* back sees requests /before/ updated producer
> > index */
> > \
> > +	(_r)->sring->req_prod = __new;
> > \
> > +	xen_mb(); /* back sees new requests /before/ we check req_event
> > */
> > \
> > +	(_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <
> > \
> > +				 (RING_IDX)(__new -
> > __old));                      \
> > +} while (0)
> > +
> > +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do
> > {                    \
> > +	RING_IDX __old = (_r)->sring->rsp_prod;
> > \
> > +	RING_IDX __new = (_r)->rsp_prod_pvt;
> > \
> > +	xen_wmb(); /* front sees resps /before/ updated producer index
> > */
> > \
> > +	(_r)->sring->rsp_prod = __new;
> > \
> > +	xen_mb(); /* front sees new resps /before/ we check rsp_event
> > */
> > \
> > +	(_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <
> > \
> > +				 (RING_IDX)(__new -
> > __old));                      \
> > +} while (0)
> > +
> > +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do
> > {                       \
> > +	(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);
> > \
> > +	if (_work_to_do)							
> >   \
> > +		break;
> > \
> > +	(_r)->sring->req_event = (_r)->req_cons + 1;
> > \
> > +	xen_mb();
> > \
> > +	(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);
> > \
> > +} while (0)
> > +
> > +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do
> > {                      \
> > +	(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);
> > \
> > +	if (_work_to_do)							
> >   \
> > +		break;
> > \
> > +	(_r)->sring->rsp_event = (_r)->rsp_cons + 1;
> > \
> > +	xen_mb();
> > \
> > +	(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);
> > \
> > +} while (0)
> > +
> > +/*
> > + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings
> > and
> > + * functions to check if there is data on the ring, and to read
> > and
> > + * write to them.
> > + *
> > + * DEFINE_XEN_FLEX_RING is similar to
> > DEFINE_XEN_FLEX_RING_AND_INTF, but
> > + * does not define the indexes page. As different protocols can
> > have
> > + * extensions to the basic format, this macro allow them to define
> > their
> > + * own struct.
> > + *
> > + * XEN_FLEX_RING_SIZE
> > + *   Convenience macro to calculate the size of one of the two
> > rings
> > + *   from the overall order.
> > + *
> > + * $NAME_mask
> > + *   Function to apply the size mask to an index, to reduce the
> > index
> > + *   within the range [0-size].
> > + *
> > + * $NAME_read_packet
> > + *   Function to read data from the ring. The amount of data to
> > read is
> > + *   specified by the "size" argument.
> > + *
> > + * $NAME_write_packet
> > + *   Function to write data to the ring. The amount of data to
> > write is
> > + *   specified by the "size" argument.
> > + *
> > + * $NAME_get_ring_ptr
> > + *   Convenience function that returns a pointer to read/write to
> > the
> > + *   ring at the right location.
> > + *
> > + * $NAME_data_intf
> > + *   Indexes page, shared between frontend and backend. It also
> > + *   contains the array of grant refs.
> > + *
> > + * $NAME_queued
> > + *   Function to calculate how many bytes are currently on the
> > ring,
> > + *   ready to be read. It can also be used to calculate how much
> > free
> > + *   space is currently on the ring (XEN_FLEX_RING_SIZE() -
> > + *   $NAME_queued()).
> > + */
> > +
> > +#ifndef XEN_PAGE_SHIFT
> > +/* The PAGE_SIZE for ring protocols and hypercall interfaces is
> > always
> > + * 4K, regardless of the architecture, and page granularity chosen
> > by
> > + * operating systems.
> > + */
> > +#define XEN_PAGE_SHIFT 12
> > +#endif
> > +#define XEN_FLEX_RING_SIZE(order)
> > \
> > +	(1UL << ((order) + XEN_PAGE_SHIFT - 1))
> > +
> > +#define DEFINE_XEN_FLEX_RING(name)
> > \
> > +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX
> > ring_size)
> > \
> > +{
> >                      \
> > +	return idx & (ring_size - 1);
> > \
> > +}
> > \
> > +									
> > 	  \
> > +static inline unsigned char *name##_get_ring_ptr(unsigned char
> > *buf,
> > \
> > +						 RING_IDX
> > idx,                    \
> > +						 RING_IDX
> > ring_size)              \
> > +{
> >                      \
> > +	return buf + name##_mask(idx, ring_size);
> > \
> > +}
> > \
> > +									
> > 	  \
> > +static inline void name##_read_packet(void *opaque,
> > \
> > +				      const unsigned char
> > *buf,                   \
> > +				      size_t size,
> > \
> > +				      RING_IDX masked_prod,
> > \
> > +				      RING_IDX *masked_cons,
> > \
> > +				      RING_IDX ring_size)
> > \
> > +{
> >                      \
> > +	if (*masked_cons < masked_prod ||
> > \
> > +		size <= ring_size - *masked_cons)
> > {                               \
> > +		memcpy(opaque, buf + *masked_cons, size);
> > \
> > +	} else
> > {
> >      \
> > +		memcpy(opaque, buf + *masked_cons, ring_size -
> > *masked_cons);
> > \
> > +		memcpy((unsigned char *)opaque + ring_size -
> > *masked_cons, buf,
> > \
> > +			   size - (ring_size - *masked_cons));
> > \
> > +	}
> > \
> > +	*masked_cons = name##_mask(*masked_cons + size, ring_size);
> > \
> > +}
> > \
> > +									
> > 	  \
> > +static inline void name##_write_packet(unsigned char *buf,
> > \
> > +				       const void *opaque,
> > \
> > +				       size_t size,
> > \
> > +				       RING_IDX *masked_prod,
> > \
> > +				       RING_IDX masked_cons,
> > \
> > +				       RING_IDX ring_size)
> > \
> > +{
> >                      \
> > +	if (*masked_prod < masked_cons ||
> > \
> > +		size <= ring_size - *masked_prod)
> > {                               \
> > +		memcpy(buf + *masked_prod, opaque, size);
> > \
> > +	} else
> > {
> >      \
> > +		memcpy(buf + *masked_prod, opaque, ring_size -
> > *masked_prod);
> > \
> > +		memcpy(buf, (unsigned char *)opaque + (ring_size -
> > *masked_prod),
> > \
> > +		       size - (ring_size - *masked_prod));
> > \
> > +	}
> > \
> > +	*masked_prod = name##_mask(*masked_prod + size, ring_size);
> > \
> > +}
> > \
> > +									
> > 	  \
> > +static inline RING_IDX name##_queued(RING_IDX prod,
> > \
> > +				     RING_IDX cons,
> > \
> > +				     RING_IDX ring_size)
> > \
> > +{
> >                      \
> > +	RING_IDX size;
> > \
> > +									
> > 	  \
> > +	if (prod == cons)
> > \
> > +		return 0;
> > \
> > +									
> > 	  \
> > +	prod = name##_mask(prod, ring_size);
> > \
> > +	cons = name##_mask(cons, ring_size);
> > \
> > +									
> > 	  \
> > +	if (prod == cons)
> > \
> > +		return ring_size;
> > \
> > +									
> > 	  \
> > +	if (prod > cons)
> > \
> > +		size = prod - cons;
> > \
> > +	else
> > \
> > +		size = ring_size - (cons - prod);
> > \
> > +	return size;
> > \
> > +}
> > \
> > +									
> > 	  \
> > +struct name##_data
> > {
> >  \
> > +	unsigned char *in; /* half of the allocation */
> > \
> > +	unsigned char *out; /* half of the allocation */
> > \
> > +}
> > +
> > +#define DEFINE_XEN_FLEX_RING_AND_INTF(name)
> > \
> > +struct name##_data_intf
> > {                                                         \
> > +	RING_IDX in_cons, in_prod;
> > \
> > +									
> > 	  \
> > +	u8 pad1[56];
> > \
> > +									
> > 	  \
> > +	RING_IDX out_cons, out_prod;
> > \
> > +									
> > 	  \
> > +	u8 pad2[56];
> > \
> > +									
> > 	  \
> > +	RING_IDX ring_order;
> > \
> > +	grant_ref_t ref[];
> > \
> > +};
> > \
> > +DEFINE_XEN_FLEX_RING(name)
> > +
> > +#endif /* __XEN_PUBLIC_IO_RING_H__ */
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 8
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> > diff --git a/include/xen/interface/io/xenbus.h
> > b/include/xen/interface/io/xenbus.h
> > new file mode 100644
> > index 0000000000..f452748b03
> > --- /dev/null
> > +++ b/include/xen/interface/io/xenbus.h
> > @@ -0,0 +1,81 @@
> > +/************************************************************
> > *****************
> > + * xenbus.h
> > + *
> > + * Xenbus protocol details.
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (C) 2005 XenSource Ltd.
> > + */
> > +
> > +#ifndef _XEN_PUBLIC_IO_XENBUS_H
> > +#define _XEN_PUBLIC_IO_XENBUS_H
> > +
> > +/*
> > + * The state of either end of the Xenbus, i.e. the current
> > communication
> > + * status of initialisation across the bus.  States here imply
> > nothing about
> > + * the state of the connection between the driver and the kernel's
> > device
> > + * layers.
> > + */
> > +enum xenbus_state {
> > +	XenbusStateUnknown       = 0,
> > +
> > +	XenbusStateInitialising  = 1,
> > +
> > +	/*
> > +	 * InitWait: Finished early initialisation but waiting for
> > information
> > +	 * from the peer or hotplug scripts.
> > +	 */
> > +	XenbusStateInitWait      = 2,
> > +
> > +	/*
> > +	 * Initialised: Waiting for a connection from the peer.
> > +	 */
> > +	XenbusStateInitialised   = 3,
> > +
> > +	XenbusStateConnected     = 4,
> > +
> > +	/*
> > +	 * Closing: The device is being closed due to an error or an
> > unplug event.
> > +	 */
> > +	XenbusStateClosing       = 5,
> > +
> > +	XenbusStateClosed        = 6,
> > +
> > +	/*
> > +	 * Reconfiguring: The device is being reconfigured.
> > +	 */
> > +	XenbusStateReconfiguring = 7,
> > +
> > +	XenbusStateReconfigured  = 8
> > +};
> > +
> > +typedef enum xenbus_state XenbusState;
> > +
> > +#endif /* _XEN_PUBLIC_IO_XENBUS_H */
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 4
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> > diff --git a/include/xen/interface/io/xs_wire.h
> > b/include/xen/interface/io/xs_wire.h
> > new file mode 100644
> > index 0000000000..87987334bf
> > --- /dev/null
> > +++ b/include/xen/interface/io/xs_wire.h
> > @@ -0,0 +1,151 @@
> > +/*
> > + * Details of the "wire" protocol between Xen Store Daemon and
> > client
> > + * library or guest kernel.
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (C) 2005 Rusty Russell IBM Corporation
> > + */
> > +
> > +#ifndef _XS_WIRE_H
> > +#define _XS_WIRE_H
> > +
> > +enum xsd_sockmsg_type {
> > +	XS_CONTROL,
> > +#define XS_DEBUG XS_CONTROL
> > +	XS_DIRECTORY,
> > +	XS_READ,
> > +	XS_GET_PERMS,
> > +	XS_WATCH,
> > +	XS_UNWATCH,
> > +	XS_TRANSACTION_START,
> > +	XS_TRANSACTION_END,
> > +	XS_INTRODUCE,
> > +	XS_RELEASE,
> > +	XS_GET_DOMAIN_PATH,
> > +	XS_WRITE,
> > +	XS_MKDIR,
> > +	XS_RM,
> > +	XS_SET_PERMS,
> > +	XS_WATCH_EVENT,
> > +	XS_ERROR,
> > +	XS_IS_DOMAIN_INTRODUCED,
> > +	XS_RESUME,
> > +	XS_SET_TARGET,
> > +	/* XS_RESTRICT has been removed */
> > +	XS_RESET_WATCHES = XS_SET_TARGET + 2,
> > +	XS_DIRECTORY_PART,
> > +
> > +	XS_TYPE_COUNT,      /* Number of valid types. */
> > +
> > +	XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
> > +};
> > +
> > +#define XS_WRITE_NONE "NONE"
> > +#define XS_WRITE_CREATE "CREATE"
> > +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
> > +
> > +/* We hand errors as strings, for portability. */
> > +struct xsd_errors {
> > +	int errnum;
> > +	const char *errstring;
> > +};
> > +
> > +#ifdef EINVAL
> > +#define XSD_ERROR(x) { x, #x }
> > +/* LINTED: static unused */
> > +static struct xsd_errors xsd_errors[]
> > +#if defined(__GNUC__)
> > +__attribute__((unused))
> > +#endif
> > +	= {
> > +	XSD_ERROR(EINVAL),
> > +	XSD_ERROR(EACCES),
> > +	XSD_ERROR(EEXIST),
> > +	XSD_ERROR(EISDIR),
> > +	XSD_ERROR(ENOENT),
> > +	XSD_ERROR(ENOMEM),
> > +	XSD_ERROR(ENOSPC),
> > +	XSD_ERROR(EIO),
> > +	XSD_ERROR(ENOTEMPTY),
> > +	XSD_ERROR(ENOSYS),
> > +	XSD_ERROR(EROFS),
> > +	XSD_ERROR(EBUSY),
> > +	XSD_ERROR(EAGAIN),
> > +	XSD_ERROR(EISCONN),
> > +	XSD_ERROR(E2BIG)
> > +};
> > +#endif
> > +
> > +struct xsd_sockmsg {
> > +	u32 type;  /* XS_??? */
> > +	u32 req_id;/* Request identifier, echoed in daemon's
> > response.  */
> > +	u32 tx_id; /* Transaction id (0 if not related to a
> > transaction). */
> > +	u32 len;   /* Length of data following this. */
> > +
> > +	/* Generally followed by nul-terminated string(s). */
> > +};
> > +
> > +enum xs_watch_type {
> > +	XS_WATCH_PATH = 0,
> > +	XS_WATCH_TOKEN
> > +};
> > +
> > +/*
> > + * `incontents 150 xenstore_struct XenStore wire protocol.
> > + *
> > + * Inter-domain shared memory communications.
> > + */
> > +#define XENSTORE_RING_SIZE 1024
> > +typedef u32 XENSTORE_RING_IDX;
> > +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1))
> > +struct xenstore_domain_interface {
> > +	char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon.
> > */
> > +	char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch
> > events. */
> > +	XENSTORE_RING_IDX req_cons, req_prod;
> > +	XENSTORE_RING_IDX rsp_cons, rsp_prod;
> > +	u32 server_features; /* Bitmap of features supported by the
> > server */
> > +	u32 connection;
> > +};
> > +
> > +/* Violating this is very bad.  See docs/misc/xenstore.txt. */
> > +#define XENSTORE_PAYLOAD_MAX 4096
> > +
> > +/* Violating these just gets you an error back */
> > +#define XENSTORE_ABS_PATH_MAX 3072
> > +#define XENSTORE_REL_PATH_MAX 2048
> > +
> > +/* The ability to reconnect a ring */
> > +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
> > +
> > +/* Valid values for the connection field */
> > +#define XENSTORE_CONNECTED 0 /* the steady-state */
> > +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
> > +
> > +#endif /* _XS_WIRE_H */
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 8
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> > diff --git a/include/xen/interface/memory.h
> > b/include/xen/interface/memory.h
> > new file mode 100644
> > index 0000000000..19959da8b4
> > --- /dev/null
> > +++ b/include/xen/interface/memory.h
> > @@ -0,0 +1,332 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/************************************************************
> > ******************
> > + * memory.h
> > + *
> > + * Memory reservation and information.
> > + *
> > + * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_MEMORY_H__
> > +#define __XEN_PUBLIC_MEMORY_H__
> > +
> > +/*
> > + * Increase or decrease the specified domain's memory reservation.
> > Returns
> > a
> > + * -ve errcode on failure, or the # extents successfully allocated
> > or freed.
> > + * arg == addr of struct xen_memory_reservation.
> > + */
> > +#define XENMEM_increase_reservation 0
> > +#define XENMEM_decrease_reservation 1
> > +#define XENMEM_populate_physmap     6
> > +struct xen_memory_reservation {
> > +	/*
> > +	 * XENMEM_increase_reservation:
> > +	 *   OUT: MFN (*not* GMFN) bases of extents that were allocated
> > +	 * XENMEM_decrease_reservation:
> > +	 *   IN:  GMFN bases of extents to free
> > +	 * XENMEM_populate_physmap:
> > +	 *   IN:  GPFN bases of extents to populate with memory
> > +	 *   OUT: GMFN bases of extents that were allocated
> > +	 *   (NB. This command also updates the mach_to_phys
> > translation
> > table)
> > +	 */
> > +	GUEST_HANDLE(xen_pfn_t)extent_start;
> > +
> > +	/* Number of extents, and size/alignment of each
> > (2^extent_order
> > pages). */
> > +	xen_ulong_t  nr_extents;
> > +	unsigned int   extent_order;
> > +
> > +	/*
> > +	 * Maximum # bits addressable by the user of the allocated
> > region (e.g.,
> > +	 * I/O devices often have a 32-bit limitation even in 64-bit
> > systems). If
> > +	 * zero then the user has no addressing restriction.
> > +	 * This field is not used by XENMEM_decrease_reservation.
> > +	 */
> > +	unsigned int   address_bits;
> > +
> > +	/*
> > +	 * Domain whose reservation is being changed.
> > +	 * Unprivileged domains can specify only DOMID_SELF.
> > +	 */
> > +	domid_t        domid;
> > +
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
> > +
> > +/*
> > + * An atomic exchange of memory pages. If return code is zero then
> > + * @out.extent_list provides GMFNs of the newly-allocated memory.
> > + * Returns zero on complete success, otherwise a negative error
> > code.
> > + * On complete success then always @nr_exchanged ==
> > @in.nr_extents.
> > + * On partial success @nr_exchanged indicates how much work was
> > done.
> > + */
> > +#define XENMEM_exchange             11
> > +struct xen_memory_exchange {
> > +	/*
> > +	 * [IN] Details of memory extents to be exchanged (GMFN bases).
> > +	 * Note that @in.address_bits is ignored and unused.
> > +	 */
> > +	struct xen_memory_reservation in;
> > +
> > +	/*
> > +	 * [IN/OUT] Details of new memory extents.
> > +	 * We require that:
> > +	 *  1. @in.domid == @out.domid
> > +	 *  2. @in.nr_extents  << @in.extent_order ==
> > +	 *     @out.nr_extents << @out.extent_order
> > +	 *  3. @in.extent_start and @out.extent_start lists must not
> > overlap
> > +	 *  4. @out.extent_start lists GPFN bases to be populated
> > +	 *  5. @out.extent_start is overwritten with allocated GMFN
> > bases
> > +	 */
> > +	struct xen_memory_reservation out;
> > +
> > +	/*
> > +	 * [OUT] Number of input extents that were successfully
> > exchanged:
> > +	 *  1. The first @nr_exchanged input extents were successfully
> > +	 *     deallocated.
> > +	 *  2. The corresponding first entries in the output extent
> > list correctly
> > +	 *     indicate the GMFNs that were successfully exchanged.
> > +	 *  3. All other input and output extents are untouched.
> > +	 *  4. If not all input exents are exchanged then the return
> > code of this
> > +	 *     command will be non-zero.
> > +	 *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
> > +	 */
> > +	xen_ulong_t nr_exchanged;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
> > +/*
> > + * Returns the maximum machine frame number of mapped RAM in this
> > system.
> > + * This command always succeeds (it never returns an error code).
> > + * arg == NULL.
> > + */
> > +#define XENMEM_maximum_ram_page     2
> > +
> > +/*
> > + * Returns the current or maximum memory reservation, in pages, of
> > the
> > + * specified domain (may be DOMID_SELF). Returns -ve errcode on
> > failure.
> > + * arg == addr of domid_t.
> > + */
> > +#define XENMEM_current_reservation  3
> > +#define XENMEM_maximum_reservation  4
> > +
> > +/*
> > + * Returns a list of MFN bases of 2MB extents comprising the
> > machine_to_phys
> > + * mapping table. Architectures which do not have a m2p table do
> > not
> > implement
> > + * this command.
> > + * arg == addr of xen_machphys_mfn_list_t.
> > + */
> > +#define XENMEM_machphys_mfn_list    5
> > +struct xen_machphys_mfn_list {
> > +	/*
> > +	 * Size of the 'extent_start' array. Fewer entries will be
> > filled if the
> > +	 * machphys table is smaller than max_extents * 2MB.
> > +	 */
> > +	unsigned int max_extents;
> > +
> > +	/*
> > +	 * Pointer to buffer to fill with list of extent starts. If
> > there are
> > +	 * any large discontiguities in the machine address space, 2MB
> > gaps in
> > +	 * the machphys table will be represented by an MFN base of
> > zero.
> > +	 */
> > +	GUEST_HANDLE(xen_pfn_t)extent_start;
> > +
> > +	/*
> > +	 * Number of extents written to the above array. This will be
> > smaller
> > +	 * than 'max_extents' if the machphys table is smaller than
> > max_e *
> > 2MB.
> > +	 */
> > +	unsigned int nr_extents;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
> > +
> > +/*
> > + * Returns the location in virtual address space of the
> > machine_to_phys
> > + * mapping table. Architectures which do not have a m2p table, or
> > which do
> > not
> > + * map it by default into guest address space, do not implement
> > this
> > command.
> > + * arg == addr of xen_machphys_mapping_t.
> > + */
> > +#define XENMEM_machphys_mapping     12
> > +struct xen_machphys_mapping {
> > +	xen_ulong_t v_start, v_end; /* Start and end virtual
> > addresses.   */
> > +	xen_ulong_t max_mfn;        /* Maximum MFN that can be looked
> > up.
> > */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
> > +
> > +#define XENMAPSPACE_shared_info  0 /* shared info page */
> > +#define XENMAPSPACE_grant_table  1 /* grant table page */
> > +#define XENMAPSPACE_gmfn         2 /* GMFN */
> > +#define XENMAPSPACE_gmfn_range   3 /* GMFN range,
> > XENMEM_add_to_physmap only. */
> > +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
> > +				    * XENMEM_add_to_physmap_range only.
> > +				    */
> > +#define XENMAPSPACE_dev_mmio     5 /* device mmio region */
> > +
> > +/*
> > + * Sets the GPFN at which a particular page appears in the
> > specified guest's
> > + * pseudophysical address space.
> > + * arg == addr of xen_add_to_physmap_t.
> > + */
> > +#define XENMEM_add_to_physmap      7
> > +struct xen_add_to_physmap {
> > +	/* Which domain to change the mapping for. */
> > +	domid_t domid;
> > +
> > +	/* Number of pages to go through for gmfn_range */
> > +	u16    size;
> > +
> > +	/* Source mapping space. */
> > +	unsigned int space;
> > +
> > +	/* Index into source mapping space. */
> > +	xen_ulong_t idx;
> > +
> > +	/* GPFN where the source mapping page should appear. */
> > +	xen_pfn_t gpfn;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
> > +
> > +/*** REMOVED ***/
> > +/*#define XENMEM_translate_gpfn_list  8*/
> > +
> > +#define XENMEM_add_to_physmap_range 23
> > +struct xen_add_to_physmap_range {
> > +	/* IN */
> > +	/* Which domain to change the mapping for. */
> > +	domid_t domid;
> > +	u16 space; /* => enum phys_map_space */
> > +
> > +	/* Number of pages to go through */
> > +	u16 size;
> > +	domid_t foreign_domid; /* IFF gmfn_foreign */
> > +
> > +	/* Indexes into space being mapped. */
> > +	GUEST_HANDLE(xen_ulong_t)idxs;
> > +
> > +	/* GPFN in domid where the source mapping page should appear.
> > */
> > +	GUEST_HANDLE(xen_pfn_t)gpfns;
> > +
> > +	/* OUT */
> > +
> > +	/* Per index error code. */
> > +	GUEST_HANDLE(int)errs;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range);
> > +
> > +/*
> > + * Returns the pseudo-physical memory map as it was when the
> > domain
> > + * was started (specified by XENMEM_set_memory_map).
> > + * arg == addr of struct xen_memory_map.
> > + */
> > +#define XENMEM_memory_map           9
> > +struct xen_memory_map {
> > +	/*
> > +	 * On call the number of entries which can be stored in buffer.
> > On
> > +	 * return the number of entries which have been stored in
> > +	 * buffer.
> > +	 */
> > +	unsigned int nr_entries;
> > +
> > +	/*
> > +	 * Entries in the buffer are in the same format as returned by
> > the
> > +	 * BIOS INT 0x15 EAX=0xE820 call.
> > +	 */
> > +	GUEST_HANDLE(void)buffer;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
> > +
> > +/*
> > + * Returns the real physical memory map. Passes the same structure
> > as
> > + * XENMEM_memory_map.
> > + * arg == addr of struct xen_memory_map.
> > + */
> > +#define XENMEM_machine_memory_map   10
> > +
> > +/*
> > + * Unmaps the page appearing at a particular GPFN from the
> > specified
> > guest's
> > + * pseudophysical address space.
> > + * arg == addr of xen_remove_from_physmap_t.
> > + */
> > +#define XENMEM_remove_from_physmap      15
> > +struct xen_remove_from_physmap {
> > +	/* Which domain to change the mapping for. */
> > +	domid_t domid;
> > +
> > +	/* GPFN of the current mapping of the page. */
> > +	xen_pfn_t gpfn;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
> > +
> > +/*
> > + * Get the pages for a particular guest resource, so that they can
> > be
> > + * mapped directly by a tools domain.
> > + */
> > +#define XENMEM_acquire_resource 28
> > +struct xen_mem_acquire_resource {
> > +	/* IN - The domain whose resource is to be mapped */
> > +	domid_t domid;
> > +	/* IN - the type of resource */
> > +	u16 type;
> > +
> > +#define XENMEM_resource_ioreq_server 0
> > +#define XENMEM_resource_grant_table 1
> > +
> > +	/*
> > +	 * IN - a type-specific resource identifier, which must be zero
> > +	 *      unless stated otherwise.
> > +	 *
> > +	 * type == XENMEM_resource_ioreq_server -> id == ioreq server
> > id
> > +	 * type == XENMEM_resource_grant_table -> id defined below
> > +	 */
> > +	u32 id;
> > +
> > +#define XENMEM_resource_grant_table_id_shared 0
> > +#define XENMEM_resource_grant_table_id_status 1
> > +
> > +	/* IN/OUT - As an IN parameter number of frames of the resource
> > +	 *          to be mapped. However, if the specified value is 0
> > and
> > +	 *          frame_list is NULL then this field will be set to
> > the
> > +	 *          maximum value supported by the implementation on
> > return.
> > +	 */
> > +	u32 nr_frames;
> > +	/*
> > +	 * OUT - Must be zero on entry. On return this may contain a
> > bitwise
> > +	 *       OR of the following values.
> > +	 */
> > +	u32 flags;
> > +
> > +	/* The resource pages have been assigned to the calling domain
> > */
> > +#define _XENMEM_rsrc_acq_caller_owned 0
> > +#define XENMEM_rsrc_acq_caller_owned (1u <<
> > _XENMEM_rsrc_acq_caller_owned)
> > +
> > +	/*
> > +	 * IN - the index of the initial frame to be mapped. This
> > parameter
> > +	 *      is ignored if nr_frames is 0.
> > +	 */
> > +	u64 frame;
> > +
> > +#define XENMEM_resource_ioreq_server_frame_bufioreq 0
> > +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
> > +
> > +	/*
> > +	 * IN/OUT - If the tools domain is PV then, upon return,
> > frame_list
> > +	 *          will be populated with the MFNs of the resource.
> > +	 *          If the tools domain is HVM then it is expected
> > that, on
> > +	 *          entry, frame_list will be populated with a list of
> > GFNs
> > +	 *          that will be mapped to the MFNs of the resource.
> > +	 *          If -EIO is returned then the frame_list has only
> > been
> > +	 *          partially mapped and it is up to the caller to
> > unmap all
> > +	 *          the GFNs.
> > +	 *          This parameter may be NULL if nr_frames is 0.
> > +	 */
> > +	GUEST_HANDLE(xen_pfn_t)frame_list;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource);
> > +
> > +#endif /* __XEN_PUBLIC_MEMORY_H__ */
> > diff --git a/include/xen/interface/sched.h
> > b/include/xen/interface/sched.h
> > new file mode 100644
> > index 0000000000..0f12dcf267
> > --- /dev/null
> > +++ b/include/xen/interface/sched.h
> > @@ -0,0 +1,188 @@
> > +/************************************************************
> > ******************
> > + * sched.h
> > + *
> > + * Scheduler state interactions
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_SCHED_H__
> > +#define __XEN_PUBLIC_SCHED_H__
> > +
> > +#include <xen/interface/event_channel.h>
> > +
> > +/*
> > + * Guest Scheduler Operations
> > + *
> > + * The SCHEDOP interface provides mechanisms for a guest to
> > interact
> > + * with the scheduler, including yield, blocking and shutting
> > itself
> > + * down.
> > + */
> > +
> > +/*
> > + * The prototype for this hypercall is:
> > + * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
> > + *
> > + * @cmd == SCHEDOP_??? (scheduler operation).
> > + * @arg == Operation-specific extra argument(s), as described
> > below.
> > + * ...  == Additional Operation-specific extra arguments,
> > described below.
> > + *
> > + * Versions of Xen prior to 3.0.2 provided only the following
> > legacy version
> > + * of this hypercall, supporting only the commands yield, block
> > and
> > shutdown:
> > + *  long sched_op(int cmd, unsigned long arg)
> > + * @cmd == SCHEDOP_??? (scheduler operation).
> > + * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
> > + *      == SHUTDOWN_* code (SCHEDOP_shutdown)
> > + *
> > + * This legacy version is available to new guests as:
> > + * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned
> > long
> > arg)
> > + */
> > +
> > +/*
> > + * Voluntarily yield the CPU.
> > + * @arg == NULL.
> > + */
> > +#define SCHEDOP_yield       0
> > +
> > +/*
> > + * Block execution of this VCPU until an event is received for
> > processing.
> > + * If called with event upcalls masked, this operation will
> > atomically
> > + * reenable event delivery and check for pending events before
> > blocking the
> > + * VCPU. This avoids a "wakeup waiting" race.
> > + * @arg == NULL.
> > + */
> > +#define SCHEDOP_block       1
> > +
> > +/*
> > + * Halt execution of this domain (all VCPUs) and notify the system
> > controller.
> > + * @arg == pointer to sched_shutdown structure.
> > + *
> > + * If the sched_shutdown_t reason is SHUTDOWN_suspend then
> > + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the
> > MFN
> > + * of the guest's start info page.  RDX/EDX is the third hypercall
> > + * argument.
> > + *
> > + * In addition, which reason is SHUTDOWN_suspend this hypercall
> > + * returns 1 if suspend was cancelled or the domain was merely
> > + * checkpointed, and 0 if it is resuming in a new domain.
> > + */
> > +#define SCHEDOP_shutdown    2
> > +
> > +/*
> > + * Poll a set of event-channel ports. Return when one or more are
> > pending.
> > An
> > + * optional timeout may be specified.
> > + * @arg == pointer to sched_poll structure.
> > + */
> > +#define SCHEDOP_poll        3
> > +
> > +/*
> > + * Declare a shutdown for another domain. The main use of this
> > function is
> > + * in interpreting shutdown requests and reasons for fully-
> > virtualized
> > + * domains.  A para-virtualized domain may use SCHEDOP_shutdown
> > directly.
> > + * @arg == pointer to sched_remote_shutdown structure.
> > + */
> > +#define SCHEDOP_remote_shutdown        4
> > +
> > +/*
> > + * Latch a shutdown code, so that when the domain later shuts down
> > it
> > + * reports this code to the control tools.
> > + * @arg == sched_shutdown, as for SCHEDOP_shutdown.
> > + */
> > +#define SCHEDOP_shutdown_code 5
> > +
> > +/*
> > + * Setup, poke and destroy a domain watchdog timer.
> > + * @arg == pointer to sched_watchdog structure.
> > + * With id == 0, setup a domain watchdog timer to cause domain
> > shutdown
> > + *               after timeout, returns watchdog id.
> > + * With id != 0 and timeout == 0, destroy domain watchdog timer.
> > + * With id != 0 and timeout != 0, poke watchdog timer and set new
> > timeout.
> > + */
> > +#define SCHEDOP_watchdog    6
> > +
> > +/*
> > + * Override the current vcpu affinity by pinning it to one
> > physical cpu or
> > + * undo this override restoring the previous affinity.
> > + * @arg == pointer to sched_pin_override structure.
> > + *
> > + * A negative pcpu value will undo a previous pin override and
> > restore the
> > + * previous cpu affinity.
> > + * This call is allowed for the hardware domain only and requires
> > the cpu
> > + * to be part of the domain's cpupool.
> > + */
> > +#define SCHEDOP_pin_override 7
> > +
> > +struct sched_shutdown {
> > +	unsigned int reason; /* SHUTDOWN_* => shutdown reason */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
> > +
> > +struct sched_poll {
> > +	GUEST_HANDLE(evtchn_port_t)ports;
> > +	unsigned int nr_ports;
> > +	u64 timeout;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
> > +
> > +struct sched_remote_shutdown {
> > +	domid_t domain_id;         /* Remote domain ID */
> > +	unsigned int reason;       /* SHUTDOWN_* => shutdown reason */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown);
> > +
> > +struct sched_watchdog {
> > +	u32 id;                /* watchdog ID */
> > +	u32 timeout;           /* timeout */
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog);
> > +
> > +struct sched_pin_override {
> > +	s32 pcpu;
> > +};
> > +
> > +DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override);
> > +
> > +/*
> > + * Reason codes for SCHEDOP_shutdown. These may be interpreted by
> > control
> > + * software to determine the appropriate action. For the most
> > part, Xen does
> > + * not care about the shutdown code.
> > + */
> > +#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up
> > and kill. */
> > +#define SHUTDOWN_reboot     1  /* Clean up, kill, and then
> > restart.
> > */
> > +#define SHUTDOWN_suspend    2  /* Clean up, save suspend info,
> > kill.
> > */
> > +#define SHUTDOWN_crash      3  /* Tell controller we've crashed.
> > */
> > +#define SHUTDOWN_watchdog   4  /* Restart because watchdog time
> > expired.     */
> > +
> > +/*
> > + * Domain asked to perform 'soft reset' for it. The expected
> > behavior is to
> > + * reset internal Xen state for the domain returning it to the
> > point where it
> > + * was created but leaving the domain's memory contents and vCPU
> > contexts
> > + * intact. This will allow the domain to start over and set up all
> > Xen specific
> > + * interfaces again.
> > + */
> > +#define SHUTDOWN_soft_reset 5
> > +#define SHUTDOWN_MAX        5  /* Maximum valid shutdown reason.
> > */
> > +
> > +#endif /* __XEN_PUBLIC_SCHED_H__ */
> > diff --git a/include/xen/interface/xen.h
> > b/include/xen/interface/xen.h
> > new file mode 100644
> > index 0000000000..964daaedfb
> > --- /dev/null
> > +++ b/include/xen/interface/xen.h
> > @@ -0,0 +1,225 @@
> > +/************************************************************
> > ******************
> > + * xen.h
> > + *
> > + * Guest OS interface to Xen.
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > copy
> > + * of this software and associated documentation files (the
> > "Software"), to
> > + * deal in the Software without restriction, including without
> > limitation the
> > + * rights to use, copy, modify, merge, publish, distribute,
> > sublicense, and/or
> > + * sell copies of the Software, and to permit persons to whom the
> > Software is
> > + * furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be
> > included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> > EVENT SHALL THE
> > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Copyright (c) 2004, K A Fraser
> > + */
> > +
> > +#ifndef __XEN_PUBLIC_XEN_H__
> > +#define __XEN_PUBLIC_XEN_H__
> > +
> > +#include <xen/arm/interface.h>
> > +
> > +/*
> > + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
> > + */
> > +
> > +/*
> > + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3,
> > 4, 5.
> > + *         EAX = return value
> > + *         (argument registers may be clobbered on return)
> > + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2,
> > 3, 4, 5, 6.
> > + *         RAX = return value
> > + *         (argument registers not clobbered on return; RCX, R11
> > are)
> > + */
> > +#define __HYPERVISOR_set_trap_table        0
> > +#define __HYPERVISOR_mmu_update            1
> > +#define __HYPERVISOR_set_gdt               2
> > +#define __HYPERVISOR_stack_switch          3
> > +#define __HYPERVISOR_set_callbacks         4
> > +#define __HYPERVISOR_fpu_taskswitch        5
> > +#define __HYPERVISOR_sched_op_compat       6
> > +#define __HYPERVISOR_platform_op           7
> > +#define __HYPERVISOR_set_debugreg          8
> > +#define __HYPERVISOR_get_debugreg          9
> > +#define __HYPERVISOR_update_descriptor    10
> > +#define __HYPERVISOR_memory_op            12
> > +#define __HYPERVISOR_multicall            13
> > +#define __HYPERVISOR_update_va_mapping    14
> > +#define __HYPERVISOR_set_timer_op         15
> > +#define __HYPERVISOR_event_channel_op_compat 16
> > +#define __HYPERVISOR_xen_version          17
> > +#define __HYPERVISOR_console_io           18
> > +#define __HYPERVISOR_physdev_op_compat    19
> > +#define __HYPERVISOR_grant_table_op       20
> > +#define __HYPERVISOR_vm_assist            21
> > +#define __HYPERVISOR_update_va_mapping_otherdomain 22
> > +#define __HYPERVISOR_iret                 23 /* x86 only */
> > +#define __HYPERVISOR_vcpu_op              24
> > +#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
> > +#define __HYPERVISOR_mmuext_op            26
> > +#define __HYPERVISOR_xsm_op               27
> > +#define __HYPERVISOR_nmi_op               28
> > +#define __HYPERVISOR_sched_op             29
> > +#define __HYPERVISOR_callback_op          30
> > +#define __HYPERVISOR_xenoprof_op          31
> > +#define __HYPERVISOR_event_channel_op     32
> > +#define __HYPERVISOR_physdev_op           33
> > +#define __HYPERVISOR_hvm_op               34
> > +#define __HYPERVISOR_sysctl               35
> > +#define __HYPERVISOR_domctl               36
> > +#define __HYPERVISOR_kexec_op             37
> > +#define __HYPERVISOR_tmem_op              38
> > +#define __HYPERVISOR_xc_reserved_op       39 /* reserved for
> > XenClient */
> > +#define __HYPERVISOR_xenpmu_op            40
> > +#define __HYPERVISOR_dm_op                41
> > +
> > +/* Architecture-specific hypercall definitions. */
> > +#define __HYPERVISOR_arch_0               48
> > +#define __HYPERVISOR_arch_1               49
> > +#define __HYPERVISOR_arch_2               50
> > +#define __HYPERVISOR_arch_3               51
> > +#define __HYPERVISOR_arch_4               52
> > +#define __HYPERVISOR_arch_5               53
> > +#define __HYPERVISOR_arch_6               54
> > +#define __HYPERVISOR_arch_7               55
> > +
> > +#ifndef __ASSEMBLY__
> > +
> > +typedef u16 domid_t;
> > +
> > +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary
> > domains. */
> > +#define DOMID_FIRST_RESERVED (0x7FF0U)
> > +
> > +/* DOMID_SELF is used in certain contexts to refer to oneself. */
> > +#define DOMID_SELF (0x7FF0U)
> > +
> > +/*
> > + * DOMID_IO is used to restrict page-table updates to mapping I/O
> > memory.
> > + * Although no Foreign Domain need be specified to map I/O pages,
> > DOMID_IO
> > + * is useful to ensure that no mappings to the OS's own heap are
> > accidentally
> > + * installed. (e.g., in Linux this could cause havoc as reference
> > counts
> > + * aren't adjusted on the I/O-mapping code path).
> > + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that
> > context can
> > + * be specified by any calling domain.
> > + */
> > +#define DOMID_IO   (0x7FF1U)
> > +
> > +/*
> > + * DOMID_XEN is used to allow privileged domains to map restricted
> > parts of
> > + * Xen's heap space (e.g., the machine_to_phys table).
> > + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only
> > permitted if
> > + * the caller is privileged.
> > + */
> > +#define DOMID_XEN  (0x7FF2U)
> > +
> > +/* DOMID_COW is used as the owner of sharable pages */
> > +#define DOMID_COW  (0x7FF3U)
> > +
> > +/* DOMID_INVALID is used to identify pages with unknown owner. */
> > +#define DOMID_INVALID (0x7FF4U)
> > +
> > +/* Idle domain. */
> > +#define DOMID_IDLE (0x7FFFU)
> > +
> > +struct vcpu_info {
> > +	/*
> > +	 * 'evtchn_upcall_pending' is written non-zero by Xen to
> > indicate
> > +	 * a pending notification for a particular VCPU. It is then
> > cleared
> > +	 * by the guest OS /before/ checking for pending work, thus
> > avoiding
> > +	 * a set-and-check race. Note that the mask is only accessed by
> > Xen
> > +	 * on the CPU that is currently hosting the VCPU. This means
> > that the
> > +	 * pending and mask flags can be updated by the guest without
> > special
> > +	 * synchronisation (i.e., no need for the x86 LOCK prefix).
> > +	 * This may seem suboptimal because if the pending flag is set
> > by
> > +	 * a different CPU then an IPI may be scheduled even when the
> > mask
> > +	 * is set. However, note:
> > +	 *  1. The task of 'interrupt holdoff' is covered by the per-
> > event-
> > +	 *     channel mask bits. A 'noisy' event that is continually
> > being
> > +	 *     triggered can be masked at source at this very precise
> > +	 *     granularity.
> > +	 *  2. The main purpose of the per-VCPU mask is therefore to
> > restrict
> > +	 *     reentrant execution: whether for concurrency control, or
> > to
> > +	 *     prevent unbounded stack usage. Whatever the purpose, we
> > expect
> > +	 *     that the mask will be asserted only for short periods at
> > a time,
> > +	 *     and so the likelihood of a 'spurious' IPI is suitably
> > small.
> > +	 * The mask is read before making an event upcall to the guest:
> > a
> > +	 * non-zero mask therefore guarantees that the VCPU will not
> > receive
> > +	 * an upcall activation. The mask is cleared when the VCPU
> > requests
> > +	 * to block: this avoids wakeup-waiting races.
> > +	 */
> > +	u8 evtchn_upcall_pending;
> > +	u8 evtchn_upcall_mask;
> > +	xen_ulong_t evtchn_pending_sel;
> > +	struct arch_vcpu_info arch;
> > +	struct pvclock_vcpu_time_info time;
> > +}; /* 64 bytes (x86) */
> > +
> > +/*
> > + * Xen/kernel shared data -- pointer provided in start_info.
> > + * NB. We expect that this struct is smaller than a page.
> > + */
> > +struct shared_info {
> > +	struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
> > +
> > +	/*
> > +	 * A domain can create "event channels" on which it can send
> > and
> > receive
> > +	 * asynchronous event notifications. There are three classes of
> > event
> > that
> > +	 * are delivered by this mechanism:
> > +	 *  1. Bi-directional inter- and intra-domain connections.
> > Domains must
> > +	 *     arrange out-of-band to set up a connection (usually by
> > allocating
> > +	 *     an unbound 'listener' port and avertising that via a
> > storage
> > service
> > +	 *     such as xenstore).
> > +	 *  2. Physical interrupts. A domain with suitable hardware-
> > access
> > +	 *     privileges can bind an event-channel port to a physical
> > interrupt
> > +	 *     source.
> > +	 *  3. Virtual interrupts ('events'). A domain can bind an
> > event-channel
> > +	 *     port to a virtual interrupt source, such as the virtual-
> > timer
> > +	 *     device or the emergency console.
> > +	 *
> > +	 * Event channels are addressed by a "port index". Each channel
> > is
> > +	 * associated with two bits of information:
> > +	 *  1. PENDING -- notifies the domain that there is a pending
> > notification
> > +	 *     to be processed. This bit is cleared by the guest.
> > +	 *  2. MASK -- if this bit is clear then a 0->1 transition of
> > PENDING
> > +	 *     will cause an asynchronous upcall to be scheduled. This
> > bit is
> > only
> > +	 *     updated by the guest. It is read-only within Xen. If a
> > channel
> > +	 *     becomes pending while the channel is masked then the
> > 'edge' is
> > lost
> > +	 *     (i.e., when the channel is unmasked, the guest must
> > manually
> > handle
> > +	 *     pending notifications as no upcall will be scheduled by
> > Xen).
> > +	 *
> > +	 * To expedite scanning of pending notifications, any 0->1
> > pending
> > +	 * transition on an unmasked channel causes a corresponding bit
> > in a
> > +	 * per-vcpu selector word to be set. Each bit in the selector
> > covers a
> > +	 * 'C long' in the PENDING bitfield array.
> > +	 */
> > +	xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];
> > +	xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];
> > +
> > +	/*
> > +	 * Wallclock time: updated only by control software. Guests
> > should base
> > +	 * their gettimeofday() syscall on this wallclock-base value.
> > +	 */
> > +	struct pvclock_wall_clock wc;
> > +
> > +	struct arch_shared_info arch;
> > +
> > +};
> > +
> > +#else /* __ASSEMBLY__ */
> > +
> > +/* In assembly code we cannot use C numeric constant suffixes. */
> > +#define mk_unsigned_long(x) x
> > +
> > +#endif /* !__ASSEMBLY__ */
> > +
> > +#endif /* __XEN_PUBLIC_XEN_H__ */
> > --
> > 2.17.1
> 
>
diff mbox series

Patch

diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h
new file mode 100644
index 0000000000..79d5ae8563
--- /dev/null
+++ b/include/xen/arm/interface.h
@@ -0,0 +1,88 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/******************************************************************************
+ * Guest OS interface to ARM Xen.
+ *
+ * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
+ */
+
+#ifndef _ASM_ARM_XEN_INTERFACE_H
+#define _ASM_ARM_XEN_INTERFACE_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+
+#define uint64_aligned_t u64 __attribute__((aligned(8)))
+
+#define __DEFINE_GUEST_HANDLE(name, type) \
+	typedef struct { union { type *p; uint64_aligned_t q; }; }  \
+		__guest_handle_ ## name
+
+#define DEFINE_GUEST_HANDLE_STRUCT(name) \
+	__DEFINE_GUEST_HANDLE(name, struct name)
+#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
+#define GUEST_HANDLE(name)        __guest_handle_ ## name
+
+#define set_xen_guest_handle(hnd, val)			\
+	do {						\
+		if (sizeof(hnd) == 8)			\
+			*(u64 *)&(hnd) = 0;	\
+		(hnd).p = val;				\
+	} while (0)
+
+#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op
+
+#ifndef __ASSEMBLY__
+/* Explicitly size integers that represent pfns in the interface with
+ * Xen so that we can have one ABI that works for 32 and 64 bit guests.
+ * Note that this means that the xen_pfn_t type may be capable of
+ * representing pfn's which the guest cannot represent in its own pfn
+ * type. However since pfn space is controlled by the guest this is
+ * fine since it simply wouldn't be able to create any sure pfns in
+ * the first place.
+ */
+typedef u64 xen_pfn_t;
+#define PRI_xen_pfn "llx"
+typedef u64 xen_ulong_t;
+#define PRI_xen_ulong "llx"
+typedef s64 xen_long_t;
+#define PRI_xen_long "llx"
+/* Guest handles for primitive C types. */
+__DEFINE_GUEST_HANDLE(uchar, unsigned char);
+__DEFINE_GUEST_HANDLE(uint,  unsigned int);
+DEFINE_GUEST_HANDLE(char);
+DEFINE_GUEST_HANDLE(int);
+DEFINE_GUEST_HANDLE(void);
+DEFINE_GUEST_HANDLE(u64);
+DEFINE_GUEST_HANDLE(u32);
+DEFINE_GUEST_HANDLE(xen_pfn_t);
+DEFINE_GUEST_HANDLE(xen_ulong_t);
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+#define MAX_VIRT_CPUS 1
+
+struct arch_vcpu_info { };
+struct arch_shared_info { };
+
+/* TODO: Move pvclock definitions some place arch independent */
+struct pvclock_vcpu_time_info {
+	u32   version;
+	u32   pad0;
+	u64   tsc_timestamp;
+	u64   system_time;
+	u32   tsc_to_system_mul;
+	s8    tsc_shift;
+	u8    flags;
+	u8    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+/* It is OK to have a 12 bytes struct with no padding because it is packed */
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+	u32   sec_hi;
+} __attribute__((__packed__));
+#endif
+
+#endif /* _ASM_ARM_XEN_INTERFACE_H */
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
new file mode 100644
index 0000000000..8174999c2f
--- /dev/null
+++ b/include/xen/interface/event_channel.h
@@ -0,0 +1,281 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/******************************************************************************
+ * event_channel.h
+ *
+ * Event channels between domains.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
+#define __XEN_PUBLIC_EVENT_CHANNEL_H__
+
+#include <xen/interface/xen.h>
+
+typedef u32 evtchn_port_t;
+DEFINE_GUEST_HANDLE(evtchn_port_t);
+
+/*
+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
+ * accepting interdomain bindings from domain <remote_dom>. A fresh port
+ * is allocated in <dom> and returned as <port>.
+ * NOTES:
+ *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
+ *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_alloc_unbound	  6
+struct evtchn_alloc_unbound {
+	/* IN parameters */
+	domid_t dom, remote_dom;
+	/* OUT parameters */
+	evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
+ * a port that is unbound and marked as accepting bindings from the calling
+ * domain. A fresh port is allocated in the calling domain and returned as
+ * <local_port>.
+ * NOTES:
+ *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_bind_interdomain 0
+struct evtchn_bind_interdomain {
+	/* IN parameters. */
+	domid_t remote_dom;
+	evtchn_port_t remote_port;
+	/* OUT parameters. */
+	evtchn_port_t local_port;
+};
+
+/*
+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
+ * vcpu.
+ * NOTES:
+ *  1. A virtual IRQ may be bound to at most one event channel per vcpu.
+ *  2. The allocated event channel is bound to the specified vcpu. The binding
+ *     may not be changed.
+ */
+#define EVTCHNOP_bind_virq	  1
+struct evtchn_bind_virq {
+	/* IN parameters. */
+	u32 virq;
+	u32 vcpu;
+	/* OUT parameters. */
+	evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
+ * NOTES:
+ *  1. A physical IRQ may be bound to at most one event channel per domain.
+ *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
+ */
+#define EVTCHNOP_bind_pirq	  2
+struct evtchn_bind_pirq {
+	/* IN parameters. */
+	u32 pirq;
+#define BIND_PIRQ__WILL_SHARE 1
+	u32 flags; /* BIND_PIRQ__* */
+	/* OUT parameters. */
+	evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
+ * NOTES:
+ *  1. The allocated event channel is bound to the specified vcpu. The binding
+ *     may not be changed.
+ */
+#define EVTCHNOP_bind_ipi	  7
+struct evtchn_bind_ipi {
+	u32 vcpu;
+	/* OUT parameters. */
+	evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is
+ * interdomain then the remote end is placed in the unbound state
+ * (EVTCHNSTAT_unbound), awaiting a new connection.
+ */
+#define EVTCHNOP_close		  3
+struct evtchn_close {
+	/* IN parameters. */
+	evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local
+ * endpoint is <port>.
+ */
+#define EVTCHNOP_send		  4
+struct evtchn_send {
+	/* IN parameters. */
+	evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_status: Get the current status of the communication channel which
+ * has an endpoint at <dom, port>.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may obtain the status of an event
+ *     channel for which <dom> is not DOMID_SELF.
+ */
+#define EVTCHNOP_status		  5
+struct evtchn_status {
+	/* IN parameters */
+	domid_t  dom;
+	evtchn_port_t port;
+	/* OUT parameters */
+#define EVTCHNSTAT_closed	0  /* Channel is not in use.		     */
+#define EVTCHNSTAT_unbound	1  /* Channel is waiting interdom connection.*/
+#define EVTCHNSTAT_interdomain	2  /* Channel is connected to remote domain. */
+#define EVTCHNSTAT_pirq		3  /* Channel is bound to a phys IRQ line.   */
+#define EVTCHNSTAT_virq		4  /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi		5  /* Channel is bound to a virtual IPI line */
+	u32 status;
+	u32 vcpu;		   /* VCPU to which this channel is bound.   */
+	union {
+		struct {
+			domid_t dom;
+		} unbound; /* EVTCHNSTAT_unbound */
+		struct {
+			domid_t dom;
+			evtchn_port_t port;
+		} interdomain; /* EVTCHNSTAT_interdomain */
+		u32 pirq;	    /* EVTCHNSTAT_pirq	      */
+		u32 virq;	    /* EVTCHNSTAT_virq	      */
+	} u;
+};
+
+/*
+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
+ * event is pending.
+ * NOTES:
+ *  1. IPI- and VIRQ-bound channels always notify the vcpu that initialised
+ *     the binding. This binding cannot be changed.
+ *  2. All other channels notify vcpu0 by default. This default is set when
+ *     the channel is allocated (a port that is freed and subsequently reused
+ *     has its binding reset to vcpu0).
+ */
+#define EVTCHNOP_bind_vcpu	  8
+struct evtchn_bind_vcpu {
+	/* IN parameters. */
+	evtchn_port_t port;
+	u32 vcpu;
+};
+
+/*
+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
+ * a notification to the appropriate VCPU if an event is pending.
+ */
+#define EVTCHNOP_unmask		  9
+struct evtchn_unmask {
+	/* IN parameters. */
+	evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+#define EVTCHNOP_reset		 10
+struct evtchn_reset {
+	/* IN parameters. */
+	domid_t dom;
+};
+
+typedef struct evtchn_reset evtchn_reset_t;
+
+/*
+ * EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
+ */
+#define EVTCHNOP_init_control    11
+struct evtchn_init_control {
+	/* IN parameters. */
+	u64 control_gfn;
+	u32 offset;
+	u32 vcpu;
+	/* OUT parameters. */
+	u8 link_bits;
+	u8 _pad[7];
+};
+
+/*
+ * EVTCHNOP_expand_array: add an additional page to the event array.
+ */
+#define EVTCHNOP_expand_array    12
+struct evtchn_expand_array {
+	/* IN parameters. */
+	u64 array_gfn;
+};
+
+/*
+ * EVTCHNOP_set_priority: set the priority for an event channel.
+ */
+#define EVTCHNOP_set_priority    13
+struct evtchn_set_priority {
+	/* IN parameters. */
+	evtchn_port_t port;
+	u32 priority;
+};
+
+struct evtchn_op {
+	u32 cmd; /* EVTCHNOP_* */
+	union {
+		struct evtchn_alloc_unbound    alloc_unbound;
+		struct evtchn_bind_interdomain bind_interdomain;
+		struct evtchn_bind_virq	       bind_virq;
+		struct evtchn_bind_pirq	       bind_pirq;
+		struct evtchn_bind_ipi	       bind_ipi;
+		struct evtchn_close	       close;
+		struct evtchn_send	       send;
+		struct evtchn_status	       status;
+		struct evtchn_bind_vcpu	       bind_vcpu;
+		struct evtchn_unmask	       unmask;
+	} u;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
+
+/*
+ * 2-level ABI
+ */
+
+#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
+
+/*
+ * FIFO ABI
+ */
+
+/* Events may have priorities from 0 (highest) to 15 (lowest). */
+#define EVTCHN_FIFO_PRIORITY_MAX     0
+#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
+#define EVTCHN_FIFO_PRIORITY_MIN     15
+
+#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
+
+typedef u32 event_word_t;
+
+#define EVTCHN_FIFO_PENDING 31
+#define EVTCHN_FIFO_MASKED  30
+#define EVTCHN_FIFO_LINKED  29
+#define EVTCHN_FIFO_BUSY    28
+
+#define EVTCHN_FIFO_LINK_BITS 17
+#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
+
+#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
+
+struct evtchn_fifo_control_block {
+	u32     ready;
+	u32     _rsvd;
+	event_word_t head[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
new file mode 100644
index 0000000000..197a0d0d58
--- /dev/null
+++ b/include/xen/interface/grant_table.h
@@ -0,0 +1,582 @@ 
+/******************************************************************************
+ * grant_table.h
+ *
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+#include <xen/interface/xen.h>
+
+/***********************************
+ * GRANT TABLE REPRESENTATION
+ */
+
+/* Some rough guidelines on accessing and updating grant-table entries
+ * in a concurrency-safe manner. For more information, Linux contains a
+ * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
+ *
+ * NB. WMB is a no-op on current-generation x86 processors. However, a
+ *     compiler barrier will still be required.
+ *
+ * Introducing a valid entry into the grant table:
+ *  1. Write ent->domid.
+ *  2. Write ent->frame:
+ *      GTF_permit_access:   Frame to which access is permitted.
+ *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ *                           frame, or zero if none.
+ *  3. Write memory barrier (WMB).
+ *  4. Write ent->flags, inc. valid type.
+ *
+ * Invalidating an unused GTF_permit_access entry:
+ *  1. flags = ent->flags.
+ *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *
+ * Invalidating an in-use GTF_permit_access entry:
+ *  This cannot be done directly. Request assistance from the domain controller
+ *  which can set a timeout on the use of a grant entry and take necessary
+ *  action. (NB. This is not yet implemented!).
+ *
+ * Invalidating an unused GTF_accept_transfer entry:
+ *  1. flags = ent->flags.
+ *  2. Observe that !(flags & GTF_transfer_committed). [*]
+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
+ *      The guest must /not/ modify the grant entry until the address of the
+ *      transferred frame is written. It is safe for the guest to spin waiting
+ *      for this to occur (detect by observing GTF_transfer_completed in
+ *      ent->flags).
+ *
+ * Invalidating a committed GTF_accept_transfer entry:
+ *  1. Wait for (ent->flags & GTF_transfer_completed).
+ *
+ * Changing a GTF_permit_access from writable to read-only:
+ *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
+ *
+ * Changing a GTF_permit_access from read-only to writable:
+ *  Use SMP-safe bit-setting instruction.
+ */
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef u32 grant_ref_t;
+
+/*
+ * A grant table comprises a packed array of grant entries in one or more
+ * page frames shared between Xen and a guest.
+ * [XEN]: This field is written by Xen and read by the sharing guest.
+ * [GST]: This field is written by the guest and read by Xen.
+ */
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility.  New guests should use version 2.
+ */
+struct grant_entry_v1 {
+	/* GTF_xxx: various type and flag information.  [XEN,GST] */
+	u16 flags;
+	/* The domain being granted foreign privileges. [GST] */
+	domid_t  domid;
+	/*
+	 * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
+	 * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
+	 */
+	u32 frame;
+};
+
+/*
+ * Type of grant entry.
+ *  GTF_invalid: This grant entry grants no privileges.
+ *  GTF_permit_access: Allow @domid to map/access @frame.
+ *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
+ *                       to this guest. Xen writes the page number to @frame.
+ *  GTF_transitive: Allow @domid to transitively access a subrange of
+ *                  @trans_grant in @trans_domid.  No mappings are allowed.
+ */
+#define GTF_invalid         (0U << 0)
+#define GTF_permit_access   (1U << 0)
+#define GTF_accept_transfer (2U << 0)
+#define GTF_transitive      (3U << 0)
+#define GTF_type_mask       (3U << 0)
+
+/*
+ * Subflags for GTF_permit_access.
+ *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
+ *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
+ *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
+ *                will only be allowed to copy from the grant, and not
+ *                map it. [GST]
+ */
+#define _GTF_readonly       (2)
+#define GTF_readonly        (1U << _GTF_readonly)
+#define _GTF_reading        (3)
+#define GTF_reading         (1U << _GTF_reading)
+#define _GTF_writing        (4)
+#define GTF_writing         (1U << _GTF_writing)
+#define _GTF_sub_page       (8)
+#define GTF_sub_page        (1U << _GTF_sub_page)
+
+/*
+ * Subflags for GTF_accept_transfer:
+ *  GTF_transfer_committed: Xen sets this flag to indicate that it is committed
+ *      to transferring ownership of a page frame. When a guest sees this flag
+ *      it must /not/ modify the grant entry until GTF_transfer_completed is
+ *      set by Xen.
+ *  GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
+ *      after reading GTF_transfer_committed. Xen will always write the frame
+ *      address, followed by ORing this flag, in a timely manner.
+ */
+#define _GTF_transfer_committed (2)
+#define GTF_transfer_committed  (1U << _GTF_transfer_committed)
+#define _GTF_transfer_completed (3)
+#define GTF_transfer_completed  (1U << _GTF_transfer_completed)
+
+/*
+ * Version 2 grant table entries.  These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
+
+/*
+ * Version 1 and version 2 grant entries share a common prefix.  The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
+ */
+struct grant_entry_header {
+	u16 flags;
+	domid_t  domid;
+};
+
+/*
+ * Version 2 of the grant entry structure, here is a union because three
+ * different types are suppotted: full_page, sub_page and transitive.
+ */
+union grant_entry_v2 {
+	struct grant_entry_header hdr;
+
+	/*
+	 * This member is used for V1-style full page grants, where either:
+	 *
+	 * -- hdr.type is GTF_accept_transfer, or
+	 * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
+	 *
+	 * In that case, the frame field has the same semantics as the
+	 * field of the same name in the V1 entry structure.
+	 */
+	struct {
+	struct grant_entry_header hdr;
+	u32 pad0;
+	u64 frame;
+	} full_page;
+
+	/*
+	 * If the grant type is GTF_grant_access and GTF_sub_page is set,
+	 * @domid is allowed to access bytes [@page_off,@page_off+@length)
+	 * in frame @frame.
+	 */
+	struct {
+	struct grant_entry_header hdr;
+	u16 page_off;
+	u16 length;
+	u64 frame;
+	} sub_page;
+
+	/*
+	 * If the grant is GTF_transitive, @domid is allowed to use the
+	 * grant @gref in domain @trans_domid, as if it was the local
+	 * domain.  Obviously, the transitive access must be compatible
+	 * with the original grant.
+	 */
+	struct {
+	struct grant_entry_header hdr;
+	domid_t trans_domid;
+	u16 pad0;
+	grant_ref_t gref;
+	} transitive;
+
+	u32 __spacer[4]; /* Pad to a power of two */
+};
+
+typedef u16 grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
+ */
+
+/*
+ * Handle to track a mapping created via a grant reference.
+ */
+typedef u32 grant_handle_t;
+
+/*
+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
+ * by devices and/or host CPUs. If successful, <handle> is a tracking number
+ * that must be presented later to destroy the mapping(s). On error, <handle>
+ * is a negative status code.
+ * NOTES:
+ *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
+ *     via which I/O devices may access the granted frame.
+ *  2. If GNTMAP_host_map is specified then a mapping will be added at
+ *     either a host virtual address in the current address space, or at
+ *     a PTE at the specified machine address.  The type of mapping to
+ *     perform is selected through the GNTMAP_contains_pte flag, and the
+ *     address is specified in <host_addr>.
+ *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
+ *     host mapping is destroyed by other means then it is *NOT* guaranteed
+ *     to be accounted to the correct grant reference!
+ */
+#define GNTTABOP_map_grant_ref        0
+struct gnttab_map_grant_ref {
+	/* IN parameters. */
+	u64 host_addr;
+	u32 flags;               /* GNTMAP_* */
+	grant_ref_t ref;
+	domid_t  dom;
+	/* OUT parameters. */
+	s16  status;              /* GNTST_* */
+	grant_handle_t handle;
+	u64 dev_bus_addr;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
+
+/*
+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
+ * field is ignored. If non-zero, they must refer to a device/host mapping
+ * that is tracked by <handle>
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  3. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_grant_ref      1
+struct gnttab_unmap_grant_ref {
+	/* IN parameters. */
+	u64 host_addr;
+	u64 dev_bus_addr;
+	grant_handle_t handle;
+	/* OUT parameters. */
+	s16  status;              /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
+
+/*
+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
+ * Only <nr_frames> addresses are written, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ *  3. Xen may not support more than a single grant-table page per domain.
+ */
+#define GNTTABOP_setup_table          2
+struct gnttab_setup_table {
+	/* IN parameters. */
+	domid_t  dom;
+	u32 nr_frames;
+	/* OUT parameters. */
+	s16  status;              /* GNTST_* */
+
+	GUEST_HANDLE(xen_pfn_t)frame_list;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
+
+/*
+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
+ * xen console. Debugging use only.
+ */
+#define GNTTABOP_dump_table           3
+struct gnttab_dump_table {
+	/* IN parameters. */
+	domid_t dom;
+	/* OUT parameters. */
+	s16 status;               /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
+
+/*
+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
+ * foreign domain has previously registered its interest in the transfer via
+ * <domid, ref>.
+ *
+ * Note that, even if the transfer fails, the specified page no longer belongs
+ * to the calling domain *unless* the error is GNTST_bad_page.
+ */
+#define GNTTABOP_transfer                4
+struct gnttab_transfer {
+	/* IN parameters. */
+	xen_pfn_t mfn;
+	domid_t       domid;
+	grant_ref_t   ref;
+	/* OUT parameters. */
+	s16       status;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
+
+/*
+ * GNTTABOP_copy: Hypervisor based copy
+ * source and destinations can be eithers MFNs or, for foreign domains,
+ * grant references. the foreign domain has to grant read/write access
+ * in its grant table.
+ *
+ * The flags specify what type source and destinations are (either MFN
+ * or grant reference).
+ *
+ * Note that this can also be used to copy data between two domains
+ * via a third party if the source and destination domains had previously
+ * grant appropriate access to their pages to the third party.
+ *
+ * source_offset specifies an offset in the source frame, dest_offset
+ * the offset in the target frame and  len specifies the number of
+ * bytes to be copied.
+ */
+
+#define _GNTCOPY_source_gref      (0)
+#define GNTCOPY_source_gref       (1 << _GNTCOPY_source_gref)
+#define _GNTCOPY_dest_gref        (1)
+#define GNTCOPY_dest_gref         (1 << _GNTCOPY_dest_gref)
+
+#define GNTTABOP_copy                 5
+struct gnttab_copy {
+	/* IN parameters. */
+	struct {
+		union {
+			grant_ref_t ref;
+			xen_pfn_t   gmfn;
+		} u;
+		domid_t  domid;
+		u16 offset;
+	} source, dest;
+	u16      len;
+	u16      flags;          /* GNTCOPY_* */
+	/* OUT parameters. */
+	s16       status;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
+
+/*
+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
+ * grant table.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_query_size           6
+struct gnttab_query_size {
+	/* IN parameters. */
+	domid_t  dom;
+	/* OUT parameters. */
+	u32 nr_frames;
+	u32 max_nr_frames;
+	s16  status;              /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>.  <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  2. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_and_replace    7
+struct gnttab_unmap_and_replace {
+	/* IN parameters. */
+	u64 host_addr;
+	u64 new_addr;
+	grant_handle_t handle;
+	/* OUT parameters. */
+	s16  status;              /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
+
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure.  This operation can only be performed
+ * once in any given domain.  It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version          8
+struct gnttab_set_version {
+	/* IN parameters */
+	u32 version;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_get_status_frames     9
+struct gnttab_get_status_frames {
+	/* IN parameters. */
+	u32 nr_frames;
+	domid_t  dom;
+	/* OUT parameters. */
+	s16  status;              /* GNTST_* */
+
+	GUEST_HANDLE(u64)frame_list;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+
+/*
+ * GNTTABOP_get_version: Get the grant table version which is in
+ * effect for domain <dom>.
+ */
+#define GNTTABOP_get_version          10
+struct gnttab_get_version {
+	/* IN parameters */
+	domid_t dom;
+	u16 pad;
+	/* OUT parameters */
+	u32 version;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
+
+/*
+ * Issue one or more cache maintenance operations on a portion of a
+ * page granted to the calling domain by a foreign domain.
+ */
+#define GNTTABOP_cache_flush          12
+struct gnttab_cache_flush {
+	union {
+		u64 dev_bus_addr;
+		grant_ref_t ref;
+	} a;
+	u16 offset;   /* offset from start of grant */
+	u16 length;   /* size within the grant */
+#define GNTTAB_CACHE_CLEAN          (1 << 0)
+#define GNTTAB_CACHE_INVAL          (1 << 1)
+#define GNTTAB_CACHE_SOURCE_GREF    (1 << 31)
+	u32 op;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
+
+/*
+ * Bitfield values for update_pin_status.flags.
+ */
+ /* Map the grant entry for access by I/O devices. */
+#define _GNTMAP_device_map      (0)
+#define GNTMAP_device_map       (1 << _GNTMAP_device_map)
+/* Map the grant entry for access by host CPUs. */
+#define _GNTMAP_host_map        (1)
+#define GNTMAP_host_map         (1 << _GNTMAP_host_map)
+/* Accesses to the granted frame will be restricted to read-only access. */
+#define _GNTMAP_readonly        (2)
+#define GNTMAP_readonly         (1 << _GNTMAP_readonly)
+/*
+ * GNTMAP_host_map subflag:
+ *  0 => The host mapping is usable only by the guest OS.
+ *  1 => The host mapping is usable by guest OS + current application.
+ */
+#define _GNTMAP_application_map (3)
+#define GNTMAP_application_map  (1 << _GNTMAP_application_map)
+
+/*
+ * GNTMAP_contains_pte subflag:
+ *  0 => This map request contains a host virtual address.
+ *  1 => This map request contains the machine addess of the PTE to update.
+ */
+#define _GNTMAP_contains_pte    (4)
+#define GNTMAP_contains_pte     (1 << _GNTMAP_contains_pte)
+
+/*
+ * Bits to be placed in guest kernel available PTE bits (architecture
+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
+ */
+#define _GNTMAP_guest_avail0    (16)
+#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0)
+
+/*
+ * Values for error status returns. All errors are -ve.
+ */
+#define GNTST_okay             (0)  /* Normal return.                        */
+#define GNTST_general_error    (-1) /* General undefined error.              */
+#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */
+#define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
+#define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate handle. */
+#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to map. */
+#define GNTST_bad_dev_addr     (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
+#define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */
+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
+#define GNTST_address_too_big (-11) /* transfer page address too large.      */
+#define GNTST_eagain          (-12) /* Operation not done; try again.        */
+
+#define GNTTABOP_error_msgs {                   \
+	"okay",                                     \
+	"undefined error",                          \
+	"unrecognised domain id",                   \
+	"invalid grant reference",                  \
+	"invalid mapping handle",                   \
+	"invalid virtual address",                  \
+	"invalid device address",                   \
+	"no spare translation slot in the I/O MMU", \
+	"permission denied",                        \
+	"bad page",                                 \
+	"copy arguments cross page boundary",       \
+	"page address size too large",              \
+	"operation not done; try again"             \
+}
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
new file mode 100644
index 0000000000..1c53cad729
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -0,0 +1,69 @@ 
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+/* Get/set subcommands: the second argument of the hypercall is a
+ * pointer to a xen_hvm_param struct.
+ */
+#define HVMOP_set_param           0
+#define HVMOP_get_param           1
+struct xen_hvm_param {
+	domid_t  domid;    /* IN */
+	u32 index;    /* IN */
+	u64 value;    /* IN/OUT */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
+
+/* Hint from PV drivers for pagetable destruction. */
+#define HVMOP_pagetable_dying       9
+struct xen_hvm_pagetable_dying {
+	/* Domain with a pagetable about to be destroyed. */
+	domid_t  domid;
+	/* guest physical address of the toplevel pagetable dying */
+	aligned_u64 gpa;
+};
+
+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
+
+enum hvmmem_type_t {
+	HVMMEM_ram_rw,             /* Normal read/write guest RAM */
+	HVMMEM_ram_ro,             /* Read-only; writes are discarded */
+	HVMMEM_mmio_dm,            /* Reads and write go to the device model */
+};
+
+#define HVMOP_get_mem_type    15
+/* Return hvmmem_type_t for the specified pfn. */
+struct xen_hvm_get_mem_type {
+	/* Domain to be queried. */
+	domid_t domid;
+	/* OUT variable. */
+	u16 mem_type;
+	u16 pad[2]; /* align next field on 8-byte boundary */
+	/* IN variable. */
+	u64 pfn;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type);
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
new file mode 100644
index 0000000000..4d61fc58d9
--- /dev/null
+++ b/include/xen/interface/hvm/params.h
@@ -0,0 +1,127 @@ 
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include <xen/interface/hvm/hvm_op.h>
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+#define HVM_PARAM_CALLBACK_IRQ 0
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ *
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ * If val != 0, val[63:56] encodes the type, as follows:
+ */
+
+#define HVM_PARAM_CALLBACK_TYPE_GSI      0
+/*
+ * val[55:0] is a delivery GSI.  GSI 0 cannot be used, as it aliases val == 0,
+ * and disables all notifications.
+ */
+
+#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
+/*
+ * val[55:0] is a delivery PCI INTx line:
+ * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0]
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define HVM_PARAM_CALLBACK_TYPE_VECTOR   2
+/*
+ * val[7:0] is a vector number.  Check for XENFEAT_hvm_callback_vector to know
+ * if this delivery method is available.
+ */
+#elif defined(__arm__) || defined(__aarch64__)
+#define HVM_PARAM_CALLBACK_TYPE_PPI      2
+/*
+ * val[55:16] needs to be zero.
+ * val[15:8] is interrupt flag of the PPI used by event-channel:
+ *  bit 8: the PPI is edge(1) or level(0) triggered
+ *  bit 9: the PPI is active low(1) or high(0)
+ * val[7:0] is a PPI number used by event-channel.
+ * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to
+ * the notification is handled by the interrupt controller.
+ */
+#endif
+
+#define HVM_PARAM_STORE_PFN    1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_PAE_ENABLED  4
+
+#define HVM_PARAM_IOREQ_PFN    5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ *  delay_for_missed_ticks (default):
+ *   Do not advance a vcpu's time beyond the correct delivery time for
+ *   interrupts that have been missed due to preemption. Deliver missed
+ *   interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ *   time stepwise for each one.
+ *  no_delay_for_missed_ticks:
+ *   As above, missed interrupts are delivered, but guest time always tracks
+ *   wallclock (i.e., real) time while doing so.
+ *  no_missed_ticks_pending:
+ *   No missed interrupts are held pending. Instead, to ensure ticks are
+ *   delivered at some non-zero rate, if we detect missed ticks then the
+ *   internal tick alarm is not disabled if the VCPU is preempted during the
+ *   next tick period.
+ *  one_missed_tick_pending:
+ *   Missed interrupts are collapsed together and delivered as one 'late tick'.
+ *   Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE   10
+#define HVMPTM_delay_for_missed_ticks    0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending   2
+#define HVMPTM_one_missed_tick_pending   3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT     12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN    13
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS     15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN    16
+
+/* Console debug shared memory ring and event channel */
+#define HVM_PARAM_CONSOLE_PFN    17
+#define HVM_PARAM_CONSOLE_EVTCHN 18
+
+#define HVM_NR_PARAMS          19
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
new file mode 100644
index 0000000000..7d74c99226
--- /dev/null
+++ b/include/xen/interface/io/blkif.h
@@ -0,0 +1,726 @@ 
+/******************************************************************************
+ * blkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t   u16
+#endif
+#define blkif_sector_t u64
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen block driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters.  This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * All data in the XenStore is stored as strings.  Nodes specifying numeric
+ * values are encoded in decimal.  Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ * XenStore nodes marked "DEPRECATED" in their notes section should only be
+ * used to provide interoperability with legacy implementations.
+ *
+ * See the XenBus state transition diagram below for details on when XenBus
+ * nodes must be published and when they can be queried.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * mode
+ *      Values:         "r" (read only), "w" (writable)
+ *
+ *      The read or write access permissions to the backing store to be
+ *      granted to the frontend.
+ *
+ * params
+ *      Values:         string
+ *
+ *      A free formatted string providing sufficient information for the
+ *      hotplug script to attach the device and provide a suitable
+ *      handler (ie: a block device) for blkback to use.
+ *
+ * physical-device
+ *      Values:         "MAJOR:MINOR"
+ *      Notes: 11
+ *
+ *      MAJOR and MINOR are the major number and minor number of the
+ *      backing device respectively.
+ *
+ * physical-device-path
+ *      Values:         path string
+ *
+ *      A string that contains the absolute path to the disk image. On
+ *      NetBSD and Linux this is always a block device, while on FreeBSD
+ *      it can be either a block device or a regular file.
+ *
+ * type
+ *      Values:         "file", "phy", "tap"
+ *
+ *      The type of the backing device/object.
+ *
+ *
+ * direct-io-safe
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      The underlying storage is not affected by the direct IO memory
+ *      lifetime bug.  See:
+ *        http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
+ *
+ *      Therefore this option gives the backend permission to use
+ *      O_DIRECT, notwithstanding that bug.
+ *
+ *      That is, if this option is enabled, use of O_DIRECT is safe,
+ *      in circumstances where we would normally have avoided it as a
+ *      workaround for that bug.  This option is not relevant for all
+ *      backends, and even not necessarily supported for those for
+ *      which it is relevant.  A backend which knows that it is not
+ *      affected by the bug can ignore this option.
+ *
+ *      This option doesn't require a backend to use O_DIRECT, so it
+ *      should not be used to try to control the caching behaviour.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-barrier
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_WRITE_BARRIER request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-flush-cache
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-discard
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_DISCARD request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-persistent
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes: 7
+ *
+ *      A value of "1" indicates that the backend can keep the grants used
+ *      by the frontend driver mapped, so the same set of grants should be
+ *      used in all transactions. The maximum number of grants the backend
+ *      can map persistently depends on the implementation, but ideally it
+ *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
+ *      feature the backend doesn't need to unmap each grant, preventing
+ *      costly TLB flushes. The backend driver should only map grants
+ *      persistently if the frontend supports it. If a backend driver chooses
+ *      to use the persistent protocol when the frontend doesn't support it,
+ *      it will probably hit the maximum number of persistently mapped grants
+ *      (due to the fact that the frontend won't be reusing the same grants),
+ *      and fall back to non-persistent mode. Backend implementations may
+ *      shrink or expand the number of persistently mapped grants without
+ *      notifying the frontend depending on memory constraints (this might
+ *      cause a performance degradation).
+ *
+ *      If a backend driver wants to limit the maximum number of persistently
+ *      mapped grants to a value less than RING_SIZE *
+ *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
+ *      discard the grants that are less commonly used. Using a LRU in the
+ *      backend driver paired with a LIFO queue in the frontend will
+ *      allow us to have better performance in this scenario.
+ *
+ *----------------------- Request Transport Parameters ------------------------
+ *
+ * max-ring-page-order
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Notes:          1, 3
+ *
+ *      The maximum supported size of the request ring buffer in units of
+ *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4 pages,
+ *      etc.).
+ *
+ * max-ring-pages
+ *      Values:         <uint32_t>
+ *      Default Value:  1
+ *      Notes:          DEPRECATED, 2, 3
+ *
+ *      The maximum supported size of the request ring buffer in units of
+ *      machine pages.  The value must be a power of 2.
+ *
+ *------------------------- Backend Device Properties -------------------------
+ *
+ * discard-enable
+ *      Values:         0/1 (boolean)
+ *      Default Value:  1
+ *
+ *      This optional property, set by the toolstack, instructs the backend
+ *      to offer (or not to offer) discard to the frontend. If the property
+ *      is missing the backend should offer discard if the backing storage
+ *      actually supports it.
+ *
+ * discard-alignment
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Notes:          4, 5
+ *
+ *      The offset, in bytes from the beginning of the virtual block device,
+ *      to the first, addressable, discard extent on the underlying device.
+ *
+ * discard-granularity
+ *      Values:         <uint32_t>
+ *      Default Value:  <"sector-size">
+ *      Notes:          4
+ *
+ *      The size, in bytes, of the individually addressable discard extents
+ *      of the underlying device.
+ *
+ * discard-secure
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes:          10
+ *
+ *      A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
+ *      requests with the BLKIF_DISCARD_SECURE flag set.
+ *
+ * info
+ *      Values:         <uint32_t> (bitmap)
+ *
+ *      A collection of bit flags describing attributes of the backing
+ *      device.  The VDISK_* macros define the meaning of each bit
+ *      location.
+ *
+ * sector-size
+ *      Values:         <uint32_t>
+ *
+ *      The logical block size, in bytes, of the underlying storage. This
+ *      must be a power of two with a minimum value of 512.
+ *
+ *      NOTE: Because of implementation bugs in some frontends this must be
+ *            set to 512, unless the frontend advertizes a non-zero value
+ *            in its "feature-large-sector-size" xenbus node. (See below).
+ *
+ * physical-sector-size
+ *      Values:         <uint32_t>
+ *      Default Value:  <"sector-size">
+ *
+ *      The physical block size, in bytes, of the backend storage. This
+ *      must be an integer multiple of "sector-size".
+ *
+ * sectors
+ *      Values:         <u64>
+ *
+ *      The size of the backend device, expressed in units of "sector-size".
+ *      The product of "sector-size" and "sectors" must also be an integer
+ *      multiple of "physical-sector-size", if that node is present.
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         <uint32_t>
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * ring-ref
+ *      Values:         <uint32_t>
+ *      Notes:          6
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer.
+ *
+ * ring-ref%u
+ *      Values:         <uint32_t>
+ *      Notes:          6
+ *
+ *      For a frontend providing a multi-page ring, a "number of ring pages"
+ *      sized list of nodes, each containing a Xen grant reference granting
+ *      permission for the backend to map the page of the ring located
+ *      at page index "%u".  Page indexes are zero based.
+ *
+ * protocol
+ *      Values:         string (XEN_IO_PROTO_ABI_*)
+ *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
+ *
+ *      The machine ABI rules governing the format of all ring request and
+ *      response structures.
+ *
+ * ring-page-order
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Maximum Value:  MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
+ *      Notes:          1, 3
+ *
+ *      The size of the frontend allocated request ring buffer in units
+ *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ *      etc.).
+ *
+ * num-ring-pages
+ *      Values:         <uint32_t>
+ *      Default Value:  1
+ *      Maximum Value:  MAX(max-ring-pages,(0x1 << max-ring-page-order))
+ *      Notes:          DEPRECATED, 2, 3
+ *
+ *      The size of the frontend allocated request ring buffer in units of
+ *      machine pages.  The value must be a power of 2.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-persistent
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes: 7, 8, 9
+ *
+ *      A value of "1" indicates that the frontend will reuse the same grants
+ *      for all transactions, allowing the backend to map them with write
+ *      access (even when it should be read-only). If the frontend hits the
+ *      maximum number of allowed persistently mapped grants, it can fallback
+ *      to non persistent mode. This will cause a performance degradation,
+ *      since the the backend driver will still try to map those grants
+ *      persistently. Since the persistent grants protocol is compatible with
+ *      the previous protocol, a frontend driver can choose to work in
+ *      persistent mode even when the backend doesn't support it.
+ *
+ *      It is recommended that the frontend driver stores the persistently
+ *      mapped grants in a LIFO queue, so a subset of all persistently mapped
+ *      grants gets used commonly. This is done in case the backend driver
+ *      decides to limit the maximum number of persistently mapped grants
+ *      to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *
+ * feature-large-sector-size
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the frontend will correctly supply and
+ *      interpret all sector-based quantities in terms of the "sector-size"
+ *      value supplied in the backend info, whatever that may be set to.
+ *      If this node is not present or its value is "0" then it is assumed
+ *      that the frontend requires that the logical block size is 512 as it
+ *      is hardcoded (which is the case in some frontend implementations).
+ *
+ *------------------------- Virtual Device Properties -------------------------
+ *
+ * device-type
+ *      Values:         "disk", "cdrom", "floppy", etc.
+ *
+ * virtual-device
+ *      Values:         <uint32_t>
+ *
+ *      A value indicating the physical device to virtualize within the
+ *      frontend's domain.  (e.g. "The first ATA disk", "The third SCSI
+ *      disk", etc.)
+ *
+ *      See docs/misc/vbd-interface.txt for details on the format of this
+ *      value.
+ *
+ * Notes
+ * -----
+ * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
+ *     PV drivers.
+ * (2) Multi-page ring buffer scheme first used in some RedHat distributions
+ *     including a distribution deployed on certain nodes of the Amazon
+ *     EC2 cluster.
+ * (3) Support for multi-page ring buffers was implemented independently,
+ *     in slightly different forms, by both Citrix and RedHat/Amazon.
+ *     For full interoperability, block front and backends should publish
+ *     identical ring parameters, adjusted for unit differences, to the
+ *     XenStore nodes used in both schemes.
+ * (4) Devices that support discard functionality may internally allocate space
+ *     (discardable extents) in units that are larger than the exported logical
+ *     block size. If the backing device has such discardable extents the
+ *     backend should provide both discard-granularity and discard-alignment.
+ *     Providing just one of the two may be considered an error by the frontend.
+ *     Backends supporting discard should include discard-granularity and
+ *     discard-alignment even if it supports discarding individual sectors.
+ *     Frontends should assume discard-alignment == 0 and discard-granularity
+ *     == sector size if these keys are missing.
+ * (5) The discard-alignment parameter allows a physical device to be
+ *     partitioned into virtual devices that do not necessarily begin or
+ *     end on a discardable extent boundary.
+ * (6) When there is only a single page allocated to the request ring,
+ *     'ring-ref' is used to communicate the grant reference for this
+ *     page to the backend.  When using a multi-page ring, the 'ring-ref'
+ *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are used.
+ * (7) When using persistent grants data has to be copied from/to the page
+ *     where the grant is currently mapped. The overhead of doing this copy
+ *     however doesn't suppress the speed improvement of not having to unmap
+ *     the grants.
+ * (8) The frontend driver has to allow the backend driver to map all grants
+ *     with write access, even when they should be mapped read-only, since
+ *     further requests may reuse these grants and require write permissions.
+ * (9) Linux implementation doesn't have a limit on the maximum number of
+ *     grants that can be persistently mapped in the frontend driver, but
+ *     due to the frontent driver implementation it should never be bigger
+ *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *(10) The discard-secure property may be present and will be set to 1 if the
+ *     backing device supports secure discard.
+ *(11) Only used by Linux and NetBSD.
+ */
+
+/*
+ * Multiple hardware queues/rings:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vbd, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues" with the number they wish to use, which must be
+ * greater than zero, and no more than the value reported by the backend in
+ * "multi-queue-max-queues".
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel and ring-ref keys, instead writing those keys under sub-keys
+ * having the name "queue-N" where N is the integer ID of the queue/ring for
+ * which those keys belong. Queues are indexed from zero.
+ * For example, a frontend with two queues must write the following set of
+ * queue-related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ * It is also possible to use multiple queues/rings together with
+ * feature multi-page ring buffer.
+ * For example, a frontend requests two queues/rings and the size of each ring
+ * buffer is two pages must write the following set of related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/ring-page-order = "1"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ */
+
+/*
+ * STATE DIAGRAMS
+ *
+ *****************************************************************************
+ *                                   Startup                                 *
+ *****************************************************************************
+ *
+ * Tool stack creates front and back nodes with state XenbusStateInitialising.
+ *
+ * Front                                Back
+ * =================================    =====================================
+ * XenbusStateInitialising              XenbusStateInitialising
+ *  o Query virtual device               o Query backend device identification
+ *    properties.                          data.
+ *  o Setup OS device instance.          o Open and validate backend device.
+ *                                       o Publish backend features and
+ *                                         transport parameters.
+ *                                                      |
+ *                                                      |
+ *                                                      V
+ *                                      XenbusStateInitWait
+ *
+ * o Query backend features and
+ *   transport parameters.
+ * o Allocate and initialize the
+ *   request ring.
+ * o Publish transport parameters
+ *   that will be in effect during
+ *   this connection.
+ *              |
+ *              |
+ *              V
+ * XenbusStateInitialised
+ *
+ *                                       o Query frontend transport parameters.
+ *                                       o Connect to the request ring and
+ *                                         event channel.
+ *                                       o Publish backend device properties.
+ *                                                      |
+ *                                                      |
+ *                                                      V
+ *                                      XenbusStateConnected
+ *
+ *  o Query backend device properties.
+ *  o Finalize OS virtual device
+ *    instance.
+ *              |
+ *              |
+ *              V
+ * XenbusStateConnected
+ *
+ * Note: Drivers that do not support any optional features, or the negotiation
+ *       of transport parameters, can skip certain states in the state machine:
+ *
+ *       o A frontend may transition to XenbusStateInitialised without
+ *         waiting for the backend to enter XenbusStateInitWait.  In this
+ *         case, default transport parameters are in effect and any
+ *         transport parameters published by the frontend must contain
+ *         their default values.
+ *
+ *       o A backend may transition to XenbusStateInitialised, bypassing
+ *         XenbusStateInitWait, without waiting for the frontend to first
+ *         enter the XenbusStateInitialised state.  In this case, default
+ *         transport parameters are in effect and any transport parameters
+ *         published by the backend must contain their default values.
+ *
+ *       Drivers that support optional features and/or transport parameter
+ *       negotiation must tolerate these additional state transition paths.
+ *       In general this means performing the work of any skipped state
+ *       transition, if it has not already been performed, in addition to the
+ *       work associated with entry into the current state.
+ */
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ              0
+#define BLKIF_OP_WRITE             1
+/*
+ * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
+ * operation code ("barrier request") must be completed prior to the
+ * execution of the barrier request.  All writes issued after the barrier
+ * request must not execute until after the completion of the barrier request.
+ *
+ * Optional.  See "feature-barrier" XenBus node documentation above.
+ */
+#define BLKIF_OP_WRITE_BARRIER     2
+/*
+ * Commit any uncommitted contents of the backing device's volatile cache
+ * to stable storage.
+ *
+ * Optional.  See "feature-flush-cache" XenBus node documentation above.
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE   3
+/*
+ * Used in SLES sources for device specific command packet
+ * contained within the request. Reserved for that purpose.
+ */
+#define BLKIF_OP_RESERVED_1        4
+/*
+ * Indicate to the backend device that a region of storage is no longer in
+ * use, and may be discarded at any time without impact to the client.  If
+ * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
+ * discarded region on the device must be rendered unrecoverable before the
+ * command returns.
+ *
+ * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
+ * command on a native device.
+ *
+ * More information about trim/unmap operations can be found at:
+ * http://t13.org/Documents/UploadedDocuments/docs2008/
+ *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
+ * http://www.seagate.com/staticfiles/support/disc/manuals/
+ *     Interface%20manuals/100293068c.pdf
+ *
+ * Optional.  See "feature-discard", "discard-alignment",
+ * "discard-granularity", and "discard-secure" in the XenBus node
+ * documentation above.
+ */
+#define BLKIF_OP_DISCARD           5
+
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT          6
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+/*
+ * Maximum number of indirect pages to use per request.
+ */
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+/*
+ * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as
+ * 'sector_number' in blkif_request, blkif_request_discard and
+ * blkif_request_indirect are sector-based quantities. See the description
+ * of the "feature-large-sector-size" frontend xenbus node above for
+ * more information.
+ */
+struct blkif_request_segment {
+	grant_ref_t gref;        /* reference to I/O buffer frame        */
+	/* @first_sect: first sector in frame to transfer (inclusive).   */
+	/* @last_sect: last sector in frame to transfer (inclusive).     */
+	u8     first_sect, last_sect;
+};
+
+/*
+ * Starting ring element for any I/O request.
+ */
+struct blkif_request {
+	u8        operation;    /* BLKIF_OP_???                         */
+	u8        nr_segments;  /* number of segments                   */
+	blkif_vdev_t   handle;       /* only for read/write requests         */
+	u64       id;           /* private guest value, echoed in resp  */
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+typedef struct blkif_request blkif_request_t;
+
+/*
+ * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
+ * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
+ */
+struct blkif_request_discard {
+	u8        operation;    /* BLKIF_OP_DISCARD                     */
+	u8        flag;         /* BLKIF_DISCARD_SECURE or zero         */
+#define BLKIF_DISCARD_SECURE (1 << 0)  /* ignored if discard-secure=0      */
+	blkif_vdev_t   handle;       /* same as for read/write requests      */
+	u64       id;           /* private guest value, echoed in resp  */
+	blkif_sector_t sector_number;/* start sector idx on disk             */
+	u64       nr_sectors;   /* number of contiguous sectors to discard*/
+};
+
+typedef struct blkif_request_discard blkif_request_discard_t;
+
+struct blkif_request_indirect {
+	u8        operation;    /* BLKIF_OP_INDIRECT                    */
+	u8        indirect_op;  /* BLKIF_OP_{READ/WRITE}                */
+	u16       nr_segments;  /* number of segments                   */
+	u64       id;           /* private guest value, echoed in resp  */
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	blkif_vdev_t   handle;       /* same as for read/write requests      */
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef __i386__
+	u64       pad;          /* Make it 64 byte aligned on i386      */
+#endif
+};
+
+typedef struct blkif_request_indirect blkif_request_indirect_t;
+
+struct blkif_response {
+	u64        id;              /* copied from request */
+	u8         operation;       /* copied from request */
+	s16         status;          /* BLKIF_RSP_???       */
+};
+
+typedef struct blkif_response blkif_response_t;
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP  -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR       -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY         0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM        0x1
+#define VDISK_REMOVABLE    0x2
+#define VDISK_READONLY     0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h
new file mode 100644
index 0000000000..3489fc7a60
--- /dev/null
+++ b/include/xen/interface/io/console.h
@@ -0,0 +1,56 @@ 
+/******************************************************************************
+ * console.h
+ *
+ * Console I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef u32 XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1))
+
+struct xencons_interface {
+	char in[1024];
+	char out[2048];
+	XENCONS_RING_IDX in_cons, in_prod;
+	XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#ifdef XEN_WANT_FLEX_CONSOLE_RING
+#include "ring.h"
+DEFINE_XEN_FLEX_RING(xencons);
+#endif
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h
new file mode 100644
index 0000000000..52b4de0f81
--- /dev/null
+++ b/include/xen/interface/io/protocols.h
@@ -0,0 +1,42 @@ 
+/******************************************************************************
+ * protocols.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2008, Keir Fraser
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
+#define XEN_IO_PROTO_ABI_ARM        "arm-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__arm__) || defined(__aarch64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
+#else
+# error arch fixup needed here
+#endif
+
+#endif
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
new file mode 100644
index 0000000000..4e02678e3c
--- /dev/null
+++ b/include/xen/interface/io/ring.h
@@ -0,0 +1,479 @@ 
+/******************************************************************************
+ * ring.h
+ *
+ * Shared producer-consumer ring macros.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+/*
+ * When #include'ing this header, you need to provide the following
+ * declaration upfront:
+ * - standard integers types (u8, u16, etc)
+ * They are provided by stdint.h of the standard headers.
+ *
+ * In addition, if you intend to use the FLEX macros, you also need to
+ * provide the following, before invoking the FLEX macros:
+ * - size_t
+ * - memcpy
+ * - grant_ref_t
+ * These declarations are provided by string.h of the standard headers,
+ * and grant_table.h from the Xen public headers.
+ */
+
+#include <xen/interface/grant_table.h>
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
+#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
+#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __CONST_RING_SIZE(_s, _sz) \
+	(__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
+		sizeof(((struct _s##_sring *)0)->ring[0])))
+/*
+ * The same for passing in an actual pointer instead of a name tag.
+ */
+#define __RING_SIZE(_s, _sz) \
+	(__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ *     DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ *
+ *     mytag_sring_t      - The shared ring.
+ *     mytag_front_ring_t - The 'front' half of the ring.
+ *     mytag_back_ring_t  - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ *     mytag_front_ring_t front_ring;
+ *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ *     mytag_back_ring_t back_ring;
+ *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                               \
+										  \
+/* Shared ring entry */                                                           \
+union __name##_sring_entry {                                                      \
+	__req_t req;                                                              \
+	__rsp_t rsp;                                                              \
+};                                                                                \
+										  \
+/* Shared ring page */                                                            \
+struct __name##_sring {                                                           \
+	RING_IDX req_prod, req_event;                                             \
+	RING_IDX rsp_prod, rsp_event;                                             \
+	union {                                                                   \
+		struct {                                                          \
+			u8 smartpoll_active;                                      \
+		} netif;                                                          \
+		struct {                                                          \
+			u8 msg;                                                   \
+		} tapif_user;                                                     \
+		u8 pvt_pad[4];                                                    \
+	} pvt;                                                                    \
+	u8 __pad[44];                                                             \
+	union __name##_sring_entry ring[1]; /* variable-length */                 \
+};                                                                                \
+										  \
+/* "Front" end's private variables */                                             \
+struct __name##_front_ring {                                                      \
+	RING_IDX req_prod_pvt;                                                    \
+	RING_IDX rsp_cons;                                                        \
+	unsigned int nr_ents;                                                     \
+	struct __name##_sring *sring;                                             \
+};                                                                                \
+										  \
+/* "Back" end's private variables */                                              \
+struct __name##_back_ring {                                                       \
+	RING_IDX rsp_prod_pvt;                                                    \
+	RING_IDX req_cons;                                                        \
+	unsigned int nr_ents;                                                     \
+	struct __name##_sring *sring;                                             \
+};                                                                                \
+										  \
+/* Syntactic sugar */                                                             \
+typedef struct __name##_sring __name##_sring_t;                                   \
+typedef struct __name##_front_ring __name##_front_ring_t;                         \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do {                                                 \
+	(_s)->req_prod  = (_s)->rsp_prod  = 0;                                    \
+	(_s)->req_event = (_s)->rsp_event = 1;                                    \
+	(void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad));            \
+	(void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                        \
+} while (0)
+
+#define FRONT_RING_INIT(_r, _s, __size) do {                                      \
+	(_r)->req_prod_pvt = 0;                                                   \
+	(_r)->rsp_cons = 0;                                                       \
+	(_r)->nr_ents = __RING_SIZE(_s, __size);                                  \
+	(_r)->sring = (_s);                                                       \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do {                                       \
+	(_r)->rsp_prod_pvt = 0;                                                   \
+	(_r)->req_cons = 0;                                                       \
+	(_r)->nr_ents = __RING_SIZE(_s, __size);                                  \
+	(_r)->sring = (_s);                                                       \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r)                                                             \
+	((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r)                                                    \
+	(RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r)                                                             \
+	(RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r)                                         \
+	((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                                       \
+	unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;                \
+	unsigned int rsp = RING_SIZE(_r) -                                        \
+		((_r)->req_cons - (_r)->rsp_prod_pvt);                            \
+	req < rsp ? req : rsp;                                                    \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                          \
+	((((_r)->sring->req_prod - (_r)->req_cons) <                              \
+	  (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?              \
+	 ((_r)->sring->req_prod - (_r)->req_cons) :                               \
+	 (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx)                                                \
+	(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+/*
+ * Get a local copy of a request.
+ *
+ * Use this in preference to RING_GET_REQUEST() so all processing is
+ * done on a local copy that cannot be modified by the other end.
+ *
+ * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
+ * to be ineffective where _req is a struct which consists of only bitfields.
+ */
+#define RING_COPY_REQUEST(_r, _idx, _req) do {				          \
+	/* Use volatile to force the copy into _req. */			          \
+	*(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx);	          \
+} while (0)
+
+#define RING_GET_RESPONSE(_r, _idx)                                               \
+	(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                                     \
+	(((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)                                     \
+	(((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do {                                               \
+	xen_wmb(); /* back sees requests /before/ updated producer index */       \
+	(_r)->sring->req_prod = (_r)->req_prod_pvt;                               \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do {                                              \
+	xen_wmb(); /* front sees resps /before/ updated producer index */         \
+	(_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                               \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ *
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ *
+ * When enqueuing requests or responses:
+ *
+ *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ *  is a boolean return value. True indicates that the receiver requires an
+ *  asynchronous notification.
+ *
+ * After dequeuing requests or responses (before sleeping the connection):
+ *
+ *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ *  The second argument is a boolean return value. True indicates that there
+ *  are pending messages on the ring (i.e., the connection should not be put
+ *  to sleep).
+ *
+ *  These macros will set the req_event/rsp_event field to trigger a
+ *  notification on the very next message that is enqueued. If you want to
+ *  create batches of work (i.e., only receive a notification after several
+ *  messages have been enqueued) then you will need to create a customised
+ *  version of the FINAL_CHECK macro in your own code, which sets the event
+ *  field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {                     \
+	RING_IDX __old = (_r)->sring->req_prod;                                   \
+	RING_IDX __new = (_r)->req_prod_pvt;                                      \
+	xen_wmb(); /* back sees requests /before/ updated producer index */       \
+	(_r)->sring->req_prod = __new;                                            \
+	xen_mb(); /* back sees new requests /before/ we check req_event */        \
+	(_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <                 \
+				 (RING_IDX)(__new - __old));                      \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {                    \
+	RING_IDX __old = (_r)->sring->rsp_prod;                                   \
+	RING_IDX __new = (_r)->rsp_prod_pvt;                                      \
+	xen_wmb(); /* front sees resps /before/ updated producer index */         \
+	(_r)->sring->rsp_prod = __new;                                            \
+	xen_mb(); /* front sees new resps /before/ we check rsp_event */          \
+	(_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <                 \
+				 (RING_IDX)(__new - __old));                      \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {                       \
+	(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                         \
+	if (_work_to_do)							  \
+		break;                                                            \
+	(_r)->sring->req_event = (_r)->req_cons + 1;                              \
+	xen_mb();                                                                 \
+	(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                         \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {                      \
+	(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                        \
+	if (_work_to_do)							  \
+		break;                                                            \
+	(_r)->sring->rsp_event = (_r)->rsp_cons + 1;                              \
+	xen_mb();                                                                 \
+	(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                        \
+} while (0)
+
+/*
+ * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and
+ * functions to check if there is data on the ring, and to read and
+ * write to them.
+ *
+ * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but
+ * does not define the indexes page. As different protocols can have
+ * extensions to the basic format, this macro allow them to define their
+ * own struct.
+ *
+ * XEN_FLEX_RING_SIZE
+ *   Convenience macro to calculate the size of one of the two rings
+ *   from the overall order.
+ *
+ * $NAME_mask
+ *   Function to apply the size mask to an index, to reduce the index
+ *   within the range [0-size].
+ *
+ * $NAME_read_packet
+ *   Function to read data from the ring. The amount of data to read is
+ *   specified by the "size" argument.
+ *
+ * $NAME_write_packet
+ *   Function to write data to the ring. The amount of data to write is
+ *   specified by the "size" argument.
+ *
+ * $NAME_get_ring_ptr
+ *   Convenience function that returns a pointer to read/write to the
+ *   ring at the right location.
+ *
+ * $NAME_data_intf
+ *   Indexes page, shared between frontend and backend. It also
+ *   contains the array of grant refs.
+ *
+ * $NAME_queued
+ *   Function to calculate how many bytes are currently on the ring,
+ *   ready to be read. It can also be used to calculate how much free
+ *   space is currently on the ring (XEN_FLEX_RING_SIZE() -
+ *   $NAME_queued()).
+ */
+
+#ifndef XEN_PAGE_SHIFT
+/* The PAGE_SIZE for ring protocols and hypercall interfaces is always
+ * 4K, regardless of the architecture, and page granularity chosen by
+ * operating systems.
+ */
+#define XEN_PAGE_SHIFT 12
+#endif
+#define XEN_FLEX_RING_SIZE(order)                                                 \
+	(1UL << ((order) + XEN_PAGE_SHIFT - 1))
+
+#define DEFINE_XEN_FLEX_RING(name)                                                \
+static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size)              \
+{                                                                                 \
+	return idx & (ring_size - 1);                                             \
+}                                                                                 \
+										  \
+static inline unsigned char *name##_get_ring_ptr(unsigned char *buf,              \
+						 RING_IDX idx,                    \
+						 RING_IDX ring_size)              \
+{                                                                                 \
+	return buf + name##_mask(idx, ring_size);                                 \
+}                                                                                 \
+										  \
+static inline void name##_read_packet(void *opaque,                               \
+				      const unsigned char *buf,                   \
+				      size_t size,                                \
+				      RING_IDX masked_prod,                       \
+				      RING_IDX *masked_cons,                      \
+				      RING_IDX ring_size)                         \
+{                                                                                 \
+	if (*masked_cons < masked_prod ||                                         \
+		size <= ring_size - *masked_cons) {                               \
+		memcpy(opaque, buf + *masked_cons, size);                         \
+	} else {                                                                  \
+		memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons);     \
+		memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf,   \
+			   size - (ring_size - *masked_cons));                    \
+	}                                                                         \
+	*masked_cons = name##_mask(*masked_cons + size, ring_size);               \
+}                                                                                 \
+										  \
+static inline void name##_write_packet(unsigned char *buf,                        \
+				       const void *opaque,                        \
+				       size_t size,                               \
+				       RING_IDX *masked_prod,                     \
+				       RING_IDX masked_cons,                      \
+				       RING_IDX ring_size)                        \
+{                                                                                 \
+	if (*masked_prod < masked_cons ||                                         \
+		size <= ring_size - *masked_prod) {                               \
+		memcpy(buf + *masked_prod, opaque, size);                         \
+	} else {                                                                  \
+		memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod);     \
+		memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \
+		       size - (ring_size - *masked_prod));                        \
+	}                                                                         \
+	*masked_prod = name##_mask(*masked_prod + size, ring_size);               \
+}                                                                                 \
+										  \
+static inline RING_IDX name##_queued(RING_IDX prod,                               \
+				     RING_IDX cons,                               \
+				     RING_IDX ring_size)                          \
+{                                                                                 \
+	RING_IDX size;                                                            \
+										  \
+	if (prod == cons)                                                         \
+		return 0;                                                         \
+										  \
+	prod = name##_mask(prod, ring_size);                                      \
+	cons = name##_mask(cons, ring_size);                                      \
+										  \
+	if (prod == cons)                                                         \
+		return ring_size;                                                 \
+										  \
+	if (prod > cons)                                                          \
+		size = prod - cons;                                               \
+	else                                                                      \
+		size = ring_size - (cons - prod);                                 \
+	return size;                                                              \
+}                                                                                 \
+										  \
+struct name##_data {                                                              \
+	unsigned char *in; /* half of the allocation */                           \
+	unsigned char *out; /* half of the allocation */                          \
+}
+
+#define DEFINE_XEN_FLEX_RING_AND_INTF(name)                                       \
+struct name##_data_intf {                                                         \
+	RING_IDX in_cons, in_prod;                                                \
+										  \
+	u8 pad1[56];                                                              \
+										  \
+	RING_IDX out_cons, out_prod;                                              \
+										  \
+	u8 pad2[56];                                                              \
+										  \
+	RING_IDX ring_order;                                                      \
+	grant_ref_t ref[];                                                        \
+};                                                                                \
+DEFINE_XEN_FLEX_RING(name)
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h
new file mode 100644
index 0000000000..f452748b03
--- /dev/null
+++ b/include/xen/interface/io/xenbus.h
@@ -0,0 +1,81 @@ 
+/*****************************************************************************
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+	XenbusStateUnknown       = 0,
+
+	XenbusStateInitialising  = 1,
+
+	/*
+	 * InitWait: Finished early initialisation but waiting for information
+	 * from the peer or hotplug scripts.
+	 */
+	XenbusStateInitWait      = 2,
+
+	/*
+	 * Initialised: Waiting for a connection from the peer.
+	 */
+	XenbusStateInitialised   = 3,
+
+	XenbusStateConnected     = 4,
+
+	/*
+	 * Closing: The device is being closed due to an error or an unplug event.
+	 */
+	XenbusStateClosing       = 5,
+
+	XenbusStateClosed        = 6,
+
+	/*
+	 * Reconfiguring: The device is being reconfigured.
+	 */
+	XenbusStateReconfiguring = 7,
+
+	XenbusStateReconfigured  = 8
+};
+
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
new file mode 100644
index 0000000000..87987334bf
--- /dev/null
+++ b/include/xen/interface/io/xs_wire.h
@@ -0,0 +1,151 @@ 
+/*
+ * Details of the "wire" protocol between Xen Store Daemon and client
+ * library or guest kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ */
+
+#ifndef _XS_WIRE_H
+#define _XS_WIRE_H
+
+enum xsd_sockmsg_type {
+	XS_CONTROL,
+#define XS_DEBUG XS_CONTROL
+	XS_DIRECTORY,
+	XS_READ,
+	XS_GET_PERMS,
+	XS_WATCH,
+	XS_UNWATCH,
+	XS_TRANSACTION_START,
+	XS_TRANSACTION_END,
+	XS_INTRODUCE,
+	XS_RELEASE,
+	XS_GET_DOMAIN_PATH,
+	XS_WRITE,
+	XS_MKDIR,
+	XS_RM,
+	XS_SET_PERMS,
+	XS_WATCH_EVENT,
+	XS_ERROR,
+	XS_IS_DOMAIN_INTRODUCED,
+	XS_RESUME,
+	XS_SET_TARGET,
+	/* XS_RESTRICT has been removed */
+	XS_RESET_WATCHES = XS_SET_TARGET + 2,
+	XS_DIRECTORY_PART,
+
+	XS_TYPE_COUNT,      /* Number of valid types. */
+
+	XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors {
+	int errnum;
+	const char *errstring;
+};
+
+#ifdef EINVAL
+#define XSD_ERROR(x) { x, #x }
+/* LINTED: static unused */
+static struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+	= {
+	XSD_ERROR(EINVAL),
+	XSD_ERROR(EACCES),
+	XSD_ERROR(EEXIST),
+	XSD_ERROR(EISDIR),
+	XSD_ERROR(ENOENT),
+	XSD_ERROR(ENOMEM),
+	XSD_ERROR(ENOSPC),
+	XSD_ERROR(EIO),
+	XSD_ERROR(ENOTEMPTY),
+	XSD_ERROR(ENOSYS),
+	XSD_ERROR(EROFS),
+	XSD_ERROR(EBUSY),
+	XSD_ERROR(EAGAIN),
+	XSD_ERROR(EISCONN),
+	XSD_ERROR(E2BIG)
+};
+#endif
+
+struct xsd_sockmsg {
+	u32 type;  /* XS_??? */
+	u32 req_id;/* Request identifier, echoed in daemon's response.  */
+	u32 tx_id; /* Transaction id (0 if not related to a transaction). */
+	u32 len;   /* Length of data following this. */
+
+	/* Generally followed by nul-terminated string(s). */
+};
+
+enum xs_watch_type {
+	XS_WATCH_PATH = 0,
+	XS_WATCH_TOKEN
+};
+
+/*
+ * `incontents 150 xenstore_struct XenStore wire protocol.
+ *
+ * Inter-domain shared memory communications.
+ */
+#define XENSTORE_RING_SIZE 1024
+typedef u32 XENSTORE_RING_IDX;
+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1))
+struct xenstore_domain_interface {
+	char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
+	char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
+	XENSTORE_RING_IDX req_cons, req_prod;
+	XENSTORE_RING_IDX rsp_cons, rsp_prod;
+	u32 server_features; /* Bitmap of features supported by the server */
+	u32 connection;
+};
+
+/* Violating this is very bad.  See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
+/* The ability to reconnect a ring */
+#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
+
+/* Valid values for the connection field */
+#define XENSTORE_CONNECTED 0 /* the steady-state */
+#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
+
+#endif /* _XS_WIRE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
new file mode 100644
index 0000000000..19959da8b4
--- /dev/null
+++ b/include/xen/interface/memory.h
@@ -0,0 +1,332 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/******************************************************************************
+ * memory.h
+ *
+ * Memory reservation and information.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_MEMORY_H__
+#define __XEN_PUBLIC_MEMORY_H__
+
+/*
+ * Increase or decrease the specified domain's memory reservation. Returns a
+ * -ve errcode on failure, or the # extents successfully allocated or freed.
+ * arg == addr of struct xen_memory_reservation.
+ */
+#define XENMEM_increase_reservation 0
+#define XENMEM_decrease_reservation 1
+#define XENMEM_populate_physmap     6
+struct xen_memory_reservation {
+	/*
+	 * XENMEM_increase_reservation:
+	 *   OUT: MFN (*not* GMFN) bases of extents that were allocated
+	 * XENMEM_decrease_reservation:
+	 *   IN:  GMFN bases of extents to free
+	 * XENMEM_populate_physmap:
+	 *   IN:  GPFN bases of extents to populate with memory
+	 *   OUT: GMFN bases of extents that were allocated
+	 *   (NB. This command also updates the mach_to_phys translation table)
+	 */
+	GUEST_HANDLE(xen_pfn_t)extent_start;
+
+	/* Number of extents, and size/alignment of each (2^extent_order pages). */
+	xen_ulong_t  nr_extents;
+	unsigned int   extent_order;
+
+	/*
+	 * Maximum # bits addressable by the user of the allocated region (e.g.,
+	 * I/O devices often have a 32-bit limitation even in 64-bit systems). If
+	 * zero then the user has no addressing restriction.
+	 * This field is not used by XENMEM_decrease_reservation.
+	 */
+	unsigned int   address_bits;
+
+	/*
+	 * Domain whose reservation is being changed.
+	 * Unprivileged domains can specify only DOMID_SELF.
+	 */
+	domid_t        domid;
+
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
+
+/*
+ * An atomic exchange of memory pages. If return code is zero then
+ * @out.extent_list provides GMFNs of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code.
+ * On complete success then always @nr_exchanged == @in.nr_extents.
+ * On partial success @nr_exchanged indicates how much work was done.
+ */
+#define XENMEM_exchange             11
+struct xen_memory_exchange {
+	/*
+	 * [IN] Details of memory extents to be exchanged (GMFN bases).
+	 * Note that @in.address_bits is ignored and unused.
+	 */
+	struct xen_memory_reservation in;
+
+	/*
+	 * [IN/OUT] Details of new memory extents.
+	 * We require that:
+	 *  1. @in.domid == @out.domid
+	 *  2. @in.nr_extents  << @in.extent_order ==
+	 *     @out.nr_extents << @out.extent_order
+	 *  3. @in.extent_start and @out.extent_start lists must not overlap
+	 *  4. @out.extent_start lists GPFN bases to be populated
+	 *  5. @out.extent_start is overwritten with allocated GMFN bases
+	 */
+	struct xen_memory_reservation out;
+
+	/*
+	 * [OUT] Number of input extents that were successfully exchanged:
+	 *  1. The first @nr_exchanged input extents were successfully
+	 *     deallocated.
+	 *  2. The corresponding first entries in the output extent list correctly
+	 *     indicate the GMFNs that were successfully exchanged.
+	 *  3. All other input and output extents are untouched.
+	 *  4. If not all input exents are exchanged then the return code of this
+	 *     command will be non-zero.
+	 *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
+	 */
+	xen_ulong_t nr_exchanged;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
+/*
+ * Returns the maximum machine frame number of mapped RAM in this system.
+ * This command always succeeds (it never returns an error code).
+ * arg == NULL.
+ */
+#define XENMEM_maximum_ram_page     2
+
+/*
+ * Returns the current or maximum memory reservation, in pages, of the
+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
+ * arg == addr of domid_t.
+ */
+#define XENMEM_current_reservation  3
+#define XENMEM_maximum_reservation  4
+
+/*
+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table do not implement
+ * this command.
+ * arg == addr of xen_machphys_mfn_list_t.
+ */
+#define XENMEM_machphys_mfn_list    5
+struct xen_machphys_mfn_list {
+	/*
+	 * Size of the 'extent_start' array. Fewer entries will be filled if the
+	 * machphys table is smaller than max_extents * 2MB.
+	 */
+	unsigned int max_extents;
+
+	/*
+	 * Pointer to buffer to fill with list of extent starts. If there are
+	 * any large discontiguities in the machine address space, 2MB gaps in
+	 * the machphys table will be represented by an MFN base of zero.
+	 */
+	GUEST_HANDLE(xen_pfn_t)extent_start;
+
+	/*
+	 * Number of extents written to the above array. This will be smaller
+	 * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
+	 */
+	unsigned int nr_extents;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
+
+/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+	xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
+	xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
+
+#define XENMAPSPACE_shared_info  0 /* shared info page */
+#define XENMAPSPACE_grant_table  1 /* grant table page */
+#define XENMAPSPACE_gmfn         2 /* GMFN */
+#define XENMAPSPACE_gmfn_range   3 /* GMFN range, XENMEM_add_to_physmap only. */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
+				    * XENMEM_add_to_physmap_range only.
+				    */
+#define XENMAPSPACE_dev_mmio     5 /* device mmio region */
+
+/*
+ * Sets the GPFN at which a particular page appears in the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_add_to_physmap_t.
+ */
+#define XENMEM_add_to_physmap      7
+struct xen_add_to_physmap {
+	/* Which domain to change the mapping for. */
+	domid_t domid;
+
+	/* Number of pages to go through for gmfn_range */
+	u16    size;
+
+	/* Source mapping space. */
+	unsigned int space;
+
+	/* Index into source mapping space. */
+	xen_ulong_t idx;
+
+	/* GPFN where the source mapping page should appear. */
+	xen_pfn_t gpfn;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
+
+/*** REMOVED ***/
+/*#define XENMEM_translate_gpfn_list  8*/
+
+#define XENMEM_add_to_physmap_range 23
+struct xen_add_to_physmap_range {
+	/* IN */
+	/* Which domain to change the mapping for. */
+	domid_t domid;
+	u16 space; /* => enum phys_map_space */
+
+	/* Number of pages to go through */
+	u16 size;
+	domid_t foreign_domid; /* IFF gmfn_foreign */
+
+	/* Indexes into space being mapped. */
+	GUEST_HANDLE(xen_ulong_t)idxs;
+
+	/* GPFN in domid where the source mapping page should appear. */
+	GUEST_HANDLE(xen_pfn_t)gpfns;
+
+	/* OUT */
+
+	/* Per index error code. */
+	GUEST_HANDLE(int)errs;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range);
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+	/*
+	 * On call the number of entries which can be stored in buffer. On
+	 * return the number of entries which have been stored in
+	 * buffer.
+	 */
+	unsigned int nr_entries;
+
+	/*
+	 * Entries in the buffer are in the same format as returned by the
+	 * BIOS INT 0x15 EAX=0xE820 call.
+	 */
+	GUEST_HANDLE(void)buffer;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_machine_memory_map   10
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap      15
+struct xen_remove_from_physmap {
+	/* Which domain to change the mapping for. */
+	domid_t domid;
+
+	/* GPFN of the current mapping of the page. */
+	xen_pfn_t gpfn;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
+
+/*
+ * Get the pages for a particular guest resource, so that they can be
+ * mapped directly by a tools domain.
+ */
+#define XENMEM_acquire_resource 28
+struct xen_mem_acquire_resource {
+	/* IN - The domain whose resource is to be mapped */
+	domid_t domid;
+	/* IN - the type of resource */
+	u16 type;
+
+#define XENMEM_resource_ioreq_server 0
+#define XENMEM_resource_grant_table 1
+
+	/*
+	 * IN - a type-specific resource identifier, which must be zero
+	 *      unless stated otherwise.
+	 *
+	 * type == XENMEM_resource_ioreq_server -> id == ioreq server id
+	 * type == XENMEM_resource_grant_table -> id defined below
+	 */
+	u32 id;
+
+#define XENMEM_resource_grant_table_id_shared 0
+#define XENMEM_resource_grant_table_id_status 1
+
+	/* IN/OUT - As an IN parameter number of frames of the resource
+	 *          to be mapped. However, if the specified value is 0 and
+	 *          frame_list is NULL then this field will be set to the
+	 *          maximum value supported by the implementation on return.
+	 */
+	u32 nr_frames;
+	/*
+	 * OUT - Must be zero on entry. On return this may contain a bitwise
+	 *       OR of the following values.
+	 */
+	u32 flags;
+
+	/* The resource pages have been assigned to the calling domain */
+#define _XENMEM_rsrc_acq_caller_owned 0
+#define XENMEM_rsrc_acq_caller_owned (1u << _XENMEM_rsrc_acq_caller_owned)
+
+	/*
+	 * IN - the index of the initial frame to be mapped. This parameter
+	 *      is ignored if nr_frames is 0.
+	 */
+	u64 frame;
+
+#define XENMEM_resource_ioreq_server_frame_bufioreq 0
+#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
+
+	/*
+	 * IN/OUT - If the tools domain is PV then, upon return, frame_list
+	 *          will be populated with the MFNs of the resource.
+	 *          If the tools domain is HVM then it is expected that, on
+	 *          entry, frame_list will be populated with a list of GFNs
+	 *          that will be mapped to the MFNs of the resource.
+	 *          If -EIO is returned then the frame_list has only been
+	 *          partially mapped and it is up to the caller to unmap all
+	 *          the GFNs.
+	 *          This parameter may be NULL if nr_frames is 0.
+	 */
+	GUEST_HANDLE(xen_pfn_t)frame_list;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource);
+
+#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
new file mode 100644
index 0000000000..0f12dcf267
--- /dev/null
+++ b/include/xen/interface/sched.h
@@ -0,0 +1,188 @@ 
+/******************************************************************************
+ * sched.h
+ *
+ * Scheduler state interactions
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_SCHED_H__
+#define __XEN_PUBLIC_SCHED_H__
+
+#include <xen/interface/event_channel.h>
+
+/*
+ * Guest Scheduler Operations
+ *
+ * The SCHEDOP interface provides mechanisms for a guest to interact
+ * with the scheduler, including yield, blocking and shutting itself
+ * down.
+ */
+
+/*
+ * The prototype for this hypercall is:
+ * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
+ *
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == Operation-specific extra argument(s), as described below.
+ * ...  == Additional Operation-specific extra arguments, described below.
+ *
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
+ * of this hypercall, supporting only the commands yield, block and shutdown:
+ *  long sched_op(int cmd, unsigned long arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
+ *      == SHUTDOWN_* code (SCHEDOP_shutdown)
+ *
+ * This legacy version is available to new guests as:
+ * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
+ */
+
+/*
+ * Voluntarily yield the CPU.
+ * @arg == NULL.
+ */
+#define SCHEDOP_yield       0
+
+/*
+ * Block execution of this VCPU until an event is received for processing.
+ * If called with event upcalls masked, this operation will atomically
+ * reenable event delivery and check for pending events before blocking the
+ * VCPU. This avoids a "wakeup waiting" race.
+ * @arg == NULL.
+ */
+#define SCHEDOP_block       1
+
+/*
+ * Halt execution of this domain (all VCPUs) and notify the system controller.
+ * @arg == pointer to sched_shutdown structure.
+ *
+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then
+ * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
+ * of the guest's start info page.  RDX/EDX is the third hypercall
+ * argument.
+ *
+ * In addition, which reason is SHUTDOWN_suspend this hypercall
+ * returns 1 if suspend was cancelled or the domain was merely
+ * checkpointed, and 0 if it is resuming in a new domain.
+ */
+#define SCHEDOP_shutdown    2
+
+/*
+ * Poll a set of event-channel ports. Return when one or more are pending. An
+ * optional timeout may be specified.
+ * @arg == pointer to sched_poll structure.
+ */
+#define SCHEDOP_poll        3
+
+/*
+ * Declare a shutdown for another domain. The main use of this function is
+ * in interpreting shutdown requests and reasons for fully-virtualized
+ * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly.
+ * @arg == pointer to sched_remote_shutdown structure.
+ */
+#define SCHEDOP_remote_shutdown        4
+
+/*
+ * Latch a shutdown code, so that when the domain later shuts down it
+ * reports this code to the control tools.
+ * @arg == sched_shutdown, as for SCHEDOP_shutdown.
+ */
+#define SCHEDOP_shutdown_code 5
+
+/*
+ * Setup, poke and destroy a domain watchdog timer.
+ * @arg == pointer to sched_watchdog structure.
+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
+ *               after timeout, returns watchdog id.
+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
+ */
+#define SCHEDOP_watchdog    6
+
+/*
+ * Override the current vcpu affinity by pinning it to one physical cpu or
+ * undo this override restoring the previous affinity.
+ * @arg == pointer to sched_pin_override structure.
+ *
+ * A negative pcpu value will undo a previous pin override and restore the
+ * previous cpu affinity.
+ * This call is allowed for the hardware domain only and requires the cpu
+ * to be part of the domain's cpupool.
+ */
+#define SCHEDOP_pin_override 7
+
+struct sched_shutdown {
+	unsigned int reason; /* SHUTDOWN_* => shutdown reason */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
+
+struct sched_poll {
+	GUEST_HANDLE(evtchn_port_t)ports;
+	unsigned int nr_ports;
+	u64 timeout;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
+
+struct sched_remote_shutdown {
+	domid_t domain_id;         /* Remote domain ID */
+	unsigned int reason;       /* SHUTDOWN_* => shutdown reason */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown);
+
+struct sched_watchdog {
+	u32 id;                /* watchdog ID */
+	u32 timeout;           /* timeout */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog);
+
+struct sched_pin_override {
+	s32 pcpu;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override);
+
+/*
+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
+ * software to determine the appropriate action. For the most part, Xen does
+ * not care about the shutdown code.
+ */
+#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up and kill. */
+#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
+#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
+#define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
+#define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */
+
+/*
+ * Domain asked to perform 'soft reset' for it. The expected behavior is to
+ * reset internal Xen state for the domain returning it to the point where it
+ * was created but leaving the domain's memory contents and vCPU contexts
+ * intact. This will allow the domain to start over and set up all Xen specific
+ * interfaces again.
+ */
+#define SHUTDOWN_soft_reset 5
+#define SHUTDOWN_MAX        5  /* Maximum valid shutdown reason.             */
+
+#endif /* __XEN_PUBLIC_SCHED_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
new file mode 100644
index 0000000000..964daaedfb
--- /dev/null
+++ b/include/xen/interface/xen.h
@@ -0,0 +1,225 @@ 
+/******************************************************************************
+ * xen.h
+ *
+ * Guest OS interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_XEN_H__
+#define __XEN_PUBLIC_XEN_H__
+
+#include <xen/arm/interface.h>
+
+/*
+ * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
+ */
+
+/*
+ * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5.
+ *         EAX = return value
+ *         (argument registers may be clobbered on return)
+ * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6.
+ *         RAX = return value
+ *         (argument registers not clobbered on return; RCX, R11 are)
+ */
+#define __HYPERVISOR_set_trap_table        0
+#define __HYPERVISOR_mmu_update            1
+#define __HYPERVISOR_set_gdt               2
+#define __HYPERVISOR_stack_switch          3
+#define __HYPERVISOR_set_callbacks         4
+#define __HYPERVISOR_fpu_taskswitch        5
+#define __HYPERVISOR_sched_op_compat       6
+#define __HYPERVISOR_platform_op           7
+#define __HYPERVISOR_set_debugreg          8
+#define __HYPERVISOR_get_debugreg          9
+#define __HYPERVISOR_update_descriptor    10
+#define __HYPERVISOR_memory_op            12
+#define __HYPERVISOR_multicall            13
+#define __HYPERVISOR_update_va_mapping    14
+#define __HYPERVISOR_set_timer_op         15
+#define __HYPERVISOR_event_channel_op_compat 16
+#define __HYPERVISOR_xen_version          17
+#define __HYPERVISOR_console_io           18
+#define __HYPERVISOR_physdev_op_compat    19
+#define __HYPERVISOR_grant_table_op       20
+#define __HYPERVISOR_vm_assist            21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret                 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op              24
+#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_xsm_op               27
+#define __HYPERVISOR_nmi_op               28
+#define __HYPERVISOR_sched_op             29
+#define __HYPERVISOR_callback_op          30
+#define __HYPERVISOR_xenoprof_op          31
+#define __HYPERVISOR_event_channel_op     32
+#define __HYPERVISOR_physdev_op           33
+#define __HYPERVISOR_hvm_op               34
+#define __HYPERVISOR_sysctl               35
+#define __HYPERVISOR_domctl               36
+#define __HYPERVISOR_kexec_op             37
+#define __HYPERVISOR_tmem_op              38
+#define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
+#define __HYPERVISOR_xenpmu_op            40
+#define __HYPERVISOR_dm_op                41
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0               48
+#define __HYPERVISOR_arch_1               49
+#define __HYPERVISOR_arch_2               50
+#define __HYPERVISOR_arch_3               51
+#define __HYPERVISOR_arch_4               52
+#define __HYPERVISOR_arch_5               53
+#define __HYPERVISOR_arch_6               54
+#define __HYPERVISOR_arch_7               55
+
+#ifndef __ASSEMBLY__
+
+typedef u16 domid_t;
+
+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
+#define DOMID_FIRST_RESERVED (0x7FF0U)
+
+/* DOMID_SELF is used in certain contexts to refer to oneself. */
+#define DOMID_SELF (0x7FF0U)
+
+/*
+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
+ * is useful to ensure that no mappings to the OS's own heap are accidentally
+ * installed. (e.g., in Linux this could cause havoc as reference counts
+ * aren't adjusted on the I/O-mapping code path).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
+ * be specified by any calling domain.
+ */
+#define DOMID_IO   (0x7FF1U)
+
+/*
+ * DOMID_XEN is used to allow privileged domains to map restricted parts of
+ * Xen's heap space (e.g., the machine_to_phys table).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
+ * the caller is privileged.
+ */
+#define DOMID_XEN  (0x7FF2U)
+
+/* DOMID_COW is used as the owner of sharable pages */
+#define DOMID_COW  (0x7FF3U)
+
+/* DOMID_INVALID is used to identify pages with unknown owner. */
+#define DOMID_INVALID (0x7FF4U)
+
+/* Idle domain. */
+#define DOMID_IDLE (0x7FFFU)
+
+struct vcpu_info {
+	/*
+	 * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+	 * a pending notification for a particular VCPU. It is then cleared
+	 * by the guest OS /before/ checking for pending work, thus avoiding
+	 * a set-and-check race. Note that the mask is only accessed by Xen
+	 * on the CPU that is currently hosting the VCPU. This means that the
+	 * pending and mask flags can be updated by the guest without special
+	 * synchronisation (i.e., no need for the x86 LOCK prefix).
+	 * This may seem suboptimal because if the pending flag is set by
+	 * a different CPU then an IPI may be scheduled even when the mask
+	 * is set. However, note:
+	 *  1. The task of 'interrupt holdoff' is covered by the per-event-
+	 *     channel mask bits. A 'noisy' event that is continually being
+	 *     triggered can be masked at source at this very precise
+	 *     granularity.
+	 *  2. The main purpose of the per-VCPU mask is therefore to restrict
+	 *     reentrant execution: whether for concurrency control, or to
+	 *     prevent unbounded stack usage. Whatever the purpose, we expect
+	 *     that the mask will be asserted only for short periods at a time,
+	 *     and so the likelihood of a 'spurious' IPI is suitably small.
+	 * The mask is read before making an event upcall to the guest: a
+	 * non-zero mask therefore guarantees that the VCPU will not receive
+	 * an upcall activation. The mask is cleared when the VCPU requests
+	 * to block: this avoids wakeup-waiting races.
+	 */
+	u8 evtchn_upcall_pending;
+	u8 evtchn_upcall_mask;
+	xen_ulong_t evtchn_pending_sel;
+	struct arch_vcpu_info arch;
+	struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+/*
+ * Xen/kernel shared data -- pointer provided in start_info.
+ * NB. We expect that this struct is smaller than a page.
+ */
+struct shared_info {
+	struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
+
+	/*
+	 * A domain can create "event channels" on which it can send and receive
+	 * asynchronous event notifications. There are three classes of event that
+	 * are delivered by this mechanism:
+	 *  1. Bi-directional inter- and intra-domain connections. Domains must
+	 *     arrange out-of-band to set up a connection (usually by allocating
+	 *     an unbound 'listener' port and avertising that via a storage service
+	 *     such as xenstore).
+	 *  2. Physical interrupts. A domain with suitable hardware-access
+	 *     privileges can bind an event-channel port to a physical interrupt
+	 *     source.
+	 *  3. Virtual interrupts ('events'). A domain can bind an event-channel
+	 *     port to a virtual interrupt source, such as the virtual-timer
+	 *     device or the emergency console.
+	 *
+	 * Event channels are addressed by a "port index". Each channel is
+	 * associated with two bits of information:
+	 *  1. PENDING -- notifies the domain that there is a pending notification
+	 *     to be processed. This bit is cleared by the guest.
+	 *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING
+	 *     will cause an asynchronous upcall to be scheduled. This bit is only
+	 *     updated by the guest. It is read-only within Xen. If a channel
+	 *     becomes pending while the channel is masked then the 'edge' is lost
+	 *     (i.e., when the channel is unmasked, the guest must manually handle
+	 *     pending notifications as no upcall will be scheduled by Xen).
+	 *
+	 * To expedite scanning of pending notifications, any 0->1 pending
+	 * transition on an unmasked channel causes a corresponding bit in a
+	 * per-vcpu selector word to be set. Each bit in the selector covers a
+	 * 'C long' in the PENDING bitfield array.
+	 */
+	xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];
+	xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];
+
+	/*
+	 * Wallclock time: updated only by control software. Guests should base
+	 * their gettimeofday() syscall on this wallclock-base value.
+	 */
+	struct pvclock_wall_clock wc;
+
+	struct arch_shared_info arch;
+
+};
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_XEN_H__ */