diff mbox

[28/40] xenner: libxc emu: evtchn

Message ID 1288623713-28062-29-git-send-email-agraf@suse.de
State New
Headers show

Commit Message

Alexander Graf Nov. 1, 2010, 3:01 p.m. UTC
Xenner emulates parts of libxc, so we can not use the real xen infrastructure
when running xen pv guests without xen.

This patch adds support for event channel communication.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/xenner_libxc_evtchn.c |  467 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 467 insertions(+), 0 deletions(-)
 create mode 100644 hw/xenner_libxc_evtchn.c

Comments

Anthony Liguori Nov. 1, 2010, 3:45 p.m. UTC | #1
On 11/01/2010 10:01 AM, Alexander Graf wrote:
> Xenner emulates parts of libxc, so we can not use the real xen infrastructure
> when running xen pv guests without xen.
>
> This patch adds support for event channel communication.
>
> Signed-off-by: Alexander Graf<agraf@suse.de>
>    

Has anyone checked with the Xen folks about supporting this type of 
functionality in libxc directly?

Regards,

Anthony Liguori

> ---
>   hw/xenner_libxc_evtchn.c |  467 ++++++++++++++++++++++++++++++++++++++++++++++
>   1 files changed, 467 insertions(+), 0 deletions(-)
>   create mode 100644 hw/xenner_libxc_evtchn.c
>
> diff --git a/hw/xenner_libxc_evtchn.c b/hw/xenner_libxc_evtchn.c
> new file mode 100644
> index 0000000..bb1984c
> --- /dev/null
> +++ b/hw/xenner_libxc_evtchn.c
> @@ -0,0 +1,467 @@
> +/*
> + *  Copyright (C) Red Hat 2007
> + *  Copyright (C) Novell Inc. 2010
> + *
> + *  Author(s): Gerd Hoffmann<kraxel@redhat.com>
> + *             Alexander Graf<agraf@suse.de>
> + *
> + *  Xenner emulation -- event channels
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; under version 2 of the License.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License along
> + *  with this program; if not, see<http://www.gnu.org/licenses/>.
> + */
> +
> +#include<assert.h>
> +#include<xenctrl.h>
> +
> +#include "hw.h"
> +#include "qemu-log.h"
> +#include "console.h"
> +#include "monitor.h"
> +#include "xen.h"
> +#include "xen_interfaces.h"
> +
> +/* ------------------------------------------------------------- */
> +
> +struct evtpriv;
> +
> +struct port {
> +    struct evtpriv   *priv;
> +    struct port      *peer;
> +    int              port;
> +    int              pending;
> +    int              count_snd;
> +    int              count_fwd;
> +    int              count_msg;
> +};
> +
> +struct domain {
> +    int              domid;
> +    struct port      p[NR_EVENT_CHANNELS];
> +};
> +static struct domain dom0;  /* host  */
> +static struct domain domU;  /* guest */
> +
> +struct evtpriv {
> +    int                      fd_read, fd_write;
> +    struct domain            *domain;
> +    int                      ports;
> +    int                      pending;
> +    QTAILQ_ENTRY(evtpriv)    list;
> +};
> +static QTAILQ_HEAD(evtpriv_head, evtpriv) privs = QTAILQ_HEAD_INITIALIZER(privs);
> +
> +static int debug = 0;
> +
> +/* ------------------------------------------------------------- */
> +
> +static struct evtpriv *getpriv(int handle)
> +{
> +    struct evtpriv *priv;
> +
> +    QTAILQ_FOREACH(priv,&privs, list) {
> +        if (priv->fd_read == handle) {
> +            return priv;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static struct domain *get_domain(int domid)
> +{
> +    if (domid == 0) {
> +        return&dom0;
> +    }
> +    if (!domU.domid) {
> +        domU.domid = domid;
> +    }
> +    assert(domU.domid == domid);
> +    return&domU;
> +}
> +
> +static struct port *alloc_port(struct evtpriv *priv, const char *reason)
> +{
> +    struct port *p = NULL;
> +    int i;
> +
> +    for (i = 1; i<  NR_EVENT_CHANNELS; i++) {
> +#ifdef DEBUG
> +        /* debug hack */
> +#define EA_START 20
> +        if (priv->domain->domid&&  i<  EA_START)
> +            i = EA_START;
> +#undef EA_START
> +#endif
> +        if (priv->domain->p[i].priv != NULL) {
> +            continue;
> +        }
> +        p = priv->domain->p+i;
> +        p->port = i;
> +        p->priv = priv;
> +        p->count_snd = 0;
> +        p->count_fwd = 0;
> +        p->count_msg = 1;
> +        priv->ports++;
> +        if (debug) {
> +            qemu_log("xen ev:%3d: alloc port %d, domain %d (%s)\n",
> +                     priv->fd_read, p->port, priv->domain->domid, reason);
> +        }
> +        return p;
> +    }
> +    return NULL;
> +}
> +
> +static void bind_port_peer(struct port *p, int domid, int port)
> +{
> +    struct domain *domain;
> +    struct port *o;
> +    const char *msg = "ok";
> +
> +    domain = get_domain(domid);
> +    o = domain->p+port;
> +    if (!o->priv) {
> +        msg = "peer not allocated";
> +    } else if (o->peer) {
> +        msg = "peer already bound";
> +    } else if (p->peer) {
> +        msg = "port already bound";
> +    } else {
> +        o->peer = p;
> +        p->peer = o;
> +    }
> +    if (debug) {
> +        qemu_log("xen ev:%3d: bind port %d domain %d<->   port %d domain %d : %s\n",
> +                 p->priv->fd_read,
> +                 p->port, p->priv->domain->domid,
> +                 port, domid, msg);
> +    }
> +}
> +
> +static void unbind_port(struct port *p)
> +{
> +    struct port *o;
> +
> +    o = p->peer;
> +    if (o) {
> +        if (debug) {
> +            fprintf(stderr,"xen ev:%3d: unbind port %d domain %d<->   port %d domain %d\n",
> +                    p->priv->fd_read,
> +                    p->port, p->priv->domain->domid,
> +                    o->port, o->priv->domain->domid);
> +        }
> +        o->peer = NULL;
> +        p->peer = NULL;
> +    }
> +}
> +
> +static void notify_send_peer(struct port *peer)
> +{
> +    uint32_t evtchn = peer->port;
> +    int r;
> +
> +    peer->count_snd++;
> +    if (peer->pending) {
> +        return;
> +    }
> +
> +    r = write(peer->priv->fd_write,&evtchn, sizeof(evtchn));
> +    if (r != sizeof(evtchn)) {
> +        // XXX break
> +    }
> +    peer->count_fwd++;
> +    peer->pending++;
> +    peer->priv->pending++;
> +}
> +
> +static void notify_port(struct port *p)
> +{
> +    if (p->peer) {
> +        notify_send_peer(p->peer);
> +        if (debug&&  p->peer->count_snd>= p->peer->count_msg) {
> +            fprintf(stderr, "xen ev:%3d: notify port %d domain %d  ->   port %d "
> +                            "domain %d  |  counts %d/%d\n",
> +                     p->priv->fd_read, p->port, p->priv->domain->domid,
> +                     p->peer->port, p->peer->priv->domain->domid,
> +                     p->peer->count_fwd, p->peer->count_snd);
> +            p->peer->count_msg *= 10;
> +        }
> +    } else {
> +        if (debug) {
> +            fprintf(stderr, "xen ev:%3d: notify port %d domain %d  ->   unconnected\n",
> +                    p->priv->fd_read, p->port, p->priv->domain->domid);
> +        }
> +    }
> +}
> +
> +static void unmask_port(struct port *p)
> +{
> +    /* nothing to do */
> +}
> +
> +static void release_port(struct port *p)
> +{
> +    if (debug) {
> +        fprintf(stderr,"xen ev:%3d: release port %d, domain %d\n",
> +                p->priv->fd_read, p->port, p->priv->domain->domid);
> +    }
> +    unbind_port(p);
> +    p->priv->ports--;
> +    p->port = 0;
> +    p->priv = 0;
> +}
> +
> +/* ------------------------------------------------------------- */
> +
> +static int qemu_xopen(void)
> +{
> +    struct evtpriv *priv;
> +    int fd[2];
> +
> +    priv = qemu_mallocz(sizeof(*priv));
> +    QTAILQ_INSERT_TAIL(&privs, priv, list);
> +
> +    if (pipe(fd)<  0) {
> +        goto err;
> +    }
> +    priv->fd_read  = fd[0];
> +    priv->fd_write = fd[1];
> +    fcntl(priv->fd_read,F_SETFL,O_NONBLOCK);
> +
> +    priv->domain = get_domain(0);
> +    return priv->fd_read;
> +
> +err:
> +    qemu_free(priv);
> +    return -1;
> +}
> +
> +static int qemu_close(int handle)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    struct port *p;
> +    int i;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +
> +    for (i = 1; i<  NR_EVENT_CHANNELS; i++) {
> +        p = priv->domain->p+i;
> +        if (priv != p->priv) {
> +            continue;
> +        }
> +        release_port(p);
> +    }
> +
> +    close(priv->fd_read);
> +    close(priv->fd_write);
> +    QTAILQ_REMOVE(&privs, priv, list);
> +    qemu_free(priv);
> +    return 0;
> +}
> +
> +static int qemu_fd(int handle)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    return priv->fd_read;
> +}
> +
> +static int qemu_notify(int handle, evtchn_port_t port)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    struct port *p;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    if (port>= NR_EVENT_CHANNELS) {
> +        return -1;
> +    }
> +    p = priv->domain->p + port;
> +    notify_port(p);
> +    return -1;
> +}
> +
> +static evtchn_port_or_error_t qemu_bind_unbound_port(int handle, int domid)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    struct port *p;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    p = alloc_port(priv, "unbound");
> +    if (!p) {
> +        return -1;
> +    }
> +    return p->port;
> +}
> +
> +static evtchn_port_or_error_t qemu_bind_interdomain(int handle, int domid,
> +                                                    evtchn_port_t remote_port)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    struct port *p;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    if (remote_port>= NR_EVENT_CHANNELS) {
> +        return -1;
> +    }
> +    p = alloc_port(priv, "interdomain");
> +    if (!p) {
> +        return -1;
> +    }
> +    bind_port_peer(p, domid, remote_port);
> +    return p->port;
> +}
> +
> +static evtchn_port_or_error_t qemu_bind_virq(int handle, unsigned int virq)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    struct port *p;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    p = alloc_port(priv, "virq");
> +    if (!p) {
> +        return -1;
> +    }
> +    /*
> +     * Note: port not linked here, we only allocate some port.
> +     */
> +    return p->port;
> +}
> +
> +static int qemu_unbind(int handle, evtchn_port_t port)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    struct port *p;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    if (port>= NR_EVENT_CHANNELS) {
> +        return -1;
> +    }
> +    p = priv->domain->p + port;
> +    unbind_port(p);
> +    release_port(p);
> +    return 0;
> +}
> +
> +static evtchn_port_or_error_t qemu_pending(int handle)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    uint32_t evtchn;
> +    int rc;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    rc = read(priv->fd_read,&evtchn, sizeof(evtchn));
> +    if (rc != sizeof(evtchn)) {
> +        return -1;
> +    }
> +    priv->pending--;
> +    priv->domain->p[evtchn].pending--;
> +    return evtchn;
> +}
> +
> +static int qemu_unmask(int handle, evtchn_port_t port)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +    struct port *p;
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    if (port>= NR_EVENT_CHANNELS) {
> +        return -1;
> +    }
> +    p = priv->domain->p + port;
> +    unmask_port(p);
> +    return 0;
> +}
> +
> +static int qemu_domid(int handle, int domid)
> +{
> +    struct evtpriv *priv = getpriv(handle);
> +
> +    if (!priv) {
> +        return -1;
> +    }
> +    if (priv->ports) {
> +        return -1;
> +    }
> +    priv->domain = get_domain(domid);
> +    return 0;
> +}
> +
> +struct XenEvtOps xc_evtchn_xenner = {
> +    .open               = qemu_xopen,
> +    .domid              = qemu_domid,
> +    .close              = qemu_close,
> +    .fd                 = qemu_fd,
> +    .notify             = qemu_notify,
> +    .bind_unbound_port  = qemu_bind_unbound_port,
> +    .bind_interdomain   = qemu_bind_interdomain,
> +    .bind_virq          = qemu_bind_virq,
> +    .unbind             = qemu_unbind,
> +    .pending            = qemu_pending,
> +    .unmask             = qemu_unmask,
> +};
> +
> +/* ------------------------------------------------------------- */
> +
> +#if 0
> +
> +void do_info_evtchn(Monitor *mon)
> +{
> +    struct evtpriv *priv;
> +    struct port *port;
> +    int i;
> +
> +    if (xen_mode != XEN_EMULATE) {
> +        monitor_printf(mon, "Not emulating xen event channels.\n");
> +        return;
> +    }
> +
> +    QTAILQ_FOREACH(priv,&privs, list) {
> +        monitor_printf(mon, "%p: domid %d, fds %d,%d\n", priv,
> +                       priv->domain->domid,
> +                       priv->fd_read, priv->fd_write);
> +        for (i = 1; i<  NR_EVENT_CHANNELS; i++) {
> +            port = priv->domain->p + i;
> +            if (port->priv != priv) {
> +                continue;
> +            }
> +            monitor_printf(mon, "  port #%d: ", port->port);
> +            if (port->peer) {
> +                monitor_printf(mon, "peer #%d (%p, domid %d)\n",
> +                               port->peer->port, port->peer->priv,
> +                               port->peer->priv->domain->domid);
> +            } else {
> +                monitor_printf(mon, "no peer\n");
> +            }
> +        }
> +    }
> +}
> +
> +#endif
> +
>
Alexander Graf Nov. 1, 2010, 3:49 p.m. UTC | #2
On 01.11.2010, at 11:45, Anthony Liguori wrote:

> On 11/01/2010 10:01 AM, Alexander Graf wrote:
>> Xenner emulates parts of libxc, so we can not use the real xen infrastructure
>> when running xen pv guests without xen.
>> 
>> This patch adds support for event channel communication.
>> 
>> Signed-off-by: Alexander Graf<agraf@suse.de>
>>   
> 
> Has anyone checked with the Xen folks about supporting this type of functionality in libxc directly?


The issue I have with libxc is that it goes orthogonal to the qemu infrastructure way of doing things. If we base on libxc, we will never be able to do cross-architecture execution of xen pv guests. Do we really want to go that way?


Alex
Anthony Liguori Nov. 1, 2010, 4:01 p.m. UTC | #3
On 11/01/2010 10:49 AM, Alexander Graf wrote:
> On 01.11.2010, at 11:45, Anthony Liguori wrote:
>
>    
>> On 11/01/2010 10:01 AM, Alexander Graf wrote:
>>      
>>> Xenner emulates parts of libxc, so we can not use the real xen infrastructure
>>> when running xen pv guests without xen.
>>>
>>> This patch adds support for event channel communication.
>>>
>>> Signed-off-by: Alexander Graf<agraf@suse.de>
>>>
>>>        
>> Has anyone checked with the Xen folks about supporting this type of functionality in libxc directly?
>>      
>
> The issue I have with libxc is that it goes orthogonal to the qemu infrastructure way of doing things. If we base on libxc, we will never be able to do cross-architecture execution of xen pv guests. Do we really want to go that way?
>    

IIUC, this is a mini-libxc that you enable by mucking with 
LD_LIBRARY_PATH such that you can run things like xenstored unmodified.  
What I'm really asking is whether there has been a discussion about a 
more pleasant way to do this that the Xen guys would feel comfortable with.

I'd feel a little weird if someone was replacing a part of QEMU via 
LD_LIBRARY_PATH trickery.  It's better to try to work out a proper 
solution with the upstream community than to do trickery.

I'm not entirely opposed to this if the Xen guys say they don't want 
anything to do with Xenner, but we should have the discussion at least.

Regards,

Anthony Liguori

>
> Alex
>
>
Alexander Graf Nov. 1, 2010, 4:07 p.m. UTC | #4
On 01.11.2010, at 12:01, Anthony Liguori wrote:

> On 11/01/2010 10:49 AM, Alexander Graf wrote:
>> On 01.11.2010, at 11:45, Anthony Liguori wrote:
>> 
>>   
>>> On 11/01/2010 10:01 AM, Alexander Graf wrote:
>>>     
>>>> Xenner emulates parts of libxc, so we can not use the real xen infrastructure
>>>> when running xen pv guests without xen.
>>>> 
>>>> This patch adds support for event channel communication.
>>>> 
>>>> Signed-off-by: Alexander Graf<agraf@suse.de>
>>>> 
>>>>       
>>> Has anyone checked with the Xen folks about supporting this type of functionality in libxc directly?
>>>     
>> 
>> The issue I have with libxc is that it goes orthogonal to the qemu infrastructure way of doing things. If we base on libxc, we will never be able to do cross-architecture execution of xen pv guests. Do we really want to go that way?
>>   
> 
> IIUC, this is a mini-libxc that you enable by mucking with LD_LIBRARY_PATH such that you can run things like xenstored unmodified.  What I'm really asking is whether there has been a discussion about a more pleasant way to do this that the Xen guys would feel comfortable with.
> 
> I'd feel a little weird if someone was replacing a part of QEMU via LD_LIBRARY_PATH trickery.  It's better to try to work out a proper solution with the upstream community than to do trickery.
> 
> I'm not entirely opposed to this if the Xen guys say they don't want anything to do with Xenner, but we should have the discussion at least.

I agree about the discussion part, that's why we're all gathering in Boston this week, right?

But technically, this code really just bumps all libxc calls to indirect function calls that go through a struct. If we're using xenner, we use our own implementation, if we're using xen, we use xen's. The thing is that with xenner we usually don't have xen infrastructure available and most likely don't want to start any either.


Alex
Anthony Liguori Nov. 1, 2010, 4:14 p.m. UTC | #5
On 11/01/2010 11:07 AM, Alexander Graf wrote:
> On 01.11.2010, at 12:01, Anthony Liguori wrote:
>
>    
>> On 11/01/2010 10:49 AM, Alexander Graf wrote:
>>      
>>> On 01.11.2010, at 11:45, Anthony Liguori wrote:
>>>
>>>
>>>        
>>>> On 11/01/2010 10:01 AM, Alexander Graf wrote:
>>>>
>>>>          
>>>>> Xenner emulates parts of libxc, so we can not use the real xen infrastructure
>>>>> when running xen pv guests without xen.
>>>>>
>>>>> This patch adds support for event channel communication.
>>>>>
>>>>> Signed-off-by: Alexander Graf<agraf@suse.de>
>>>>>
>>>>>
>>>>>            
>>>> Has anyone checked with the Xen folks about supporting this type of functionality in libxc directly?
>>>>
>>>>          
>>> The issue I have with libxc is that it goes orthogonal to the qemu infrastructure way of doing things. If we base on libxc, we will never be able to do cross-architecture execution of xen pv guests. Do we really want to go that way?
>>>
>>>        
>> IIUC, this is a mini-libxc that you enable by mucking with LD_LIBRARY_PATH such that you can run things like xenstored unmodified.  What I'm really asking is whether there has been a discussion about a more pleasant way to do this that the Xen guys would feel comfortable with.
>>
>> I'd feel a little weird if someone was replacing a part of QEMU via LD_LIBRARY_PATH trickery.  It's better to try to work out a proper solution with the upstream community than to do trickery.
>>
>> I'm not entirely opposed to this if the Xen guys say they don't want anything to do with Xenner, but we should have the discussion at least.
>>      
> I agree about the discussion part, that's why we're all gathering in Boston this week, right?
>    

Fair enough :-)

> But technically, this code really just bumps all libxc calls to indirect function calls that go through a struct. If we're using xenner, we use our own implementation, if we're using xen, we use xen's. The thing is that with xenner we usually don't have xen infrastructure available and most likely don't want to start any either.
>    

Yeah, I guess I'd just like to see a more "polite" solution.

Regards,

Anthony Liguori

> Alex
>
>
Alexander Graf Nov. 1, 2010, 4:15 p.m. UTC | #6
On 01.11.2010, at 12:14, Anthony Liguori wrote:

> On 11/01/2010 11:07 AM, Alexander Graf wrote:
>> On 01.11.2010, at 12:01, Anthony Liguori wrote:
>> 
>>   
>>> On 11/01/2010 10:49 AM, Alexander Graf wrote:
>>>     
>>>> On 01.11.2010, at 11:45, Anthony Liguori wrote:
>>>> 
>>>> 
>>>>       
>>>>> On 11/01/2010 10:01 AM, Alexander Graf wrote:
>>>>> 
>>>>>         
>>>>>> Xenner emulates parts of libxc, so we can not use the real xen infrastructure
>>>>>> when running xen pv guests without xen.
>>>>>> 
>>>>>> This patch adds support for event channel communication.
>>>>>> 
>>>>>> Signed-off-by: Alexander Graf<agraf@suse.de>
>>>>>> 
>>>>>> 
>>>>>>           
>>>>> Has anyone checked with the Xen folks about supporting this type of functionality in libxc directly?
>>>>> 
>>>>>         
>>>> The issue I have with libxc is that it goes orthogonal to the qemu infrastructure way of doing things. If we base on libxc, we will never be able to do cross-architecture execution of xen pv guests. Do we really want to go that way?
>>>> 
>>>>       
>>> IIUC, this is a mini-libxc that you enable by mucking with LD_LIBRARY_PATH such that you can run things like xenstored unmodified.  What I'm really asking is whether there has been a discussion about a more pleasant way to do this that the Xen guys would feel comfortable with.
>>> 
>>> I'd feel a little weird if someone was replacing a part of QEMU via LD_LIBRARY_PATH trickery.  It's better to try to work out a proper solution with the upstream community than to do trickery.
>>> 
>>> I'm not entirely opposed to this if the Xen guys say they don't want anything to do with Xenner, but we should have the discussion at least.
>>>     
>> I agree about the discussion part, that's why we're all gathering in Boston this week, right?
>>   
> 
> Fair enough :-)
> 
>> But technically, this code really just bumps all libxc calls to indirect function calls that go through a struct. If we're using xenner, we use our own implementation, if we're using xen, we use xen's. The thing is that with xenner we usually don't have xen infrastructure available and most likely don't want to start any either.
>>   
> 
> Yeah, I guess I'd just like to see a more "polite" solution.

We can try and see if we can maybe reuse parts of the event channel and xenstored stuff, but when it comes to memory mappings or grant tables, we have to have our own code since we're the ones owning the ram.

But yeah, let's move that discussion to LPC :). That way the xen folks can participate!


Alex
Paolo Bonzini Nov. 1, 2010, 7:39 p.m. UTC | #7
On 11/01/2010 05:01 PM, Anthony Liguori wrote:
>
> IIUC, this is a mini-libxc that you enable by mucking with
> LD_LIBRARY_PATH such that you can run things like xenstored unmodified.
> What I'm really asking is whether there has been a discussion about a
> more pleasant way to do this that the Xen guys would feel comfortable with.

I don't know if it's Alex or Gerd who did the switch, but this version 
of the code doesn't have the separate mini-libxc.  The code of the 
mini-libxc is embedded in QEMU, just like xenstored, blkback and 
netback.  See patch 31/40, which includes both the "mini xenstored" and 
the "mini libxenstore".

It's not clear where is xenconsoled, is the PV console functionality 
missing in this version of xenner?

Paolo
Anthony Liguori Nov. 1, 2010, 7:41 p.m. UTC | #8
On 11/01/2010 02:39 PM, Paolo Bonzini wrote:
> On 11/01/2010 05:01 PM, Anthony Liguori wrote:
>>
>> IIUC, this is a mini-libxc that you enable by mucking with
>> LD_LIBRARY_PATH such that you can run things like xenstored unmodified.
>> What I'm really asking is whether there has been a discussion about a
>> more pleasant way to do this that the Xen guys would feel comfortable 
>> with.
>
> I don't know if it's Alex or Gerd who did the switch, but this version 
> of the code doesn't have the separate mini-libxc.  The code of the 
> mini-libxc is embedded in QEMU, just like xenstored, blkback and 
> netback.  See patch 31/40, which includes both the "mini xenstored" 
> and the "mini libxenstore".

Oh, I'm still missing some of it.  That's a curious choice.

What's the logic for duplicating xenstored/xenconsoled?  I understand 
blkback/netback.

Regards,

Anthony Liguori

> It's not clear where is xenconsoled, is the PV console functionality 
> missing in this version of xenner?
>
> Paolo
Alexander Graf Nov. 1, 2010, 7:47 p.m. UTC | #9
On 01.11.2010, at 15:41, Anthony Liguori wrote:

> On 11/01/2010 02:39 PM, Paolo Bonzini wrote:
>> On 11/01/2010 05:01 PM, Anthony Liguori wrote:
>>> 
>>> IIUC, this is a mini-libxc that you enable by mucking with
>>> LD_LIBRARY_PATH such that you can run things like xenstored unmodified.
>>> What I'm really asking is whether there has been a discussion about a
>>> more pleasant way to do this that the Xen guys would feel comfortable with.
>> 
>> I don't know if it's Alex or Gerd who did the switch, but this version of the code doesn't have the separate mini-libxc.  The code of the mini-libxc is embedded in QEMU, just like xenstored, blkback and netback.  See patch 31/40, which includes both the "mini xenstored" and the "mini libxenstore".
> 
> Oh, I'm still missing some of it.  That's a curious choice.
> 
> What's the logic for duplicating xenstored/xenconsoled?  I understand blkback/netback.

Where else would it belong? Qemu is an emulator. Device emulation belongs to qemu code. The xen PV machine is nothing but a special case of the pc machine with custom firmware and odd devices :).

As I stated in my cover letter, the goal of all this should be to have the qemu pieces be 100% independent of any xen headers or libraries, so we can eventually isolate it well enough that it even works on non-x86. Then we're at the point qemu code usually is.

I'm sure there are also practical implications btw. But I don't really care about those too much, because the architectural ones outweigh that to me.


Alex
Anthony Liguori Nov. 1, 2010, 8:32 p.m. UTC | #10
On 11/01/2010 02:47 PM, Alexander Graf wrote:
> Where else would it belong? Qemu is an emulator. Device emulation belongs to qemu code. The xen PV machine is nothing but a special case of the pc machine with custom firmware and odd devices :).
>
> As I stated in my cover letter, the goal of all this should be to have the qemu pieces be 100% independent of any xen headers or libraries,

I'm not sure I agree with the goal.  I think where ever possible we 
should reuse code with the Xen project when it makes sense.  Reusing 
blkback/netback is impossible because we want userspace implementations 
and the current implementations are in the kernel.  blktap also doesn't 
tie into the QEMU block layer and making it tie into the QEMU block 
layer would probably result in more code than it saved.

OTOH, xenstored and xenconsoled have very little direct dependence on 
Xen.  I'm not saying that we shouldn't make things Just Work in QEMU, so 
if that means spawning xenconsoled/xenstored automagically from QEMU 
with special options, that's perfectly fine.

But to replicate the functionality of this code solely because of NIH 
seems like a waste of effort.

Regards,

Anthony Liguori

>   so we can eventually isolate it well enough that it even works on non-x86. Then we're at the point qemu code usually is.
>
> I'm sure there are also practical implications btw. But I don't really care about those too much, because the architectural ones outweigh that to me.
>
>
> Alex
>
>
Paolo Bonzini Nov. 1, 2010, 9:47 p.m. UTC | #11
On 11/01/2010 09:32 PM, Anthony Liguori wrote:
>
> I'm not sure I agree with the goal.  I think where ever possible we
> should reuse code with the Xen project when it makes sense.  Reusing
> blkback/netback is impossible because we want userspace implementations
> and the current implementations are in the kernel.  blktap also doesn't
> tie into the QEMU block layer and making it tie into the QEMU block
> layer would probably result in more code than it saved.
>
> OTOH, xenstored and xenconsoled have very little direct dependence on
> Xen.  I'm not saying that we shouldn't make things Just Work in QEMU, so
> if that means spawning xenconsoled/xenstored automagically from QEMU
> with special options, that's perfectly fine.

xenstored is 3 times bigger than what Alex submitted, however.  The code 
is much simpler because _this_ xenstore only serves one domain.  So it 
doesn't have to implement permissions, it doesn't have complicated 
threading to handle multiple instances of libxs accessing the daemon, 
and so on.  Besides the data structures implementing the tree, there's 
really very little in common, and the xenner code is almost trivial.

The situation is similar for the console.  There is only one console to 
track here.  In fact, maybe it's simplest to implement it as a small 
8250A driver in the xenner kernel, reading from the serial console at 
0x3f8 and writing to the ring buffer and vice versa.

Paolo
Anthony Liguori Nov. 1, 2010, 10 p.m. UTC | #12
On 11/01/2010 04:47 PM, Paolo Bonzini wrote:
> On 11/01/2010 09:32 PM, Anthony Liguori wrote:
>>
>> I'm not sure I agree with the goal.  I think where ever possible we
>> should reuse code with the Xen project when it makes sense.  Reusing
>> blkback/netback is impossible because we want userspace implementations
>> and the current implementations are in the kernel.  blktap also doesn't
>> tie into the QEMU block layer and making it tie into the QEMU block
>> layer would probably result in more code than it saved.
>>
>> OTOH, xenstored and xenconsoled have very little direct dependence on
>> Xen.  I'm not saying that we shouldn't make things Just Work in QEMU, so
>> if that means spawning xenconsoled/xenstored automagically from QEMU
>> with special options, that's perfectly fine.
>
> xenstored is 3 times bigger than what Alex submitted, however.  The 
> code is much simpler because _this_ xenstore only serves one domain.  
> So it doesn't have to implement permissions, it doesn't have 
> complicated threading to handle multiple instances of libxs accessing 
> the daemon, and so on.  Besides the data structures implementing the 
> tree, there's really very little in common, and the xenner code is 
> almost trivial.
>
> The situation is similar for the console.  There is only one console 
> to track here.  In fact, maybe it's simplest to implement it as a 
> small 8250A driver in the xenner kernel, reading from the serial 
> console at 0x3f8 and writing to the ring buffer and vice versa.

Okay, so does the same apply for xenstored?  Does it make more sense to 
move that into the xenner kernel?

The big advantage of the xenner kernel is that it runs in guest mode so 
it's no concern from a security PoV.  While xenstored is 3x bigger than 
Alex's version, it also has had an awful lot more validation from a 
security point of view.  Since this is guest facing code, that's important.

Regards,

Anthony Liguori

>
> Paolo
Paolo Bonzini Nov. 1, 2010, 10:08 p.m. UTC | #13
On 11/01/2010 11:00 PM, Anthony Liguori wrote:
>
> Okay, so does the same apply for xenstored?  Does it make more sense to
> move that into the xenner kernel?

I think no, because the backend devices do use xenstore, so they would 
need a way to talk to the guest.  It's the same conceptually for the 
console, but in that case the "way to talk to the guest" is the 8250A 
device model that already exists.  In the case of xenstore it would be 
yet another protocol to devise and scrutinize.

Paolo
Anthony Liguori Nov. 1, 2010, 10:29 p.m. UTC | #14
On 11/01/2010 05:08 PM, Paolo Bonzini wrote:
> On 11/01/2010 11:00 PM, Anthony Liguori wrote:
>>
>> Okay, so does the same apply for xenstored?  Does it make more sense to
>> move that into the xenner kernel?
>
> I think no, because the backend devices do use xenstore, so they would 
> need a way to talk to the guest.

Yeah, I was thinking fw_cfg but that's only after not thinking too much 
about it so that may be naive.

Regards,

Anthony Liguori

>   It's the same conceptually for the console, but in that case the 
> "way to talk to the guest" is the 8250A device model that already 
> exists.  In the case of xenstore it would be yet another protocol to 
> devise and scrutinize.
>
> Paolo
Stefano Stabellini Nov. 2, 2010, 4:33 a.m. UTC | #15
On Mon, 1 Nov 2010, Anthony Liguori wrote:
> On 11/01/2010 02:47 PM, Alexander Graf wrote:
> > Where else would it belong? Qemu is an emulator. Device emulation belongs to qemu code. The xen PV machine is nothing but a special case of the pc machine with custom firmware and odd devices :).
> >
> > As I stated in my cover letter, the goal of all this should be to have the qemu pieces be 100% independent of any xen headers or libraries,
> 
> I'm not sure I agree with the goal.  I think where ever possible we 
> should reuse code with the Xen project when it makes sense.  Reusing 
> blkback/netback is impossible because we want userspace implementations 
> and the current implementations are in the kernel.  blktap also doesn't 
> tie into the QEMU block layer and making it tie into the QEMU block 
> layer would probably result in more code than it saved.
> 
> OTOH, xenstored and xenconsoled have very little direct dependence on 
> Xen.  I'm not saying that we shouldn't make things Just Work in QEMU, so 
> if that means spawning xenconsoled/xenstored automagically from QEMU 
> with special options, that's perfectly fine.
> 
> But to replicate the functionality of this code solely because of NIH 
> seems like a waste of effort.
> 

I have been traveling so I haven't had a chance to carefully read the
series yet, however these are my early observations:

I don't mind xenner, of course I think the best way to run a PV guest is
to use Xen, but Xenner can be useful in many ways. I would love to see
an x86_32 PV guest run on PowerPC, or even in a Xen HVM domain!
It would be very useful for testing too, it would shorten my dev & test
cycle by quite a bit.

I am a strong proponent of code sharing and reuse so I agree with
Anthony on this: we should reuse Xen libraries and daemons as much as
possible. If you need some patches to port xenstored and/or xenconsoled
to PowerPC we would gladly accept them.
That said, many Xen components are obviously tied to the Xen
architecture, so it might not be easy to reuse them outside a Xen
environment. For example: making xenstored work without Xen shouldn't be
too difficult but porting libxc to KVM/QEMU I think would be harder.

I am looking forward to talking with you in Boston,

Stefano
Paolo Bonzini Nov. 2, 2010, 10:06 a.m. UTC | #16
On 11/02/2010 05:33 AM, Stefano Stabellini wrote:
> On Mon, 1 Nov 2010, Anthony Liguori wrote:
>> On 11/01/2010 02:47 PM, Alexander Graf wrote:
>>> Where else would it belong? Qemu is an emulator. Device emulation belongs to qemu code. The xen PV machine is nothing but a special case of the pc machine with custom firmware and odd devices :).
>>>
>>> As I stated in my cover letter, the goal of all this should be to have the qemu pieces be 100% independent of any xen headers or libraries,
>>
>> I'm not sure I agree with the goal.  I think where ever possible we
>> should reuse code with the Xen project when it makes sense.  Reusing
>> blkback/netback is impossible because we want userspace implementations
>> and the current implementations are in the kernel.  blktap also doesn't
>> tie into the QEMU block layer and making it tie into the QEMU block
>> layer would probably result in more code than it saved.
>>
>> OTOH, xenstored and xenconsoled have very little direct dependence on
>> Xen.  I'm not saying that we shouldn't make things Just Work in QEMU, so
>> if that means spawning xenconsoled/xenstored automagically from QEMU
>> with special options, that's perfectly fine.
>>
>> But to replicate the functionality of this code solely because of NIH
>> seems like a waste of effort.
>
> I am a strong proponent of code sharing and reuse so I agree with
> Anthony on this: we should reuse Xen libraries and daemons as much as
> possible. If you need some patches to port xenstored and/or xenconsoled
> to PowerPC we would gladly accept them.

The question is, how much do the Xen userspace and Xenner have in common?

If you remove code that Xen runs in the hypervisor or in the dom0 
kernel, or code that (like xenconsoled) is IMHO best moved to the Xenner 
kernel, what remains is the domain builder and of course xenstore 
handling.  The domain builder is in libxc, which makes it hard to share, 
and this leaves xenstore.

Now, half of it (the ring buffer protocol) already has a million 
duplicate implementation in userspace, in the kernel, in Windows PV 
drivers (at least three independent versions), and is pretty much set in 
stone.

So, what remains is actually parsing the xenstore messages and handling 
the tree data structure.  Which is actually a _very_ small part of 
xenstored: xenstored has to work across multiple domains and clients, be 
careful about inter-domain security, and so on.  Xenner has the _big_ 
advantage of having total independence between domUs (it's like if each 
domU had its own little dom0, its own little xenstore and so on).  While 
it doesn't mean there are no security concerns with guest-facing code, 
it simplifies the code to the point where effectively it makes no sense 
to share anything but the APIs.

I took a look at recent changes to libxs and xenstored in 
xen-unstable.hg. Here are some subjects going back to c/s 17400 (about 
30 months):

- xenstore: libxenstore: fix threading bug which cause xend startup hang
- xenstore: correctly handle errors from read_message
- xenstore: Make sure that libxs reports an error if xenstored drops
- xenstore: Fix cleanup_pop() definition for some (buggy) pthread.h headers.
- xs: avoid pthread_join deadlock in xs_daemon_close
- xs: make sure mutexes are cleaned up and memory freed if the read 
thread is cancelled
- xenstore,libxl: cleanup of xenstore connections across fork()
- xenstored: fix use-after free bug
- xenstore: Fix a memory leak in 'xs_is_domain_introduced'.
- xenstored: Fix xenstored abort when connection dropped.
- xenstore: fix canonicalize for metanodes

Almost all of them are about threading or error conditions, and even 
those that aren't wouldn't apply to a simple implementation like 
Xenner's.  This shows that the risk of missing bugfixes in guest-facing 
code is much smaller than one would think (including what I thought).

(BTW, I noticed that Xenner does not limit guest segments like Xen does. 
  Does it mean the guest can overwrite the Xenner kernel and effectively 
run ring0?)

Paolo
Gerd Hoffmann Nov. 2, 2010, 10:31 a.m. UTC | #17
Hi,

> (BTW, I noticed that Xenner does not limit guest segments like Xen does.
> Does it mean the guest can overwrite the Xenner kernel and effectively
> run ring0?)

Yes.  The guest also can modify page tables as it pleases.  It is the 
vmx/svm container which protects the host, not the xenner kernel.

cheers,
   Gerd
Paolo Bonzini Nov. 2, 2010, 10:38 a.m. UTC | #18
On 11/02/2010 11:31 AM, Gerd Hoffmann wrote:
>   Hi,
>
>> (BTW, I noticed that Xenner does not limit guest segments like Xen does.
>> Does it mean the guest can overwrite the Xenner kernel and effectively
>> run ring0?)
>
> Yes. The guest also can modify page tables as it pleases. It is the
> vmx/svm container which protects the host, not the xenner kernel.

Yes, got it.  I was trying to understand exactly which parts are 
guest-facing (the answer is "everything") and which are only 
xenner-facing (and here the answer is "none" :)).

Paolo
Stefano Stabellini Nov. 2, 2010, 1:55 p.m. UTC | #19
On Tue, 2 Nov 2010, Paolo Bonzini wrote:
> The question is, how much do the Xen userspace and Xenner have in common?
> 
> If you remove code that Xen runs in the hypervisor or in the dom0 
> kernel, or code that (like xenconsoled) is IMHO best moved to the Xenner 
> kernel, what remains is the domain builder and of course xenstore 
> handling.  The domain builder is in libxc, which makes it hard to share, 
> and this leaves xenstore.
> 

There is a xen console backend in qemu already (xen_console.c).


> Now, half of it (the ring buffer protocol) already has a million 
> duplicate implementation in userspace, in the kernel, in Windows PV 
> drivers (at least three independent versions), and is pretty much set in 
> stone.
> 
> So, what remains is actually parsing the xenstore messages and handling 
> the tree data structure.  Which is actually a _very_ small part of 
> xenstored: xenstored has to work across multiple domains and clients, be 
> careful about inter-domain security, and so on.  Xenner has the _big_ 
> advantage of having total independence between domUs (it's like if each 
> domU had its own little dom0, its own little xenstore and so on).  While 
> it doesn't mean there are no security concerns with guest-facing code, 
> it simplifies the code to the point where effectively it makes no sense 
> to share anything but the APIs.
> 

All right, if you feel that it would be easier for you to use your own
simplified version, I am OK with that.
However it is important that the mini-libxc, the mini-xenstored and the
qemu domain builder are disable when using xen as accelerator.
As I said before, running pure PV guests in a xen HVM domain should be one of
the targets of the series, and in that case we do want to use the full
featured xenstored and libxc and the libxenlight domain buider.
Alexander Graf Nov. 2, 2010, 3:48 p.m. UTC | #20
On 02.11.2010, at 09:55, Stefano Stabellini wrote:

> On Tue, 2 Nov 2010, Paolo Bonzini wrote:
>> The question is, how much do the Xen userspace and Xenner have in common?
>> 
>> If you remove code that Xen runs in the hypervisor or in the dom0 
>> kernel, or code that (like xenconsoled) is IMHO best moved to the Xenner 
>> kernel, what remains is the domain builder and of course xenstore 
>> handling.  The domain builder is in libxc, which makes it hard to share, 
>> and this leaves xenstore.
>> 
> 
> There is a xen console backend in qemu already (xen_console.c).
> 
> 
>> Now, half of it (the ring buffer protocol) already has a million 
>> duplicate implementation in userspace, in the kernel, in Windows PV 
>> drivers (at least three independent versions), and is pretty much set in 
>> stone.
>> 
>> So, what remains is actually parsing the xenstore messages and handling 
>> the tree data structure.  Which is actually a _very_ small part of 
>> xenstored: xenstored has to work across multiple domains and clients, be 
>> careful about inter-domain security, and so on.  Xenner has the _big_ 
>> advantage of having total independence between domUs (it's like if each 
>> domU had its own little dom0, its own little xenstore and so on).  While 
>> it doesn't mean there are no security concerns with guest-facing code, 
>> it simplifies the code to the point where effectively it makes no sense 
>> to share anything but the APIs.
>> 
> 
> All right, if you feel that it would be easier for you to use your own
> simplified version, I am OK with that.
> However it is important that the mini-libxc, the mini-xenstored and the
> qemu domain builder are disable when using xen as accelerator.
> As I said before, running pure PV guests in a xen HVM domain should be one of
> the targets of the series, and in that case we do want to use the full
> featured xenstored and libxc and the libxenlight domain buider.

This is getting confusing :). There are multiple ways of spawning a Xen PV instance I'm aware of:

1) Xen PV context
2) Xen PV context in SVM/VMX container, maintained by Xen
3) Xenner on TCG/KVM
4) Xenner on Xen HVM

For 1 and 2 the way to go is definitely to reuse the xen infrastructure. For 3 I'm very reluctant in requiring dependencies. One of qemu's strong points is that it does not have too many dependencies on other code. If there are strong points for it however, I gladly change my position :).

For 4 however, I haven't fully made up my mind on if it's useful to people (if you say it is, I'm more than glad to get this rolling!) and what the best way to implement it would be.

So I suppose your suggestion is to use the xen infrastructure for case 4? That might work out. Fortunately, all the detection on which backend we use happens at runtime. Since in that case Xen does own the guest's memory, we might even be safe on using its memory mapping functionality. Maybe.

I'm looking very much forward to talking to you about this in Boston. Are you around already?


Alex
Stefano Stabellini Nov. 2, 2010, 7:20 p.m. UTC | #21
On Tue, 2 Nov 2010, Alexander Graf wrote:
> This is getting confusing :). There are multiple ways of spawning a Xen PV instance I'm aware of:
> 
> 1) Xen PV context
> 2) Xen PV context in SVM/VMX container, maintained by Xen
> 3) Xenner on TCG/KVM
> 4) Xenner on Xen HVM
> 
> For 1 and 2 the way to go is definitely to reuse the xen infrastructure. For 3 I'm very reluctant in requiring dependencies. One of qemu's strong points is that it does not have too many dependencies on other code. If there are strong points for it however, I gladly change my position :).
> 
> For 4 however, I haven't fully made up my mind on if it's useful to people (if you say it is, I'm more than glad to get this rolling!) and what the best way to implement it would be.
> 

I am guessing that with 2) you are referring to Linux PV on HVM guests.
If so 2) and 4) are very different: a Linux PV on HVM guest is a normal
Linux kernel that would boot just fine on native, but is also able to
enable some Xen PV interfaces when running in a Xen HVM domain.
Linux PV on HVM guests are new and support is in the kernel since less
than a year.
However Linux PV guests have been around for a long time and
traditionally are unable to boot on native or in a Xen HVM container.
So 4) would allow these kernels to boot in a Xen HVM container
unmodified, this is why it would be useful.


> So I suppose your suggestion is to use the xen infrastructure for case 4? That might work out. Fortunately, all the detection on which backend we use happens at runtime. Since in that case Xen does own the guest's memory, we might even be safe on using its memory mapping functionality. Maybe.
> 

Yes. Case 4) is just a normal Xen HVM domain from the Xen point of view,
so it needs all the rest of the Xen infrastructure. There is no need to
replace xenstored or libxc when the real xenstored and libxc are
available.


> I'm looking very much forward to talking to you about this in Boston. Are you around already?
> 

Yep!
diff mbox

Patch

diff --git a/hw/xenner_libxc_evtchn.c b/hw/xenner_libxc_evtchn.c
new file mode 100644
index 0000000..bb1984c
--- /dev/null
+++ b/hw/xenner_libxc_evtchn.c
@@ -0,0 +1,467 @@ 
+/*
+ *  Copyright (C) Red Hat 2007
+ *  Copyright (C) Novell Inc. 2010
+ *
+ *  Author(s): Gerd Hoffmann <kraxel@redhat.com>
+ *             Alexander Graf <agraf@suse.de>
+ *
+ *  Xenner emulation -- event channels
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <xenctrl.h>
+
+#include "hw.h"
+#include "qemu-log.h"
+#include "console.h"
+#include "monitor.h"
+#include "xen.h"
+#include "xen_interfaces.h"
+
+/* ------------------------------------------------------------- */
+
+struct evtpriv;
+
+struct port {
+    struct evtpriv   *priv;
+    struct port      *peer;
+    int              port;
+    int              pending;
+    int              count_snd;
+    int              count_fwd;
+    int              count_msg;
+};
+
+struct domain {
+    int              domid;
+    struct port      p[NR_EVENT_CHANNELS];
+};
+static struct domain dom0;  /* host  */
+static struct domain domU;  /* guest */
+
+struct evtpriv {
+    int                      fd_read, fd_write;
+    struct domain            *domain;
+    int                      ports;
+    int                      pending;
+    QTAILQ_ENTRY(evtpriv)    list;
+};
+static QTAILQ_HEAD(evtpriv_head, evtpriv) privs = QTAILQ_HEAD_INITIALIZER(privs);
+
+static int debug = 0;
+
+/* ------------------------------------------------------------- */
+
+static struct evtpriv *getpriv(int handle)
+{
+    struct evtpriv *priv;
+
+    QTAILQ_FOREACH(priv, &privs, list) {
+        if (priv->fd_read == handle) {
+            return priv;
+        }
+    }
+    return NULL;
+}
+
+static struct domain *get_domain(int domid)
+{
+    if (domid == 0) {
+        return &dom0;
+    }
+    if (!domU.domid) {
+        domU.domid = domid;
+    }
+    assert(domU.domid == domid);
+    return &domU;
+}
+
+static struct port *alloc_port(struct evtpriv *priv, const char *reason)
+{
+    struct port *p = NULL;
+    int i;
+
+    for (i = 1; i < NR_EVENT_CHANNELS; i++) {
+#ifdef DEBUG
+        /* debug hack */
+#define EA_START 20
+        if (priv->domain->domid && i < EA_START)
+            i = EA_START;
+#undef EA_START
+#endif
+        if (priv->domain->p[i].priv != NULL) {
+            continue;
+        }
+        p = priv->domain->p+i;
+        p->port = i;
+        p->priv = priv;
+        p->count_snd = 0;
+        p->count_fwd = 0;
+        p->count_msg = 1;
+        priv->ports++;
+        if (debug) {
+            qemu_log("xen ev:%3d: alloc port %d, domain %d (%s)\n",
+                     priv->fd_read, p->port, priv->domain->domid, reason);
+        }
+        return p;
+    }
+    return NULL;
+}
+
+static void bind_port_peer(struct port *p, int domid, int port)
+{
+    struct domain *domain;
+    struct port *o;
+    const char *msg = "ok";
+
+    domain = get_domain(domid);
+    o = domain->p+port;
+    if (!o->priv) {
+        msg = "peer not allocated";
+    } else if (o->peer) {
+        msg = "peer already bound";
+    } else if (p->peer) {
+        msg = "port already bound";
+    } else {
+        o->peer = p;
+        p->peer = o;
+    }
+    if (debug) {
+        qemu_log("xen ev:%3d: bind port %d domain %d  <->  port %d domain %d : %s\n",
+                 p->priv->fd_read,
+                 p->port, p->priv->domain->domid,
+                 port, domid, msg);
+    }
+}
+
+static void unbind_port(struct port *p)
+{
+    struct port *o;
+
+    o = p->peer;
+    if (o) {
+        if (debug) {
+            fprintf(stderr,"xen ev:%3d: unbind port %d domain %d  <->  port %d domain %d\n",
+                    p->priv->fd_read,
+                    p->port, p->priv->domain->domid,
+                    o->port, o->priv->domain->domid);
+        }
+        o->peer = NULL;
+        p->peer = NULL;
+    }
+}
+
+static void notify_send_peer(struct port *peer)
+{
+    uint32_t evtchn = peer->port;
+    int r;
+
+    peer->count_snd++;
+    if (peer->pending) {
+        return;
+    }
+
+    r = write(peer->priv->fd_write, &evtchn, sizeof(evtchn));
+    if (r != sizeof(evtchn)) {
+        // XXX break
+    }
+    peer->count_fwd++;
+    peer->pending++;
+    peer->priv->pending++;
+}
+
+static void notify_port(struct port *p)
+{
+    if (p->peer) {
+        notify_send_peer(p->peer);
+        if (debug && p->peer->count_snd >= p->peer->count_msg) {
+            fprintf(stderr, "xen ev:%3d: notify port %d domain %d  ->  port %d "
+                            "domain %d  |  counts %d/%d\n",
+                     p->priv->fd_read, p->port, p->priv->domain->domid,
+                     p->peer->port, p->peer->priv->domain->domid,
+                     p->peer->count_fwd, p->peer->count_snd);
+            p->peer->count_msg *= 10;
+        }
+    } else {
+        if (debug) {
+            fprintf(stderr, "xen ev:%3d: notify port %d domain %d  ->  unconnected\n",
+                    p->priv->fd_read, p->port, p->priv->domain->domid);
+        }
+    }
+}
+
+static void unmask_port(struct port *p)
+{
+    /* nothing to do */
+}
+
+static void release_port(struct port *p)
+{
+    if (debug) {
+        fprintf(stderr,"xen ev:%3d: release port %d, domain %d\n",
+                p->priv->fd_read, p->port, p->priv->domain->domid);
+    }
+    unbind_port(p);
+    p->priv->ports--;
+    p->port = 0;
+    p->priv = 0;
+}
+
+/* ------------------------------------------------------------- */
+
+static int qemu_xopen(void)
+{
+    struct evtpriv *priv;
+    int fd[2];
+
+    priv = qemu_mallocz(sizeof(*priv));
+    QTAILQ_INSERT_TAIL(&privs, priv, list);
+
+    if (pipe(fd) < 0) {
+        goto err;
+    }
+    priv->fd_read  = fd[0];
+    priv->fd_write = fd[1];
+    fcntl(priv->fd_read,F_SETFL,O_NONBLOCK);
+
+    priv->domain = get_domain(0);
+    return priv->fd_read;
+
+err:
+    qemu_free(priv);
+    return -1;
+}
+
+static int qemu_close(int handle)
+{
+    struct evtpriv *priv = getpriv(handle);
+    struct port *p;
+    int i;
+
+    if (!priv) {
+        return -1;
+    }
+
+    for (i = 1; i < NR_EVENT_CHANNELS; i++) {
+        p = priv->domain->p+i;
+        if (priv != p->priv) {
+            continue;
+        }
+        release_port(p);
+    }
+
+    close(priv->fd_read);
+    close(priv->fd_write);
+    QTAILQ_REMOVE(&privs, priv, list);
+    qemu_free(priv);
+    return 0;
+}
+
+static int qemu_fd(int handle)
+{
+    struct evtpriv *priv = getpriv(handle);
+
+    if (!priv) {
+        return -1;
+    }
+    return priv->fd_read;
+}
+
+static int qemu_notify(int handle, evtchn_port_t port)
+{
+    struct evtpriv *priv = getpriv(handle);
+    struct port *p;
+
+    if (!priv) {
+        return -1;
+    }
+    if (port >= NR_EVENT_CHANNELS) {
+        return -1;
+    }
+    p = priv->domain->p + port;
+    notify_port(p);
+    return -1;
+}
+
+static evtchn_port_or_error_t qemu_bind_unbound_port(int handle, int domid)
+{
+    struct evtpriv *priv = getpriv(handle);
+    struct port *p;
+
+    if (!priv) {
+        return -1;
+    }
+    p = alloc_port(priv, "unbound");
+    if (!p) {
+        return -1;
+    }
+    return p->port;
+}
+
+static evtchn_port_or_error_t qemu_bind_interdomain(int handle, int domid,
+                                                    evtchn_port_t remote_port)
+{
+    struct evtpriv *priv = getpriv(handle);
+    struct port *p;
+
+    if (!priv) {
+        return -1;
+    }
+    if (remote_port >= NR_EVENT_CHANNELS) {
+        return -1;
+    }
+    p = alloc_port(priv, "interdomain");
+    if (!p) {
+        return -1;
+    }
+    bind_port_peer(p, domid, remote_port);
+    return p->port;
+}
+
+static evtchn_port_or_error_t qemu_bind_virq(int handle, unsigned int virq)
+{
+    struct evtpriv *priv = getpriv(handle);
+    struct port *p;
+
+    if (!priv) {
+        return -1;
+    }
+    p = alloc_port(priv, "virq");
+    if (!p) {
+        return -1;
+    }
+    /*
+     * Note: port not linked here, we only allocate some port.
+     */
+    return p->port;
+}
+
+static int qemu_unbind(int handle, evtchn_port_t port)
+{
+    struct evtpriv *priv = getpriv(handle);
+    struct port *p;
+
+    if (!priv) {
+        return -1;
+    }
+    if (port >= NR_EVENT_CHANNELS) {
+        return -1;
+    }
+    p = priv->domain->p + port;
+    unbind_port(p);
+    release_port(p);
+    return 0;
+}
+
+static evtchn_port_or_error_t qemu_pending(int handle)
+{
+    struct evtpriv *priv = getpriv(handle);
+    uint32_t evtchn;
+    int rc;
+
+    if (!priv) {
+        return -1;
+    }
+    rc = read(priv->fd_read, &evtchn, sizeof(evtchn));
+    if (rc != sizeof(evtchn)) {
+        return -1;
+    }
+    priv->pending--;
+    priv->domain->p[evtchn].pending--;
+    return evtchn;
+}
+
+static int qemu_unmask(int handle, evtchn_port_t port)
+{
+    struct evtpriv *priv = getpriv(handle);
+    struct port *p;
+
+    if (!priv) {
+        return -1;
+    }
+    if (port >= NR_EVENT_CHANNELS) {
+        return -1;
+    }
+    p = priv->domain->p + port;
+    unmask_port(p);
+    return 0;
+}
+
+static int qemu_domid(int handle, int domid)
+{
+    struct evtpriv *priv = getpriv(handle);
+
+    if (!priv) {
+        return -1;
+    }
+    if (priv->ports) {
+        return -1;
+    }
+    priv->domain = get_domain(domid);
+    return 0;
+}
+
+struct XenEvtOps xc_evtchn_xenner = {
+    .open               = qemu_xopen,
+    .domid              = qemu_domid,
+    .close              = qemu_close,
+    .fd                 = qemu_fd,
+    .notify             = qemu_notify,
+    .bind_unbound_port  = qemu_bind_unbound_port,
+    .bind_interdomain   = qemu_bind_interdomain,
+    .bind_virq          = qemu_bind_virq,
+    .unbind             = qemu_unbind,
+    .pending            = qemu_pending,
+    .unmask             = qemu_unmask,
+};
+
+/* ------------------------------------------------------------- */
+
+#if 0
+
+void do_info_evtchn(Monitor *mon)
+{
+    struct evtpriv *priv;
+    struct port *port;
+    int i;
+
+    if (xen_mode != XEN_EMULATE) {
+        monitor_printf(mon, "Not emulating xen event channels.\n");
+        return;
+    }
+
+    QTAILQ_FOREACH(priv, &privs, list) {
+        monitor_printf(mon, "%p: domid %d, fds %d,%d\n", priv,
+                       priv->domain->domid,
+                       priv->fd_read, priv->fd_write);
+        for (i = 1; i < NR_EVENT_CHANNELS; i++) {
+            port = priv->domain->p + i;
+            if (port->priv != priv) {
+                continue;
+            }
+            monitor_printf(mon, "  port #%d: ", port->port);
+            if (port->peer) {
+                monitor_printf(mon, "peer #%d (%p, domid %d)\n",
+                               port->peer->port, port->peer->priv,
+                               port->peer->priv->domain->domid);
+            } else {
+                monitor_printf(mon, "no peer\n");
+            }
+        }
+    }
+}
+
+#endif
+