diff mbox

[v10,09/10] netfilter: add a netbuffer filter

Message ID 1441783481-17698-10-git-send-email-yanghy@cn.fujitsu.com
State New
Headers show

Commit Message

Yang Hongyang Sept. 9, 2015, 7:24 a.m. UTC
This filter is to buffer/release packets, this feature can be used
when using MicroCheckpointing, or other Remus like VM FT solutions, you
can also use it to simulate the network delay.
It has an interval option, if supplied, this filter will release
packets by interval.

Usage:
 -netdev tap,id=bn0
 -object filter-buffer,id=f0,netdev=bn0,chain=in,interval=1000

NOTE:
 the scale of interval is microsecond.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
v10: use NetQueue flush api to flush packets
     sent_cb can not be called when we already return size
v9: adjustment due to the qapi change
v7: use QTAILQ_FOREACH_SAFE() when flush packets
v6: move the interval check earlier and some comment adjust
v5: remove dummy sent_cb
    change interval type from int64 to uint32
    check interval!=0 when initialise
    rename FILTERBUFFERState to FilterBufferState
v4: remove bh
    pass the packet to next filter instead of receiver
v3: check packet's sender and sender->peer when flush it
---
 net/Makefile.objs   |   1 +
 net/filter-buffer.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-options.hx     |  18 ++++++
 vl.c                |   7 ++-
 4 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 net/filter-buffer.c

Comments

Daniel P. Berrangé Sept. 14, 2015, 9:04 a.m. UTC | #1
On Wed, Sep 09, 2015 at 03:24:40PM +0800, Yang Hongyang wrote:
> This filter is to buffer/release packets, this feature can be used
> when using MicroCheckpointing, or other Remus like VM FT solutions, you
> can also use it to simulate the network delay.
> It has an interval option, if supplied, this filter will release
> packets by interval.
> 
> Usage:
>  -netdev tap,id=bn0
>  -object filter-buffer,id=f0,netdev=bn0,chain=in,interval=1000
> 
> NOTE:
>  the scale of interval is microsecond.
> 
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>

> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
> new file mode 100644
> index 0000000..26698d9
> --- /dev/null
> +++ b/net/filter-buffer.c
> @@ -0,0 +1,169 @@
> +/*
> + * Copyright (c) 2015 FUJITSU LIMITED
> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "net/filter.h"
> +#include "net/queue.h"
> +#include "qemu-common.h"
> +#include "qemu/timer.h"
> +#include "qemu/iov.h"
> +#include "qapi/qmp/qerror.h"
> +#include "qapi-visit.h"
> +#include "qom/object.h"
> +
> +#define TYPE_FILTER_BUFFER "filter-buffer"
> +
> +#define FILTER_BUFFER(obj) \
> +    OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
> +
> +typedef struct FilterBufferState {
> +    NetFilterState parent_obj;
> +
> +    NetQueue *incoming_queue;
> +    uint32_t interval;
> +    QEMUTimer release_timer;
> +} FilterBufferState;
> +
> +static void filter_buffer_flush(NetFilterState *nf)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +
> +    if (!qemu_net_queue_flush(s->incoming_queue)) {
> +        /* Unable to empty the queue, purge remaining packets */
> +        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
> +    }
> +}
> +
> +static void filter_buffer_release_timer(void *opaque)
> +{
> +    NetFilterState *nf = opaque;
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +    filter_buffer_flush(nf);
> +    timer_mod(&s->release_timer,
> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
> +}
> +
> +/* filter APIs */
> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
> +                                         NetClientState *sender,
> +                                         unsigned flags,
> +                                         const struct iovec *iov,
> +                                         int iovcnt,
> +                                         NetPacketSent *sent_cb)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +
> +    /*
> +     * we return size when buffer a packet, the sender will take it as
> +     * a already sent packet, so sent_cb should not be called later
> +     */
> +    qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
> +                              iov, iovcnt, NULL);
> +    return iov_size(iov, iovcnt);
> +}
> +
> +static void filter_buffer_cleanup(NetFilterState *nf)

Same comment about s/cleanup/finalize/

> +{
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +
> +    if (s->interval) {
> +        timer_del(&s->release_timer);
> +    }
> +
> +    /* flush packets */
> +    if (s->incoming_queue) {
> +        filter_buffer_flush(nf);
> +        g_free(s->incoming_queue);
> +    }
> +}

> diff --git a/vl.c b/vl.c
> index 672f8b2..30196e4 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2783,7 +2783,12 @@ static bool object_create_initial(const char *type)
>      if (g_str_equal(type, "rng-egd")) {
>          return false;
>      }
> -    /* TODO: reture false for concrete netfilters */

Oh, I missed the typo in your earlier patch - s/reture/return/

> +
> +    /* reture false for concrete netfilters */

And again in the line you changed here.

> +    if (g_str_equal(type, "filter-buffer")) {
> +        return false;
> +    }
> +


Regards,
Daniel
Yang Hongyang Sept. 14, 2015, 9:53 a.m. UTC | #2
On 09/14/2015 05:04 PM, Daniel P. Berrange wrote:
> On Wed, Sep 09, 2015 at 03:24:40PM +0800, Yang Hongyang wrote:
>> This filter is to buffer/release packets, this feature can be used
>> when using MicroCheckpointing, or other Remus like VM FT solutions, you
>> can also use it to simulate the network delay.
>> It has an interval option, if supplied, this filter will release
>> packets by interval.
>>
>> Usage:
>>   -netdev tap,id=bn0
>>   -object filter-buffer,id=f0,netdev=bn0,chain=in,interval=1000
>>
>> NOTE:
>>   the scale of interval is microsecond.
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>
>> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
>> new file mode 100644
>> index 0000000..26698d9
>> --- /dev/null
>> +++ b/net/filter-buffer.c
>> @@ -0,0 +1,169 @@
>> +/*
>> + * Copyright (c) 2015 FUJITSU LIMITED
>> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "net/filter.h"
>> +#include "net/queue.h"
>> +#include "qemu-common.h"
>> +#include "qemu/timer.h"
>> +#include "qemu/iov.h"
>> +#include "qapi/qmp/qerror.h"
>> +#include "qapi-visit.h"
>> +#include "qom/object.h"
>> +
>> +#define TYPE_FILTER_BUFFER "filter-buffer"
>> +
>> +#define FILTER_BUFFER(obj) \
>> +    OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
>> +
>> +typedef struct FilterBufferState {
>> +    NetFilterState parent_obj;
>> +
>> +    NetQueue *incoming_queue;
>> +    uint32_t interval;
>> +    QEMUTimer release_timer;
>> +} FilterBufferState;
>> +
>> +static void filter_buffer_flush(NetFilterState *nf)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +
>> +    if (!qemu_net_queue_flush(s->incoming_queue)) {
>> +        /* Unable to empty the queue, purge remaining packets */
>> +        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
>> +    }
>> +}
>> +
>> +static void filter_buffer_release_timer(void *opaque)
>> +{
>> +    NetFilterState *nf = opaque;
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +    filter_buffer_flush(nf);
>> +    timer_mod(&s->release_timer,
>> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
>> +}
>> +
>> +/* filter APIs */
>> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
>> +                                         NetClientState *sender,
>> +                                         unsigned flags,
>> +                                         const struct iovec *iov,
>> +                                         int iovcnt,
>> +                                         NetPacketSent *sent_cb)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +
>> +    /*
>> +     * we return size when buffer a packet, the sender will take it as
>> +     * a already sent packet, so sent_cb should not be called later
>> +     */
>> +    qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
>> +                              iov, iovcnt, NULL);
>> +    return iov_size(iov, iovcnt);
>> +}
>> +
>> +static void filter_buffer_cleanup(NetFilterState *nf)
>
> Same comment about s/cleanup/finalize/

This is not the .instance_finalize func, although it is called
in netfilter_finalize, it is a cleanup virtual method in the
filter class:
include/net/filter.h
  39 typedef struct NetFilterClass {
  40     ObjectClass parent_class;
  41
  42     FilterSetup *setup;
  43     FilterCleanup *cleanup;
  44     FilterReceiveIOV *receive_iov;
  45 } NetFilterClass;

>
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +
>> +    if (s->interval) {
>> +        timer_del(&s->release_timer);
>> +    }
>> +
>> +    /* flush packets */
>> +    if (s->incoming_queue) {
>> +        filter_buffer_flush(nf);
>> +        g_free(s->incoming_queue);
>> +    }
>> +}
>
>> diff --git a/vl.c b/vl.c
>> index 672f8b2..30196e4 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -2783,7 +2783,12 @@ static bool object_create_initial(const char *type)
>>       if (g_str_equal(type, "rng-egd")) {
>>           return false;
>>       }
>> -    /* TODO: reture false for concrete netfilters */
>
> Oh, I missed the typo in your earlier patch - s/reture/return/
>
>> +
>> +    /* reture false for concrete netfilters */
>
> And again in the line you changed here.

All fixed as well as your comments in patch 2, thank you.

>
>> +    if (g_str_equal(type, "filter-buffer")) {
>> +        return false;
>> +    }
>> +
>
>
> Regards,
> Daniel
>
Jason Wang Sept. 16, 2015, 9:42 a.m. UTC | #3
On 09/09/2015 03:24 PM, Yang Hongyang wrote:
> This filter is to buffer/release packets, this feature can be used
> when using MicroCheckpointing, or other Remus like VM FT solutions, you
> can also use it to simulate the network delay.
> It has an interval option, if supplied, this filter will release
> packets by interval.
>
> Usage:
>  -netdev tap,id=bn0
>  -object filter-buffer,id=f0,netdev=bn0,chain=in,interval=1000
>
> NOTE:
>  the scale of interval is microsecond.
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
> v10: use NetQueue flush api to flush packets
>      sent_cb can not be called when we already return size
> v9: adjustment due to the qapi change
> v7: use QTAILQ_FOREACH_SAFE() when flush packets
> v6: move the interval check earlier and some comment adjust
> v5: remove dummy sent_cb
>     change interval type from int64 to uint32
>     check interval!=0 when initialise
>     rename FILTERBUFFERState to FilterBufferState
> v4: remove bh
>     pass the packet to next filter instead of receiver
> v3: check packet's sender and sender->peer when flush it
> ---
>  net/Makefile.objs   |   1 +
>  net/filter-buffer.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  qemu-options.hx     |  18 ++++++
>  vl.c                |   7 ++-
>  4 files changed, 194 insertions(+), 1 deletion(-)
>  create mode 100644 net/filter-buffer.c
>
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index 914aec0..5fa2f97 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -14,3 +14,4 @@ common-obj-$(CONFIG_SLIRP) += slirp.o
>  common-obj-$(CONFIG_VDE) += vde.o
>  common-obj-$(CONFIG_NETMAP) += netmap.o
>  common-obj-y += filter.o
> +common-obj-y += filter-buffer.o
> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
> new file mode 100644
> index 0000000..26698d9
> --- /dev/null
> +++ b/net/filter-buffer.c
> @@ -0,0 +1,169 @@
> +/*
> + * Copyright (c) 2015 FUJITSU LIMITED
> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "net/filter.h"
> +#include "net/queue.h"
> +#include "qemu-common.h"
> +#include "qemu/timer.h"
> +#include "qemu/iov.h"
> +#include "qapi/qmp/qerror.h"
> +#include "qapi-visit.h"
> +#include "qom/object.h"
> +
> +#define TYPE_FILTER_BUFFER "filter-buffer"
> +
> +#define FILTER_BUFFER(obj) \
> +    OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
> +
> +typedef struct FilterBufferState {
> +    NetFilterState parent_obj;
> +
> +    NetQueue *incoming_queue;
> +    uint32_t interval;
> +    QEMUTimer release_timer;
> +} FilterBufferState;
> +
> +static void filter_buffer_flush(NetFilterState *nf)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +
> +    if (!qemu_net_queue_flush(s->incoming_queue)) {
> +        /* Unable to empty the queue, purge remaining packets */
> +        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
> +    }
> +}
> +
> +static void filter_buffer_release_timer(void *opaque)
> +{
> +    NetFilterState *nf = opaque;
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +    filter_buffer_flush(nf);
> +    timer_mod(&s->release_timer,
> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
> +}
> +
> +/* filter APIs */
> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
> +                                         NetClientState *sender,
> +                                         unsigned flags,
> +                                         const struct iovec *iov,
> +                                         int iovcnt,
> +                                         NetPacketSent *sent_cb)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +
> +    /*
> +     * we return size when buffer a packet, the sender will take it as
> +     * a already sent packet, so sent_cb should not be called later
> +     */
> +    qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
> +                              iov, iovcnt, NULL);
> +    return iov_size(iov, iovcnt);

Then a small issue here is, even if guest can't receive packet for some
reasons. Filter can still accept packet until its internal queue is
full. May consider to solve this in the future.

> +}
> +
> +static void filter_buffer_cleanup(NetFilterState *nf)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +
> +    if (s->interval) {
> +        timer_del(&s->release_timer);
> +    }
> +
> +    /* flush packets */
> +    if (s->incoming_queue) {
> +        filter_buffer_flush(nf);
> +        g_free(s->incoming_queue);
> +    }
> +}
> +
> +static void filter_buffer_setup(NetFilterState *nf, Error **errp)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(nf);
> +
> +    /*
> +     * this check should be dropped when there're VM FT solutions like MC
> +     * or COLO use this filter to release packets on demand.
> +     */
> +    if (!s->interval) {
> +        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval",
> +                   "a non-zero interval");
> +        return;
> +    }
> +
> +    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
> +    if (s->interval) {
> +        timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
> +                      filter_buffer_release_timer, nf);
> +        timer_mod(&s->release_timer,
> +                  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
> +        snprintf(nf->info_str, sizeof(nf->info_str),
> +                 "interval=%d", s->interval);
> +    }
> +}
> +
> +static void filter_buffer_class_init(ObjectClass *oc, void *data)
> +{
> +    NetFilterClass *nfc = NETFILTER_CLASS(oc);
> +
> +    nfc->setup = filter_buffer_setup;
> +    nfc->cleanup = filter_buffer_cleanup;
> +    nfc->receive_iov = filter_buffer_receive_iov;
> +}
> +
> +static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque,
> +                                       const char *name, Error **errp)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(obj);
> +    uint32_t value = s->interval;
> +
> +    visit_type_uint32(v, &value, name, errp);
> +}
> +
> +static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque,
> +                                       const char *name, Error **errp)
> +{
> +    FilterBufferState *s = FILTER_BUFFER(obj);
> +    Error *local_err = NULL;
> +    uint32_t value;
> +
> +    visit_type_uint32(v, &value, name, &local_err);
> +    if (local_err) {
> +        goto out;
> +    }
> +    if (!value) {
> +        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
> +                   PRIu32 "'", object_get_typename(obj), name, value);
> +        goto out;
> +    }
> +    s->interval = value;
> +
> +out:
> +    error_propagate(errp, local_err);
> +}
> +
> +static void filter_buffer_init(Object *obj)
> +{
> +    object_property_add(obj, "interval", "int",
> +                        filter_buffer_get_interval,
> +                        filter_buffer_set_interval, NULL, NULL, NULL);
> +}
> +
> +static const TypeInfo filter_buffer_info = {
> +    .name = TYPE_FILTER_BUFFER,
> +    .parent = TYPE_NETFILTER,
> +    .class_init = filter_buffer_class_init,
> +    .instance_init = filter_buffer_init,
> +    .instance_size = sizeof(FilterBufferState),
> +};
> +
> +static void register_types(void)
> +{
> +    type_register_static(&filter_buffer_info);
> +}
> +
> +type_init(register_types);
> diff --git a/qemu-options.hx b/qemu-options.hx
> index efce775..1dc2680 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -3568,6 +3568,24 @@ the @option{virtio-rng} device. The @option{chardev} parameter is
>  the unique ID of a character device backend that provides the connection
>  to the RNG daemon.
>  
> +@item -object filter-buffer,id=@var{id},netdev=@var{netdevid}[,chain=@var{all|in|out}][,interval=@var{t}]
> +
> +Buffer network packets on netdev @var{netdevid}.
> +If interval @var{t} provided, will release packets by interval.
> +Interval scale: microsecond.
> +
> +If interval @var{t} not provided, you have to make sure the packets can be
> +released, either by manually remove this filter or call the release buffer API,
> +otherwise, the packets will be buffered forever. Use with caution.
> +
> +chain @var{all|in|out} is an option that can be applied to any netfilter, default is @option{all}.
> +
> +@option{all} means this filter will receive packets both sent to/from the netdev
> +
> +@option{in} means this filter will receive packets sent to the netdev
> +
> +@option{out} means this filter will receive packets sent from the netdev
> +
>  @end table
>  
>  ETEXI
> diff --git a/vl.c b/vl.c
> index 672f8b2..30196e4 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2783,7 +2783,12 @@ static bool object_create_initial(const char *type)
>      if (g_str_equal(type, "rng-egd")) {
>          return false;
>      }
> -    /* TODO: reture false for concrete netfilters */
> +
> +    /* reture false for concrete netfilters */
> +    if (g_str_equal(type, "filter-buffer")) {
> +        return false;
> +    }
> +
>      return true;
>  }
>
Yang Hongyang Sept. 16, 2015, 11:19 a.m. UTC | #4
On 09/16/2015 05:42 PM, Jason Wang wrote:
>
>
> On 09/09/2015 03:24 PM, Yang Hongyang wrote:
>> This filter is to buffer/release packets, this feature can be used
>> when using MicroCheckpointing, or other Remus like VM FT solutions, you
>> can also use it to simulate the network delay.
>> It has an interval option, if supplied, this filter will release
>> packets by interval.
>>
>> Usage:
>>   -netdev tap,id=bn0
>>   -object filter-buffer,id=f0,netdev=bn0,chain=in,interval=1000
>>
>> NOTE:
>>   the scale of interval is microsecond.
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>> ---
>> v10: use NetQueue flush api to flush packets
>>       sent_cb can not be called when we already return size
>> v9: adjustment due to the qapi change
>> v7: use QTAILQ_FOREACH_SAFE() when flush packets
>> v6: move the interval check earlier and some comment adjust
>> v5: remove dummy sent_cb
>>      change interval type from int64 to uint32
>>      check interval!=0 when initialise
>>      rename FILTERBUFFERState to FilterBufferState
>> v4: remove bh
>>      pass the packet to next filter instead of receiver
>> v3: check packet's sender and sender->peer when flush it
>> ---
>>   net/Makefile.objs   |   1 +
>>   net/filter-buffer.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   qemu-options.hx     |  18 ++++++
>>   vl.c                |   7 ++-
>>   4 files changed, 194 insertions(+), 1 deletion(-)
>>   create mode 100644 net/filter-buffer.c
>>
>> diff --git a/net/Makefile.objs b/net/Makefile.objs
>> index 914aec0..5fa2f97 100644
>> --- a/net/Makefile.objs
>> +++ b/net/Makefile.objs
>> @@ -14,3 +14,4 @@ common-obj-$(CONFIG_SLIRP) += slirp.o
>>   common-obj-$(CONFIG_VDE) += vde.o
>>   common-obj-$(CONFIG_NETMAP) += netmap.o
>>   common-obj-y += filter.o
>> +common-obj-y += filter-buffer.o
>> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
>> new file mode 100644
>> index 0000000..26698d9
>> --- /dev/null
>> +++ b/net/filter-buffer.c
>> @@ -0,0 +1,169 @@
>> +/*
>> + * Copyright (c) 2015 FUJITSU LIMITED
>> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "net/filter.h"
>> +#include "net/queue.h"
>> +#include "qemu-common.h"
>> +#include "qemu/timer.h"
>> +#include "qemu/iov.h"
>> +#include "qapi/qmp/qerror.h"
>> +#include "qapi-visit.h"
>> +#include "qom/object.h"
>> +
>> +#define TYPE_FILTER_BUFFER "filter-buffer"
>> +
>> +#define FILTER_BUFFER(obj) \
>> +    OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
>> +
>> +typedef struct FilterBufferState {
>> +    NetFilterState parent_obj;
>> +
>> +    NetQueue *incoming_queue;
>> +    uint32_t interval;
>> +    QEMUTimer release_timer;
>> +} FilterBufferState;
>> +
>> +static void filter_buffer_flush(NetFilterState *nf)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +
>> +    if (!qemu_net_queue_flush(s->incoming_queue)) {
>> +        /* Unable to empty the queue, purge remaining packets */
>> +        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
>> +    }
>> +}
>> +
>> +static void filter_buffer_release_timer(void *opaque)
>> +{
>> +    NetFilterState *nf = opaque;
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +    filter_buffer_flush(nf);
>> +    timer_mod(&s->release_timer,
>> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
>> +}
>> +
>> +/* filter APIs */
>> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
>> +                                         NetClientState *sender,
>> +                                         unsigned flags,
>> +                                         const struct iovec *iov,
>> +                                         int iovcnt,
>> +                                         NetPacketSent *sent_cb)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +
>> +    /*
>> +     * we return size when buffer a packet, the sender will take it as
>> +     * a already sent packet, so sent_cb should not be called later
>> +     */
>> +    qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
>> +                              iov, iovcnt, NULL);
>> +    return iov_size(iov, iovcnt);
>
> Then a small issue here is, even if guest can't receive packet for some
> reasons. Filter can still accept packet until its internal queue is
> full. May consider to solve this in the future.

Sure, will add your comment above into the code comment as a FIXME, thank you!

>
>> +}
>> +
>> +static void filter_buffer_cleanup(NetFilterState *nf)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +
>> +    if (s->interval) {
>> +        timer_del(&s->release_timer);
>> +    }
>> +
>> +    /* flush packets */
>> +    if (s->incoming_queue) {
>> +        filter_buffer_flush(nf);
>> +        g_free(s->incoming_queue);
>> +    }
>> +}
>> +
>> +static void filter_buffer_setup(NetFilterState *nf, Error **errp)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(nf);
>> +
>> +    /*
>> +     * this check should be dropped when there're VM FT solutions like MC
>> +     * or COLO use this filter to release packets on demand.
>> +     */
>> +    if (!s->interval) {
>> +        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval",
>> +                   "a non-zero interval");
>> +        return;
>> +    }
>> +
>> +    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
>> +    if (s->interval) {
>> +        timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
>> +                      filter_buffer_release_timer, nf);
>> +        timer_mod(&s->release_timer,
>> +                  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
>> +        snprintf(nf->info_str, sizeof(nf->info_str),
>> +                 "interval=%d", s->interval);
>> +    }
>> +}
>> +
>> +static void filter_buffer_class_init(ObjectClass *oc, void *data)
>> +{
>> +    NetFilterClass *nfc = NETFILTER_CLASS(oc);
>> +
>> +    nfc->setup = filter_buffer_setup;
>> +    nfc->cleanup = filter_buffer_cleanup;
>> +    nfc->receive_iov = filter_buffer_receive_iov;
>> +}
>> +
>> +static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque,
>> +                                       const char *name, Error **errp)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(obj);
>> +    uint32_t value = s->interval;
>> +
>> +    visit_type_uint32(v, &value, name, errp);
>> +}
>> +
>> +static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque,
>> +                                       const char *name, Error **errp)
>> +{
>> +    FilterBufferState *s = FILTER_BUFFER(obj);
>> +    Error *local_err = NULL;
>> +    uint32_t value;
>> +
>> +    visit_type_uint32(v, &value, name, &local_err);
>> +    if (local_err) {
>> +        goto out;
>> +    }
>> +    if (!value) {
>> +        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
>> +                   PRIu32 "'", object_get_typename(obj), name, value);
>> +        goto out;
>> +    }
>> +    s->interval = value;
>> +
>> +out:
>> +    error_propagate(errp, local_err);
>> +}
>> +
>> +static void filter_buffer_init(Object *obj)
>> +{
>> +    object_property_add(obj, "interval", "int",
>> +                        filter_buffer_get_interval,
>> +                        filter_buffer_set_interval, NULL, NULL, NULL);
>> +}
>> +
>> +static const TypeInfo filter_buffer_info = {
>> +    .name = TYPE_FILTER_BUFFER,
>> +    .parent = TYPE_NETFILTER,
>> +    .class_init = filter_buffer_class_init,
>> +    .instance_init = filter_buffer_init,
>> +    .instance_size = sizeof(FilterBufferState),
>> +};
>> +
>> +static void register_types(void)
>> +{
>> +    type_register_static(&filter_buffer_info);
>> +}
>> +
>> +type_init(register_types);
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index efce775..1dc2680 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -3568,6 +3568,24 @@ the @option{virtio-rng} device. The @option{chardev} parameter is
>>   the unique ID of a character device backend that provides the connection
>>   to the RNG daemon.
>>
>> +@item -object filter-buffer,id=@var{id},netdev=@var{netdevid}[,chain=@var{all|in|out}][,interval=@var{t}]
>> +
>> +Buffer network packets on netdev @var{netdevid}.
>> +If interval @var{t} provided, will release packets by interval.
>> +Interval scale: microsecond.
>> +
>> +If interval @var{t} not provided, you have to make sure the packets can be
>> +released, either by manually remove this filter or call the release buffer API,
>> +otherwise, the packets will be buffered forever. Use with caution.
>> +
>> +chain @var{all|in|out} is an option that can be applied to any netfilter, default is @option{all}.
>> +
>> +@option{all} means this filter will receive packets both sent to/from the netdev
>> +
>> +@option{in} means this filter will receive packets sent to the netdev
>> +
>> +@option{out} means this filter will receive packets sent from the netdev
>> +
>>   @end table
>>
>>   ETEXI
>> diff --git a/vl.c b/vl.c
>> index 672f8b2..30196e4 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -2783,7 +2783,12 @@ static bool object_create_initial(const char *type)
>>       if (g_str_equal(type, "rng-egd")) {
>>           return false;
>>       }
>> -    /* TODO: reture false for concrete netfilters */
>> +
>> +    /* reture false for concrete netfilters */
>> +    if (g_str_equal(type, "filter-buffer")) {
>> +        return false;
>> +    }
>> +
>>       return true;
>>   }
>>
>
> .
>
diff mbox

Patch

diff --git a/net/Makefile.objs b/net/Makefile.objs
index 914aec0..5fa2f97 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -14,3 +14,4 @@  common-obj-$(CONFIG_SLIRP) += slirp.o
 common-obj-$(CONFIG_VDE) += vde.o
 common-obj-$(CONFIG_NETMAP) += netmap.o
 common-obj-y += filter.o
+common-obj-y += filter-buffer.o
diff --git a/net/filter-buffer.c b/net/filter-buffer.c
new file mode 100644
index 0000000..26698d9
--- /dev/null
+++ b/net/filter-buffer.c
@@ -0,0 +1,169 @@ 
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "net/filter.h"
+#include "net/queue.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/iov.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi-visit.h"
+#include "qom/object.h"
+
+#define TYPE_FILTER_BUFFER "filter-buffer"
+
+#define FILTER_BUFFER(obj) \
+    OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
+
+typedef struct FilterBufferState {
+    NetFilterState parent_obj;
+
+    NetQueue *incoming_queue;
+    uint32_t interval;
+    QEMUTimer release_timer;
+} FilterBufferState;
+
+static void filter_buffer_flush(NetFilterState *nf)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    if (!qemu_net_queue_flush(s->incoming_queue)) {
+        /* Unable to empty the queue, purge remaining packets */
+        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
+    }
+}
+
+static void filter_buffer_release_timer(void *opaque)
+{
+    NetFilterState *nf = opaque;
+    FilterBufferState *s = FILTER_BUFFER(nf);
+    filter_buffer_flush(nf);
+    timer_mod(&s->release_timer,
+              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+}
+
+/* filter APIs */
+static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
+                                         NetClientState *sender,
+                                         unsigned flags,
+                                         const struct iovec *iov,
+                                         int iovcnt,
+                                         NetPacketSent *sent_cb)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    /*
+     * we return size when buffer a packet, the sender will take it as
+     * a already sent packet, so sent_cb should not be called later
+     */
+    qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
+                              iov, iovcnt, NULL);
+    return iov_size(iov, iovcnt);
+}
+
+static void filter_buffer_cleanup(NetFilterState *nf)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    if (s->interval) {
+        timer_del(&s->release_timer);
+    }
+
+    /* flush packets */
+    if (s->incoming_queue) {
+        filter_buffer_flush(nf);
+        g_free(s->incoming_queue);
+    }
+}
+
+static void filter_buffer_setup(NetFilterState *nf, Error **errp)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    /*
+     * this check should be dropped when there're VM FT solutions like MC
+     * or COLO use this filter to release packets on demand.
+     */
+    if (!s->interval) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval",
+                   "a non-zero interval");
+        return;
+    }
+
+    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+    if (s->interval) {
+        timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
+                      filter_buffer_release_timer, nf);
+        timer_mod(&s->release_timer,
+                  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+        snprintf(nf->info_str, sizeof(nf->info_str),
+                 "interval=%d", s->interval);
+    }
+}
+
+static void filter_buffer_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = filter_buffer_setup;
+    nfc->cleanup = filter_buffer_cleanup;
+    nfc->receive_iov = filter_buffer_receive_iov;
+}
+
+static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque,
+                                       const char *name, Error **errp)
+{
+    FilterBufferState *s = FILTER_BUFFER(obj);
+    uint32_t value = s->interval;
+
+    visit_type_uint32(v, &value, name, errp);
+}
+
+static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque,
+                                       const char *name, Error **errp)
+{
+    FilterBufferState *s = FILTER_BUFFER(obj);
+    Error *local_err = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, &value, name, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    if (!value) {
+        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
+                   PRIu32 "'", object_get_typename(obj), name, value);
+        goto out;
+    }
+    s->interval = value;
+
+out:
+    error_propagate(errp, local_err);
+}
+
+static void filter_buffer_init(Object *obj)
+{
+    object_property_add(obj, "interval", "int",
+                        filter_buffer_get_interval,
+                        filter_buffer_set_interval, NULL, NULL, NULL);
+}
+
+static const TypeInfo filter_buffer_info = {
+    .name = TYPE_FILTER_BUFFER,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_buffer_class_init,
+    .instance_init = filter_buffer_init,
+    .instance_size = sizeof(FilterBufferState),
+};
+
+static void register_types(void)
+{
+    type_register_static(&filter_buffer_info);
+}
+
+type_init(register_types);
diff --git a/qemu-options.hx b/qemu-options.hx
index efce775..1dc2680 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3568,6 +3568,24 @@  the @option{virtio-rng} device. The @option{chardev} parameter is
 the unique ID of a character device backend that provides the connection
 to the RNG daemon.
 
+@item -object filter-buffer,id=@var{id},netdev=@var{netdevid}[,chain=@var{all|in|out}][,interval=@var{t}]
+
+Buffer network packets on netdev @var{netdevid}.
+If interval @var{t} provided, will release packets by interval.
+Interval scale: microsecond.
+
+If interval @var{t} not provided, you have to make sure the packets can be
+released, either by manually remove this filter or call the release buffer API,
+otherwise, the packets will be buffered forever. Use with caution.
+
+chain @var{all|in|out} is an option that can be applied to any netfilter, default is @option{all}.
+
+@option{all} means this filter will receive packets both sent to/from the netdev
+
+@option{in} means this filter will receive packets sent to the netdev
+
+@option{out} means this filter will receive packets sent from the netdev
+
 @end table
 
 ETEXI
diff --git a/vl.c b/vl.c
index 672f8b2..30196e4 100644
--- a/vl.c
+++ b/vl.c
@@ -2783,7 +2783,12 @@  static bool object_create_initial(const char *type)
     if (g_str_equal(type, "rng-egd")) {
         return false;
     }
-    /* TODO: reture false for concrete netfilters */
+
+    /* reture false for concrete netfilters */
+    if (g_str_equal(type, "filter-buffer")) {
+        return false;
+    }
+
     return true;
 }