diff mbox

[v4,09/11] netfilter: add a netbuffer filter

Message ID 1438677044-13030-10-git-send-email-yanghy@cn.fujitsu.com
State New
Headers show

Commit Message

Yang Hongyang Aug. 4, 2015, 8:30 a.m. UTC
This filter is to buffer/release packets, this feature can be used
when using MicroCheckpointing, or other Remus like VM FT solutions, you
can also use it to simulate the network delay.
It has an interval option, if supplied, this filter will release
packets by interval.

Usage:
 -netdev tap,id=bn0
 -netfilter buffer,id=f0,netdev=bn0,chain=in,interval=1000

NOTE:
 the scale of interval is microsecond.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
v4: remove bh
    pass the packet to next filter instead of receiver
v3: check packet's sender and sender->peer when flush it
---
 net/Makefile.objs   |   1 +
 net/filter-buffer.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/filter.c        |   2 +
 net/filters.h       |  17 ++++++++
 qapi-schema.json    |  18 +++++++-
 5 files changed, 157 insertions(+), 1 deletion(-)
 create mode 100644 net/filter-buffer.c
 create mode 100644 net/filters.h

Comments

Jason Wang Aug. 6, 2015, 7:21 a.m. UTC | #1
On 08/04/2015 04:30 PM, Yang Hongyang wrote:
> This filter is to buffer/release packets, this feature can be used
> when using MicroCheckpointing, or other Remus like VM FT solutions, you
> can also use it to simulate the network delay.
> It has an interval option, if supplied, this filter will release
> packets by interval.
>
> Usage:
>  -netdev tap,id=bn0
>  -netfilter buffer,id=f0,netdev=bn0,chain=in,interval=1000
>
> NOTE:
>  the scale of interval is microsecond.
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
> v4: remove bh
>     pass the packet to next filter instead of receiver
> v3: check packet's sender and sender->peer when flush it
> ---
>  net/Makefile.objs   |   1 +
>  net/filter-buffer.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  net/filter.c        |   2 +
>  net/filters.h       |  17 ++++++++
>  qapi-schema.json    |  18 +++++++-
>  5 files changed, 157 insertions(+), 1 deletion(-)
>  create mode 100644 net/filter-buffer.c
>  create mode 100644 net/filters.h
>
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index 914aec0..5fa2f97 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -14,3 +14,4 @@ common-obj-$(CONFIG_SLIRP) += slirp.o
>  common-obj-$(CONFIG_VDE) += vde.o
>  common-obj-$(CONFIG_NETMAP) += netmap.o
>  common-obj-y += filter.o
> +common-obj-y += filter-buffer.o
> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
> new file mode 100644
> index 0000000..fd467db
> --- /dev/null
> +++ b/net/filter-buffer.c
> @@ -0,0 +1,120 @@
> +/*
> + * Copyright (c) 2015 FUJITSU LIMITED
> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "net/filter.h"
> +#include "net/queue.h"
> +#include "filters.h"
> +#include "qemu-common.h"
> +#include "qemu/timer.h"
> +#include "qemu/iov.h"
> +
> +typedef struct FILTERBUFFERState {
> +    NetFilterState nf;
> +    NetQueue *incoming_queue;
> +    int64_t interval;

Can interval be negative? If not please uint. And not sure you really
need a 64 bit integer, if not, please use uint32_t.

> +    QEMUTimer release_timer;
> +} FILTERBUFFERState;

Filter buffer is not abbreviation. So better name it as FilterBufferState.

> +
> +static void packet_send_completed(NetClientState *nc, ssize_t len)
> +{
> +    return;
> +}

Why need a dummy sent cb?

> +
> +static void filter_buffer_flush(NetFilterState *nf)
> +{
> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
> +    NetQueue *queue = s->incoming_queue;
> +    NetPacket *packet;
> +
> +    while (queue && !QTAILQ_EMPTY(&queue->packets)) {
> +        packet = QTAILQ_FIRST(&queue->packets);
> +        QTAILQ_REMOVE(&queue->packets, packet, entry);
> +        queue->nq_count--;
> +
> +        if (packet->sender && packet->sender->peer) {
> +            qemu_netfilter_pass_to_next(nf, packet->sender, packet->flags,
> +                                        packet->data, packet->size);
> +        }
> +
> +        /*
> +         * now that we pass the packet to next filter, we don't care the
> +         * reture value here, because the filter layer or other filter
> +         * will take care of this packet
> +         */
> +        g_free(packet);

This seems wrong, since packet could be queued into incoming queue.
Doing this may cause use after free.

> +    }
> +}
> +
> +static void filter_buffer_release_timer(void *opaque)
> +{
> +    FILTERBUFFERState *s = opaque;
> +    filter_buffer_flush(&s->nf);
> +    timer_mod(&s->release_timer,
> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
> +}
> +
> +/* filter APIs */
> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
> +                                         NetClientState *sender,
> +                                         unsigned flags,
> +                                         const struct iovec *iov,
> +                                         int iovcnt)
> +{
> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
> +    NetQueue *queue = s->incoming_queue;
> +
> +    qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt,
> +                              packet_send_completed);
> +    return iov_size(iov, iovcnt);

So if interval is zero, packet will be blocked forever and memory will
be exhausted.

> +}
> +
> +static void filter_buffer_cleanup(NetFilterState *nf)
> +{
> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
> +
> +    if (s->interval) {
> +        timer_del(&s->release_timer);
> +    }
> +
> +    /* flush packets */
> +    filter_buffer_flush(nf);
> +    g_free(s->incoming_queue);
> +    return;
> +}
> +
> +static NetFilterInfo net_filter_buffer_info = {
> +    .type = NET_FILTER_OPTIONS_KIND_BUFFER,
> +    .size = sizeof(FILTERBUFFERState),
> +    .receive_iov = filter_buffer_receive_iov,
> +    .cleanup = filter_buffer_cleanup,
> +};
> +
> +int net_init_filter_buffer(const NetFilterOptions *opts, const char *name,
> +                           int chain, NetClientState *netdev, Error **errp)
> +{
> +    NetFilterState *nf;
> +    FILTERBUFFERState *s;
> +    const NetFilterBufferOptions *bufferopt;
> +
> +    assert(opts->kind == NET_FILTER_OPTIONS_KIND_BUFFER);
> +    bufferopt = opts->buffer;
> +
> +    nf = qemu_new_net_filter(&net_filter_buffer_info,
> +                             netdev, "buffer", name, chain);
> +    s = DO_UPCAST(FILTERBUFFERState, nf, nf);
> +    s->incoming_queue = qemu_new_net_queue(nf);
> +    s->interval = bufferopt->has_interval ? bufferopt->interval : 0;
> +    if (s->interval) {
> +        timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
> +                      filter_buffer_release_timer, s);
> +        timer_mod(&s->release_timer,
> +                  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
> +    }
> +
> +    return 0;
> +}
> diff --git a/net/filter.c b/net/filter.c
> index 166b851..02f3e76 100644
> --- a/net/filter.c
> +++ b/net/filter.c
> @@ -19,6 +19,7 @@
>  #include "net/filter.h"
>  #include "net/net.h"
>  #include "net/queue.h"
> +#include "filters.h"
>  
>  static QTAILQ_HEAD(, NetFilterState) net_filters;
>  
> @@ -179,6 +180,7 @@ typedef int (NetFilterInit)(const NetFilterOptions *opts,
>  
>  static
>  NetFilterInit * const net_filter_init_fun[NET_FILTER_OPTIONS_KIND_MAX] = {
> +    [NET_FILTER_OPTIONS_KIND_BUFFER] = net_init_filter_buffer,
>  };
>  
>  static int net_filter_init1(const NetFilter *netfilter, Error **errp)
> diff --git a/net/filters.h b/net/filters.h
> new file mode 100644
> index 0000000..3b546db
> --- /dev/null
> +++ b/net/filters.h
> @@ -0,0 +1,17 @@
> +/*
> + * Copyright (c) 2015 FUJITSU LIMITED
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef QEMU_NET_FILTERS_H
> +#define QEMU_NET_FILTERS_H
> +
> +#include "net/net.h"
> +#include "net/filter.h"
> +
> +int net_init_filter_buffer(const NetFilterOptions *opts, const char *name,
> +                           int chain, NetClientState *netdev, Error **errp);
> +
> +#endif /* QEMU_NET_FILTERS_H */
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 9d97c21..e51bb59 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -2584,6 +2584,21 @@
>  { 'command': 'netfilter_del', 'data': {'id': 'str'} }
>  
>  ##
> +# @NetFilterBufferOptions
> +#
> +# a netbuffer filter for network backend.
> +#
> +# @interval: #optional release packets by interval, if no interval supplied,
> +#            will release packets when filter_buffer_release_all been called.
> +#            scale: microsecond
> +#
> +# Since 2.5
> +##
> +{ 'struct': 'NetFilterBufferOptions',
> +  'data': {
> +    '*interval':     'int64' } }
> +
> +##
>  # @NetFilterOptions
>  #
>  # A discriminated record of network filters.
> @@ -2592,7 +2607,8 @@
>  #
>  ##
>  { 'union': 'NetFilterOptions',
> -  'data': { } }
> +  'data': {
> +    'buffer':     'NetFilterBufferOptions'} }
>  
>  ##
>  # @NetFilter
Yang Hongyang Aug. 6, 2015, 8:19 a.m. UTC | #2
On 08/06/2015 03:21 PM, Jason Wang wrote:
>
>
> On 08/04/2015 04:30 PM, Yang Hongyang wrote:
>> This filter is to buffer/release packets, this feature can be used
>> when using MicroCheckpointing, or other Remus like VM FT solutions, you
>> can also use it to simulate the network delay.
>> It has an interval option, if supplied, this filter will release
>> packets by interval.
>>
>> Usage:
>>   -netdev tap,id=bn0
>>   -netfilter buffer,id=f0,netdev=bn0,chain=in,interval=1000
>>
>> NOTE:
>>   the scale of interval is microsecond.
>>
>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>> ---
>> v4: remove bh
>>      pass the packet to next filter instead of receiver
>> v3: check packet's sender and sender->peer when flush it
>> ---
>>   net/Makefile.objs   |   1 +
>>   net/filter-buffer.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   net/filter.c        |   2 +
>>   net/filters.h       |  17 ++++++++
>>   qapi-schema.json    |  18 +++++++-
>>   5 files changed, 157 insertions(+), 1 deletion(-)
>>   create mode 100644 net/filter-buffer.c
>>   create mode 100644 net/filters.h
>>
>> diff --git a/net/Makefile.objs b/net/Makefile.objs
>> index 914aec0..5fa2f97 100644
>> --- a/net/Makefile.objs
>> +++ b/net/Makefile.objs
>> @@ -14,3 +14,4 @@ common-obj-$(CONFIG_SLIRP) += slirp.o
>>   common-obj-$(CONFIG_VDE) += vde.o
>>   common-obj-$(CONFIG_NETMAP) += netmap.o
>>   common-obj-y += filter.o
>> +common-obj-y += filter-buffer.o
>> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
>> new file mode 100644
>> index 0000000..fd467db
>> --- /dev/null
>> +++ b/net/filter-buffer.c
>> @@ -0,0 +1,120 @@
>> +/*
>> + * Copyright (c) 2015 FUJITSU LIMITED
>> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "net/filter.h"
>> +#include "net/queue.h"
>> +#include "filters.h"
>> +#include "qemu-common.h"
>> +#include "qemu/timer.h"
>> +#include "qemu/iov.h"
>> +
>> +typedef struct FILTERBUFFERState {
>> +    NetFilterState nf;
>> +    NetQueue *incoming_queue;
>> +    int64_t interval;
>
> Can interval be negative?

No.

> If not please uint. And not sure you really
> need a 64 bit integer, if not, please use uint32_t.

Ok, will use uint32_t instead.

>
>> +    QEMUTimer release_timer;
>> +} FILTERBUFFERState;
>
> Filter buffer is not abbreviation. So better name it as FilterBufferState.

Ok, thanks.

>
>> +
>> +static void packet_send_completed(NetClientState *nc, ssize_t len)
>> +{
>> +    return;
>> +}
>
> Why need a dummy sent cb?

Need to work around with queue_append, if there's no sent_cb, queue_append
will simply drop the packet...Even we provide a sent_cb param to receive_iov,
this dummy might still be needed because sent_cb might be null.

>
>> +
>> +static void filter_buffer_flush(NetFilterState *nf)
>> +{
>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>> +    NetQueue *queue = s->incoming_queue;
>> +    NetPacket *packet;
>> +
>> +    while (queue && !QTAILQ_EMPTY(&queue->packets)) {
>> +        packet = QTAILQ_FIRST(&queue->packets);
>> +        QTAILQ_REMOVE(&queue->packets, packet, entry);
>> +        queue->nq_count--;
>> +
>> +        if (packet->sender && packet->sender->peer) {
>> +            qemu_netfilter_pass_to_next(nf, packet->sender, packet->flags,
>> +                                        packet->data, packet->size);
>> +        }
>> +
>> +        /*
>> +         * now that we pass the packet to next filter, we don't care the
>> +         * reture value here, because the filter layer or other filter
>> +         * will take care of this packet
>> +         */
>> +        g_free(packet);
>
> This seems wrong, since packet could be queued into incoming queue.
> Doing this may cause use after free.

The incoming queue will make a copy of packet data when queued a packet.
So I think it's ok to free this packet here...because this packet is
alloced by this filter when calling qemu_net_queue_append_iov.

>
>> +    }
>> +}
>> +
>> +static void filter_buffer_release_timer(void *opaque)
>> +{
>> +    FILTERBUFFERState *s = opaque;
>> +    filter_buffer_flush(&s->nf);
>> +    timer_mod(&s->release_timer,
>> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
>> +}
>> +
>> +/* filter APIs */
>> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
>> +                                         NetClientState *sender,
>> +                                         unsigned flags,
>> +                                         const struct iovec *iov,
>> +                                         int iovcnt)
>> +{
>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>> +    NetQueue *queue = s->incoming_queue;
>> +
>> +    qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt,
>> +                              packet_send_completed);
>> +    return iov_size(iov, iovcnt);
>
> So if interval is zero, packet will be blocked forever and memory will
> be exhausted.

Yes, but this supposed to be used by FT solutions, so it will
be released periodically. Currently only used with interval makes sense.

>
>> +}
>> +
>> +static void filter_buffer_cleanup(NetFilterState *nf)
>> +{
>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>> +
>> +    if (s->interval) {
>> +        timer_del(&s->release_timer);
>> +    }
>> +
>> +    /* flush packets */
>> +    filter_buffer_flush(nf);
>> +    g_free(s->incoming_queue);
>> +    return;
>> +}
>> +
>> +static NetFilterInfo net_filter_buffer_info = {
>> +    .type = NET_FILTER_OPTIONS_KIND_BUFFER,
>> +    .size = sizeof(FILTERBUFFERState),
>> +    .receive_iov = filter_buffer_receive_iov,
>> +    .cleanup = filter_buffer_cleanup,
>> +};
>> +
>> +int net_init_filter_buffer(const NetFilterOptions *opts, const char *name,
>> +                           int chain, NetClientState *netdev, Error **errp)
>> +{
>> +    NetFilterState *nf;
>> +    FILTERBUFFERState *s;
>> +    const NetFilterBufferOptions *bufferopt;
>> +
>> +    assert(opts->kind == NET_FILTER_OPTIONS_KIND_BUFFER);
>> +    bufferopt = opts->buffer;
>> +
>> +    nf = qemu_new_net_filter(&net_filter_buffer_info,
>> +                             netdev, "buffer", name, chain);
>> +    s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>> +    s->incoming_queue = qemu_new_net_queue(nf);
>> +    s->interval = bufferopt->has_interval ? bufferopt->interval : 0;
>> +    if (s->interval) {
>> +        timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
>> +                      filter_buffer_release_timer, s);
>> +        timer_mod(&s->release_timer,
>> +                  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
>> +    }
>> +
>> +    return 0;
>> +}
>> diff --git a/net/filter.c b/net/filter.c
>> index 166b851..02f3e76 100644
>> --- a/net/filter.c
>> +++ b/net/filter.c
>> @@ -19,6 +19,7 @@
>>   #include "net/filter.h"
>>   #include "net/net.h"
>>   #include "net/queue.h"
>> +#include "filters.h"
>>
>>   static QTAILQ_HEAD(, NetFilterState) net_filters;
>>
>> @@ -179,6 +180,7 @@ typedef int (NetFilterInit)(const NetFilterOptions *opts,
>>
>>   static
>>   NetFilterInit * const net_filter_init_fun[NET_FILTER_OPTIONS_KIND_MAX] = {
>> +    [NET_FILTER_OPTIONS_KIND_BUFFER] = net_init_filter_buffer,
>>   };
>>
>>   static int net_filter_init1(const NetFilter *netfilter, Error **errp)
>> diff --git a/net/filters.h b/net/filters.h
>> new file mode 100644
>> index 0000000..3b546db
>> --- /dev/null
>> +++ b/net/filters.h
>> @@ -0,0 +1,17 @@
>> +/*
>> + * Copyright (c) 2015 FUJITSU LIMITED
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef QEMU_NET_FILTERS_H
>> +#define QEMU_NET_FILTERS_H
>> +
>> +#include "net/net.h"
>> +#include "net/filter.h"
>> +
>> +int net_init_filter_buffer(const NetFilterOptions *opts, const char *name,
>> +                           int chain, NetClientState *netdev, Error **errp);
>> +
>> +#endif /* QEMU_NET_FILTERS_H */
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index 9d97c21..e51bb59 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -2584,6 +2584,21 @@
>>   { 'command': 'netfilter_del', 'data': {'id': 'str'} }
>>
>>   ##
>> +# @NetFilterBufferOptions
>> +#
>> +# a netbuffer filter for network backend.
>> +#
>> +# @interval: #optional release packets by interval, if no interval supplied,
>> +#            will release packets when filter_buffer_release_all been called.
>> +#            scale: microsecond
>> +#
>> +# Since 2.5
>> +##
>> +{ 'struct': 'NetFilterBufferOptions',
>> +  'data': {
>> +    '*interval':     'int64' } }
>> +
>> +##
>>   # @NetFilterOptions
>>   #
>>   # A discriminated record of network filters.
>> @@ -2592,7 +2607,8 @@
>>   #
>>   ##
>>   { 'union': 'NetFilterOptions',
>> -  'data': { } }
>> +  'data': {
>> +    'buffer':     'NetFilterBufferOptions'} }
>>
>>   ##
>>   # @NetFilter
>
> .
>
Jason Wang Aug. 6, 2015, 9:09 a.m. UTC | #3
On 08/06/2015 04:19 PM, Yang Hongyang wrote:
>
>
> On 08/06/2015 03:21 PM, Jason Wang wrote:
>>
>>
>> On 08/04/2015 04:30 PM, Yang Hongyang wrote:
>>> This filter is to buffer/release packets, this feature can be used
>>> when using MicroCheckpointing, or other Remus like VM FT solutions, you
>>> can also use it to simulate the network delay.
>>> It has an interval option, if supplied, this filter will release
>>> packets by interval.
>>>
>>> Usage:
>>>   -netdev tap,id=bn0
>>>   -netfilter buffer,id=f0,netdev=bn0,chain=in,interval=1000
>>>
>>> NOTE:
>>>   the scale of interval is microsecond.
>>>
>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>> ---
>>> v4: remove bh
>>>      pass the packet to next filter instead of receiver
>>> v3: check packet's sender and sender->peer when flush it
>>> ---
>>>   net/Makefile.objs   |   1 +
>>>   net/filter-buffer.c | 120
>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   net/filter.c        |   2 +
>>>   net/filters.h       |  17 ++++++++
>>>   qapi-schema.json    |  18 +++++++-
>>>   5 files changed, 157 insertions(+), 1 deletion(-)
>>>   create mode 100644 net/filter-buffer.c
>>>   create mode 100644 net/filters.h
>>>
>>> diff --git a/net/Makefile.objs b/net/Makefile.objs
>>> index 914aec0..5fa2f97 100644
>>> --- a/net/Makefile.objs
>>> +++ b/net/Makefile.objs
>>> @@ -14,3 +14,4 @@ common-obj-$(CONFIG_SLIRP) += slirp.o
>>>   common-obj-$(CONFIG_VDE) += vde.o
>>>   common-obj-$(CONFIG_NETMAP) += netmap.o
>>>   common-obj-y += filter.o
>>> +common-obj-y += filter-buffer.o
>>> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
>>> new file mode 100644
>>> index 0000000..fd467db
>>> --- /dev/null
>>> +++ b/net/filter-buffer.c
>>> @@ -0,0 +1,120 @@
>>> +/*
>>> + * Copyright (c) 2015 FUJITSU LIMITED
>>> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>> + * later.  See the COPYING file in the top-level directory.
>>> + */
>>> +
>>> +#include "net/filter.h"
>>> +#include "net/queue.h"
>>> +#include "filters.h"
>>> +#include "qemu-common.h"
>>> +#include "qemu/timer.h"
>>> +#include "qemu/iov.h"
>>> +
>>> +typedef struct FILTERBUFFERState {
>>> +    NetFilterState nf;
>>> +    NetQueue *incoming_queue;
>>> +    int64_t interval;
>>
>> Can interval be negative?
>
> No.
>
>> If not please uint. And not sure you really
>> need a 64 bit integer, if not, please use uint32_t.
>
> Ok, will use uint32_t instead.
>
>>
>>> +    QEMUTimer release_timer;
>>> +} FILTERBUFFERState;
>>
>> Filter buffer is not abbreviation. So better name it as
>> FilterBufferState.
>
> Ok, thanks.
>
>>
>>> +
>>> +static void packet_send_completed(NetClientState *nc, ssize_t len)
>>> +{
>>> +    return;
>>> +}
>>
>> Why need a dummy sent cb?
>
> Need to work around with queue_append, if there's no sent_cb,
> queue_append
> will simply drop the packet...Even we provide a sent_cb param to
> receive_iov,
> this dummy might still be needed because sent_cb might be null.

Dropping happens only when the number of queued packet exceeds queue
limitation and no sent_cb. Isn't this just what we want? And like we've
discussed, we need track packet->sent_cb so it was not a problem?

>
>>
>>> +
>>> +static void filter_buffer_flush(NetFilterState *nf)
>>> +{
>>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>>> +    NetQueue *queue = s->incoming_queue;
>>> +    NetPacket *packet;
>>> +
>>> +    while (queue && !QTAILQ_EMPTY(&queue->packets)) {
>>> +        packet = QTAILQ_FIRST(&queue->packets);
>>> +        QTAILQ_REMOVE(&queue->packets, packet, entry);
>>> +        queue->nq_count--;
>>> +
>>> +        if (packet->sender && packet->sender->peer) {
>>> +            qemu_netfilter_pass_to_next(nf, packet->sender,
>>> packet->flags,
>>> +                                        packet->data, packet->size);
>>> +        }
>>> +
>>> +        /*
>>> +         * now that we pass the packet to next filter, we don't
>>> care the
>>> +         * reture value here, because the filter layer or other filter
>>> +         * will take care of this packet
>>> +         */
>>> +        g_free(packet);
>>
>> This seems wrong, since packet could be queued into incoming queue.
>> Doing this may cause use after free.
>
> The incoming queue will make a copy of packet data when queued a packet.
> So I think it's ok to free this packet here...because this packet is
> alloced by this filter when calling qemu_net_queue_append_iov.
>

You're right. I see.

>>
>>> +    }
>>> +}
>>> +
>>> +static void filter_buffer_release_timer(void *opaque)
>>> +{
>>> +    FILTERBUFFERState *s = opaque;
>>> +    filter_buffer_flush(&s->nf);
>>> +    timer_mod(&s->release_timer,
>>> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
>>> +}
>>> +
>>> +/* filter APIs */
>>> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
>>> +                                         NetClientState *sender,
>>> +                                         unsigned flags,
>>> +                                         const struct iovec *iov,
>>> +                                         int iovcnt)
>>> +{
>>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>>> +    NetQueue *queue = s->incoming_queue;
>>> +
>>> +    qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt,
>>> +                              packet_send_completed);
>>> +    return iov_size(iov, iovcnt);
>>
>> So if interval is zero, packet will be blocked forever and memory will
>> be exhausted.
>
> Yes, but this supposed to be used by FT solutions, so it will
> be released periodically. Currently only used with interval makes sense.
>

Then you need terminate the initialization when interval is zero.

[...]
Yang Hongyang Aug. 6, 2015, 9:23 a.m. UTC | #4
On 08/06/2015 05:09 PM, Jason Wang wrote:
>
>
> On 08/06/2015 04:19 PM, Yang Hongyang wrote:
>>
>>
>> On 08/06/2015 03:21 PM, Jason Wang wrote:
>>>
>>>
>>> On 08/04/2015 04:30 PM, Yang Hongyang wrote:
>>>> This filter is to buffer/release packets, this feature can be used
>>>> when using MicroCheckpointing, or other Remus like VM FT solutions, you
>>>> can also use it to simulate the network delay.
>>>> It has an interval option, if supplied, this filter will release
>>>> packets by interval.
>>>>
>>>> Usage:
>>>>    -netdev tap,id=bn0
>>>>    -netfilter buffer,id=f0,netdev=bn0,chain=in,interval=1000
>>>>
>>>> NOTE:
>>>>    the scale of interval is microsecond.
>>>>
>>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>>> ---
>>>> v4: remove bh
>>>>       pass the packet to next filter instead of receiver
>>>> v3: check packet's sender and sender->peer when flush it
>>>> ---
>>>>    net/Makefile.objs   |   1 +
>>>>    net/filter-buffer.c | 120
>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    net/filter.c        |   2 +
>>>>    net/filters.h       |  17 ++++++++
>>>>    qapi-schema.json    |  18 +++++++-
>>>>    5 files changed, 157 insertions(+), 1 deletion(-)
>>>>    create mode 100644 net/filter-buffer.c
>>>>    create mode 100644 net/filters.h
>>>>
>>>> diff --git a/net/Makefile.objs b/net/Makefile.objs
>>>> index 914aec0..5fa2f97 100644
>>>> --- a/net/Makefile.objs
>>>> +++ b/net/Makefile.objs
>>>> @@ -14,3 +14,4 @@ common-obj-$(CONFIG_SLIRP) += slirp.o
>>>>    common-obj-$(CONFIG_VDE) += vde.o
>>>>    common-obj-$(CONFIG_NETMAP) += netmap.o
>>>>    common-obj-y += filter.o
>>>> +common-obj-y += filter-buffer.o
>>>> diff --git a/net/filter-buffer.c b/net/filter-buffer.c
>>>> new file mode 100644
>>>> index 0000000..fd467db
>>>> --- /dev/null
>>>> +++ b/net/filter-buffer.c
>>>> @@ -0,0 +1,120 @@
>>>> +/*
>>>> + * Copyright (c) 2015 FUJITSU LIMITED
>>>> + * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>>> + * later.  See the COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#include "net/filter.h"
>>>> +#include "net/queue.h"
>>>> +#include "filters.h"
>>>> +#include "qemu-common.h"
>>>> +#include "qemu/timer.h"
>>>> +#include "qemu/iov.h"
>>>> +
>>>> +typedef struct FILTERBUFFERState {
>>>> +    NetFilterState nf;
>>>> +    NetQueue *incoming_queue;
>>>> +    int64_t interval;
>>>
>>> Can interval be negative?
>>
>> No.
>>
>>> If not please uint. And not sure you really
>>> need a 64 bit integer, if not, please use uint32_t.
>>
>> Ok, will use uint32_t instead.
>>
>>>
>>>> +    QEMUTimer release_timer;
>>>> +} FILTERBUFFERState;
>>>
>>> Filter buffer is not abbreviation. So better name it as
>>> FilterBufferState.
>>
>> Ok, thanks.
>>
>>>
>>>> +
>>>> +static void packet_send_completed(NetClientState *nc, ssize_t len)
>>>> +{
>>>> +    return;
>>>> +}
>>>
>>> Why need a dummy sent cb?
>>
>> Need to work around with queue_append, if there's no sent_cb,
>> queue_append
>> will simply drop the packet...Even we provide a sent_cb param to
>> receive_iov,
>> this dummy might still be needed because sent_cb might be null.
>
> Dropping happens only when the number of queued packet exceeds queue
> limitation and no sent_cb. Isn't this just what we want?

Yes, you are right.

> And like we've
> discussed, we need track packet->sent_cb so it was not a problem?

Ok, we can drop this dummy callback, thank you.

>
>>
>>>
>>>> +
>>>> +static void filter_buffer_flush(NetFilterState *nf)
>>>> +{
>>>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>>>> +    NetQueue *queue = s->incoming_queue;
>>>> +    NetPacket *packet;
>>>> +
>>>> +    while (queue && !QTAILQ_EMPTY(&queue->packets)) {
>>>> +        packet = QTAILQ_FIRST(&queue->packets);
>>>> +        QTAILQ_REMOVE(&queue->packets, packet, entry);
>>>> +        queue->nq_count--;
>>>> +
>>>> +        if (packet->sender && packet->sender->peer) {
>>>> +            qemu_netfilter_pass_to_next(nf, packet->sender,
>>>> packet->flags,
>>>> +                                        packet->data, packet->size);
>>>> +        }
>>>> +
>>>> +        /*
>>>> +         * now that we pass the packet to next filter, we don't
>>>> care the
>>>> +         * reture value here, because the filter layer or other filter
>>>> +         * will take care of this packet
>>>> +         */
>>>> +        g_free(packet);
>>>
>>> This seems wrong, since packet could be queued into incoming queue.
>>> Doing this may cause use after free.
>>
>> The incoming queue will make a copy of packet data when queued a packet.
>> So I think it's ok to free this packet here...because this packet is
>> alloced by this filter when calling qemu_net_queue_append_iov.
>>
>
> You're right. I see.
>
>>>
>>>> +    }
>>>> +}
>>>> +
>>>> +static void filter_buffer_release_timer(void *opaque)
>>>> +{
>>>> +    FILTERBUFFERState *s = opaque;
>>>> +    filter_buffer_flush(&s->nf);
>>>> +    timer_mod(&s->release_timer,
>>>> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
>>>> +}
>>>> +
>>>> +/* filter APIs */
>>>> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
>>>> +                                         NetClientState *sender,
>>>> +                                         unsigned flags,
>>>> +                                         const struct iovec *iov,
>>>> +                                         int iovcnt)
>>>> +{
>>>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>>>> +    NetQueue *queue = s->incoming_queue;
>>>> +
>>>> +    qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt,
>>>> +                              packet_send_completed);
>>>> +    return iov_size(iov, iovcnt);
>>>
>>> So if interval is zero, packet will be blocked forever and memory will
>>> be exhausted.
>>
>> Yes, but this supposed to be used by FT solutions, so it will
>> be released periodically. Currently only used with interval makes sense.
>>
>
> Then you need terminate the initialization when interval is zero.

Ok, but I'd like to also write a comment with it to mention when there're
users, the interval check can be dropped.

>
> [...]
> .
>
Jason Wang Aug. 6, 2015, 9:35 a.m. UTC | #5
On 08/06/2015 05:23 PM, Yang Hongyang wrote:
>
>
> On 08/06/2015 05:09 PM, Jason Wang wrote:
>>
>>
>> On 08/06/2015 04:19 PM, Yang Hongyang wrote:
>>>
>>>
>>> On 08/06/2015 03:21 PM, Jason Wang wrote:
>>>>
>>>>
>>>> On 08/04/2015 04:30 PM, Yang Hongyang wrote:
>>>>> This filter is to buffer/release packets, this feature can be used
>>>>> when using MicroCheckpointing, or other Remus like VM FT
>>>>> solutions, you
>>>>> can also use it to simulate the network delay.
>>>>> It has an interval option, if supplied, this filter will release
>>>>> packets by interval.
>>>>>
>>>>> Usage:
>>>>>    -netdev tap,id=bn0
>>>>>    -netfilter buffer,id=f0,netdev=bn0,chain=in,interval=1000
>>>>>
>>>>> NOTE:
>>>>>    the scale of interval is microsecond.
>>>>>
>>>>> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
>>>>> ---
>>>>> v4: remove bh
>>>>>       pass the packet to next filter instead of receiver
>>>>> v3: check packet's sender and sender->peer when flush it
>>>>> ---
>>>>>   

[...]

>>>>>>>
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void filter_buffer_release_timer(void *opaque)
>>>>>>>> +{
>>>>>>>> +    FILTERBUFFERState *s = opaque;
>>>>>>>> +    filter_buffer_flush(&s->nf);
>>>>>>>> +    timer_mod(&s->release_timer,
>>>>>>>> +              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) +
>>>>>>>> s->interval);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* filter APIs */
>>>>>>>> +static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
>>>>>>>> +                                         NetClientState *sender,
>>>>>>>> +                                         unsigned flags,
>>>>>>>> +                                         const struct iovec *iov,
>>>>>>>> +                                         int iovcnt)
>>>>>>>> +{
>>>>>>>> +    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
>>>>>>>> +    NetQueue *queue = s->incoming_queue;
>>>>>>>> +
>>>>>>>> +    qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt,
>>>>>>>> +                              packet_send_completed);
>>>>>>>> +    return iov_size(iov, iovcnt);
>>>>>>>
>>>>>>> So if interval is zero, packet will be blocked forever and
>>>>>>> memory will
>>>>>>> be exhausted.
>>>>>>
>>>>>> Yes, but this supposed to be used by FT solutions, so it will
>>>>>> be released periodically. Currently only used with interval makes
>>>>>> sense.
>>>>>>
>>>>>
>>>>> Then you need terminate the initialization when interval is zero.
>
> Ok, but I'd like to also write a comment with it to mention when there're
> users, the interval check can be dropped. 

Ok.
diff mbox

Patch

diff --git a/net/Makefile.objs b/net/Makefile.objs
index 914aec0..5fa2f97 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -14,3 +14,4 @@  common-obj-$(CONFIG_SLIRP) += slirp.o
 common-obj-$(CONFIG_VDE) += vde.o
 common-obj-$(CONFIG_NETMAP) += netmap.o
 common-obj-y += filter.o
+common-obj-y += filter-buffer.o
diff --git a/net/filter-buffer.c b/net/filter-buffer.c
new file mode 100644
index 0000000..fd467db
--- /dev/null
+++ b/net/filter-buffer.c
@@ -0,0 +1,120 @@ 
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "net/filter.h"
+#include "net/queue.h"
+#include "filters.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/iov.h"
+
+typedef struct FILTERBUFFERState {
+    NetFilterState nf;
+    NetQueue *incoming_queue;
+    int64_t interval;
+    QEMUTimer release_timer;
+} FILTERBUFFERState;
+
+static void packet_send_completed(NetClientState *nc, ssize_t len)
+{
+    return;
+}
+
+static void filter_buffer_flush(NetFilterState *nf)
+{
+    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
+    NetQueue *queue = s->incoming_queue;
+    NetPacket *packet;
+
+    while (queue && !QTAILQ_EMPTY(&queue->packets)) {
+        packet = QTAILQ_FIRST(&queue->packets);
+        QTAILQ_REMOVE(&queue->packets, packet, entry);
+        queue->nq_count--;
+
+        if (packet->sender && packet->sender->peer) {
+            qemu_netfilter_pass_to_next(nf, packet->sender, packet->flags,
+                                        packet->data, packet->size);
+        }
+
+        /*
+         * now that we pass the packet to next filter, we don't care the
+         * reture value here, because the filter layer or other filter
+         * will take care of this packet
+         */
+        g_free(packet);
+    }
+}
+
+static void filter_buffer_release_timer(void *opaque)
+{
+    FILTERBUFFERState *s = opaque;
+    filter_buffer_flush(&s->nf);
+    timer_mod(&s->release_timer,
+              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+}
+
+/* filter APIs */
+static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
+                                         NetClientState *sender,
+                                         unsigned flags,
+                                         const struct iovec *iov,
+                                         int iovcnt)
+{
+    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
+    NetQueue *queue = s->incoming_queue;
+
+    qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt,
+                              packet_send_completed);
+    return iov_size(iov, iovcnt);
+}
+
+static void filter_buffer_cleanup(NetFilterState *nf)
+{
+    FILTERBUFFERState *s = DO_UPCAST(FILTERBUFFERState, nf, nf);
+
+    if (s->interval) {
+        timer_del(&s->release_timer);
+    }
+
+    /* flush packets */
+    filter_buffer_flush(nf);
+    g_free(s->incoming_queue);
+    return;
+}
+
+static NetFilterInfo net_filter_buffer_info = {
+    .type = NET_FILTER_OPTIONS_KIND_BUFFER,
+    .size = sizeof(FILTERBUFFERState),
+    .receive_iov = filter_buffer_receive_iov,
+    .cleanup = filter_buffer_cleanup,
+};
+
+int net_init_filter_buffer(const NetFilterOptions *opts, const char *name,
+                           int chain, NetClientState *netdev, Error **errp)
+{
+    NetFilterState *nf;
+    FILTERBUFFERState *s;
+    const NetFilterBufferOptions *bufferopt;
+
+    assert(opts->kind == NET_FILTER_OPTIONS_KIND_BUFFER);
+    bufferopt = opts->buffer;
+
+    nf = qemu_new_net_filter(&net_filter_buffer_info,
+                             netdev, "buffer", name, chain);
+    s = DO_UPCAST(FILTERBUFFERState, nf, nf);
+    s->incoming_queue = qemu_new_net_queue(nf);
+    s->interval = bufferopt->has_interval ? bufferopt->interval : 0;
+    if (s->interval) {
+        timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
+                      filter_buffer_release_timer, s);
+        timer_mod(&s->release_timer,
+                  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+    }
+
+    return 0;
+}
diff --git a/net/filter.c b/net/filter.c
index 166b851..02f3e76 100644
--- a/net/filter.c
+++ b/net/filter.c
@@ -19,6 +19,7 @@ 
 #include "net/filter.h"
 #include "net/net.h"
 #include "net/queue.h"
+#include "filters.h"
 
 static QTAILQ_HEAD(, NetFilterState) net_filters;
 
@@ -179,6 +180,7 @@  typedef int (NetFilterInit)(const NetFilterOptions *opts,
 
 static
 NetFilterInit * const net_filter_init_fun[NET_FILTER_OPTIONS_KIND_MAX] = {
+    [NET_FILTER_OPTIONS_KIND_BUFFER] = net_init_filter_buffer,
 };
 
 static int net_filter_init1(const NetFilter *netfilter, Error **errp)
diff --git a/net/filters.h b/net/filters.h
new file mode 100644
index 0000000..3b546db
--- /dev/null
+++ b/net/filters.h
@@ -0,0 +1,17 @@ 
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_NET_FILTERS_H
+#define QEMU_NET_FILTERS_H
+
+#include "net/net.h"
+#include "net/filter.h"
+
+int net_init_filter_buffer(const NetFilterOptions *opts, const char *name,
+                           int chain, NetClientState *netdev, Error **errp);
+
+#endif /* QEMU_NET_FILTERS_H */
diff --git a/qapi-schema.json b/qapi-schema.json
index 9d97c21..e51bb59 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2584,6 +2584,21 @@ 
 { 'command': 'netfilter_del', 'data': {'id': 'str'} }
 
 ##
+# @NetFilterBufferOptions
+#
+# a netbuffer filter for network backend.
+#
+# @interval: #optional release packets by interval, if no interval supplied,
+#            will release packets when filter_buffer_release_all been called.
+#            scale: microsecond
+#
+# Since 2.5
+##
+{ 'struct': 'NetFilterBufferOptions',
+  'data': {
+    '*interval':     'int64' } }
+
+##
 # @NetFilterOptions
 #
 # A discriminated record of network filters.
@@ -2592,7 +2607,8 @@ 
 #
 ##
 { 'union': 'NetFilterOptions',
-  'data': { } }
+  'data': {
+    'buffer':     'NetFilterBufferOptions'} }
 
 ##
 # @NetFilter