diff mbox series

[v4,2/2] net/colo-compare.c: handling of the full primary or secondary queue

Message ID 20200328124646.7778-3-dereksu@qnap.com
State New
Headers show
Series COLO: handling of the full primary or secondary queue | expand

Commit Message

Derek Su March 28, 2020, 12:46 p.m. UTC
The pervious handling of the full primary or queue is only dropping
the packet. If there are lots of clients to the guest VM,
the "drop" will lead to the lost of the networking connection
until next checkpoint.

To address the issue, this patch drops the packet firstly.
Then, do checkpoint and flush packets.

Signed-off-by: Derek Su <dereksu@qnap.com>
---
 net/colo-compare.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

Comments

Zhang, Chen March 31, 2020, 1:15 a.m. UTC | #1
> Subject: [PATCH v4 2/2] net/colo-compare.c: handling of the full primary or
> secondary queue
> 
> The pervious handling of the full primary or queue is only dropping the
> packet. If there are lots of clients to the guest VM, the "drop" will lead to the
> lost of the networking connection until next checkpoint.
> 
> To address the issue, this patch drops the packet firstly.
> Then, do checkpoint and flush packets.
> 
> Signed-off-by: Derek Su <dereksu@qnap.com>

Looks good for me.
Reviewed-by: Zhang Chen <chen.zhang@intel.com>

Thanks
Zhang Chen

> ---
>  net/colo-compare.c | 39 ++++++++++++++++++++++++++++-----------
>  1 file changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/net/colo-compare.c b/net/colo-compare.c index
> cdd87b2aa8..fe8779cf2d 100644
> --- a/net/colo-compare.c
> +++ b/net/colo-compare.c
> @@ -125,6 +125,12 @@ static const char *colo_mode[] = {
>      [SECONDARY_IN] = "secondary",
>  };
> 
> +enum {
> +    QUEUE_INSERT_ERR = -1,
> +    QUEUE_INSERT_OK = 0,
> +    QUEUE_INSERT_FULL = 1,
> +};
> +
>  static int compare_chr_send(CompareState *s,
>                              const uint8_t *buf,
>                              uint32_t size, @@ -211,8 +217,10 @@ static int
> colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)  }
> 
>  /*
> - * Return 0 on success, if return -1 means the pkt
> - * is unsupported(arp and ipv6) and will be sent later
> + * Return QUEUE_INSERT_OK on success.
> + * If return QUEUE_INSERT_FULL means list is full, and
> + * QUEUE_INSERT_ERR means the pkt is unsupported(arp and ipv6) and
> + * will be sent later
>   */
>  static int packet_enqueue(CompareState *s, int mode, Connection **con)
> { @@ -234,7 +242,7 @@ static int packet_enqueue(CompareState *s, int
> mode, Connection **con)
>      if (parse_packet_early(pkt)) {
>          packet_destroy(pkt, NULL);
>          pkt = NULL;
> -        return -1;
> +        return QUEUE_INSERT_ERR;
>      }
>      fill_connection_key(pkt, &key);
> 
> @@ -258,11 +266,12 @@ static int packet_enqueue(CompareState *s, int
> mode, Connection **con)
>                       "drop packet", colo_mode[mode]);
>          packet_destroy(pkt, NULL);
>          pkt = NULL;
> +        return QUEUE_INSERT_FULL;
>      }
> 
>      *con = conn;
> 
> -    return 0;
> +    return QUEUE_INSERT_OK;
>  }
> 
>  static inline bool after(uint32_t seq1, uint32_t seq2) @@ -995,17 +1004,21
> @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)  {
>      CompareState *s = container_of(pri_rs, CompareState, pri_rs);
>      Connection *conn = NULL;
> +    int ret;
> 
> -    if (packet_enqueue(s, PRIMARY_IN, &conn)) {
> +    ret = packet_enqueue(s, PRIMARY_IN, &conn);
> +    if (ret == QUEUE_INSERT_OK) {
> +        /* compare packet in the specified connection */
> +        colo_compare_connection(conn, s);
> +    } else if (ret == QUEUE_INSERT_FULL) {
> +        colo_compare_inconsistency_notify(s);
> +    } else {
>          trace_colo_compare_main("primary: unsupported packet in");
>          compare_chr_send(s,
>                           pri_rs->buf,
>                           pri_rs->packet_len,
>                           pri_rs->vnet_hdr_len,
>                           false);
> -    } else {
> -        /* compare packet in the specified connection */
> -        colo_compare_connection(conn, s);
>      }
>  }
> 
> @@ -1013,12 +1026,16 @@ static void
> compare_sec_rs_finalize(SocketReadState *sec_rs)  {
>      CompareState *s = container_of(sec_rs, CompareState, sec_rs);
>      Connection *conn = NULL;
> +    int ret;
> 
> -    if (packet_enqueue(s, SECONDARY_IN, &conn)) {
> -        trace_colo_compare_main("secondary: unsupported packet in");
> -    } else {
> +    ret = packet_enqueue(s, SECONDARY_IN, &conn);
> +    if (ret == QUEUE_INSERT_OK) {
>          /* compare packet in the specified connection */
>          colo_compare_connection(conn, s);
> +    } else if (ret == QUEUE_INSERT_FULL) {
> +        colo_compare_inconsistency_notify(s);
> +    } else {
> +        trace_colo_compare_main("secondary: unsupported packet in");
>      }
>  }
> 
> --
> 2.17.1
Lukas Straub April 5, 2020, 10:11 p.m. UTC | #2
On Sat, 28 Mar 2020 20:46:46 +0800
Derek Su <dereksu@qnap.com> wrote:

> The pervious handling of the full primary or queue is only dropping
> the packet. If there are lots of clients to the guest VM,
> the "drop" will lead to the lost of the networking connection
> until next checkpoint.
> 
> To address the issue, this patch drops the packet firstly.
> Then, do checkpoint and flush packets.
> 
> Signed-off-by: Derek Su <dereksu@qnap.com>

Looks good and works well in my tests.
Reviewed-by: Lukas Straub <lukasstraub2@web.de>
Tested-by: Lukas Straub <lukasstraub2@web.de>

Regards,
Lukas Straub

> ---
>  net/colo-compare.c | 39 ++++++++++++++++++++++++++++-----------
>  1 file changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/net/colo-compare.c b/net/colo-compare.c
> index cdd87b2aa8..fe8779cf2d 100644
> --- a/net/colo-compare.c
> +++ b/net/colo-compare.c
> @@ -125,6 +125,12 @@ static const char *colo_mode[] = {
>      [SECONDARY_IN] = "secondary",
>  };
>  
> +enum {
> +    QUEUE_INSERT_ERR = -1,
> +    QUEUE_INSERT_OK = 0,
> +    QUEUE_INSERT_FULL = 1,
> +};
> +
>  static int compare_chr_send(CompareState *s,
>                              const uint8_t *buf,
>                              uint32_t size,
> @@ -211,8 +217,10 @@ static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
>  }
>  
>  /*
> - * Return 0 on success, if return -1 means the pkt
> - * is unsupported(arp and ipv6) and will be sent later
> + * Return QUEUE_INSERT_OK on success.
> + * If return QUEUE_INSERT_FULL means list is full, and
> + * QUEUE_INSERT_ERR means the pkt is unsupported(arp and ipv6) and
> + * will be sent later
>   */
>  static int packet_enqueue(CompareState *s, int mode, Connection **con)
>  {
> @@ -234,7 +242,7 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
>      if (parse_packet_early(pkt)) {
>          packet_destroy(pkt, NULL);
>          pkt = NULL;
> -        return -1;
> +        return QUEUE_INSERT_ERR;
>      }
>      fill_connection_key(pkt, &key);
>  
> @@ -258,11 +266,12 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
>                       "drop packet", colo_mode[mode]);
>          packet_destroy(pkt, NULL);
>          pkt = NULL;
> +        return QUEUE_INSERT_FULL;
>      }
>  
>      *con = conn;
>  
> -    return 0;
> +    return QUEUE_INSERT_OK;
>  }
>  
>  static inline bool after(uint32_t seq1, uint32_t seq2)
> @@ -995,17 +1004,21 @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)
>  {
>      CompareState *s = container_of(pri_rs, CompareState, pri_rs);
>      Connection *conn = NULL;
> +    int ret;
>  
> -    if (packet_enqueue(s, PRIMARY_IN, &conn)) {
> +    ret = packet_enqueue(s, PRIMARY_IN, &conn);
> +    if (ret == QUEUE_INSERT_OK) {
> +        /* compare packet in the specified connection */
> +        colo_compare_connection(conn, s);
> +    } else if (ret == QUEUE_INSERT_FULL) {
> +        colo_compare_inconsistency_notify(s);
> +    } else {
>          trace_colo_compare_main("primary: unsupported packet in");
>          compare_chr_send(s,
>                           pri_rs->buf,
>                           pri_rs->packet_len,
>                           pri_rs->vnet_hdr_len,
>                           false);
> -    } else {
> -        /* compare packet in the specified connection */
> -        colo_compare_connection(conn, s);
>      }
>  }
>  
> @@ -1013,12 +1026,16 @@ static void compare_sec_rs_finalize(SocketReadState *sec_rs)
>  {
>      CompareState *s = container_of(sec_rs, CompareState, sec_rs);
>      Connection *conn = NULL;
> +    int ret;
>  
> -    if (packet_enqueue(s, SECONDARY_IN, &conn)) {
> -        trace_colo_compare_main("secondary: unsupported packet in");
> -    } else {
> +    ret = packet_enqueue(s, SECONDARY_IN, &conn);
> +    if (ret == QUEUE_INSERT_OK) {
>          /* compare packet in the specified connection */
>          colo_compare_connection(conn, s);
> +    } else if (ret == QUEUE_INSERT_FULL) {
> +        colo_compare_inconsistency_notify(s);
> +    } else {
> +        trace_colo_compare_main("secondary: unsupported packet in");
>      }
>  }
>
Lukas Straub April 8, 2020, 7:18 p.m. UTC | #3
On Sat, 28 Mar 2020 20:46:46 +0800
Derek Su <dereksu@qnap.com> wrote:

> The pervious handling of the full primary or queue is only dropping
> the packet. If there are lots of clients to the guest VM,
> the "drop" will lead to the lost of the networking connection
> until next checkpoint.
> 
> To address the issue, this patch drops the packet firstly.
> Then, do checkpoint and flush packets.
> 
> Signed-off-by: Derek Su <dereksu@qnap.com>

Hello,
I had a look at this again and did some benchmarking.
First just qemu 5.0-rc1 with my bugfixes ( https://lists.nongnu.org/archive/html/qemu-devel/2020-04/msg01432.html )
Then qemu 5.0-rc1 with my bugfixes and this patch series.

This commit hurts performance too much:
Client-to-server bandwidth falls from ~45.9 Mbit/s to 22.9 Mbit/s.
Server-to-client bandwidth falls from ~6.3 Mbit/s to just ~674 Kbit/s.
Average latency rises from ~197ms to ~397ms.

Meanwhile the packet loss without this commit is negligible,
only 1-2 ping packets got lost during each test run.

Instead I think we should just turn the error message
into a trace so it doesn't flood the logs.

Regards,
Lukas Straub
5.0-rc1 with bugfixes:

*** iperf -c 192.168.178.65 ***

[  3] local 192.168.178.68 port 33418 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=53 ttl=64 time=2.57 ms
[ ID] Interval       Transfer     Bandwidth                                                             64 bytes from 192.168.178.65: icmp_seq=54 ttl=64 time=142 ms
[  3]  0.0-10.2 sec  60.1 MBytes  49.4 Mbits/sec                                                        64 bytes from 192.168.178.65: icmp_seq=55 ttl=64 time=2.64 ms

[  3] Sent 892 datagrams                                                                                rtt min/avg/max/mdev = 1.920/235.514/1810.653/411.394 ms, pipe 4

[  3] local 192.168.178.68 port 33420 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=55 ttl=64 time=24.3 ms
[ ID] Interval       Transfer     Bandwidth                                                             64 bytes from 192.168.178.65: icmp_seq=56 ttl=64 time=505 ms
[  3]  0.0-10.3 sec  51.0 MBytes  41.7 Mbits/sec                                                        64 bytes from 192.168.178.65: icmp_seq=57 ttl=64 time=11.7 ms

[  3] Sent 892 datagrams                                                                                rtt min/avg/max/mdev = 1.962/218.713/1365.510/344.164 ms, pipe 3

[  3] local 192.168.178.68 port 33422 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=54 ttl=64 time=49.2 ms
[ ID] Interval       Transfer     Bandwidth                                                             64 bytes from 192.168.178.65: icmp_seq=55 ttl=64 time=587 ms
[  3]  0.0-10.8 sec  60.1 MBytes  46.6 Mbits/sec                                                        64 bytes from 192.168.178.65: icmp_seq=56 ttl=64 time=167 ms

[  3] Sent 892 datagrams                                                                                rtt min/avg/max/mdev = 1.573/276.375/1384.085/362.027 ms, pipe 3


*** iperf -c 192.168.178.65 -d ***

[  4] local 192.168.178.68 port 33424 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=60 ttl=64 time=2.27 ms
[  5] local 192.168.178.68 port 5001 connected with 192.168.178.65 port 48440                           ^C
[ ID] Interval       Transfer     Bandwidth                                                             --- 192.168.178.65 ping statistics ---
[  4]  0.0-10.1 sec  60.8 MBytes  50.4 Mbits/sec                                                        60 packets transmitted, 59 received, 1.66667% packet loss, time 29595ms
[  5]  0.0-11.7 sec  11.2 MBytes  8.05 Mbits/sec                                                        rtt min/avg/max/mdev = 1.785/254.258/1352.916/395.070 ms, pipe 3

[  5] local 192.168.178.68 port 33426 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=59 ttl=64 time=2.98 ms
[  4] local 192.168.178.68 port 5001 connected with 192.168.178.65 port 48442                           ^C
[ ID] Interval       Transfer     Bandwidth                                                             --- 192.168.178.65 ping statistics ---
[  5]  0.0-10.3 sec  58.6 MBytes  47.6 Mbits/sec                                                        59 packets transmitted, 59 received, 0% packet loss, time 29088ms
[  4]  0.0-11.9 sec  13.8 MBytes  9.77 Mbits/sec                                                        rtt min/avg/max/mdev = 2.026/207.507/1518.013/331.972 ms, pipe 4

[  5] local 192.168.178.68 port 33432 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=93 ttl=64 time=12.6 ms
[  4] local 192.168.178.68 port 5001 connected with 192.168.178.65 port 48446                           ^C
[ ID] Interval       Transfer     Bandwidth                                                             --- 192.168.178.65 ping statistics ---
[  5]  0.0-10.2 sec  59.4 MBytes  49.0 Mbits/sec                                                        93 packets transmitted, 92 received, 1.07527% packet loss, time 46137ms
[  4]  0.0-31.0 sec  4.41 MBytes  1.19 Mbits/sec                                                        rtt min/avg/max/mdev = 2.107/131.212/1405.670/287.854 ms, pipe 3


5.0-rc1 with bugfixes and checkpoint on queue full:

*** iperf -c 192.168.178.65 ***

[  3] local 192.168.178.68 port 33402 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=52 ttl=64 time=1329 ms
[ ID] Interval       Transfer     Bandwidth                                                             64 bytes from 192.168.178.65: icmp_seq=53 ttl=64 time=828 ms
[  3]  0.0-10.1 sec  35.9 MBytes  29.9 Mbits/sec                                                        64 bytes from 192.168.178.65: icmp_seq=54 ttl=64 time=619 ms

[  3] Sent 892 datagrams                                                                                rtt min/avg/max/mdev = 2.183/788.828/3329.045/958.683 ms, pipe 7

[  3] local 192.168.178.68 port 33404 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=57 ttl=64 time=2.58 ms
[ ID] Interval       Transfer     Bandwidth                                                             64 bytes from 192.168.178.65: icmp_seq=58 ttl=64 time=322 ms
[  3]  0.0-11.5 sec  36.5 MBytes  26.6 Mbits/sec                                                        64 bytes from 192.168.178.65: icmp_seq=59 ttl=64 time=10.2 ms

[  3] Sent 892 datagrams                                                                                rtt min/avg/max/mdev = 1.962/610.122/3303.544/907.708 ms, pipe 7

[  3] local 192.168.178.68 port 33406 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=55 ttl=64 time=2.60 ms
[ ID] Interval       Transfer     Bandwidth                                                             64 bytes from 192.168.178.65: icmp_seq=56 ttl=64 time=112 ms
[  3]  0.0-11.1 sec  16.1 MBytes  12.2 Mbits/sec                                                        64 bytes from 192.168.178.65: icmp_seq=57 ttl=64 time=118 ms

[  3] Sent 892 datagrams                                                                                rtt min/avg/max/mdev = 2.088/593.139/2734.770/857.021 ms, pipe 6

*** iperf -c 192.168.178.65 -d ***

[  5] local 192.168.178.68 port 33408 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=75 ttl=64 time=2.64 ms
[  4] local 192.168.178.68 port 5001 connected with 192.168.178.65 port 47492                           ^C
[ ID] Interval       Transfer     Bandwidth                                                             --- 192.168.178.65 ping statistics ---
[  5]  0.0-10.3 sec  51.2 MBytes  41.9 Mbits/sec                                                        75 packets transmitted, 75 received, 0% packet loss, time 37122ms
[  4]  0.0-25.0 sec  1.00 MBytes   336 Kbits/sec                                                        rtt min/avg/max/mdev = 1.830/563.652/1996.992/651.117 ms, pipe 4

[  4] local 192.168.178.68 port 33410 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=127 ttl=64 time=2.41 ms
[  5] local 192.168.178.68 port 5001 connected with 192.168.178.65 port 47494                           ^C
[ ID] Interval       Transfer     Bandwidth                                                             --- 192.168.178.65 ping statistics ---
[  4]  0.0-10.1 sec  39.0 MBytes  32.4 Mbits/sec                                                        127 packets transmitted, 126 received, 0.787402% packet loss, time 63205ms
[  5]  0.0-50.8 sec  5.01 MBytes   826 Kbits/sec                                                        rtt min/avg/max/mdev = 1.791/239.200/2105.867/513.022 ms, pipe 5

[  4] local 192.168.178.68 port 33414 connected with 192.168.178.65 port 5001                           64 bytes from 192.168.178.65: icmp_seq=90 ttl=64 time=69.4 ms
[  5] local 192.168.178.68 port 5001 connected with 192.168.178.65 port 47496                           ^C
[ ID] Interval       Transfer     Bandwidth                                                             --- 192.168.178.65 ping statistics ---
[  4]  0.0-10.5 sec  46.1 MBytes  37.0 Mbits/sec                                                        90 packets transmitted, 90 received, 0% packet loss, time 44643ms
[  5]  0.0-31.8 sec  3.26 MBytes   861 Kbits/sec                                                        rtt min/avg/max/mdev = 1.909/391.216/1878.484/645.897 ms, pipe 4
Zhang, Chen April 9, 2020, 6:59 a.m. UTC | #4
> -----Original Message-----
> From: Lukas Straub <lukasstraub2@web.de>
> Sent: Thursday, April 9, 2020 3:19 AM
> To: Derek Su <dereksu@qnap.com>
> Cc: qemu-devel@nongnu.org; lizhijian@cn.fujitsu.com; chyang@qnap.com;
> jasowang@redhat.com; ctcheng@qnap.com; Zhang, Chen
> <chen.zhang@intel.com>; jwsu1986@gmail.com
> Subject: Re: [PATCH v4 2/2] net/colo-compare.c: handling of the full primary
> or secondary queue
> 
> On Sat, 28 Mar 2020 20:46:46 +0800
> Derek Su <dereksu@qnap.com> wrote:
> 
> > The pervious handling of the full primary or queue is only dropping
> > the packet. If there are lots of clients to the guest VM, the "drop"
> > will lead to the lost of the networking connection until next
> > checkpoint.
> >
> > To address the issue, this patch drops the packet firstly.
> > Then, do checkpoint and flush packets.
> >
> > Signed-off-by: Derek Su <dereksu@qnap.com>
> 
> Hello,
> I had a look at this again and did some benchmarking.
> First just qemu 5.0-rc1 with my bugfixes
> ( https://lists.nongnu.org/archive/html/qemu-devel/2020-
> 04/msg01432.html ) Then qemu 5.0-rc1 with my bugfixes and this patch
> series.
> 
> This commit hurts performance too much:
> Client-to-server bandwidth falls from ~45.9 Mbit/s to 22.9 Mbit/s.
> Server-to-client bandwidth falls from ~6.3 Mbit/s to just ~674 Kbit/s.
> Average latency rises from ~197ms to ~397ms.
> 
> Meanwhile the packet loss without this commit is negligible, only 1-2 ping
> packets got lost during each test run.
> 
> Instead I think we should just turn the error message into a trace so it
> doesn't flood the logs.

We re-test this patch, Lukas is right.
Sorry for the original idea, looks like it did not show better performance in the test.
Agree with Lukas's comments. Derek, can you please change it?

Thanks
Zhang Chen


> 
> Regards,
> Lukas Straub
Derek Su April 9, 2020, 7:10 a.m. UTC | #5
Hello, Zhang and Lukas

Sure, after my re-test, the performance is hurt. Will update it later.

By the way, could I also move the "error_report("colo compare
primary/secondary queue size too big, drop packet");" to trace?
The use of error_report is a little strange and make a flood in log.

May I  also make "MAX_QUEUE_SIZE"  be user-configurable in this series?

Thanks,
Derek Su




Zhang, Chen <chen.zhang@intel.com> 於 2020年4月9日 週四 下午2:59寫道:
>
>
>
> > -----Original Message-----
> > From: Lukas Straub <lukasstraub2@web.de>
> > Sent: Thursday, April 9, 2020 3:19 AM
> > To: Derek Su <dereksu@qnap.com>
> > Cc: qemu-devel@nongnu.org; lizhijian@cn.fujitsu.com; chyang@qnap.com;
> > jasowang@redhat.com; ctcheng@qnap.com; Zhang, Chen
> > <chen.zhang@intel.com>; jwsu1986@gmail.com
> > Subject: Re: [PATCH v4 2/2] net/colo-compare.c: handling of the full primary
> > or secondary queue
> >
> > On Sat, 28 Mar 2020 20:46:46 +0800
> > Derek Su <dereksu@qnap.com> wrote:
> >
> > > The pervious handling of the full primary or queue is only dropping
> > > the packet. If there are lots of clients to the guest VM, the "drop"
> > > will lead to the lost of the networking connection until next
> > > checkpoint.
> > >
> > > To address the issue, this patch drops the packet firstly.
> > > Then, do checkpoint and flush packets.
> > >
> > > Signed-off-by: Derek Su <dereksu@qnap.com>
> >
> > Hello,
> > I had a look at this again and did some benchmarking.
> > First just qemu 5.0-rc1 with my bugfixes
> > ( https://lists.nongnu.org/archive/html/qemu-devel/2020-
> > 04/msg01432.html ) Then qemu 5.0-rc1 with my bugfixes and this patch
> > series.
> >
> > This commit hurts performance too much:
> > Client-to-server bandwidth falls from ~45.9 Mbit/s to 22.9 Mbit/s.
> > Server-to-client bandwidth falls from ~6.3 Mbit/s to just ~674 Kbit/s.
> > Average latency rises from ~197ms to ~397ms.
> >
> > Meanwhile the packet loss without this commit is negligible, only 1-2 ping
> > packets got lost during each test run.
> >
> > Instead I think we should just turn the error message into a trace so it
> > doesn't flood the logs.
>
> We re-test this patch, Lukas is right.
> Sorry for the original idea, looks like it did not show better performance in the test.
> Agree with Lukas's comments. Derek, can you please change it?
>
> Thanks
> Zhang Chen
>
>
> >
> > Regards,
> > Lukas Straub
Zhang, Chen April 9, 2020, 9:02 a.m. UTC | #6
> -----Original Message-----
> From: Derek Su <jwsu1986@gmail.com>
> Sent: Thursday, April 9, 2020 3:11 PM
> To: Zhang, Chen <chen.zhang@intel.com>
> Cc: Lukas Straub <lukasstraub2@web.de>; Derek Su <dereksu@qnap.com>;
> qemu-devel@nongnu.org; lizhijian@cn.fujitsu.com; chyang@qnap.com;
> jasowang@redhat.com; ctcheng@qnap.com
> Subject: Re: [PATCH v4 2/2] net/colo-compare.c: handling of the full primary
> or secondary queue
> 
> Hello, Zhang and Lukas
> 
> Sure, after my re-test, the performance is hurt. Will update it later.
> 
> By the way, could I also move the "error_report("colo compare
> primary/secondary queue size too big, drop packet");" to trace?

Yes.

> The use of error_report is a little strange and make a flood in log.
> 
> May I  also make "MAX_QUEUE_SIZE"  be user-configurable in this series?

I have this patch internal and will send it later.

Thanks
Zhang Chen

> 
> Thanks,
> Derek Su
> 
> 
> 
> 
> Zhang, Chen <chen.zhang@intel.com> 於 2020年4月9日 週四 下午2:59
> 寫道:
> >
> >
> >
> > > -----Original Message-----
> > > From: Lukas Straub <lukasstraub2@web.de>
> > > Sent: Thursday, April 9, 2020 3:19 AM
> > > To: Derek Su <dereksu@qnap.com>
> > > Cc: qemu-devel@nongnu.org; lizhijian@cn.fujitsu.com;
> > > chyang@qnap.com; jasowang@redhat.com; ctcheng@qnap.com; Zhang,
> Chen
> > > <chen.zhang@intel.com>; jwsu1986@gmail.com
> > > Subject: Re: [PATCH v4 2/2] net/colo-compare.c: handling of the full
> > > primary or secondary queue
> > >
> > > On Sat, 28 Mar 2020 20:46:46 +0800
> > > Derek Su <dereksu@qnap.com> wrote:
> > >
> > > > The pervious handling of the full primary or queue is only
> > > > dropping the packet. If there are lots of clients to the guest VM, the
> "drop"
> > > > will lead to the lost of the networking connection until next
> > > > checkpoint.
> > > >
> > > > To address the issue, this patch drops the packet firstly.
> > > > Then, do checkpoint and flush packets.
> > > >
> > > > Signed-off-by: Derek Su <dereksu@qnap.com>
> > >
> > > Hello,
> > > I had a look at this again and did some benchmarking.
> > > First just qemu 5.0-rc1 with my bugfixes (
> > > https://lists.nongnu.org/archive/html/qemu-devel/2020-
> > > 04/msg01432.html ) Then qemu 5.0-rc1 with my bugfixes and this patch
> > > series.
> > >
> > > This commit hurts performance too much:
> > > Client-to-server bandwidth falls from ~45.9 Mbit/s to 22.9 Mbit/s.
> > > Server-to-client bandwidth falls from ~6.3 Mbit/s to just ~674 Kbit/s.
> > > Average latency rises from ~197ms to ~397ms.
> > >
> > > Meanwhile the packet loss without this commit is negligible, only
> > > 1-2 ping packets got lost during each test run.
> > >
> > > Instead I think we should just turn the error message into a trace
> > > so it doesn't flood the logs.
> >
> > We re-test this patch, Lukas is right.
> > Sorry for the original idea, looks like it did not show better performance in
> the test.
> > Agree with Lukas's comments. Derek, can you please change it?
> >
> > Thanks
> > Zhang Chen
> >
> >
> > >
> > > Regards,
> > > Lukas Straub
diff mbox series

Patch

diff --git a/net/colo-compare.c b/net/colo-compare.c
index cdd87b2aa8..fe8779cf2d 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -125,6 +125,12 @@  static const char *colo_mode[] = {
     [SECONDARY_IN] = "secondary",
 };
 
+enum {
+    QUEUE_INSERT_ERR = -1,
+    QUEUE_INSERT_OK = 0,
+    QUEUE_INSERT_FULL = 1,
+};
+
 static int compare_chr_send(CompareState *s,
                             const uint8_t *buf,
                             uint32_t size,
@@ -211,8 +217,10 @@  static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
 }
 
 /*
- * Return 0 on success, if return -1 means the pkt
- * is unsupported(arp and ipv6) and will be sent later
+ * Return QUEUE_INSERT_OK on success.
+ * If return QUEUE_INSERT_FULL means list is full, and
+ * QUEUE_INSERT_ERR means the pkt is unsupported(arp and ipv6) and
+ * will be sent later
  */
 static int packet_enqueue(CompareState *s, int mode, Connection **con)
 {
@@ -234,7 +242,7 @@  static int packet_enqueue(CompareState *s, int mode, Connection **con)
     if (parse_packet_early(pkt)) {
         packet_destroy(pkt, NULL);
         pkt = NULL;
-        return -1;
+        return QUEUE_INSERT_ERR;
     }
     fill_connection_key(pkt, &key);
 
@@ -258,11 +266,12 @@  static int packet_enqueue(CompareState *s, int mode, Connection **con)
                      "drop packet", colo_mode[mode]);
         packet_destroy(pkt, NULL);
         pkt = NULL;
+        return QUEUE_INSERT_FULL;
     }
 
     *con = conn;
 
-    return 0;
+    return QUEUE_INSERT_OK;
 }
 
 static inline bool after(uint32_t seq1, uint32_t seq2)
@@ -995,17 +1004,21 @@  static void compare_pri_rs_finalize(SocketReadState *pri_rs)
 {
     CompareState *s = container_of(pri_rs, CompareState, pri_rs);
     Connection *conn = NULL;
+    int ret;
 
-    if (packet_enqueue(s, PRIMARY_IN, &conn)) {
+    ret = packet_enqueue(s, PRIMARY_IN, &conn);
+    if (ret == QUEUE_INSERT_OK) {
+        /* compare packet in the specified connection */
+        colo_compare_connection(conn, s);
+    } else if (ret == QUEUE_INSERT_FULL) {
+        colo_compare_inconsistency_notify(s);
+    } else {
         trace_colo_compare_main("primary: unsupported packet in");
         compare_chr_send(s,
                          pri_rs->buf,
                          pri_rs->packet_len,
                          pri_rs->vnet_hdr_len,
                          false);
-    } else {
-        /* compare packet in the specified connection */
-        colo_compare_connection(conn, s);
     }
 }
 
@@ -1013,12 +1026,16 @@  static void compare_sec_rs_finalize(SocketReadState *sec_rs)
 {
     CompareState *s = container_of(sec_rs, CompareState, sec_rs);
     Connection *conn = NULL;
+    int ret;
 
-    if (packet_enqueue(s, SECONDARY_IN, &conn)) {
-        trace_colo_compare_main("secondary: unsupported packet in");
-    } else {
+    ret = packet_enqueue(s, SECONDARY_IN, &conn);
+    if (ret == QUEUE_INSERT_OK) {
         /* compare packet in the specified connection */
         colo_compare_connection(conn, s);
+    } else if (ret == QUEUE_INSERT_FULL) {
+        colo_compare_inconsistency_notify(s);
+    } else {
+        trace_colo_compare_main("secondary: unsupported packet in");
     }
 }