diff mbox

[RFC,V3,3/3] filter-rewriter: rewrite tcp packet to keep secondary connection

Message ID 1467714580-17581-4-git-send-email-zhangchen.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

Zhang Chen July 5, 2016, 10:29 a.m. UTC
We will rewrite tcp packet secondary received and sent.
When colo guest is a tcp server.

Firstly, client start a tcp handshake. the packet's seq=client_seq,
ack=0,flag=SYN. COLO primary guest get this pkt and mirror(filter-mirror)
to secondary guest, secondary get it use filter-redirector.
Then,primary guest response pkt
(seq=primary_seq,ack=client_seq+1,flag=ACK|SYN).
secondary guest response pkt
(seq=secondary_seq,ack=client_seq+1,flag=ACK|SYN).
In here,we use filter-rewriter save the secondary_seq to it's tcp connection.
Finally handshake,client send pkt
(seq=client_seq+1,ack=primary_seq+1,flag=ACK).
Here,filter-rewriter can get primary_seq, and rewrite ack from primary_seq+1
to secondary_seq+1, recalculate checksum. So the secondary tcp connection
kept good.

When we send/recv packet.
client send pkt(seq=client_seq+1+data_len,ack=primary_seq+1,flag=ACK|PSH).
filter-rewriter rewrite ack and send to secondary guest.

primary guest response pkt
(seq=primary_seq+1,ack=client_seq+1+data_len,flag=ACK)
secondary guest response pkt
(seq=secondary_seq+1,ack=client_seq+1+data_len,flag=ACK)
we rewrite secondary guest seq from secondary_seq+1 to primary_seq+1.
So tcp connection kept good.

In code We use offset( = secondary_seq - primary_seq )
to rewrite seq or ack.
handle_primary_tcp_pkt: tcp_pkt->th_ack += offset;
handle_secondary_tcp_pkt: tcp_pkt->th_seq -= offset;

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 net/colo-base.c       |   2 +
 net/colo-base.h       |   7 ++++
 net/filter-rewriter.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++-
 trace-events          |   5 +++
 4 files changed, 120 insertions(+), 2 deletions(-)

Comments

Jason Wang July 27, 2016, 7:03 a.m. UTC | #1
On 2016年07月05日 18:29, Zhang Chen wrote:
> We will rewrite tcp packet secondary received and sent.
> When colo guest is a tcp server.
>
> Firstly, client start a tcp handshake. the packet's seq=client_seq,
> ack=0,flag=SYN. COLO primary guest get this pkt and mirror(filter-mirror)
> to secondary guest, secondary get it use filter-redirector.
> Then,primary guest response pkt
> (seq=primary_seq,ack=client_seq+1,flag=ACK|SYN).
> secondary guest response pkt
> (seq=secondary_seq,ack=client_seq+1,flag=ACK|SYN).
> In here,we use filter-rewriter save the secondary_seq to it's tcp connection.
> Finally handshake,client send pkt
> (seq=client_seq+1,ack=primary_seq+1,flag=ACK).
> Here,filter-rewriter can get primary_seq, and rewrite ack from primary_seq+1
> to secondary_seq+1, recalculate checksum. So the secondary tcp connection
> kept good.
>
> When we send/recv packet.
> client send pkt(seq=client_seq+1+data_len,ack=primary_seq+1,flag=ACK|PSH).
> filter-rewriter rewrite ack and send to secondary guest.
>
> primary guest response pkt
> (seq=primary_seq+1,ack=client_seq+1+data_len,flag=ACK)
> secondary guest response pkt
> (seq=secondary_seq+1,ack=client_seq+1+data_len,flag=ACK)
> we rewrite secondary guest seq from secondary_seq+1 to primary_seq+1.
> So tcp connection kept good.
>
> In code We use offset( = secondary_seq - primary_seq )
> to rewrite seq or ack.
> handle_primary_tcp_pkt: tcp_pkt->th_ack += offset;
> handle_secondary_tcp_pkt: tcp_pkt->th_seq -= offset;
>
> Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> ---
>   net/colo-base.c       |   2 +
>   net/colo-base.h       |   7 ++++
>   net/filter-rewriter.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++-
>   trace-events          |   5 +++
>   4 files changed, 120 insertions(+), 2 deletions(-)
>
> diff --git a/net/colo-base.c b/net/colo-base.c
> index 9673661..58fbd9d 100644
> --- a/net/colo-base.c
> +++ b/net/colo-base.c
> @@ -123,6 +123,8 @@ Connection *connection_new(ConnectionKey *key)
>   
>       conn->ip_proto = key->ip_proto;
>       conn->processing = false;
> +    conn->offset = 0;
> +    conn->syn_flag = 0;
>       g_queue_init(&conn->primary_list);
>       g_queue_init(&conn->secondary_list);
>   
> diff --git a/net/colo-base.h b/net/colo-base.h
> index 62460c5..353bd55 100644
> --- a/net/colo-base.h
> +++ b/net/colo-base.h
> @@ -71,6 +71,13 @@ typedef struct Connection {
>       uint8_t ip_proto;
>       /* be used by filter-rewriter */
>       colo_conn_state state;
> +    /* offset = secondary_seq - primary_seq */
> +    tcp_seq  offset;
> +    /*
> +     * we use this flag update offset func
> +     * run once in independent tcp connection
> +     */
> +    int syn_flag;
>   } Connection;
>   
>   uint32_t connection_key_hash(const void *opaque);
> diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
> index 7f0da2c..f911f99 100644
> --- a/net/filter-rewriter.c
> +++ b/net/filter-rewriter.c
> @@ -21,6 +21,7 @@
>   #include "qemu/main-loop.h"
>   #include "qemu/iov.h"
>   #include "net/checksum.h"
> +#include "trace.h"
>   
>   #define FILTER_COLO_REWRITER(obj) \
>       OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
> @@ -62,6 +63,89 @@ static int is_tcp_packet(Packet *pkt)
>       }
>   }
>   
> +/* handle tcp packet from primary guest */
> +static int handle_primary_tcp_pkt(NetFilterState *nf,
> +                                  Connection *conn,
> +                                  Packet *pkt)
> +{
> +    struct tcphdr *tcp_pkt;
> +
> +    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
> +    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
> +        char *sdebug, *ddebug;
> +        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
> +        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
> +        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
> +                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
> +                    tcp_pkt->th_flags);
> +        trace_colo_filter_rewriter_conn_offset(conn->offset);
> +        g_free(sdebug);
> +        g_free(ddebug);
> +    }
> +
> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
> +        /*
> +         * we use this flag update offset func
> +         * run once in independent tcp connection
> +         */
> +        conn->syn_flag = 1;
> +    }
> +
> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
> +        if (conn->syn_flag) {
> +            /* offset = secondary_seq - primary seq */
> +            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);

The code here is conflict with your comment above. Why need - 1 here?

> +            conn->syn_flag = 0;
> +        }
> +        /* handle packets to the secondary from the primary */
> +        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
> +
> +        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
> +    }
> +
> +    return 0;
> +}
> +
> +/* handle tcp packet from secondary guest */
> +static int handle_secondary_tcp_pkt(NetFilterState *nf,
> +                                    Connection *conn,
> +                                    Packet *pkt)
> +{
> +    struct tcphdr *tcp_pkt;
> +
> +    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
> +
> +    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
> +        char *sdebug, *ddebug;
> +        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
> +        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
> +        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
> +                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
> +                    tcp_pkt->th_flags);
> +        trace_colo_filter_rewriter_conn_offset(conn->offset);
> +        g_free(sdebug);
> +        g_free(ddebug);
> +    }
> +
> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
> +        /*
> +         * save offset = secondary_seq and then
> +         * in handle_primary_tcp_pkt make offset
> +         * = secondary_seq - primary_seq
> +         */
> +        conn->offset = ntohl(tcp_pkt->th_seq);
> +    }
> +
> +    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
> +        /* handle packets to the primary from the secondary*/
> +        tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
> +
> +        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
> +    }
> +
> +    return 0;
> +}
> +
>   static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
>                                            NetClientState *sender,
>                                            unsigned flags,
> @@ -101,10 +185,30 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
>   
>           if (sender == nf->netdev) {
>               /* NET_FILTER_DIRECTION_TX */
> -            /* handle_primary_tcp_pkt */
> +            if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
> +                qemu_net_queue_send(s->incoming_queue, sender, 0,
> +                (const uint8_t *)pkt->data, pkt->size, NULL);
> +                packet_destroy(pkt, NULL);
> +                pkt = NULL;
> +                /*
> +                 * We block the packet here,after rewrite pkt
> +                 * and will send it
> +                 */
> +                return 1;
> +            }
>           } else {
>               /* NET_FILTER_DIRECTION_RX */
> -            /* handle_secondary_tcp_pkt */
> +            if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
> +                qemu_net_queue_send(s->incoming_queue, sender, 0,
> +                (const uint8_t *)pkt->data, pkt->size, NULL);
> +                packet_destroy(pkt, NULL);
> +                pkt = NULL;
> +                /*
> +                 * We block the packet here,after rewrite pkt
> +                 * and will send it
> +                 */
> +                return 1;
> +            }
>           }
>       }
>   
> diff --git a/trace-events b/trace-events
> index 6686cdf..5ac56f6 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1927,3 +1927,8 @@ colo_compare_icmp_miscompare_mtu(const char *sta, int size) ": %s  %d"
>   colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
>   colo_old_packet_check_found(int64_t old_time) "%" PRId64
>   colo_compare_miscompare(void) ""
> +
> +# net/filter-rewriter.c
> +colo_filter_rewriter_debug(void) ""
> +colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u  flags=%x\n"
> +colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n"
Zhang Chen July 27, 2016, 9:05 a.m. UTC | #2
On 07/27/2016 03:03 PM, Jason Wang wrote:
>
>
> On 2016年07月05日 18:29, Zhang Chen wrote:
>> We will rewrite tcp packet secondary received and sent.
>> When colo guest is a tcp server.
>>
>> Firstly, client start a tcp handshake. the packet's seq=client_seq,
>> ack=0,flag=SYN. COLO primary guest get this pkt and 
>> mirror(filter-mirror)
>> to secondary guest, secondary get it use filter-redirector.
>> Then,primary guest response pkt
>> (seq=primary_seq,ack=client_seq+1,flag=ACK|SYN).
>> secondary guest response pkt
>> (seq=secondary_seq,ack=client_seq+1,flag=ACK|SYN).
>> In here,we use filter-rewriter save the secondary_seq to it's tcp 
>> connection.
>> Finally handshake,client send pkt
>> (seq=client_seq+1,ack=primary_seq+1,flag=ACK).
>> Here,filter-rewriter can get primary_seq, and rewrite ack from 
>> primary_seq+1
>> to secondary_seq+1, recalculate checksum. So the secondary tcp 
>> connection
>> kept good.
>>
>> When we send/recv packet.
>> client send 
>> pkt(seq=client_seq+1+data_len,ack=primary_seq+1,flag=ACK|PSH).
>> filter-rewriter rewrite ack and send to secondary guest.
>>
>> primary guest response pkt
>> (seq=primary_seq+1,ack=client_seq+1+data_len,flag=ACK)
>> secondary guest response pkt
>> (seq=secondary_seq+1,ack=client_seq+1+data_len,flag=ACK)
>> we rewrite secondary guest seq from secondary_seq+1 to primary_seq+1.
>> So tcp connection kept good.
>>
>> In code We use offset( = secondary_seq - primary_seq )
>> to rewrite seq or ack.
>> handle_primary_tcp_pkt: tcp_pkt->th_ack += offset;
>> handle_secondary_tcp_pkt: tcp_pkt->th_seq -= offset;
>>
>> Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>   net/colo-base.c       |   2 +
>>   net/colo-base.h       |   7 ++++
>>   net/filter-rewriter.c | 108 
>> +++++++++++++++++++++++++++++++++++++++++++++++++-
>>   trace-events          |   5 +++
>>   4 files changed, 120 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/colo-base.c b/net/colo-base.c
>> index 9673661..58fbd9d 100644
>> --- a/net/colo-base.c
>> +++ b/net/colo-base.c
>> @@ -123,6 +123,8 @@ Connection *connection_new(ConnectionKey *key)
>>         conn->ip_proto = key->ip_proto;
>>       conn->processing = false;
>> +    conn->offset = 0;
>> +    conn->syn_flag = 0;
>>       g_queue_init(&conn->primary_list);
>>       g_queue_init(&conn->secondary_list);
>>   diff --git a/net/colo-base.h b/net/colo-base.h
>> index 62460c5..353bd55 100644
>> --- a/net/colo-base.h
>> +++ b/net/colo-base.h
>> @@ -71,6 +71,13 @@ typedef struct Connection {
>>       uint8_t ip_proto;
>>       /* be used by filter-rewriter */
>>       colo_conn_state state;
>> +    /* offset = secondary_seq - primary_seq */
>> +    tcp_seq  offset;
>> +    /*
>> +     * we use this flag update offset func
>> +     * run once in independent tcp connection
>> +     */
>> +    int syn_flag;
>>   } Connection;
>>     uint32_t connection_key_hash(const void *opaque);
>> diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
>> index 7f0da2c..f911f99 100644
>> --- a/net/filter-rewriter.c
>> +++ b/net/filter-rewriter.c
>> @@ -21,6 +21,7 @@
>>   #include "qemu/main-loop.h"
>>   #include "qemu/iov.h"
>>   #include "net/checksum.h"
>> +#include "trace.h"
>>     #define FILTER_COLO_REWRITER(obj) \
>>       OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
>> @@ -62,6 +63,89 @@ static int is_tcp_packet(Packet *pkt)
>>       }
>>   }
>>   +/* handle tcp packet from primary guest */
>> +static int handle_primary_tcp_pkt(NetFilterState *nf,
>> +                                  Connection *conn,
>> +                                  Packet *pkt)
>> +{
>> +    struct tcphdr *tcp_pkt;
>> +
>> +    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
>> +    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
>> +        char *sdebug, *ddebug;
>> +        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
>> +        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
>> +        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
>> +                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
>> +                    tcp_pkt->th_flags);
>> + trace_colo_filter_rewriter_conn_offset(conn->offset);
>> +        g_free(sdebug);
>> +        g_free(ddebug);
>> +    }
>> +
>> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
>> +        /*
>> +         * we use this flag update offset func
>> +         * run once in independent tcp connection
>> +         */
>> +        conn->syn_flag = 1;
>> +    }
>> +
>> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
>> +        if (conn->syn_flag) {
>> +            /* offset = secondary_seq - primary seq */
>> +            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
>
> The code here is conflict with your comment above. Why need - 1 here?

(ntohl(tcp_pkt->th_ack) - 1) is the primary seq
So, we need -1 here.

Thanks
Zhang Chen

>
>> +            conn->syn_flag = 0;
>> +        }
>> +        /* handle packets to the secondary from the primary */
>> +        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
>> +
>> +        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* handle tcp packet from secondary guest */
>> +static int handle_secondary_tcp_pkt(NetFilterState *nf,
>> +                                    Connection *conn,
>> +                                    Packet *pkt)
>> +{
>> +    struct tcphdr *tcp_pkt;
>> +
>> +    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
>> +
>> +    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
>> +        char *sdebug, *ddebug;
>> +        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
>> +        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
>> +        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
>> +                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
>> +                    tcp_pkt->th_flags);
>> + trace_colo_filter_rewriter_conn_offset(conn->offset);
>> +        g_free(sdebug);
>> +        g_free(ddebug);
>> +    }
>> +
>> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | 
>> TH_SYN))) {
>> +        /*
>> +         * save offset = secondary_seq and then
>> +         * in handle_primary_tcp_pkt make offset
>> +         * = secondary_seq - primary_seq
>> +         */
>> +        conn->offset = ntohl(tcp_pkt->th_seq);
>> +    }
>> +
>> +    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
>> +        /* handle packets to the primary from the secondary*/
>> +        tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
>> +
>> +        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
>>                                            NetClientState *sender,
>>                                            unsigned flags,
>> @@ -101,10 +185,30 @@ static ssize_t 
>> colo_rewriter_receive_iov(NetFilterState *nf,
>>             if (sender == nf->netdev) {
>>               /* NET_FILTER_DIRECTION_TX */
>> -            /* handle_primary_tcp_pkt */
>> +            if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
>> +                qemu_net_queue_send(s->incoming_queue, sender, 0,
>> +                (const uint8_t *)pkt->data, pkt->size, NULL);
>> +                packet_destroy(pkt, NULL);
>> +                pkt = NULL;
>> +                /*
>> +                 * We block the packet here,after rewrite pkt
>> +                 * and will send it
>> +                 */
>> +                return 1;
>> +            }
>>           } else {
>>               /* NET_FILTER_DIRECTION_RX */
>> -            /* handle_secondary_tcp_pkt */
>> +            if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
>> +                qemu_net_queue_send(s->incoming_queue, sender, 0,
>> +                (const uint8_t *)pkt->data, pkt->size, NULL);
>> +                packet_destroy(pkt, NULL);
>> +                pkt = NULL;
>> +                /*
>> +                 * We block the packet here,after rewrite pkt
>> +                 * and will send it
>> +                 */
>> +                return 1;
>> +            }
>>           }
>>       }
>>   diff --git a/trace-events b/trace-events
>> index 6686cdf..5ac56f6 100644
>> --- a/trace-events
>> +++ b/trace-events
>> @@ -1927,3 +1927,8 @@ colo_compare_icmp_miscompare_mtu(const char 
>> *sta, int size) ": %s  %d"
>>   colo_compare_ip_info(int psize, const char *sta, const char *stb, 
>> int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src 
>> = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
>>   colo_old_packet_check_found(int64_t old_time) "%" PRId64
>>   colo_compare_miscompare(void) ""
>> +
>> +# net/filter-rewriter.c
>> +colo_filter_rewriter_debug(void) ""
>> +colo_filter_rewriter_pkt_info(const char *func, const char *src, 
>> const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: 
>> src/dst: %s/%s p: seq/ack=%u/%u  flags=%x\n"
>> +colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n"
>
>
>
> .
>
Jason Wang July 28, 2016, 2:04 a.m. UTC | #3
On 2016年07月27日 17:05, Zhang Chen wrote:
>>> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
>>> +        if (conn->syn_flag) {
>>> +            /* offset = secondary_seq - primary seq */
>>> +            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
>>
>> The code here is conflict with your comment above. Why need - 1 here?
>
> (ntohl(tcp_pkt->th_ack) - 1) is the primary seq
> So, we need -1 here.
>
> Thanks
> Zhang Chen

Aha, right. Since we can't get the syn,ack packet sent by guest from 
primary node.

Better add a comment to explain this.

Thanks
diff mbox

Patch

diff --git a/net/colo-base.c b/net/colo-base.c
index 9673661..58fbd9d 100644
--- a/net/colo-base.c
+++ b/net/colo-base.c
@@ -123,6 +123,8 @@  Connection *connection_new(ConnectionKey *key)
 
     conn->ip_proto = key->ip_proto;
     conn->processing = false;
+    conn->offset = 0;
+    conn->syn_flag = 0;
     g_queue_init(&conn->primary_list);
     g_queue_init(&conn->secondary_list);
 
diff --git a/net/colo-base.h b/net/colo-base.h
index 62460c5..353bd55 100644
--- a/net/colo-base.h
+++ b/net/colo-base.h
@@ -71,6 +71,13 @@  typedef struct Connection {
     uint8_t ip_proto;
     /* be used by filter-rewriter */
     colo_conn_state state;
+    /* offset = secondary_seq - primary_seq */
+    tcp_seq  offset;
+    /*
+     * we use this flag update offset func
+     * run once in independent tcp connection
+     */
+    int syn_flag;
 } Connection;
 
 uint32_t connection_key_hash(const void *opaque);
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index 7f0da2c..f911f99 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -21,6 +21,7 @@ 
 #include "qemu/main-loop.h"
 #include "qemu/iov.h"
 #include "net/checksum.h"
+#include "trace.h"
 
 #define FILTER_COLO_REWRITER(obj) \
     OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
@@ -62,6 +63,89 @@  static int is_tcp_packet(Packet *pkt)
     }
 }
 
+/* handle tcp packet from primary guest */
+static int handle_primary_tcp_pkt(NetFilterState *nf,
+                                  Connection *conn,
+                                  Packet *pkt)
+{
+    struct tcphdr *tcp_pkt;
+
+    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
+    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        char *sdebug, *ddebug;
+        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
+        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
+        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+        g_free(sdebug);
+        g_free(ddebug);
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
+        /*
+         * we use this flag update offset func
+         * run once in independent tcp connection
+         */
+        conn->syn_flag = 1;
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
+        if (conn->syn_flag) {
+            /* offset = secondary_seq - primary seq */
+            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
+            conn->syn_flag = 0;
+        }
+        /* handle packets to the secondary from the primary */
+        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
+
+        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
+    }
+
+    return 0;
+}
+
+/* handle tcp packet from secondary guest */
+static int handle_secondary_tcp_pkt(NetFilterState *nf,
+                                    Connection *conn,
+                                    Packet *pkt)
+{
+    struct tcphdr *tcp_pkt;
+
+    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
+
+    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        char *sdebug, *ddebug;
+        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
+        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
+        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+        g_free(sdebug);
+        g_free(ddebug);
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
+        /*
+         * save offset = secondary_seq and then
+         * in handle_primary_tcp_pkt make offset
+         * = secondary_seq - primary_seq
+         */
+        conn->offset = ntohl(tcp_pkt->th_seq);
+    }
+
+    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
+        /* handle packets to the primary from the secondary*/
+        tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
+
+        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
+    }
+
+    return 0;
+}
+
 static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
                                          NetClientState *sender,
                                          unsigned flags,
@@ -101,10 +185,30 @@  static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
 
         if (sender == nf->netdev) {
             /* NET_FILTER_DIRECTION_TX */
-            /* handle_primary_tcp_pkt */
+            if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
         } else {
             /* NET_FILTER_DIRECTION_RX */
-            /* handle_secondary_tcp_pkt */
+            if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
         }
     }
 
diff --git a/trace-events b/trace-events
index 6686cdf..5ac56f6 100644
--- a/trace-events
+++ b/trace-events
@@ -1927,3 +1927,8 @@  colo_compare_icmp_miscompare_mtu(const char *sta, int size) ": %s  %d"
 colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
 colo_old_packet_check_found(int64_t old_time) "%" PRId64
 colo_compare_miscompare(void) ""
+
+# net/filter-rewriter.c
+colo_filter_rewriter_debug(void) ""
+colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u  flags=%x\n"
+colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n"