diff mbox series

[ovs-dev,v4] conntrack: Properly unNAT inner header of related traffic

Message ID 20230127135232.364284-1-amusil@redhat.com
State Superseded
Headers show
Series [ovs-dev,v4] conntrack: Properly unNAT inner header of related traffic | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Ales Musil Jan. 27, 2023, 1:52 p.m. UTC
The inner header was not handled properly.
Simplify the code which allows proper handling
of the inner headers.

Reported-at: https://bugzilla.redhat.com/2137754
Signed-off-by: Ales Musil <amusil@redhat.com>
---
v4: Rebase on top of current master.
    Use output of ovs-pcap in tests rather then tcpdump.
v3: Rebase on top of current master.
    Update the BZ reference.
    Update the test case.
---
 lib/conntrack.c         | 252 ++++++++++++++--------------------------
 tests/system-traffic.at |  66 +++++++++++
 2 files changed, 155 insertions(+), 163 deletions(-)

Comments

Dumitru Ceara Feb. 3, 2023, 10:37 a.m. UTC | #1
On 1/27/23 14:52, Ales Musil wrote:
> The inner header was not handled properly.
> Simplify the code which allows proper handling
> of the inner headers.
> 
> Reported-at: https://bugzilla.redhat.com/2137754
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---

Hi, Ales,

The changes look mostly good to me.  I only have a few (minor) comments
below.

> v4: Rebase on top of current master.
>     Use output of ovs-pcap in tests rather then tcpdump.
> v3: Rebase on top of current master.
>     Update the BZ reference.
>     Update the test case.
> ---
>  lib/conntrack.c         | 252 ++++++++++++++--------------------------
>  tests/system-traffic.at |  66 +++++++++++
>  2 files changed, 155 insertions(+), 163 deletions(-)
> 
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 550b2be9b..9a9959efc 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -764,109 +764,59 @@ handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>  }
>  
>  static void
> -pat_packet(struct dp_packet *pkt, const struct conn *conn)
> +pat_packet(struct dp_packet *pkt, const struct conn_key *key)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            struct tcp_header *th = dp_packet_l4(pkt);
> -            packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            struct udp_header *uh = dp_packet_l4(pkt);
> -            packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
> -                                conn->rev_key.src.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
> -                                conn->rev_key.src.port);
> -        }
> +    if (key->nw_proto == IPPROTO_TCP) {
> +        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
> +    } else if (key->nw_proto == IPPROTO_UDP) {
> +        packet_set_udp_port(pkt, key->dst.port, key->src.port);
>      }
>  }
>  
> -static void
> -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> +static uint16_t
> +nat_action_reverse(uint16_t nat_action)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        pkt->md.ct_state |= CS_SRC_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> -                                 conn->rev_key.dst.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_src.be32,
> -                                 &conn->rev_key.dst.addr.ipv6, true);
> -        }
> -        if (!related) {
> -            pat_packet(pkt, conn);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        pkt->md.ct_state |= CS_DST_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> -                                 conn->rev_key.src.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_dst.be32,
> -                                 &conn->rev_key.src.addr.ipv6, true);
> -        }
> -        if (!related) {
> -            pat_packet(pkt, conn);
> -        }
> +    if (nat_action & NAT_ACTION_SRC) {
> +        nat_action ^= NAT_ACTION_SRC;
> +        nat_action |= NAT_ACTION_DST;
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        nat_action ^= NAT_ACTION_DST;
> +        nat_action |= NAT_ACTION_SRC;
>      }
> +    return nat_action;
>  }
>  
>  static void
> -un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
> +                uint16_t nat_action)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            struct tcp_header *th = dp_packet_l4(pkt);
> -            packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            struct udp_header *uh = dp_packet_l4(pkt);
> -            packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            packet_set_tcp_port(pkt, conn->key.dst.port, conn->key.src.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            packet_set_udp_port(pkt, conn->key.dst.port, conn->key.src.port);
> -        }
> +    struct ip_header *nh = dp_packet_l3(pkt);
> +
> +    if (nat_action & NAT_ACTION_SRC) {
> +        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
>      }
>  }
>  
>  static void
> -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
> +                uint16_t nat_action)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            struct tcp_header *th_in = dp_packet_l4(pkt);
> -            packet_set_tcp_port(pkt, conn->key.src.port,
> -                                th_in->tcp_dst);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            struct udp_header *uh_in = dp_packet_l4(pkt);
> -            packet_set_udp_port(pkt, conn->key.src.port,
> -                                uh_in->udp_dst);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            packet_set_tcp_port(pkt, conn->key.src.port,
> -                                conn->key.dst.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            packet_set_udp_port(pkt, conn->key.src.port,
> -                                conn->key.dst.port);
> -        }
> +    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> +
> +    if (nat_action & NAT_ACTION_SRC) {
> +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
> +                             &key->dst.addr.ipv6, true);
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
> +                             &key->src.addr.ipv6, true);
>      }
>  }
>  
>  static void
> -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
> +                 uint16_t nat_action)
>  {
>      char *tail = dp_packet_tail(pkt);
>      uint16_t pad = dp_packet_l2_pad_size(pkt);
> @@ -875,98 +825,77 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
>      uint16_t orig_l3_ofs = pkt->l3_ofs;
>      uint16_t orig_l4_ofs = pkt->l4_ofs;
>  
> -    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -        struct ip_header *nh = dp_packet_l3(pkt);
> -        struct icmp_header *icmp = dp_packet_l4(pkt);
> -        struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
> -        /* This call is already verified to succeed during the code path from
> -         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
> -        extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad,
> +    void *l3 = dp_packet_l3(pkt);
> +    void *l4 = dp_packet_l4(pkt);
> +    void *inner_l3 = (char *) l4 + 8;

I understand you hardcoded this because the size of the ICMPv4 and v6
headers is in both cases 8 but it should be quite easy to avoid this if
you set inner_l3 on both branches of the "if" statement below.

> +
> +    /* These calls are already verified to succeed during the code path from
> +     * 'conn_key_extract()' which calls
> +     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
> +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> +        extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad,
>                          &inner_l4, false);
> -        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
> -        pkt->l4_ofs += inner_l4 - (char *) icmp;
> +    } else {
> +        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad,
> +                        &inner_l4);
> +    }
> +    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
> +    pkt->l4_ofs += inner_l4 - (char *) l4;
>  
> -        if (conn->nat_action & NAT_ACTION_SRC) {
> -            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
> -                                 conn->key.src.addr.ipv4);
> -        } else if (conn->nat_action & NAT_ACTION_DST) {
> -            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
> -                                 conn->key.dst.addr.ipv4);
> -        }
> +    /* Reverse the key for inner packet. */
> +    conn_key_reverse(key);

Isn't it more clear if we use a copy of the key, e.g.:

struct conn_key rev_key = *key;
conn_key_reverse(&rev_key);

Then we don't need to re-reverse the key at the end of this function.
Performance wise I think there's no impact eitherway.

>  
> -        reverse_pat_packet(pkt, conn);
> +    pat_packet(pkt, key);
> +
> +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> +        nat_packet_ipv4(pkt, key, nat_action);
> +
> +        struct icmp_header *icmp = (struct icmp_header *) l4;
>          icmp->icmp_csum = 0;
>          icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
>      } else {
> -        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
> -        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
> -            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
> -        /* This call is already verified to succeed during the code path from
> -         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
> -        extract_l3_ipv6(&inner_key, inner_l3_6,
> -                        tail - ((char *)inner_l3_6) - pad,
> -                        &inner_l4);
> -        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
> -        pkt->l4_ofs += inner_l4 - (char *) icmp6;
> -
> -        if (conn->nat_action & NAT_ACTION_SRC) {
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 inner_l3_6->ip6_src.be32,
> -                                 &conn->key.src.addr.ipv6, true);
> -        } else if (conn->nat_action & NAT_ACTION_DST) {
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 inner_l3_6->ip6_dst.be32,
> -                                 &conn->key.dst.addr.ipv6, true);
> -        }
> -        reverse_pat_packet(pkt, conn);
> +        nat_packet_ipv6(pkt, key, nat_action);
> +
> +        struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4;
>          icmp6->icmp6_base.icmp6_cksum = 0;
> -        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, icmp6,
> -            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
> +        icmp6->icmp6_base.icmp6_cksum =
> +            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
> +                                    tail - (char *) icmp6 - pad);
>      }
> +
> +    /* Return the key and offset back. */
> +    conn_key_reverse(key);
>      pkt->l3_ofs = orig_l3_ofs;
>      pkt->l4_ofs = orig_l4_ofs;
>  }
>  
>  static void
> -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> -              bool related)
> +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool related)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        pkt->md.ct_state |= CS_DST_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> -                                 conn->key.src.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_dst.be32,
> -                                 &conn->key.src.addr.ipv6, true);
> -        }
> +    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
> +    uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action)
> +                                : conn->nat_action;
>  
> -        if (OVS_UNLIKELY(related)) {
> -            reverse_nat_packet(pkt, conn);
> -        } else {
> -            un_pat_packet(pkt, conn);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> +    /* Update ct_state. */
> +    if (nat_action & NAT_ACTION_SRC) {
>          pkt->md.ct_state |= CS_SRC_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> -                                 conn->key.dst.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_src.be32,
> -                                 &conn->key.dst.addr.ipv6, true);
> -        }
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        pkt->md.ct_state |= CS_DST_NAT;
> +    }
>  
> +    /* Reverse the key for outer header. */
> +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> +        nat_packet_ipv4(pkt, key, nat_action);
> +    } else {
> +        nat_packet_ipv6(pkt, key, nat_action);
> +    }
> +
> +    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) {
>          if (OVS_UNLIKELY(related)) {
> -            reverse_nat_packet(pkt, conn);
> +            nat_action  = nat_action_reverse(conn->nat_action);

Nit: one space too many.

> +            nat_inner_packet(pkt, key, nat_action);
>          } else {
> -            un_pat_packet(pkt, conn);
> +            pat_packet(pkt, key);
>          }
>      }
>  }
> @@ -1082,7 +1011,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
>                  memcpy(nc, nat_conn, sizeof *nc);
>              }
>  
> -            nat_packet(pkt, nc, ctx->icmp_related);
> +            nat_packet(pkt, nc, false, ctx->icmp_related);
>              memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
>              memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
>              nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> @@ -1185,11 +1114,8 @@ handle_nat(struct dp_packet *pkt, struct conn *conn,
>          if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
>              pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
>          }
> -        if (reply) {
> -            un_nat_packet(pkt, conn, related);
> -        } else {
> -            nat_packet(pkt, conn, related);
> -        }
> +
> +        nat_packet(pkt, conn, reply, related);
>      }
>  }
>  
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 503455cc6..fe084e35a 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -7130,6 +7130,72 @@ recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  
> +AT_SETUP([conntrack - ICMP from different source related with NAT])

Thanks for adding a test!

> +AT_SKIP_IF([test $HAVE_NC = no])
> +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
> +CHECK_CONNTRACK()
> +CHECK_CONNTRACK_NAT()
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_NAMESPACES(client, server)
> +
> +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10")
> +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20")
> +
> +dnl Send traffic from client to CT, do DNAT if the traffic is new otherwise send it to server
> +AT_DATA([flows.txt], [dnl
> +table=0,ip,ct_state=-trk,actions=ct(table=1)

We didn't send to conntrack before, it feels a bit weird to match on
ct_state (even if we match on -trk).

Which makes me wonder, shouldn't we actually try to un-nat too?  And, to
make it more resilient we should probably use a non-default zone, to
avoid existing conntrack entries in the defauly kernel zone.

So, to be pedantic, I think this should be changed to something like:

table=0,ip,actions=ct(table=1,zone=42,nat)

> +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,nat(dst(192.168.10.20))
> +table=1,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=ct(commit,table=2,nat)
> +table=1,ip,actions=resubmit(,2)
> +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
> +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server
> +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
> +])
> +
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +
> +rm server.pcap
> +OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid])
> +sleep 1
> +
> +dnl Send UDP client->server
> +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
> +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"])
> +dnl Send UDP response server->client
> +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
> +packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"])
> +dnl Fake router sending ICMP need frag
> +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
> +packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\
> +actions=resubmit(,0)"
> +])
> +
> +AT_CHECK([ovs-appctl revalidator/purge], [0])
> +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], [0], [dnl
> + n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip actions=ct(table=1)
> + table=1, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=ct(commit,table=2,nat(dst=192.168.10.20))
> + table=1, n_packets=1, n_bytes=42, reset_counts ip actions=resubmit(,2)
> + table=1, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=ct(commit,table=2,nat)
> + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=output:2
> + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+rpl+trk,ip,in_port=2 actions=output:1
> + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=output:2
> +OFPST_FLOW reply (OF1.5):
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], [dnl
> +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
> +])
> +
> +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000])
> +
> +AT_CHECK([ovs-pcap server.pcap | grep 000000001020000000002000], [0], [dnl
> +000000001020000000002000080045000038011f0000ff011b40c0a81401c0a80a140304f778000005784500001c000040000a11d162c0a80a14c0a8140a0002000100080000
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  AT_BANNER([IGMP])
>  
>  AT_SETUP([IGMP - flood under normal action])

Regards,
Dumitru
Ales Musil Feb. 3, 2023, 12:46 p.m. UTC | #2
On Fri, Feb 3, 2023 at 11:37 AM Dumitru Ceara <dceara@redhat.com> wrote:

> On 1/27/23 14:52, Ales Musil wrote:
> > The inner header was not handled properly.
> > Simplify the code which allows proper handling
> > of the inner headers.
> >
> > Reported-at: https://bugzilla.redhat.com/2137754
> > Signed-off-by: Ales Musil <amusil@redhat.com>
> > ---
>
> Hi, Ales,
>
> The changes look mostly good to me.  I only have a few (minor) comments
> below.
>

Hi Dumitru,
thank you for the review. Everything should be addressed in v5.


>
> > v4: Rebase on top of current master.
> >     Use output of ovs-pcap in tests rather then tcpdump.
> > v3: Rebase on top of current master.
> >     Update the BZ reference.
> >     Update the test case.
> > ---
> >  lib/conntrack.c         | 252 ++++++++++++++--------------------------
> >  tests/system-traffic.at |  66 +++++++++++
> >  2 files changed, 155 insertions(+), 163 deletions(-)
> >
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 550b2be9b..9a9959efc 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -764,109 +764,59 @@ handle_alg_ctl(struct conntrack *ct, const struct
> conn_lookup_ctx *ctx,
> >  }
> >
> >  static void
> > -pat_packet(struct dp_packet *pkt, const struct conn *conn)
> > +pat_packet(struct dp_packet *pkt, const struct conn_key *key)
> >  {
> > -    if (conn->nat_action & NAT_ACTION_SRC) {
> > -        if (conn->key.nw_proto == IPPROTO_TCP) {
> > -            struct tcp_header *th = dp_packet_l4(pkt);
> > -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
> th->tcp_dst);
> > -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> > -            struct udp_header *uh = dp_packet_l4(pkt);
> > -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
> uh->udp_dst);
> > -        }
> > -    } else if (conn->nat_action & NAT_ACTION_DST) {
> > -        if (conn->key.nw_proto == IPPROTO_TCP) {
> > -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
> > -                                conn->rev_key.src.port);
> > -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> > -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
> > -                                conn->rev_key.src.port);
> > -        }
> > +    if (key->nw_proto == IPPROTO_TCP) {
> > +        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
> > +    } else if (key->nw_proto == IPPROTO_UDP) {
> > +        packet_set_udp_port(pkt, key->dst.port, key->src.port);
> >      }
> >  }
> >
> > -static void
> > -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> > +static uint16_t
> > +nat_action_reverse(uint16_t nat_action)
> >  {
> > -    if (conn->nat_action & NAT_ACTION_SRC) {
> > -        pkt->md.ct_state |= CS_SRC_NAT;
> > -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> > -            struct ip_header *nh = dp_packet_l3(pkt);
> > -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> > -                                 conn->rev_key.dst.addr.ipv4);
> > -        } else {
> > -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> > -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> > -                                 nh6->ip6_src.be32,
> > -                                 &conn->rev_key.dst.addr.ipv6, true);
> > -        }
> > -        if (!related) {
> > -            pat_packet(pkt, conn);
> > -        }
> > -    } else if (conn->nat_action & NAT_ACTION_DST) {
> > -        pkt->md.ct_state |= CS_DST_NAT;
> > -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> > -            struct ip_header *nh = dp_packet_l3(pkt);
> > -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> > -                                 conn->rev_key.src.addr.ipv4);
> > -        } else {
> > -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> > -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> > -                                 nh6->ip6_dst.be32,
> > -                                 &conn->rev_key.src.addr.ipv6, true);
> > -        }
> > -        if (!related) {
> > -            pat_packet(pkt, conn);
> > -        }
> > +    if (nat_action & NAT_ACTION_SRC) {
> > +        nat_action ^= NAT_ACTION_SRC;
> > +        nat_action |= NAT_ACTION_DST;
> > +    } else if (nat_action & NAT_ACTION_DST) {
> > +        nat_action ^= NAT_ACTION_DST;
> > +        nat_action |= NAT_ACTION_SRC;
> >      }
> > +    return nat_action;
> >  }
> >
> >  static void
> > -un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> > +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
> > +                uint16_t nat_action)
> >  {
> > -    if (conn->nat_action & NAT_ACTION_SRC) {
> > -        if (conn->key.nw_proto == IPPROTO_TCP) {
> > -            struct tcp_header *th = dp_packet_l4(pkt);
> > -            packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
> > -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> > -            struct udp_header *uh = dp_packet_l4(pkt);
> > -            packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
> > -        }
> > -    } else if (conn->nat_action & NAT_ACTION_DST) {
> > -        if (conn->key.nw_proto == IPPROTO_TCP) {
> > -            packet_set_tcp_port(pkt, conn->key.dst.port,
> conn->key.src.port);
> > -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> > -            packet_set_udp_port(pkt, conn->key.dst.port,
> conn->key.src.port);
> > -        }
> > +    struct ip_header *nh = dp_packet_l3(pkt);
> > +
> > +    if (nat_action & NAT_ACTION_SRC) {
> > +        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
> > +    } else if (nat_action & NAT_ACTION_DST) {
> > +        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
> >      }
> >  }
> >
> >  static void
> > -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> > +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
> > +                uint16_t nat_action)
> >  {
> > -    if (conn->nat_action & NAT_ACTION_SRC) {
> > -        if (conn->key.nw_proto == IPPROTO_TCP) {
> > -            struct tcp_header *th_in = dp_packet_l4(pkt);
> > -            packet_set_tcp_port(pkt, conn->key.src.port,
> > -                                th_in->tcp_dst);
> > -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> > -            struct udp_header *uh_in = dp_packet_l4(pkt);
> > -            packet_set_udp_port(pkt, conn->key.src.port,
> > -                                uh_in->udp_dst);
> > -        }
> > -    } else if (conn->nat_action & NAT_ACTION_DST) {
> > -        if (conn->key.nw_proto == IPPROTO_TCP) {
> > -            packet_set_tcp_port(pkt, conn->key.src.port,
> > -                                conn->key.dst.port);
> > -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> > -            packet_set_udp_port(pkt, conn->key.src.port,
> > -                                conn->key.dst.port);
> > -        }
> > +    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> > +
> > +    if (nat_action & NAT_ACTION_SRC) {
> > +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
> > +                             &key->dst.addr.ipv6, true);
> > +    } else if (nat_action & NAT_ACTION_DST) {
> > +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
> > +                             &key->src.addr.ipv6, true);
> >      }
> >  }
> >
> >  static void
> > -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> > +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
> > +                 uint16_t nat_action)
> >  {
> >      char *tail = dp_packet_tail(pkt);
> >      uint16_t pad = dp_packet_l2_pad_size(pkt);
> > @@ -875,98 +825,77 @@ reverse_nat_packet(struct dp_packet *pkt, const
> struct conn *conn)
> >      uint16_t orig_l3_ofs = pkt->l3_ofs;
> >      uint16_t orig_l4_ofs = pkt->l4_ofs;
> >
> > -    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> > -        struct ip_header *nh = dp_packet_l3(pkt);
> > -        struct icmp_header *icmp = dp_packet_l4(pkt);
> > -        struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
> > -        /* This call is already verified to succeed during the code
> path from
> > -         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
> > -        extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3)
> - pad,
> > +    void *l3 = dp_packet_l3(pkt);
> > +    void *l4 = dp_packet_l4(pkt);
> > +    void *inner_l3 = (char *) l4 + 8;
>
> I understand you hardcoded this because the size of the ICMPv4 and v6
> headers is in both cases 8 but it should be quite easy to avoid this if
> you set inner_l3 on both branches of the "if" statement below.
>

Ack, it makes it more explicit, done in v5.


>
> > +
> > +    /* These calls are already verified to succeed during the code path
> from
> > +     * 'conn_key_extract()' which calls
> > +     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
> > +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> > +        extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)
> inner_l3) - pad,
> >                          &inner_l4, false);
> > -        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
> > -        pkt->l4_ofs += inner_l4 - (char *) icmp;
> > +    } else {
> > +        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *)
> inner_l3) - pad,
> > +                        &inner_l4);
> > +    }
> > +    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
> > +    pkt->l4_ofs += inner_l4 - (char *) l4;
> >
> > -        if (conn->nat_action & NAT_ACTION_SRC) {
> > -            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
> > -                                 conn->key.src.addr.ipv4);
> > -        } else if (conn->nat_action & NAT_ACTION_DST) {
> > -            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
> > -                                 conn->key.dst.addr.ipv4);
> > -        }
> > +    /* Reverse the key for inner packet. */
> > +    conn_key_reverse(key);
>
> Isn't it more clear if we use a copy of the key, e.g.:
>
> struct conn_key rev_key = *key;
> conn_key_reverse(&rev_key);
>
> Then we don't need to re-reverse the key at the end of this function.
> Performance wise I think there's no impact eitherway.
>

You are right it might be less confusing and less error prone if anything
will change within this area.


>
> >
> > -        reverse_pat_packet(pkt, conn);
> > +    pat_packet(pkt, key);
> > +
> > +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> > +        nat_packet_ipv4(pkt, key, nat_action);
> > +
> > +        struct icmp_header *icmp = (struct icmp_header *) l4;
> >          icmp->icmp_csum = 0;
> >          icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
> >      } else {
> > -        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> > -        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
> > -        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
> > -            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
> > -        /* This call is already verified to succeed during the code
> path from
> > -         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
> > -        extract_l3_ipv6(&inner_key, inner_l3_6,
> > -                        tail - ((char *)inner_l3_6) - pad,
> > -                        &inner_l4);
> > -        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
> > -        pkt->l4_ofs += inner_l4 - (char *) icmp6;
> > -
> > -        if (conn->nat_action & NAT_ACTION_SRC) {
> > -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> > -                                 inner_l3_6->ip6_src.be32,
> > -                                 &conn->key.src.addr.ipv6, true);
> > -        } else if (conn->nat_action & NAT_ACTION_DST) {
> > -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> > -                                 inner_l3_6->ip6_dst.be32,
> > -                                 &conn->key.dst.addr.ipv6, true);
> > -        }
> > -        reverse_pat_packet(pkt, conn);
> > +        nat_packet_ipv6(pkt, key, nat_action);
> > +
> > +        struct icmp6_data_header *icmp6 = (struct icmp6_data_header *)
> l4;
> >          icmp6->icmp6_base.icmp6_cksum = 0;
> > -        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6,
> icmp6,
> > -            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
> > +        icmp6->icmp6_base.icmp6_cksum =
> > +            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
> > +                                    tail - (char *) icmp6 - pad);
> >      }
> > +
> > +    /* Return the key and offset back. */
> > +    conn_key_reverse(key);
> >      pkt->l3_ofs = orig_l3_ofs;
> >      pkt->l4_ofs = orig_l4_ofs;
> >  }
> >
> >  static void
> > -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> > -              bool related)
> > +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool
> related)
> >  {
> > -    if (conn->nat_action & NAT_ACTION_SRC) {
> > -        pkt->md.ct_state |= CS_DST_NAT;
> > -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> > -            struct ip_header *nh = dp_packet_l3(pkt);
> > -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> > -                                 conn->key.src.addr.ipv4);
> > -        } else {
> > -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> > -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> > -                                 nh6->ip6_dst.be32,
> > -                                 &conn->key.src.addr.ipv6, true);
> > -        }
> > +    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
> > +    uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action)
> > +                                : conn->nat_action;
> >
> > -        if (OVS_UNLIKELY(related)) {
> > -            reverse_nat_packet(pkt, conn);
> > -        } else {
> > -            un_pat_packet(pkt, conn);
> > -        }
> > -    } else if (conn->nat_action & NAT_ACTION_DST) {
> > +    /* Update ct_state. */
> > +    if (nat_action & NAT_ACTION_SRC) {
> >          pkt->md.ct_state |= CS_SRC_NAT;
> > -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> > -            struct ip_header *nh = dp_packet_l3(pkt);
> > -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> > -                                 conn->key.dst.addr.ipv4);
> > -        } else {
> > -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> > -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> > -                                 nh6->ip6_src.be32,
> > -                                 &conn->key.dst.addr.ipv6, true);
> > -        }
> > +    } else if (nat_action & NAT_ACTION_DST) {
> > +        pkt->md.ct_state |= CS_DST_NAT;
> > +    }
> >
> > +    /* Reverse the key for outer header. */
> > +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> > +        nat_packet_ipv4(pkt, key, nat_action);
> > +    } else {
> > +        nat_packet_ipv6(pkt, key, nat_action);
> > +    }
> > +
> > +    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) {
> >          if (OVS_UNLIKELY(related)) {
> > -            reverse_nat_packet(pkt, conn);
> > +            nat_action  = nat_action_reverse(conn->nat_action);
>
> Nit: one space too many.
>

Interesting I can see it aligned, not sure why the diff here shows it with
extra space.


>
> > +            nat_inner_packet(pkt, key, nat_action);
> >          } else {
> > -            un_pat_packet(pkt, conn);
> > +            pat_packet(pkt, key);
> >          }
> >      }
> >  }
> > @@ -1082,7 +1011,7 @@ conn_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
> >                  memcpy(nc, nat_conn, sizeof *nc);
> >              }
> >
> > -            nat_packet(pkt, nc, ctx->icmp_related);
> > +            nat_packet(pkt, nc, false, ctx->icmp_related);
> >              memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
> >              memcpy(&nat_conn->rev_key, &nc->key, sizeof
> nat_conn->rev_key);
> >              nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> > @@ -1185,11 +1114,8 @@ handle_nat(struct dp_packet *pkt, struct conn
> *conn,
> >          if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
> >              pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
> >          }
> > -        if (reply) {
> > -            un_nat_packet(pkt, conn, related);
> > -        } else {
> > -            nat_packet(pkt, conn, related);
> > -        }
> > +
> > +        nat_packet(pkt, conn, reply, related);
> >      }
> >  }
> >
> > diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> > index 503455cc6..fe084e35a 100644
> > --- a/tests/system-traffic.at
> > +++ b/tests/system-traffic.at
> > @@ -7130,6 +7130,72 @@
> recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f
> >  OVS_TRAFFIC_VSWITCHD_STOP
> >  AT_CLEANUP
> >
> > +AT_SETUP([conntrack - ICMP from different source related with NAT])
>
> Thanks for adding a test!
>
> > +AT_SKIP_IF([test $HAVE_NC = no])
> > +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
> > +CHECK_CONNTRACK()
> > +CHECK_CONNTRACK_NAT()
> > +OVS_TRAFFIC_VSWITCHD_START()
> > +
> > +ADD_NAMESPACES(client, server)
> > +
> > +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10")
> > +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20")
> > +
> > +dnl Send traffic from client to CT, do DNAT if the traffic is new
> otherwise send it to server
> > +AT_DATA([flows.txt], [dnl
> > +table=0,ip,ct_state=-trk,actions=ct(table=1)
>
> We didn't send to conntrack before, it feels a bit weird to match on
> ct_state (even if we match on -trk).
>
> Which makes me wonder, shouldn't we actually try to un-nat too?  And, to
> make it more resilient we should probably use a non-default zone, to
> avoid existing conntrack entries in the defauly kernel zone.
>
> So, to be pedantic, I think this should be changed to something like:
>
> table=0,ip,actions=ct(table=1,zone=42,nat)
>

Ack, it definitely shouldn't hurt, added in v5.


>
> >
> +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,nat(dst(192.168.10.20))
> >
> +table=1,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=ct(commit,table=2,nat)
> > +table=1,ip,actions=resubmit(,2)
> >
> +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
> >
> +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server
> >
> +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
> > +])
> > +
> > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> > +
> > +rm server.pcap
> > +OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid])
> > +sleep 1
> > +
> > +dnl Send UDP client->server
> > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
> >
> +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"])
> > +dnl Send UDP response server->client
> > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
> >
> +packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"])
> > +dnl Fake router sending ICMP need frag
> > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
> >
> +packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\
> > +actions=resubmit(,0)"
> > +])
> > +
> > +AT_CHECK([ovs-appctl revalidator/purge], [0])
> > +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort
> ], [0], [dnl
> > + n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip
> actions=ct(table=1)
> > + table=1, n_packets=1, n_bytes=42, reset_counts
> ct_state=+new+trk,ip,in_port=1
> actions=ct(commit,table=2,nat(dst=192.168.10.20))
> > + table=1, n_packets=1, n_bytes=42, reset_counts ip actions=resubmit(,2)
> > + table=1, n_packets=1, n_bytes=70, reset_counts
> ct_state=+rel+trk,icmp,in_port=1 actions=ct(commit,table=2,nat)
> > + table=2, n_packets=1, n_bytes=42, reset_counts
> ct_state=+new+trk,ip,in_port=1 actions=output:2
> > + table=2, n_packets=1, n_bytes=42, reset_counts
> ct_state=+rpl+trk,ip,in_port=2 actions=output:1
> > + table=2, n_packets=1, n_bytes=70, reset_counts
> ct_state=+rel+trk,icmp,in_port=1 actions=output:2
> > +OFPST_FLOW reply (OF1.5):
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0],
> [dnl
> >
> +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
> > +])
> > +
> > +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000])
> > +
> > +AT_CHECK([ovs-pcap server.pcap | grep 000000001020000000002000], [0],
> [dnl
> >
> +000000001020000000002000080045000038011f0000ff011b40c0a81401c0a80a140304f778000005784500001c000040000a11d162c0a80a14c0a8140a0002000100080000
> > +])
> > +
> > +OVS_TRAFFIC_VSWITCHD_STOP
> > +AT_CLEANUP
> > +
> >  AT_BANNER([IGMP])
> >
> >  AT_SETUP([IGMP - flood under normal action])
>
> Regards,
> Dumitru
>
>
Thanks,
Ales
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 550b2be9b..9a9959efc 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -764,109 +764,59 @@  handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
 }
 
 static void
-pat_packet(struct dp_packet *pkt, const struct conn *conn)
+pat_packet(struct dp_packet *pkt, const struct conn_key *key)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            struct tcp_header *th = dp_packet_l4(pkt);
-            packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            struct udp_header *uh = dp_packet_l4(pkt);
-            packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
-                                conn->rev_key.src.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            packet_set_udp_port(pkt, conn->rev_key.dst.port,
-                                conn->rev_key.src.port);
-        }
+    if (key->nw_proto == IPPROTO_TCP) {
+        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
+    } else if (key->nw_proto == IPPROTO_UDP) {
+        packet_set_udp_port(pkt, key->dst.port, key->src.port);
     }
 }
 
-static void
-nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
+static uint16_t
+nat_action_reverse(uint16_t nat_action)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        pkt->md.ct_state |= CS_SRC_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_src,
-                                 conn->rev_key.dst.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_src.be32,
-                                 &conn->rev_key.dst.addr.ipv6, true);
-        }
-        if (!related) {
-            pat_packet(pkt, conn);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        pkt->md.ct_state |= CS_DST_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_dst,
-                                 conn->rev_key.src.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_dst.be32,
-                                 &conn->rev_key.src.addr.ipv6, true);
-        }
-        if (!related) {
-            pat_packet(pkt, conn);
-        }
+    if (nat_action & NAT_ACTION_SRC) {
+        nat_action ^= NAT_ACTION_SRC;
+        nat_action |= NAT_ACTION_DST;
+    } else if (nat_action & NAT_ACTION_DST) {
+        nat_action ^= NAT_ACTION_DST;
+        nat_action |= NAT_ACTION_SRC;
     }
+    return nat_action;
 }
 
 static void
-un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
+nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
+                uint16_t nat_action)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            struct tcp_header *th = dp_packet_l4(pkt);
-            packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            struct udp_header *uh = dp_packet_l4(pkt);
-            packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            packet_set_tcp_port(pkt, conn->key.dst.port, conn->key.src.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            packet_set_udp_port(pkt, conn->key.dst.port, conn->key.src.port);
-        }
+    struct ip_header *nh = dp_packet_l3(pkt);
+
+    if (nat_action & NAT_ACTION_SRC) {
+        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
+    } else if (nat_action & NAT_ACTION_DST) {
+        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
     }
 }
 
 static void
-reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
+nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
+                uint16_t nat_action)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            struct tcp_header *th_in = dp_packet_l4(pkt);
-            packet_set_tcp_port(pkt, conn->key.src.port,
-                                th_in->tcp_dst);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            struct udp_header *uh_in = dp_packet_l4(pkt);
-            packet_set_udp_port(pkt, conn->key.src.port,
-                                uh_in->udp_dst);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            packet_set_tcp_port(pkt, conn->key.src.port,
-                                conn->key.dst.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            packet_set_udp_port(pkt, conn->key.src.port,
-                                conn->key.dst.port);
-        }
+    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
+
+    if (nat_action & NAT_ACTION_SRC) {
+        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
+                             &key->dst.addr.ipv6, true);
+    } else if (nat_action & NAT_ACTION_DST) {
+        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
+                             &key->src.addr.ipv6, true);
     }
 }
 
 static void
-reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
+nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
+                 uint16_t nat_action)
 {
     char *tail = dp_packet_tail(pkt);
     uint16_t pad = dp_packet_l2_pad_size(pkt);
@@ -875,98 +825,77 @@  reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
     uint16_t orig_l3_ofs = pkt->l3_ofs;
     uint16_t orig_l4_ofs = pkt->l4_ofs;
 
-    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-        struct ip_header *nh = dp_packet_l3(pkt);
-        struct icmp_header *icmp = dp_packet_l4(pkt);
-        struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
-        /* This call is already verified to succeed during the code path from
-         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
-        extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad,
+    void *l3 = dp_packet_l3(pkt);
+    void *l4 = dp_packet_l4(pkt);
+    void *inner_l3 = (char *) l4 + 8;
+
+    /* These calls are already verified to succeed during the code path from
+     * 'conn_key_extract()' which calls
+     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
+    if (key->dl_type == htons(ETH_TYPE_IP)) {
+        extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad,
                         &inner_l4, false);
-        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
-        pkt->l4_ofs += inner_l4 - (char *) icmp;
+    } else {
+        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad,
+                        &inner_l4);
+    }
+    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
+    pkt->l4_ofs += inner_l4 - (char *) l4;
 
-        if (conn->nat_action & NAT_ACTION_SRC) {
-            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
-                                 conn->key.src.addr.ipv4);
-        } else if (conn->nat_action & NAT_ACTION_DST) {
-            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
-                                 conn->key.dst.addr.ipv4);
-        }
+    /* Reverse the key for inner packet. */
+    conn_key_reverse(key);
 
-        reverse_pat_packet(pkt, conn);
+    pat_packet(pkt, key);
+
+    if (key->dl_type == htons(ETH_TYPE_IP)) {
+        nat_packet_ipv4(pkt, key, nat_action);
+
+        struct icmp_header *icmp = (struct icmp_header *) l4;
         icmp->icmp_csum = 0;
         icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
     } else {
-        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
-        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
-            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
-        /* This call is already verified to succeed during the code path from
-         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
-        extract_l3_ipv6(&inner_key, inner_l3_6,
-                        tail - ((char *)inner_l3_6) - pad,
-                        &inner_l4);
-        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
-        pkt->l4_ofs += inner_l4 - (char *) icmp6;
-
-        if (conn->nat_action & NAT_ACTION_SRC) {
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 inner_l3_6->ip6_src.be32,
-                                 &conn->key.src.addr.ipv6, true);
-        } else if (conn->nat_action & NAT_ACTION_DST) {
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 inner_l3_6->ip6_dst.be32,
-                                 &conn->key.dst.addr.ipv6, true);
-        }
-        reverse_pat_packet(pkt, conn);
+        nat_packet_ipv6(pkt, key, nat_action);
+
+        struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4;
         icmp6->icmp6_base.icmp6_cksum = 0;
-        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, icmp6,
-            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
+        icmp6->icmp6_base.icmp6_cksum =
+            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
+                                    tail - (char *) icmp6 - pad);
     }
+
+    /* Return the key and offset back. */
+    conn_key_reverse(key);
     pkt->l3_ofs = orig_l3_ofs;
     pkt->l4_ofs = orig_l4_ofs;
 }
 
 static void
-un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
-              bool related)
+nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool related)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        pkt->md.ct_state |= CS_DST_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_dst,
-                                 conn->key.src.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_dst.be32,
-                                 &conn->key.src.addr.ipv6, true);
-        }
+    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
+    uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action)
+                                : conn->nat_action;
 
-        if (OVS_UNLIKELY(related)) {
-            reverse_nat_packet(pkt, conn);
-        } else {
-            un_pat_packet(pkt, conn);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
+    /* Update ct_state. */
+    if (nat_action & NAT_ACTION_SRC) {
         pkt->md.ct_state |= CS_SRC_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_src,
-                                 conn->key.dst.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_src.be32,
-                                 &conn->key.dst.addr.ipv6, true);
-        }
+    } else if (nat_action & NAT_ACTION_DST) {
+        pkt->md.ct_state |= CS_DST_NAT;
+    }
 
+    /* Reverse the key for outer header. */
+    if (key->dl_type == htons(ETH_TYPE_IP)) {
+        nat_packet_ipv4(pkt, key, nat_action);
+    } else {
+        nat_packet_ipv6(pkt, key, nat_action);
+    }
+
+    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) {
         if (OVS_UNLIKELY(related)) {
-            reverse_nat_packet(pkt, conn);
+            nat_action  = nat_action_reverse(conn->nat_action);
+            nat_inner_packet(pkt, key, nat_action);
         } else {
-            un_pat_packet(pkt, conn);
+            pat_packet(pkt, key);
         }
     }
 }
@@ -1082,7 +1011,7 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
                 memcpy(nc, nat_conn, sizeof *nc);
             }
 
-            nat_packet(pkt, nc, ctx->icmp_related);
+            nat_packet(pkt, nc, false, ctx->icmp_related);
             memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
             memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
             nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
@@ -1185,11 +1114,8 @@  handle_nat(struct dp_packet *pkt, struct conn *conn,
         if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
             pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
         }
-        if (reply) {
-            un_nat_packet(pkt, conn, related);
-        } else {
-            nat_packet(pkt, conn, related);
-        }
+
+        nat_packet(pkt, conn, reply, related);
     }
 }
 
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 503455cc6..fe084e35a 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -7130,6 +7130,72 @@  recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([conntrack - ICMP from different source related with NAT])
+AT_SKIP_IF([test $HAVE_NC = no])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(client, server)
+
+ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10")
+ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20")
+
+dnl Send traffic from client to CT, do DNAT if the traffic is new otherwise send it to server
+AT_DATA([flows.txt], [dnl
+table=0,ip,ct_state=-trk,actions=ct(table=1)
+table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,nat(dst(192.168.10.20))
+table=1,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=ct(commit,table=2,nat)
+table=1,ip,actions=resubmit(,2)
+table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
+table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server
+table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+rm server.pcap
+OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid])
+sleep 1
+
+dnl Send UDP client->server
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
+packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"])
+dnl Send UDP response server->client
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
+packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"])
+dnl Fake router sending ICMP need frag
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
+packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\
+actions=resubmit(,0)"
+])
+
+AT_CHECK([ovs-appctl revalidator/purge], [0])
+AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], [0], [dnl
+ n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip actions=ct(table=1)
+ table=1, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=ct(commit,table=2,nat(dst=192.168.10.20))
+ table=1, n_packets=1, n_bytes=42, reset_counts ip actions=resubmit(,2)
+ table=1, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=ct(commit,table=2,nat)
+ table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=output:2
+ table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+rpl+trk,ip,in_port=2 actions=output:1
+ table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=output:2
+OFPST_FLOW reply (OF1.5):
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], [dnl
+udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
+])
+
+OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000])
+
+AT_CHECK([ovs-pcap server.pcap | grep 000000001020000000002000], [0], [dnl
+000000001020000000002000080045000038011f0000ff011b40c0a81401c0a80a140304f778000005784500001c000040000a11d162c0a80a14c0a8140a0002000100080000
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_BANNER([IGMP])
 
 AT_SETUP([IGMP - flood under normal action])