diff mbox series

[ovs-dev] conntrack: Refactor nat handling functions

Message ID 20221027091413.173848-1-amusil@redhat.com
State Superseded
Headers show
Series [ovs-dev] conntrack: Refactor nat handling functions | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Ales Musil Oct. 27, 2022, 9:14 a.m. UTC
In order to support NAT of inner packet
for ICMP related traffic refactor the nat
functions. This fixes the issue that the
NAT was not performed on inner header in orig
direction and avoids some code duplication.

Reported-at: https://bugzilla.redhat.com/2120546
Signed-off-by: Ales Musil <amusil@redhat.com>
---
 lib/conntrack.c         | 250 ++++++++++++++--------------------------
 tests/system-traffic.at |  67 +++++++++++
 2 files changed, 155 insertions(+), 162 deletions(-)

Comments

0-day Robot Oct. 27, 2022, 9:19 a.m. UTC | #1
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate spacing around cast
#190 FILE: lib/conntrack.c:839:
        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad,

Lines checked: 421, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Ales Musil Oct. 27, 2022, 11:09 a.m. UTC | #2
On Thu, Oct 27, 2022 at 11:14 AM Ales Musil <amusil@redhat.com> wrote:

> In order to support NAT of inner packet
> for ICMP related traffic refactor the nat
> functions. This fixes the issue that the
> NAT was not performed on inner header in orig
> direction and avoids some code duplication.
>
> Reported-at: https://bugzilla.redhat.com/2120546
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
>  lib/conntrack.c         | 250 ++++++++++++++--------------------------
>  tests/system-traffic.at |  67 +++++++++++
>  2 files changed, 155 insertions(+), 162 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 13c5ab628..b8b9f9c49 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -764,109 +764,59 @@ handle_alg_ctl(struct conntrack *ct, const struct
> conn_lookup_ctx *ctx,
>  }
>
>  static void
> -pat_packet(struct dp_packet *pkt, const struct conn *conn)
> +pat_packet(struct dp_packet *pkt, const struct conn_key *key)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            struct tcp_header *th = dp_packet_l4(pkt);
> -            packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            struct udp_header *uh = dp_packet_l4(pkt);
> -            packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
> -                                conn->rev_key.src.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
> -                                conn->rev_key.src.port);
> -        }
> +    if (key->nw_proto == IPPROTO_TCP) {
> +        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
> +    } else if (key->nw_proto == IPPROTO_UDP) {
> +        packet_set_udp_port(pkt, key->dst.port, key->src.port);
>      }
>  }
>
> -static void
> -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> +static uint16_t
> +nat_action_reverse(uint16_t nat_action)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        pkt->md.ct_state |= CS_SRC_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> -                                 conn->rev_key.dst.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_src.be32,
> -                                 &conn->rev_key.dst.addr.ipv6, true);
> -        }
> -        if (!related) {
> -            pat_packet(pkt, conn);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        pkt->md.ct_state |= CS_DST_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> -                                 conn->rev_key.src.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_dst.be32,
> -                                 &conn->rev_key.src.addr.ipv6, true);
> -        }
> -        if (!related) {
> -            pat_packet(pkt, conn);
> -        }
> +    if (nat_action & NAT_ACTION_SRC) {
> +        nat_action ^= NAT_ACTION_SRC;
> +        nat_action |= NAT_ACTION_DST;
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        nat_action ^= NAT_ACTION_DST;
> +        nat_action |= NAT_ACTION_SRC;
>      }
> +    return nat_action;
>  }
>
>  static void
> -un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
> +                uint16_t nat_action)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            struct tcp_header *th = dp_packet_l4(pkt);
> -            packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            struct udp_header *uh = dp_packet_l4(pkt);
> -            packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            packet_set_tcp_port(pkt, conn->key.dst.port,
> conn->key.src.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            packet_set_udp_port(pkt, conn->key.dst.port,
> conn->key.src.port);
> -        }
> +    struct ip_header *nh = dp_packet_l3(pkt);
> +
> +    if (nat_action & NAT_ACTION_SRC) {
> +        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
>      }
>  }
>
>  static void
> -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
> +                uint16_t nat_action)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            struct tcp_header *th_in = dp_packet_l4(pkt);
> -            packet_set_tcp_port(pkt, conn->key.src.port,
> -                                th_in->tcp_dst);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            struct udp_header *uh_in = dp_packet_l4(pkt);
> -            packet_set_udp_port(pkt, conn->key.src.port,
> -                                uh_in->udp_dst);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> -        if (conn->key.nw_proto == IPPROTO_TCP) {
> -            packet_set_tcp_port(pkt, conn->key.src.port,
> -                                conn->key.dst.port);
> -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> -            packet_set_udp_port(pkt, conn->key.src.port,
> -                                conn->key.dst.port);
> -        }
> +    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> +
> +    if (nat_action & NAT_ACTION_SRC) {
> +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
> +                             &key->dst.addr.ipv6, true);
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
> +                             &key->src.addr.ipv6, true);
>      }
>  }
>
>  static void
> -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
> +                 uint16_t nat_action)
>  {
>      char *tail = dp_packet_tail(pkt);
>      uint16_t pad = dp_packet_l2_pad_size(pkt);
> @@ -875,98 +825,77 @@ reverse_nat_packet(struct dp_packet *pkt, const
> struct conn *conn)
>      uint16_t orig_l3_ofs = pkt->l3_ofs;
>      uint16_t orig_l4_ofs = pkt->l4_ofs;
>
> -    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -        struct ip_header *nh = dp_packet_l3(pkt);
> -        struct icmp_header *icmp = dp_packet_l4(pkt);
> -        struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
> -        /* This call is already verified to succeed during the code path
> from
> -         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
> +    void *l3 = dp_packet_l3(pkt);
> +    void *l4 = dp_packet_l4(pkt);
> +    void *inner_l3 = (char *) l4 + 8;
> +
> +    /* These calls are already verified to succeed during the code path
> from
> +     * 'conn_key_extract()' which calls
> +     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
> +    if (key->dl_type == htons(ETH_TYPE_IP)) {
>          extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) -
> pad,
>                          &inner_l4, false);
> -        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
> -        pkt->l4_ofs += inner_l4 - (char *) icmp;
> +    } else {
> +        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *)inner_l3) -
> pad,
> +                        &inner_l4);
> +    }
> +    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
> +    pkt->l4_ofs += inner_l4 - (char *) l4;
>
> -        if (conn->nat_action & NAT_ACTION_SRC) {
> -            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
> -                                 conn->key.src.addr.ipv4);
> -        } else if (conn->nat_action & NAT_ACTION_DST) {
> -            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
> -                                 conn->key.dst.addr.ipv4);
> -        }
> +    /* Reverse the key for inner packet. */
> +    conn_key_reverse(key);
>
> -        reverse_pat_packet(pkt, conn);
> +    pat_packet(pkt, key);
> +
> +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> +        nat_packet_ipv4(pkt, key, nat_action);
> +
> +        struct icmp_header *icmp = (struct icmp_header *) l4;
>          icmp->icmp_csum = 0;
>          icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
>      } else {
> -        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
> -        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
> -            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
> -        /* This call is already verified to succeed during the code path
> from
> -         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
> -        extract_l3_ipv6(&inner_key, inner_l3_6,
> -                        tail - ((char *)inner_l3_6) - pad,
> -                        &inner_l4);
> -        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
> -        pkt->l4_ofs += inner_l4 - (char *) icmp6;
> -
> -        if (conn->nat_action & NAT_ACTION_SRC) {
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 inner_l3_6->ip6_src.be32,
> -                                 &conn->key.src.addr.ipv6, true);
> -        } else if (conn->nat_action & NAT_ACTION_DST) {
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 inner_l3_6->ip6_dst.be32,
> -                                 &conn->key.dst.addr.ipv6, true);
> -        }
> -        reverse_pat_packet(pkt, conn);
> +        nat_packet_ipv6(pkt, key, nat_action);
> +
> +        struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4;
>          icmp6->icmp6_base.icmp6_cksum = 0;
> -        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6,
> icmp6,
> -            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
> +        icmp6->icmp6_base.icmp6_cksum =
> +            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
> +                                    tail - (char *) icmp6 - pad);
>      }
> +
> +    /* Return the key and offset back. */
> +    conn_key_reverse(key);
>      pkt->l3_ofs = orig_l3_ofs;
>      pkt->l4_ofs = orig_l4_ofs;
>  }
>
>  static void
> -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> -              bool related)
> +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool
> related)
>  {
> -    if (conn->nat_action & NAT_ACTION_SRC) {
> -        pkt->md.ct_state |= CS_DST_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> -                                 conn->key.src.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_dst.be32,
> -                                 &conn->key.src.addr.ipv6, true);
> -        }
> +    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
> +    uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action)
> +                                : conn->nat_action;
>
> -        if (OVS_UNLIKELY(related)) {
> -            reverse_nat_packet(pkt, conn);
> -        } else {
> -            un_pat_packet(pkt, conn);
> -        }
> -    } else if (conn->nat_action & NAT_ACTION_DST) {
> +    /* Update ct_state. */
> +    if (nat_action & NAT_ACTION_SRC) {
>          pkt->md.ct_state |= CS_SRC_NAT;
> -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -            struct ip_header *nh = dp_packet_l3(pkt);
> -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> -                                 conn->key.dst.addr.ipv4);
> -        } else {
> -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> -                                 nh6->ip6_src.be32,
> -                                 &conn->key.dst.addr.ipv6, true);
> -        }
> +    } else if (nat_action & NAT_ACTION_DST) {
> +        pkt->md.ct_state |= CS_DST_NAT;
> +    }
>
> +    /* Reverse the key for outer header. */
> +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> +        nat_packet_ipv4(pkt, key, nat_action);
> +    } else {
> +        nat_packet_ipv6(pkt, key, nat_action);
> +    }
> +
> +    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) {
>          if (OVS_UNLIKELY(related)) {
> -            reverse_nat_packet(pkt, conn);
> +            nat_action  = nat_action_reverse(conn->nat_action);
> +            nat_inner_packet(pkt, key, nat_action);
>          } else {
> -            un_pat_packet(pkt, conn);
> +            pat_packet(pkt, key);
>          }
>      }
>  }
> @@ -1082,7 +1011,7 @@ conn_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
>                  memcpy(nc, nat_conn, sizeof *nc);
>              }
>
> -            nat_packet(pkt, nc, ctx->icmp_related);
> +            nat_packet(pkt, nc, false, ctx->icmp_related);
>              memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
>              memcpy(&nat_conn->rev_key, &nc->key, sizeof
> nat_conn->rev_key);
>              nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> @@ -1185,11 +1114,8 @@ handle_nat(struct dp_packet *pkt, struct conn *conn,
>          if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
>              pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
>          }
> -        if (reply) {
> -            un_nat_packet(pkt, conn, related);
> -        } else {
> -            nat_packet(pkt, conn, related);
> -        }
> +
> +        nat_packet(pkt, conn, reply, related);
>      }
>  }
>
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 731de439c..deb95eb49 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -6942,6 +6942,73 @@
> recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>
> +AT_SETUP([conntrack - ICMP from different source related with NAT])
> +AT_SKIP_IF([test $HAVE_NC = no])
> +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
> +CHECK_CONNTRACK()
> +CHECK_CONNTRACK_NAT()
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_NAMESPACES(client, server)
> +
> +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10")
> +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20")
> +
> +dnl Send traffic from client to CT, do DNAT if the traffic is new
> otherwise send it to server
> +AT_DATA([flows.txt], [dnl
> +table=0,ip,ct_state=-trk,actions=ct(table=1,nat)
>
> +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,nat(dst(192.168.10.20))
> +table=1,ip,actions=resubmit(,2)
> +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
>
> +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server
> +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
> +])
> +
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +
> +rm server.pcap
> +OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid])
> +sleep 1
> +
> +dnl Send UDP client->server
> +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
>
> +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"])
> +dnl Send UDP response server->client
> +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
>
> +packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"])
> +dnl Fake router sending ICMP need frag
> +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
>
> +packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\
> +actions=resubmit(,0)"
> +])
> +
> +AT_CHECK([ovs-appctl revalidator/purge], [0])
> +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ],
> [0], [dnl
> + n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip
> actions=ct(table=1,nat)
> + table=1, n_packets=1, n_bytes=42, reset_counts
> ct_state=+new+trk,ip,in_port=1
> actions=ct(commit,table=2,nat(dst=192.168.10.20))
> + table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2)
> + table=2, n_packets=1, n_bytes=42, reset_counts
> ct_state=+new+trk,ip,in_port=1 actions=output:2
> + table=2, n_packets=1, n_bytes=42, reset_counts
> ct_state=+rpl+trk,ip,in_port=2 actions=output:1
> + table=2, n_packets=1, n_bytes=70, reset_counts
> ct_state=+rel+trk,icmp,in_port=1 actions=output:2
> +OFPST_FLOW reply (OF1.5):
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0],
> [dnl
>
> +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
> +])
> +
> +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000])
> +
> +AT_CHECK([tcpdump -tvnnne "icmp" -r server.pcap 2>/dev/null], [0], [dnl
> +00:00:00:00:20:00 > 00:00:00:00:10:20, ethertype IPv4 (0x0800), length
> 70: (tos 0x0, ttl 255, id 287, offset 0, flags [[none]], proto ICMP (1),
> length 56)
> +    192.168.20.1 > 192.168.10.20: ICMP 192.168.20.10 unreachable - need
> to frag (mtu 1400), length 36
> +    (tos 0x0, ttl 10, id 0, offset 0, flags [[DF]], proto UDP (17),
> length 28)
> +    192.168.10.20.2 > 192.168.20.10.1: UDP, length 0
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  AT_BANNER([IGMP])
>
>  AT_SETUP([IGMP - flood under normal action])
> --
> 2.37.3
>
>
Ah there is an issue with the test spaces vs tabs. I'll fix that in v2 same
with the 0-day error.

Thanks,
Ales
Paolo Valerio Oct. 27, 2022, 11:17 a.m. UTC | #3
Ales Musil <amusil@redhat.com> writes:

> On Thu, Oct 27, 2022 at 11:14 AM Ales Musil <amusil@redhat.com> wrote:
>
>     In order to support NAT of inner packet
>     for ICMP related traffic refactor the nat
>     functions. This fixes the issue that the
>     NAT was not performed on inner header in orig
>     direction and avoids some code duplication.
>
>     Reported-at: https://bugzilla.redhat.com/2120546
>     Signed-off-by: Ales Musil <amusil@redhat.com>
>     ---
>      lib/conntrack.c         | 250 ++++++++++++++--------------------------
>      tests/system-traffic.at |  67 +++++++++++
>      2 files changed, 155 insertions(+), 162 deletions(-)
>
>     diff --git a/lib/conntrack.c b/lib/conntrack.c
>     index 13c5ab628..b8b9f9c49 100644
>     --- a/lib/conntrack.c
>     +++ b/lib/conntrack.c
>     @@ -764,109 +764,59 @@ handle_alg_ctl(struct conntrack *ct, const struct
>     conn_lookup_ctx *ctx,
>      }
>
>      static void
>     -pat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +pat_packet(struct dp_packet *pkt, const struct conn_key *key)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            struct tcp_header *th = dp_packet_l4(pkt);
>     -            packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            struct udp_header *uh = dp_packet_l4(pkt);
>     -            packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
>     -                                conn->rev_key.src.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
>     -                                conn->rev_key.src.port);
>     -        }
>     +    if (key->nw_proto == IPPROTO_TCP) {
>     +        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
>     +    } else if (key->nw_proto == IPPROTO_UDP) {
>     +        packet_set_udp_port(pkt, key->dst.port, key->src.port);
>          }
>      }
>
>     -static void
>     -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
>     +static uint16_t
>     +nat_action_reverse(uint16_t nat_action)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        pkt->md.ct_state |= CS_SRC_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_src,
>     -                                 conn->rev_key.dst.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_src.be32,
>     -                                 &conn->rev_key.dst.addr.ipv6, true);
>     -        }
>     -        if (!related) {
>     -            pat_packet(pkt, conn);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        pkt->md.ct_state |= CS_DST_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
>     -                                 conn->rev_key.src.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_dst.be32,
>     -                                 &conn->rev_key.src.addr.ipv6, true);
>     -        }
>     -        if (!related) {
>     -            pat_packet(pkt, conn);
>     -        }
>     +    if (nat_action & NAT_ACTION_SRC) {
>     +        nat_action ^= NAT_ACTION_SRC;
>     +        nat_action |= NAT_ACTION_DST;
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        nat_action ^= NAT_ACTION_DST;
>     +        nat_action |= NAT_ACTION_SRC;
>          }
>     +    return nat_action;
>      }
>
>      static void
>     -un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
>     +                uint16_t nat_action)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            struct tcp_header *th = dp_packet_l4(pkt);
>     -            packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            struct udp_header *uh = dp_packet_l4(pkt);
>     -            packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            packet_set_tcp_port(pkt, conn->key.dst.port, conn->
>     key.src.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            packet_set_udp_port(pkt, conn->key.dst.port, conn->
>     key.src.port);
>     -        }
>     +    struct ip_header *nh = dp_packet_l3(pkt);
>     +
>     +    if (nat_action & NAT_ACTION_SRC) {
>     +        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
>          }
>      }
>
>      static void
>     -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
>     +                uint16_t nat_action)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            struct tcp_header *th_in = dp_packet_l4(pkt);
>     -            packet_set_tcp_port(pkt, conn->key.src.port,
>     -                                th_in->tcp_dst);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            struct udp_header *uh_in = dp_packet_l4(pkt);
>     -            packet_set_udp_port(pkt, conn->key.src.port,
>     -                                uh_in->udp_dst);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            packet_set_tcp_port(pkt, conn->key.src.port,
>     -                                conn->key.dst.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            packet_set_udp_port(pkt, conn->key.src.port,
>     -                                conn->key.dst.port);
>     -        }
>     +    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     +
>     +    if (nat_action & NAT_ACTION_SRC) {
>     +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
>     +                             &key->dst.addr.ipv6, true);
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
>     +                             &key->src.addr.ipv6, true);
>          }
>      }
>
>      static void
>     -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
>     +                 uint16_t nat_action)
>      {
>          char *tail = dp_packet_tail(pkt);
>          uint16_t pad = dp_packet_l2_pad_size(pkt);
>     @@ -875,98 +825,77 @@ reverse_nat_packet(struct dp_packet *pkt, const
>     struct conn *conn)
>          uint16_t orig_l3_ofs = pkt->l3_ofs;
>          uint16_t orig_l4_ofs = pkt->l4_ofs;
>
>     -    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -        struct ip_header *nh = dp_packet_l3(pkt);
>     -        struct icmp_header *icmp = dp_packet_l4(pkt);
>     -        struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
>     -        /* This call is already verified to succeed during the code path
>     from
>     -         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
>     +    void *l3 = dp_packet_l3(pkt);
>     +    void *l4 = dp_packet_l4(pkt);
>     +    void *inner_l3 = (char *) l4 + 8;
>     +
>     +    /* These calls are already verified to succeed during the code path
>     from
>     +     * 'conn_key_extract()' which calls
>     +     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
>     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
>              extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) -
>     pad,
>                              &inner_l4, false);
>     -        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
>     -        pkt->l4_ofs += inner_l4 - (char *) icmp;
>     +    } else {
>     +        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *)inner_l3) -
>     pad,
>     +                        &inner_l4);
>     +    }
>     +    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
>     +    pkt->l4_ofs += inner_l4 - (char *) l4;
>
>     -        if (conn->nat_action & NAT_ACTION_SRC) {
>     -            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
>     -                                 conn->key.src.addr.ipv4);
>     -        } else if (conn->nat_action & NAT_ACTION_DST) {
>     -            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
>     -                                 conn->key.dst.addr.ipv4);
>     -        }
>     +    /* Reverse the key for inner packet. */
>     +    conn_key_reverse(key);
>
>     -        reverse_pat_packet(pkt, conn);
>     +    pat_packet(pkt, key);
>     +
>     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
>     +        nat_packet_ipv4(pkt, key, nat_action);
>     +
>     +        struct icmp_header *icmp = (struct icmp_header *) l4;
>              icmp->icmp_csum = 0;
>              icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
>          } else {
>     -        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
>     -        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
>     -            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
>     -        /* This call is already verified to succeed during the code path
>     from
>     -         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
>     -        extract_l3_ipv6(&inner_key, inner_l3_6,
>     -                        tail - ((char *)inner_l3_6) - pad,
>     -                        &inner_l4);
>     -        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
>     -        pkt->l4_ofs += inner_l4 - (char *) icmp6;
>     -
>     -        if (conn->nat_action & NAT_ACTION_SRC) {
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 inner_l3_6->ip6_src.be32,
>     -                                 &conn->key.src.addr.ipv6, true);
>     -        } else if (conn->nat_action & NAT_ACTION_DST) {
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 inner_l3_6->ip6_dst.be32,
>     -                                 &conn->key.dst.addr.ipv6, true);
>     -        }
>     -        reverse_pat_packet(pkt, conn);
>     +        nat_packet_ipv6(pkt, key, nat_action);
>     +
>     +        struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4;
>              icmp6->icmp6_base.icmp6_cksum = 0;
>     -        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6,
>     icmp6,
>     -            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
>     +        icmp6->icmp6_base.icmp6_cksum =
>     +            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
>     +                                    tail - (char *) icmp6 - pad);
>          }
>     +
>     +    /* Return the key and offset back. */
>     +    conn_key_reverse(key);
>          pkt->l3_ofs = orig_l3_ofs;
>          pkt->l4_ofs = orig_l4_ofs;
>      }
>
>      static void
>     -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
>     -              bool related)
>     +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool
>     related)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        pkt->md.ct_state |= CS_DST_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
>     -                                 conn->key.src.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_dst.be32,
>     -                                 &conn->key.src.addr.ipv6, true);
>     -        }
>     +    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
>     +    uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action)
>     +                                : conn->nat_action;
>
>     -        if (OVS_UNLIKELY(related)) {
>     -            reverse_nat_packet(pkt, conn);
>     -        } else {
>     -            un_pat_packet(pkt, conn);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     +    /* Update ct_state. */
>     +    if (nat_action & NAT_ACTION_SRC) {
>              pkt->md.ct_state |= CS_SRC_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_src,
>     -                                 conn->key.dst.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_src.be32,
>     -                                 &conn->key.dst.addr.ipv6, true);
>     -        }
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        pkt->md.ct_state |= CS_DST_NAT;
>     +    }
>
>     +    /* Reverse the key for outer header. */
>     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
>     +        nat_packet_ipv4(pkt, key, nat_action);
>     +    } else {
>     +        nat_packet_ipv6(pkt, key, nat_action);
>     +    }
>     +
>     +    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) {
>              if (OVS_UNLIKELY(related)) {
>     -            reverse_nat_packet(pkt, conn);
>     +            nat_action  = nat_action_reverse(conn->nat_action);
>     +            nat_inner_packet(pkt, key, nat_action);
>              } else {
>     -            un_pat_packet(pkt, conn);
>     +            pat_packet(pkt, key);
>              }
>          }
>      }
>     @@ -1082,7 +1011,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet
>     *pkt,
>                      memcpy(nc, nat_conn, sizeof *nc);
>                  }
>
>     -            nat_packet(pkt, nc, ctx->icmp_related);
>     +            nat_packet(pkt, nc, false, ctx->icmp_related);
>                  memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
>                  memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->
>     rev_key);
>                  nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
>     @@ -1185,11 +1114,8 @@ handle_nat(struct dp_packet *pkt, struct conn *conn,
>              if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
>                  pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
>              }
>     -        if (reply) {
>     -            un_nat_packet(pkt, conn, related);
>     -        } else {
>     -            nat_packet(pkt, conn, related);
>     -        }
>     +
>     +        nat_packet(pkt, conn, reply, related);
>          }
>      }
>
>     diff --git a/tests/system-traffic.at b/tests/system-traffic.at
>     index 731de439c..deb95eb49 100644
>     --- a/tests/system-traffic.at
>     +++ b/tests/system-traffic.at
>     @@ -6942,6 +6942,73 @@ recirc_id(0),in_port(br-underlay),ct_state(+trk),eth
>     (src=f0:00:00:01:01:02,dst=f
>      OVS_TRAFFIC_VSWITCHD_STOP
>      AT_CLEANUP
>
>     +AT_SETUP([conntrack - ICMP from different source related with NAT])
>     +AT_SKIP_IF([test $HAVE_NC = no])
>     +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
>     +CHECK_CONNTRACK()
>     +CHECK_CONNTRACK_NAT()
>     +OVS_TRAFFIC_VSWITCHD_START()
>     +
>     +ADD_NAMESPACES(client, server)
>     +
>     +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10")
>     +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20")
>     +
>     +dnl Send traffic from client to CT, do DNAT if the traffic is new
>     otherwise send it to server
>     +AT_DATA([flows.txt], [dnl
>     +table=0,ip,ct_state=-trk,actions=ct(table=1,nat)
>     +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=
>     2,nat(dst(192.168.10.20))
>     +table=1,ip,actions=resubmit(,2)
>     +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
>     +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=
>     output:ovs-server
>     +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
>     +])
>     +
>     +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
>     +
>     +rm server.pcap
>     +OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid])
>     +sleep 1
>     +
>     +dnl Send UDP client->server
>     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
>     +packet=
>     00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions
>     =resubmit(,0)"])
>     +dnl Send UDP response server->client
>     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
>     +packet=
>     00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions
>     =resubmit(,0)"])
>     +dnl Fake router sending ICMP need frag
>     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
>     +packet=
>     000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,
>     \
>     +actions=resubmit(,0)"
>     +])
>     +
>     +AT_CHECK([ovs-appctl revalidator/purge], [0])
>     +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ],
>     [0], [dnl
>     + n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip actions=ct(table=
>     1,nat)
>     + table=1, n_packets=1, n_bytes=42, reset_counts ct_state=
>     +new+trk,ip,in_port=1 actions=ct(commit,table=2,nat(dst=192.168.10.20))
>     + table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2)
>     + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=
>     +new+trk,ip,in_port=1 actions=output:2
>     + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=
>     +rpl+trk,ip,in_port=2 actions=output:1
>     + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=
>     +rel+trk,icmp,in_port=1 actions=output:2
>     +OFPST_FLOW reply (OF1.5):
>     +])
>     +
>     +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0],
>     [dnl
>     +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=
>     192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
>     +])
>     +
>     +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000])
>     +
>     +AT_CHECK([tcpdump -tvnnne "icmp" -r server.pcap 2>/dev/null], [0], [dnl
>     +00:00:00:00:20:00 > 00:00:00:00:10:20, ethertype IPv4 (0x0800), length 70:
>     (tos 0x0, ttl 255, id 287, offset 0, flags [[none]], proto ICMP (1), length
>     56)
>     +    192.168.20.1 > 192.168.10.20: ICMP 192.168.20.10 unreachable - need to
>     frag (mtu 1400), length 36
>     +    (tos 0x0, ttl 10, id 0, offset 0, flags [[DF]], proto UDP (17), length
>     28)
>     +    192.168.10.20.2 > 192.168.20.10.1: UDP, length 0
>     +])
>     +
>     +OVS_TRAFFIC_VSWITCHD_STOP
>     +AT_CLEANUP
>     +
>      AT_BANNER([IGMP])
>
>      AT_SETUP([IGMP - flood under normal action])
>     --
>     2.37.3
>
>
>
> Ah there is an issue with the test spaces vs tabs. I'll fix that in v2 same
> with the 0-day error.
>

Ales, I didn't review it, but while at it can you please double check
the Reported-at link? I was expecting another BZ.

> Thanks,
> Ales
>
> --
>
> Ales Musil
>
> Senior Software Engineer - OVN Core
>
> Red Hat EMEA
>
> amusil@redhat.com    IM: amusil
>
> [logo]
Ales Musil Oct. 27, 2022, 11:22 a.m. UTC | #4
On Thu, Oct 27, 2022 at 1:17 PM Paolo Valerio <pvalerio@redhat.com> wrote:

> Ales Musil <amusil@redhat.com> writes:
>
> > On Thu, Oct 27, 2022 at 11:14 AM Ales Musil <amusil@redhat.com> wrote:
> >
> >     In order to support NAT of inner packet
> >     for ICMP related traffic refactor the nat
> >     functions. This fixes the issue that the
> >     NAT was not performed on inner header in orig
> >     direction and avoids some code duplication.
> >
> >     Reported-at: https://bugzilla.redhat.com/2120546
> >     Signed-off-by: Ales Musil <amusil@redhat.com>
> >     ---
> >      lib/conntrack.c         | 250
> ++++++++++++++--------------------------
> >      tests/system-traffic.at |  67 +++++++++++
> >      2 files changed, 155 insertions(+), 162 deletions(-)
> >
> >     diff --git a/lib/conntrack.c b/lib/conntrack.c
> >     index 13c5ab628..b8b9f9c49 100644
> >     --- a/lib/conntrack.c
> >     +++ b/lib/conntrack.c
> >     @@ -764,109 +764,59 @@ handle_alg_ctl(struct conntrack *ct, const
> struct
> >     conn_lookup_ctx *ctx,
> >      }
> >
> >      static void
> >     -pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >     +pat_packet(struct dp_packet *pkt, const struct conn_key *key)
> >      {
> >     -    if (conn->nat_action & NAT_ACTION_SRC) {
> >     -        if (conn->key.nw_proto == IPPROTO_TCP) {
> >     -            struct tcp_header *th = dp_packet_l4(pkt);
> >     -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
> th->tcp_dst);
> >     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> >     -            struct udp_header *uh = dp_packet_l4(pkt);
> >     -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
> uh->udp_dst);
> >     -        }
> >     -    } else if (conn->nat_action & NAT_ACTION_DST) {
> >     -        if (conn->key.nw_proto == IPPROTO_TCP) {
> >     -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
> >     -                                conn->rev_key.src.port);
> >     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> >     -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
> >     -                                conn->rev_key.src.port);
> >     -        }
> >     +    if (key->nw_proto == IPPROTO_TCP) {
> >     +        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
> >     +    } else if (key->nw_proto == IPPROTO_UDP) {
> >     +        packet_set_udp_port(pkt, key->dst.port, key->src.port);
> >          }
> >      }
> >
> >     -static void
> >     -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool
> related)
> >     +static uint16_t
> >     +nat_action_reverse(uint16_t nat_action)
> >      {
> >     -    if (conn->nat_action & NAT_ACTION_SRC) {
> >     -        pkt->md.ct_state |= CS_SRC_NAT;
> >     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >     -            struct ip_header *nh = dp_packet_l3(pkt);
> >     -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> >     -                                 conn->rev_key.dst.addr.ipv4);
> >     -        } else {
> >     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> >     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >     -                                 nh6->ip6_src.be32,
> >     -                                 &conn->rev_key.dst.addr.ipv6,
> true);
> >     -        }
> >     -        if (!related) {
> >     -            pat_packet(pkt, conn);
> >     -        }
> >     -    } else if (conn->nat_action & NAT_ACTION_DST) {
> >     -        pkt->md.ct_state |= CS_DST_NAT;
> >     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >     -            struct ip_header *nh = dp_packet_l3(pkt);
> >     -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> >     -                                 conn->rev_key.src.addr.ipv4);
> >     -        } else {
> >     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> >     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >     -                                 nh6->ip6_dst.be32,
> >     -                                 &conn->rev_key.src.addr.ipv6,
> true);
> >     -        }
> >     -        if (!related) {
> >     -            pat_packet(pkt, conn);
> >     -        }
> >     +    if (nat_action & NAT_ACTION_SRC) {
> >     +        nat_action ^= NAT_ACTION_SRC;
> >     +        nat_action |= NAT_ACTION_DST;
> >     +    } else if (nat_action & NAT_ACTION_DST) {
> >     +        nat_action ^= NAT_ACTION_DST;
> >     +        nat_action |= NAT_ACTION_SRC;
> >          }
> >     +    return nat_action;
> >      }
> >
> >      static void
> >     -un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >     +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
> >     +                uint16_t nat_action)
> >      {
> >     -    if (conn->nat_action & NAT_ACTION_SRC) {
> >     -        if (conn->key.nw_proto == IPPROTO_TCP) {
> >     -            struct tcp_header *th = dp_packet_l4(pkt);
> >     -            packet_set_tcp_port(pkt, th->tcp_src,
> conn->key.src.port);
> >     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> >     -            struct udp_header *uh = dp_packet_l4(pkt);
> >     -            packet_set_udp_port(pkt, uh->udp_src,
> conn->key.src.port);
> >     -        }
> >     -    } else if (conn->nat_action & NAT_ACTION_DST) {
> >     -        if (conn->key.nw_proto == IPPROTO_TCP) {
> >     -            packet_set_tcp_port(pkt, conn->key.dst.port, conn->
> >     key.src.port);
> >     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> >     -            packet_set_udp_port(pkt, conn->key.dst.port, conn->
> >     key.src.port);
> >     -        }
> >     +    struct ip_header *nh = dp_packet_l3(pkt);
> >     +
> >     +    if (nat_action & NAT_ACTION_SRC) {
> >     +        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
> >     +    } else if (nat_action & NAT_ACTION_DST) {
> >     +        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
> >          }
> >      }
> >
> >      static void
> >     -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >     +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
> >     +                uint16_t nat_action)
> >      {
> >     -    if (conn->nat_action & NAT_ACTION_SRC) {
> >     -        if (conn->key.nw_proto == IPPROTO_TCP) {
> >     -            struct tcp_header *th_in = dp_packet_l4(pkt);
> >     -            packet_set_tcp_port(pkt, conn->key.src.port,
> >     -                                th_in->tcp_dst);
> >     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> >     -            struct udp_header *uh_in = dp_packet_l4(pkt);
> >     -            packet_set_udp_port(pkt, conn->key.src.port,
> >     -                                uh_in->udp_dst);
> >     -        }
> >     -    } else if (conn->nat_action & NAT_ACTION_DST) {
> >     -        if (conn->key.nw_proto == IPPROTO_TCP) {
> >     -            packet_set_tcp_port(pkt, conn->key.src.port,
> >     -                                conn->key.dst.port);
> >     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
> >     -            packet_set_udp_port(pkt, conn->key.src.port,
> >     -                                conn->key.dst.port);
> >     -        }
> >     +    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> >     +
> >     +    if (nat_action & NAT_ACTION_SRC) {
> >     +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
> >     +                             &key->dst.addr.ipv6, true);
> >     +    } else if (nat_action & NAT_ACTION_DST) {
> >     +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
> >     +                             &key->src.addr.ipv6, true);
> >          }
> >      }
> >
> >      static void
> >     -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> >     +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
> >     +                 uint16_t nat_action)
> >      {
> >          char *tail = dp_packet_tail(pkt);
> >          uint16_t pad = dp_packet_l2_pad_size(pkt);
> >     @@ -875,98 +825,77 @@ reverse_nat_packet(struct dp_packet *pkt, const
> >     struct conn *conn)
> >          uint16_t orig_l3_ofs = pkt->l3_ofs;
> >          uint16_t orig_l4_ofs = pkt->l4_ofs;
> >
> >     -    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >     -        struct ip_header *nh = dp_packet_l3(pkt);
> >     -        struct icmp_header *icmp = dp_packet_l4(pkt);
> >     -        struct ip_header *inner_l3 = (struct ip_header *) (icmp +
> 1);
> >     -        /* This call is already verified to succeed during the code
> path
> >     from
> >     -         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
> >     +    void *l3 = dp_packet_l3(pkt);
> >     +    void *l4 = dp_packet_l4(pkt);
> >     +    void *inner_l3 = (char *) l4 + 8;
> >     +
> >     +    /* These calls are already verified to succeed during the code
> path
> >     from
> >     +     * 'conn_key_extract()' which calls
> >     +     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
> >     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> >              extract_l3_ipv4(&inner_key, inner_l3, tail - ((char
> *)inner_l3) -
> >     pad,
> >                              &inner_l4, false);
> >     -        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
> >     -        pkt->l4_ofs += inner_l4 - (char *) icmp;
> >     +    } else {
> >     +        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char
> *)inner_l3) -
> >     pad,
> >     +                        &inner_l4);
> >     +    }
> >     +    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
> >     +    pkt->l4_ofs += inner_l4 - (char *) l4;
> >
> >     -        if (conn->nat_action & NAT_ACTION_SRC) {
> >     -            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
> >     -                                 conn->key.src.addr.ipv4);
> >     -        } else if (conn->nat_action & NAT_ACTION_DST) {
> >     -            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
> >     -                                 conn->key.dst.addr.ipv4);
> >     -        }
> >     +    /* Reverse the key for inner packet. */
> >     +    conn_key_reverse(key);
> >
> >     -        reverse_pat_packet(pkt, conn);
> >     +    pat_packet(pkt, key);
> >     +
> >     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> >     +        nat_packet_ipv4(pkt, key, nat_action);
> >     +
> >     +        struct icmp_header *icmp = (struct icmp_header *) l4;
> >              icmp->icmp_csum = 0;
> >              icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
> >          } else {
> >     -        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> >     -        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
> >     -        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
> >     -            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
> >     -        /* This call is already verified to succeed during the code
> path
> >     from
> >     -         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
> >     -        extract_l3_ipv6(&inner_key, inner_l3_6,
> >     -                        tail - ((char *)inner_l3_6) - pad,
> >     -                        &inner_l4);
> >     -        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
> >     -        pkt->l4_ofs += inner_l4 - (char *) icmp6;
> >     -
> >     -        if (conn->nat_action & NAT_ACTION_SRC) {
> >     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >     -                                 inner_l3_6->ip6_src.be32,
> >     -                                 &conn->key.src.addr.ipv6, true);
> >     -        } else if (conn->nat_action & NAT_ACTION_DST) {
> >     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >     -                                 inner_l3_6->ip6_dst.be32,
> >     -                                 &conn->key.dst.addr.ipv6, true);
> >     -        }
> >     -        reverse_pat_packet(pkt, conn);
> >     +        nat_packet_ipv6(pkt, key, nat_action);
> >     +
> >     +        struct icmp6_data_header *icmp6 = (struct icmp6_data_header
> *) l4;
> >              icmp6->icmp6_base.icmp6_cksum = 0;
> >     -        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6,
> >     icmp6,
> >     -            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
> >     +        icmp6->icmp6_base.icmp6_cksum =
> >     +            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
> >     +                                    tail - (char *) icmp6 - pad);
> >          }
> >     +
> >     +    /* Return the key and offset back. */
> >     +    conn_key_reverse(key);
> >          pkt->l3_ofs = orig_l3_ofs;
> >          pkt->l4_ofs = orig_l4_ofs;
> >      }
> >
> >      static void
> >     -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> >     -              bool related)
> >     +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply,
> bool
> >     related)
> >      {
> >     -    if (conn->nat_action & NAT_ACTION_SRC) {
> >     -        pkt->md.ct_state |= CS_DST_NAT;
> >     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >     -            struct ip_header *nh = dp_packet_l3(pkt);
> >     -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
> >     -                                 conn->key.src.addr.ipv4);
> >     -        } else {
> >     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> >     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >     -                                 nh6->ip6_dst.be32,
> >     -                                 &conn->key.src.addr.ipv6, true);
> >     -        }
> >     +    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
> >     +    uint16_t nat_action = reply ?
> nat_action_reverse(conn->nat_action)
> >     +                                : conn->nat_action;
> >
> >     -        if (OVS_UNLIKELY(related)) {
> >     -            reverse_nat_packet(pkt, conn);
> >     -        } else {
> >     -            un_pat_packet(pkt, conn);
> >     -        }
> >     -    } else if (conn->nat_action & NAT_ACTION_DST) {
> >     +    /* Update ct_state. */
> >     +    if (nat_action & NAT_ACTION_SRC) {
> >              pkt->md.ct_state |= CS_SRC_NAT;
> >     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >     -            struct ip_header *nh = dp_packet_l3(pkt);
> >     -            packet_set_ipv4_addr(pkt, &nh->ip_src,
> >     -                                 conn->key.dst.addr.ipv4);
> >     -        } else {
> >     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
> >     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >     -                                 nh6->ip6_src.be32,
> >     -                                 &conn->key.dst.addr.ipv6, true);
> >     -        }
> >     +    } else if (nat_action & NAT_ACTION_DST) {
> >     +        pkt->md.ct_state |= CS_DST_NAT;
> >     +    }
> >
> >     +    /* Reverse the key for outer header. */
> >     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
> >     +        nat_packet_ipv4(pkt, key, nat_action);
> >     +    } else {
> >     +        nat_packet_ipv6(pkt, key, nat_action);
> >     +    }
> >     +
> >     +    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST)
> {
> >              if (OVS_UNLIKELY(related)) {
> >     -            reverse_nat_packet(pkt, conn);
> >     +            nat_action  = nat_action_reverse(conn->nat_action);
> >     +            nat_inner_packet(pkt, key, nat_action);
> >              } else {
> >     -            un_pat_packet(pkt, conn);
> >     +            pat_packet(pkt, key);
> >              }
> >          }
> >      }
> >     @@ -1082,7 +1011,7 @@ conn_not_found(struct conntrack *ct, struct
> dp_packet
> >     *pkt,
> >                      memcpy(nc, nat_conn, sizeof *nc);
> >                  }
> >
> >     -            nat_packet(pkt, nc, ctx->icmp_related);
> >     +            nat_packet(pkt, nc, false, ctx->icmp_related);
> >                  memcpy(&nat_conn->key, &nc->rev_key, sizeof
> nat_conn->key);
> >                  memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->
> >     rev_key);
> >                  nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> >     @@ -1185,11 +1114,8 @@ handle_nat(struct dp_packet *pkt, struct conn
> *conn,
> >              if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
> >                  pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
> >              }
> >     -        if (reply) {
> >     -            un_nat_packet(pkt, conn, related);
> >     -        } else {
> >     -            nat_packet(pkt, conn, related);
> >     -        }
> >     +
> >     +        nat_packet(pkt, conn, reply, related);
> >          }
> >      }
> >
> >     diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> >     index 731de439c..deb95eb49 100644
> >     --- a/tests/system-traffic.at
> >     +++ b/tests/system-traffic.at
> >     @@ -6942,6 +6942,73 @@
> recirc_id(0),in_port(br-underlay),ct_state(+trk),eth
> >     (src=f0:00:00:01:01:02,dst=f
> >      OVS_TRAFFIC_VSWITCHD_STOP
> >      AT_CLEANUP
> >
> >     +AT_SETUP([conntrack - ICMP from different source related with NAT])
> >     +AT_SKIP_IF([test $HAVE_NC = no])
> >     +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
> >     +CHECK_CONNTRACK()
> >     +CHECK_CONNTRACK_NAT()
> >     +OVS_TRAFFIC_VSWITCHD_START()
> >     +
> >     +ADD_NAMESPACES(client, server)
> >     +
> >     +ADD_VETH(client, client, br0, "192.168.20.10/24",
> "00:00:00:00:20:10")
> >     +ADD_VETH(server, server, br0, "192.168.10.20/24",
> "00:00:00:00:10:20")
> >     +
> >     +dnl Send traffic from client to CT, do DNAT if the traffic is new
> >     otherwise send it to server
> >     +AT_DATA([flows.txt], [dnl
> >     +table=0,ip,ct_state=-trk,actions=ct(table=1,nat)
> >
>  +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=
> >     2,nat(dst(192.168.10.20))
> >     +table=1,ip,actions=resubmit(,2)
> >
>  +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
> >     +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=
> >     output:ovs-server
> >
>  +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
> >     +])
> >     +
> >     +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> >     +
> >     +rm server.pcap
> >     +OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap],
> [tcpdump.pid])
> >     +sleep 1
> >     +
> >     +dnl Send UDP client->server
> >     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
> >     +packet=
> >
>  00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions
> >     =resubmit(,0)"])
> >     +dnl Send UDP response server->client
> >     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
> >     +packet=
> >
>  00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions
> >     =resubmit(,0)"])
> >     +dnl Fake router sending ICMP need frag
> >     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
> >     +packet=
> >
>  000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,
> >     \
> >     +actions=resubmit(,0)"
> >     +])
> >     +
> >     +AT_CHECK([ovs-appctl revalidator/purge], [0])
> >     +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip |
> sort ],
> >     [0], [dnl
> >     + n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip
> actions=ct(table=
> >     1,nat)
> >     + table=1, n_packets=1, n_bytes=42, reset_counts ct_state=
> >     +new+trk,ip,in_port=1
> actions=ct(commit,table=2,nat(dst=192.168.10.20))
> >     + table=1, n_packets=2, n_bytes=112, reset_counts ip
> actions=resubmit(,2)
> >     + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=
> >     +new+trk,ip,in_port=1 actions=output:2
> >     + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=
> >     +rpl+trk,ip,in_port=2 actions=output:1
> >     + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=
> >     +rel+trk,icmp,in_port=1 actions=output:2
> >     +OFPST_FLOW reply (OF1.5):
> >     +])
> >     +
> >     +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"],
> [0],
> >     [dnl
> >
>  +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=
> >     192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
> >     +])
> >     +
> >     +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep
> 000000001020000000002000])
> >     +
> >     +AT_CHECK([tcpdump -tvnnne "icmp" -r server.pcap 2>/dev/null], [0],
> [dnl
> >     +00:00:00:00:20:00 > 00:00:00:00:10:20, ethertype IPv4 (0x0800),
> length 70:
> >     (tos 0x0, ttl 255, id 287, offset 0, flags [[none]], proto ICMP (1),
> length
> >     56)
> >     +    192.168.20.1 > 192.168.10.20: ICMP 192.168.20.10 unreachable -
> need to
> >     frag (mtu 1400), length 36
> >     +    (tos 0x0, ttl 10, id 0, offset 0, flags [[DF]], proto UDP (17),
> length
> >     28)
> >     +    192.168.10.20.2 > 192.168.20.10.1: UDP, length 0
> >     +])
> >     +
> >     +OVS_TRAFFIC_VSWITCHD_STOP
> >     +AT_CLEANUP
> >     +
> >      AT_BANNER([IGMP])
> >
> >      AT_SETUP([IGMP - flood under normal action])
> >     --
> >     2.37.3
> >
> >
> >
> > Ah there is an issue with the test spaces vs tabs. I'll fix that in v2
> same
> > with the 0-day error.
> >
>
> Ales, I didn't review it, but while at it can you please double check
> the Reported-at link? I was expecting another BZ.
>

Yeah copy-paste error, I'll fix it in v2 as well. Thanks


>
> > Thanks,
> > Ales
> >
> > --
> >
> > Ales Musil
> >
> > Senior Software Engineer - OVN Core
> >
> > Red Hat EMEA
> >
> > amusil@redhat.com    IM: amusil
> >
> > [logo]
>
>
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 13c5ab628..b8b9f9c49 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -764,109 +764,59 @@  handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
 }
 
 static void
-pat_packet(struct dp_packet *pkt, const struct conn *conn)
+pat_packet(struct dp_packet *pkt, const struct conn_key *key)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            struct tcp_header *th = dp_packet_l4(pkt);
-            packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            struct udp_header *uh = dp_packet_l4(pkt);
-            packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
-                                conn->rev_key.src.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            packet_set_udp_port(pkt, conn->rev_key.dst.port,
-                                conn->rev_key.src.port);
-        }
+    if (key->nw_proto == IPPROTO_TCP) {
+        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
+    } else if (key->nw_proto == IPPROTO_UDP) {
+        packet_set_udp_port(pkt, key->dst.port, key->src.port);
     }
 }
 
-static void
-nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
+static uint16_t
+nat_action_reverse(uint16_t nat_action)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        pkt->md.ct_state |= CS_SRC_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_src,
-                                 conn->rev_key.dst.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_src.be32,
-                                 &conn->rev_key.dst.addr.ipv6, true);
-        }
-        if (!related) {
-            pat_packet(pkt, conn);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        pkt->md.ct_state |= CS_DST_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_dst,
-                                 conn->rev_key.src.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_dst.be32,
-                                 &conn->rev_key.src.addr.ipv6, true);
-        }
-        if (!related) {
-            pat_packet(pkt, conn);
-        }
+    if (nat_action & NAT_ACTION_SRC) {
+        nat_action ^= NAT_ACTION_SRC;
+        nat_action |= NAT_ACTION_DST;
+    } else if (nat_action & NAT_ACTION_DST) {
+        nat_action ^= NAT_ACTION_DST;
+        nat_action |= NAT_ACTION_SRC;
     }
+    return nat_action;
 }
 
 static void
-un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
+nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
+                uint16_t nat_action)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            struct tcp_header *th = dp_packet_l4(pkt);
-            packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            struct udp_header *uh = dp_packet_l4(pkt);
-            packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            packet_set_tcp_port(pkt, conn->key.dst.port, conn->key.src.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            packet_set_udp_port(pkt, conn->key.dst.port, conn->key.src.port);
-        }
+    struct ip_header *nh = dp_packet_l3(pkt);
+
+    if (nat_action & NAT_ACTION_SRC) {
+        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
+    } else if (nat_action & NAT_ACTION_DST) {
+        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
     }
 }
 
 static void
-reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
+nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
+                uint16_t nat_action)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            struct tcp_header *th_in = dp_packet_l4(pkt);
-            packet_set_tcp_port(pkt, conn->key.src.port,
-                                th_in->tcp_dst);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            struct udp_header *uh_in = dp_packet_l4(pkt);
-            packet_set_udp_port(pkt, conn->key.src.port,
-                                uh_in->udp_dst);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
-        if (conn->key.nw_proto == IPPROTO_TCP) {
-            packet_set_tcp_port(pkt, conn->key.src.port,
-                                conn->key.dst.port);
-        } else if (conn->key.nw_proto == IPPROTO_UDP) {
-            packet_set_udp_port(pkt, conn->key.src.port,
-                                conn->key.dst.port);
-        }
+    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
+
+    if (nat_action & NAT_ACTION_SRC) {
+        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
+                             &key->dst.addr.ipv6, true);
+    } else if (nat_action & NAT_ACTION_DST) {
+        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
+                             &key->src.addr.ipv6, true);
     }
 }
 
 static void
-reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
+nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
+                 uint16_t nat_action)
 {
     char *tail = dp_packet_tail(pkt);
     uint16_t pad = dp_packet_l2_pad_size(pkt);
@@ -875,98 +825,77 @@  reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
     uint16_t orig_l3_ofs = pkt->l3_ofs;
     uint16_t orig_l4_ofs = pkt->l4_ofs;
 
-    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-        struct ip_header *nh = dp_packet_l3(pkt);
-        struct icmp_header *icmp = dp_packet_l4(pkt);
-        struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
-        /* This call is already verified to succeed during the code path from
-         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
+    void *l3 = dp_packet_l3(pkt);
+    void *l4 = dp_packet_l4(pkt);
+    void *inner_l3 = (char *) l4 + 8;
+
+    /* These calls are already verified to succeed during the code path from
+     * 'conn_key_extract()' which calls
+     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
+    if (key->dl_type == htons(ETH_TYPE_IP)) {
         extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad,
                         &inner_l4, false);
-        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
-        pkt->l4_ofs += inner_l4 - (char *) icmp;
+    } else {
+        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad,
+                        &inner_l4);
+    }
+    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
+    pkt->l4_ofs += inner_l4 - (char *) l4;
 
-        if (conn->nat_action & NAT_ACTION_SRC) {
-            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
-                                 conn->key.src.addr.ipv4);
-        } else if (conn->nat_action & NAT_ACTION_DST) {
-            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
-                                 conn->key.dst.addr.ipv4);
-        }
+    /* Reverse the key for inner packet. */
+    conn_key_reverse(key);
 
-        reverse_pat_packet(pkt, conn);
+    pat_packet(pkt, key);
+
+    if (key->dl_type == htons(ETH_TYPE_IP)) {
+        nat_packet_ipv4(pkt, key, nat_action);
+
+        struct icmp_header *icmp = (struct icmp_header *) l4;
         icmp->icmp_csum = 0;
         icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
     } else {
-        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
-        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
-            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
-        /* This call is already verified to succeed during the code path from
-         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
-        extract_l3_ipv6(&inner_key, inner_l3_6,
-                        tail - ((char *)inner_l3_6) - pad,
-                        &inner_l4);
-        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
-        pkt->l4_ofs += inner_l4 - (char *) icmp6;
-
-        if (conn->nat_action & NAT_ACTION_SRC) {
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 inner_l3_6->ip6_src.be32,
-                                 &conn->key.src.addr.ipv6, true);
-        } else if (conn->nat_action & NAT_ACTION_DST) {
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 inner_l3_6->ip6_dst.be32,
-                                 &conn->key.dst.addr.ipv6, true);
-        }
-        reverse_pat_packet(pkt, conn);
+        nat_packet_ipv6(pkt, key, nat_action);
+
+        struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4;
         icmp6->icmp6_base.icmp6_cksum = 0;
-        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, icmp6,
-            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
+        icmp6->icmp6_base.icmp6_cksum =
+            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
+                                    tail - (char *) icmp6 - pad);
     }
+
+    /* Return the key and offset back. */
+    conn_key_reverse(key);
     pkt->l3_ofs = orig_l3_ofs;
     pkt->l4_ofs = orig_l4_ofs;
 }
 
 static void
-un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
-              bool related)
+nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool related)
 {
-    if (conn->nat_action & NAT_ACTION_SRC) {
-        pkt->md.ct_state |= CS_DST_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_dst,
-                                 conn->key.src.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_dst.be32,
-                                 &conn->key.src.addr.ipv6, true);
-        }
+    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
+    uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action)
+                                : conn->nat_action;
 
-        if (OVS_UNLIKELY(related)) {
-            reverse_nat_packet(pkt, conn);
-        } else {
-            un_pat_packet(pkt, conn);
-        }
-    } else if (conn->nat_action & NAT_ACTION_DST) {
+    /* Update ct_state. */
+    if (nat_action & NAT_ACTION_SRC) {
         pkt->md.ct_state |= CS_SRC_NAT;
-        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-            struct ip_header *nh = dp_packet_l3(pkt);
-            packet_set_ipv4_addr(pkt, &nh->ip_src,
-                                 conn->key.dst.addr.ipv4);
-        } else {
-            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
-                                 nh6->ip6_src.be32,
-                                 &conn->key.dst.addr.ipv6, true);
-        }
+    } else if (nat_action & NAT_ACTION_DST) {
+        pkt->md.ct_state |= CS_DST_NAT;
+    }
 
+    /* Reverse the key for outer header. */
+    if (key->dl_type == htons(ETH_TYPE_IP)) {
+        nat_packet_ipv4(pkt, key, nat_action);
+    } else {
+        nat_packet_ipv6(pkt, key, nat_action);
+    }
+
+    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) {
         if (OVS_UNLIKELY(related)) {
-            reverse_nat_packet(pkt, conn);
+            nat_action  = nat_action_reverse(conn->nat_action);
+            nat_inner_packet(pkt, key, nat_action);
         } else {
-            un_pat_packet(pkt, conn);
+            pat_packet(pkt, key);
         }
     }
 }
@@ -1082,7 +1011,7 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
                 memcpy(nc, nat_conn, sizeof *nc);
             }
 
-            nat_packet(pkt, nc, ctx->icmp_related);
+            nat_packet(pkt, nc, false, ctx->icmp_related);
             memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
             memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
             nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
@@ -1185,11 +1114,8 @@  handle_nat(struct dp_packet *pkt, struct conn *conn,
         if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
             pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
         }
-        if (reply) {
-            un_nat_packet(pkt, conn, related);
-        } else {
-            nat_packet(pkt, conn, related);
-        }
+
+        nat_packet(pkt, conn, reply, related);
     }
 }
 
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 731de439c..deb95eb49 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -6942,6 +6942,73 @@  recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([conntrack - ICMP from different source related with NAT])
+AT_SKIP_IF([test $HAVE_NC = no])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(client, server)
+
+ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10")
+ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20")
+
+dnl Send traffic from client to CT, do DNAT if the traffic is new otherwise send it to server
+AT_DATA([flows.txt], [dnl
+table=0,ip,ct_state=-trk,actions=ct(table=1,nat)
+table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,nat(dst(192.168.10.20))
+table=1,ip,actions=resubmit(,2)
+table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
+table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server
+table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+rm server.pcap
+OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid])
+sleep 1
+
+dnl Send UDP client->server
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
+packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"])
+dnl Send UDP response server->client
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
+packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"])
+dnl Fake router sending ICMP need frag
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
+packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\
+actions=resubmit(,0)"
+])
+
+AT_CHECK([ovs-appctl revalidator/purge], [0])
+AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], [0], [dnl
+ n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip actions=ct(table=1,nat)
+ table=1, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=ct(commit,table=2,nat(dst=192.168.10.20))
+ table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2)
+ table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=output:2
+ table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+rpl+trk,ip,in_port=2 actions=output:1
+ table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=output:2
+OFPST_FLOW reply (OF1.5):
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], [dnl
+udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
+])
+
+OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000])
+
+AT_CHECK([tcpdump -tvnnne "icmp" -r server.pcap 2>/dev/null], [0], [dnl
+00:00:00:00:20:00 > 00:00:00:00:10:20, ethertype IPv4 (0x0800), length 70: (tos 0x0, ttl 255, id 287, offset 0, flags [[none]], proto ICMP (1), length 56)
+    192.168.20.1 > 192.168.10.20: ICMP 192.168.20.10 unreachable - need to frag (mtu 1400), length 36
+    (tos 0x0, ttl 10, id 0, offset 0, flags [[DF]], proto UDP (17), length 28)
+    192.168.10.20.2 > 192.168.20.10.1: UDP, length 0
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_BANNER([IGMP])
 
 AT_SETUP([IGMP - flood under normal action])