Message ID | 20230206114610.142298-1-amusil@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,v6] conntrack: Properly unNAT inner header of related traffic | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
ovsrobot/intel-ovs-compilation | success | test: success |
On Mon, Feb 06, 2023 at 12:46:10PM +0100, Ales Musil wrote: > The inner header was not handled properly. > Simplify the code which allows proper handling > of the inner headers. > > Reported-at: https://bugzilla.redhat.com/2137754 > Signed-off-by: Ales Musil <amusil@redhat.com> Nice clean-up too :) Reviewed-by: Simon Horman <simon.horman@corigine.com> > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 550b2be9b..3162924ca 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c ... > static void > -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) > +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, > + uint16_t nat_action) > { > char *tail = dp_packet_tail(pkt); > uint16_t pad = dp_packet_l2_pad_size(pkt); > @@ -875,98 +827,77 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) > uint16_t orig_l3_ofs = pkt->l3_ofs; > uint16_t orig_l4_ofs = pkt->l4_ofs; > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - struct icmp_header *icmp = dp_packet_l4(pkt); > - struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); > - /* This call is already verified to succeed during the code path from > - * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */ > - extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad, > + void *l3 = dp_packet_l3(pkt); > + void *l4 = dp_packet_l4(pkt); > + void *inner_l3; > + /* These calls are already verified to succeed during the code path from > + * 'conn_key_extract()' which calls > + * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */ > + if (key->dl_type == htons(ETH_TYPE_IP)) { > + inner_l3 = (char *) l4 + sizeof(struct icmp_header); > + extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad, > &inner_l4, false); > - pkt->l3_ofs += (char *) inner_l3 - (char *) nh; > - pkt->l4_ofs += inner_l4 - (char *) icmp; > + } else { > + inner_l3 = (char *) l4 + sizeof(struct icmp6_data_header); > + extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad, > + &inner_l4); > + } > + pkt->l3_ofs += (char *) inner_l3 - (char *) l3; > + pkt->l4_ofs += inner_l4 - (char *) l4; > > - if (conn->nat_action & NAT_ACTION_SRC) { > - packet_set_ipv4_addr(pkt, &inner_l3->ip_src, > - conn->key.src.addr.ipv4); > - } else if (conn->nat_action & NAT_ACTION_DST) { > - packet_set_ipv4_addr(pkt, &inner_l3->ip_dst, > - conn->key.dst.addr.ipv4); > - } > + /* Reverse the key for inner packet. */ > + struct conn_key rev_key = *key; > + conn_key_reverse(&rev_key); > + > + pat_packet(pkt, &rev_key); > + > + if (key->dl_type == htons(ETH_TYPE_IP)) { > + nat_packet_ipv4(pkt, &rev_key, nat_action); > > - reverse_pat_packet(pkt, conn); > + struct icmp_header *icmp = (struct icmp_header *) l4; > icmp->icmp_csum = 0; nit: not for this patch, but is the above line necessary? > icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad); > } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - struct icmp6_data_header *icmp6 = dp_packet_l4(pkt); > - struct ovs_16aligned_ip6_hdr *inner_l3_6 = > - (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1); > - /* This call is already verified to succeed during the code path from > - * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */ > - extract_l3_ipv6(&inner_key, inner_l3_6, > - tail - ((char *)inner_l3_6) - pad, > - &inner_l4); > - pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6; > - pkt->l4_ofs += inner_l4 - (char *) icmp6; > - > - if (conn->nat_action & NAT_ACTION_SRC) { > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - inner_l3_6->ip6_src.be32, > - &conn->key.src.addr.ipv6, true); > - } else if (conn->nat_action & NAT_ACTION_DST) { > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - inner_l3_6->ip6_dst.be32, > - &conn->key.dst.addr.ipv6, true); > - } > - reverse_pat_packet(pkt, conn); > + nat_packet_ipv6(pkt, &rev_key, nat_action); > + > + struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4; > icmp6->icmp6_base.icmp6_cksum = 0; ditto. > - icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, icmp6, > - IPPROTO_ICMPV6, tail - (char *) icmp6 - pad); > + icmp6->icmp6_base.icmp6_cksum = > + packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6, > + tail - (char *) icmp6 - pad); > } > + > pkt->l3_ofs = orig_l3_ofs; > pkt->l4_ofs = orig_l4_ofs; > }
On Tue, Feb 7, 2023 at 11:54 AM Simon Horman <simon.horman@corigine.com> wrote: > On Mon, Feb 06, 2023 at 12:46:10PM +0100, Ales Musil wrote: > > The inner header was not handled properly. > > Simplify the code which allows proper handling > > of the inner headers. > > > > Reported-at: https://bugzilla.redhat.com/2137754 > > Signed-off-by: Ales Musil <amusil@redhat.com> > > Nice clean-up too :) > > Reviewed-by: Simon Horman <simon.horman@corigine.com> > Thank you for the review. > > > diff --git a/lib/conntrack.c b/lib/conntrack.c > > index 550b2be9b..3162924ca 100644 > > --- a/lib/conntrack.c > > +++ b/lib/conntrack.c > > ... > > > static void > > -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) > > +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, > > + uint16_t nat_action) > > { > > char *tail = dp_packet_tail(pkt); > > uint16_t pad = dp_packet_l2_pad_size(pkt); > > @@ -875,98 +827,77 @@ reverse_nat_packet(struct dp_packet *pkt, const > struct conn *conn) > > uint16_t orig_l3_ofs = pkt->l3_ofs; > > uint16_t orig_l4_ofs = pkt->l4_ofs; > > > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > > - struct ip_header *nh = dp_packet_l3(pkt); > > - struct icmp_header *icmp = dp_packet_l4(pkt); > > - struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); > > - /* This call is already verified to succeed during the code > path from > > - * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */ > > - extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) > - pad, > > + void *l3 = dp_packet_l3(pkt); > > + void *l4 = dp_packet_l4(pkt); > > + void *inner_l3; > > + /* These calls are already verified to succeed during the code path > from > > + * 'conn_key_extract()' which calls > > + * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */ > > + if (key->dl_type == htons(ETH_TYPE_IP)) { > > + inner_l3 = (char *) l4 + sizeof(struct icmp_header); > > + extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *) > inner_l3) - pad, > > &inner_l4, false); > > - pkt->l3_ofs += (char *) inner_l3 - (char *) nh; > > - pkt->l4_ofs += inner_l4 - (char *) icmp; > > + } else { > > + inner_l3 = (char *) l4 + sizeof(struct icmp6_data_header); > > + extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *) > inner_l3) - pad, > > + &inner_l4); > > + } > > + pkt->l3_ofs += (char *) inner_l3 - (char *) l3; > > + pkt->l4_ofs += inner_l4 - (char *) l4; > > > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - packet_set_ipv4_addr(pkt, &inner_l3->ip_src, > > - conn->key.src.addr.ipv4); > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - packet_set_ipv4_addr(pkt, &inner_l3->ip_dst, > > - conn->key.dst.addr.ipv4); > > - } > > + /* Reverse the key for inner packet. */ > > + struct conn_key rev_key = *key; > > + conn_key_reverse(&rev_key); > > + > > + pat_packet(pkt, &rev_key); > > + > > + if (key->dl_type == htons(ETH_TYPE_IP)) { > > + nat_packet_ipv4(pkt, &rev_key, nat_action); > > > > - reverse_pat_packet(pkt, conn); > > + struct icmp_header *icmp = (struct icmp_header *) l4; > > icmp->icmp_csum = 0; > > nit: not for this patch, but is the above line necessary? > It actually is, because the checksum is part of the data for which the checksum is computed. It is a bit confusing, but aligned with the specification: "For purposes of computing the checksum, the value of the checksum field is zero." [0] [0] https://datatracker.ietf.org/doc/html/rfc791 > > > icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad); > > } else { > > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > > - struct icmp6_data_header *icmp6 = dp_packet_l4(pkt); > > - struct ovs_16aligned_ip6_hdr *inner_l3_6 = > > - (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1); > > - /* This call is already verified to succeed during the code > path from > > - * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */ > > - extract_l3_ipv6(&inner_key, inner_l3_6, > > - tail - ((char *)inner_l3_6) - pad, > > - &inner_l4); > > - pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6; > > - pkt->l4_ofs += inner_l4 - (char *) icmp6; > > - > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - inner_l3_6->ip6_src.be32, > > - &conn->key.src.addr.ipv6, true); > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - inner_l3_6->ip6_dst.be32, > > - &conn->key.dst.addr.ipv6, true); > > - } > > - reverse_pat_packet(pkt, conn); > > + nat_packet_ipv6(pkt, &rev_key, nat_action); > > + > > + struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) > l4; > > icmp6->icmp6_base.icmp6_cksum = 0; > > ditto. > > > - icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, > icmp6, > > - IPPROTO_ICMPV6, tail - (char *) icmp6 - pad); > > + icmp6->icmp6_base.icmp6_cksum = > > + packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6, > > + tail - (char *) icmp6 - pad); > > } > > + > > pkt->l3_ofs = orig_l3_ofs; > > pkt->l4_ofs = orig_l4_ofs; > > } > > Thanks, Ales
On Wed, Feb 08, 2023 at 07:29:27AM +0100, Ales Musil wrote: > On Tue, Feb 7, 2023 at 11:54 AM Simon Horman <simon.horman@corigine.com> > wrote: > > > On Mon, Feb 06, 2023 at 12:46:10PM +0100, Ales Musil wrote: > > > The inner header was not handled properly. > > > Simplify the code which allows proper handling > > > of the inner headers. > > > > > > Reported-at: https://bugzilla.redhat.com/2137754 > > > Signed-off-by: Ales Musil <amusil@redhat.com> > > > > Nice clean-up too :) > > > > Reviewed-by: Simon Horman <simon.horman@corigine.com> > > > > Thank you for the review. > > > > > > > diff --git a/lib/conntrack.c b/lib/conntrack.c > > > index 550b2be9b..3162924ca 100644 > > > --- a/lib/conntrack.c > > > +++ b/lib/conntrack.c ... > > > + > > > + if (key->dl_type == htons(ETH_TYPE_IP)) { > > > + nat_packet_ipv4(pkt, &rev_key, nat_action); > > > > > > - reverse_pat_packet(pkt, conn); > > > + struct icmp_header *icmp = (struct icmp_header *) l4; > > > icmp->icmp_csum = 0; > > > > nit: not for this patch, but is the above line necessary? > > > > It actually is, because the checksum is part of the data for which the > checksum > is computed. It is a bit confusing, but aligned with the specification: > > "For purposes of computing the checksum, the value of the checksum field is > zero." [0] > > [0] https://datatracker.ietf.org/doc/html/rfc791 Yeah, right. I was looking for that, but couldn't see it yesterday. Now I do :)
Hi Ales, I just have two small nits, but other than that the patch LGTM. Acked-by: Paolo Valerio <pvalerio@redhat.com> Ales Musil <amusil@redhat.com> writes: > The inner header was not handled properly. > Simplify the code which allows proper handling > of the inner headers. > > Reported-at: https://bugzilla.redhat.com/2137754 > Signed-off-by: Ales Musil <amusil@redhat.com> > --- > v6: Rebase on top of current master. > Address comments from Paolo: > - Add test case for ICMP related in reply direction. > - Fix a mistake when the inner header was using > wrong nat_action. > v5: Rebase on top of current master. > Address comments from Dumitru: > - Use explicit struct sizes for inner_l3 pointer. > - Use copied conn_key for reverse operation instead > of double reverse of the original one. > - Update the test case to use separate zone instead > of default one. > v4: Rebase on top of current master. > Use output of ovs-pcap in tests rather than tcpdump. > v3: Rebase on top of current master. > Update the BZ reference. > Update the test case. > --- > lib/conntrack.c | 254 ++++++++++++++-------------------------- > tests/system-traffic.at | 107 +++++++++++++++++ > 2 files changed, 198 insertions(+), 163 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 550b2be9b..3162924ca 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -764,109 +764,61 @@ handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx, > } > > static void > -pat_packet(struct dp_packet *pkt, const struct conn *conn) > +pat_packet(struct dp_packet *pkt, const struct conn_key *key) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - struct tcp_header *th = dp_packet_l4(pkt); > - packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - struct udp_header *uh = dp_packet_l4(pkt); > - packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - packet_set_tcp_port(pkt, conn->rev_key.dst.port, > - conn->rev_key.src.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - packet_set_udp_port(pkt, conn->rev_key.dst.port, > - conn->rev_key.src.port); > - } > + if (key->nw_proto == IPPROTO_TCP) { > + packet_set_tcp_port(pkt, key->dst.port, key->src.port); > + } else if (key->nw_proto == IPPROTO_UDP) { > + packet_set_udp_port(pkt, key->dst.port, key->src.port); > } > } > > -static void > -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related) > +static uint16_t > +nat_action_reverse(uint16_t nat_action) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - pkt->md.ct_state |= CS_SRC_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_src, > - conn->rev_key.dst.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_src.be32, > - &conn->rev_key.dst.addr.ipv6, true); > - } > - if (!related) { > - pat_packet(pkt, conn); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - pkt->md.ct_state |= CS_DST_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_dst, > - conn->rev_key.src.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_dst.be32, > - &conn->rev_key.src.addr.ipv6, true); > - } > - if (!related) { > - pat_packet(pkt, conn); > - } > + if (nat_action & NAT_ACTION_SRC) { > + VLOG_INFO("original SRC"); Not sure this is useful. I'd remove it including the one below. > + nat_action ^= NAT_ACTION_SRC; > + nat_action |= NAT_ACTION_DST; > + } else if (nat_action & NAT_ACTION_DST) { > + VLOG_INFO("original DST"); > + nat_action ^= NAT_ACTION_DST; > + nat_action |= NAT_ACTION_SRC; > } > + return nat_action; > } > > static void > -un_pat_packet(struct dp_packet *pkt, const struct conn *conn) > +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key, > + uint16_t nat_action) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - struct tcp_header *th = dp_packet_l4(pkt); > - packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - struct udp_header *uh = dp_packet_l4(pkt); > - packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - packet_set_tcp_port(pkt, conn->key.dst.port, conn->key.src.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - packet_set_udp_port(pkt, conn->key.dst.port, conn->key.src.port); > - } > + struct ip_header *nh = dp_packet_l3(pkt); > + > + if (nat_action & NAT_ACTION_SRC) { > + packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4); > + } else if (nat_action & NAT_ACTION_DST) { > + packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4); > } > } > > static void > -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn) > +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key, > + uint16_t nat_action) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - struct tcp_header *th_in = dp_packet_l4(pkt); > - packet_set_tcp_port(pkt, conn->key.src.port, > - th_in->tcp_dst); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - struct udp_header *uh_in = dp_packet_l4(pkt); > - packet_set_udp_port(pkt, conn->key.src.port, > - uh_in->udp_dst); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - packet_set_tcp_port(pkt, conn->key.src.port, > - conn->key.dst.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - packet_set_udp_port(pkt, conn->key.src.port, > - conn->key.dst.port); > - } > + struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > + > + if (nat_action & NAT_ACTION_SRC) { > + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32, > + &key->dst.addr.ipv6, true); > + } else if (nat_action & NAT_ACTION_DST) { > + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32, > + &key->src.addr.ipv6, true); > } > } > > static void > -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) > +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, > + uint16_t nat_action) > { > char *tail = dp_packet_tail(pkt); > uint16_t pad = dp_packet_l2_pad_size(pkt); > @@ -875,98 +827,77 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) > uint16_t orig_l3_ofs = pkt->l3_ofs; > uint16_t orig_l4_ofs = pkt->l4_ofs; > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - struct icmp_header *icmp = dp_packet_l4(pkt); > - struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); > - /* This call is already verified to succeed during the code path from > - * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */ > - extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad, > + void *l3 = dp_packet_l3(pkt); > + void *l4 = dp_packet_l4(pkt); > + void *inner_l3; > + /* These calls are already verified to succeed during the code path from > + * 'conn_key_extract()' which calls > + * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */ > + if (key->dl_type == htons(ETH_TYPE_IP)) { > + inner_l3 = (char *) l4 + sizeof(struct icmp_header); > + extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad, > &inner_l4, false); > - pkt->l3_ofs += (char *) inner_l3 - (char *) nh; > - pkt->l4_ofs += inner_l4 - (char *) icmp; > + } else { > + inner_l3 = (char *) l4 + sizeof(struct icmp6_data_header); > + extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad, > + &inner_l4); > + } > + pkt->l3_ofs += (char *) inner_l3 - (char *) l3; > + pkt->l4_ofs += inner_l4 - (char *) l4; > > - if (conn->nat_action & NAT_ACTION_SRC) { > - packet_set_ipv4_addr(pkt, &inner_l3->ip_src, > - conn->key.src.addr.ipv4); > - } else if (conn->nat_action & NAT_ACTION_DST) { > - packet_set_ipv4_addr(pkt, &inner_l3->ip_dst, > - conn->key.dst.addr.ipv4); > - } > + /* Reverse the key for inner packet. */ > + struct conn_key rev_key = *key; > + conn_key_reverse(&rev_key); > + > + pat_packet(pkt, &rev_key); > + > + if (key->dl_type == htons(ETH_TYPE_IP)) { > + nat_packet_ipv4(pkt, &rev_key, nat_action); > > - reverse_pat_packet(pkt, conn); > + struct icmp_header *icmp = (struct icmp_header *) l4; > icmp->icmp_csum = 0; > icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad); > } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - struct icmp6_data_header *icmp6 = dp_packet_l4(pkt); > - struct ovs_16aligned_ip6_hdr *inner_l3_6 = > - (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1); > - /* This call is already verified to succeed during the code path from > - * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */ > - extract_l3_ipv6(&inner_key, inner_l3_6, > - tail - ((char *)inner_l3_6) - pad, > - &inner_l4); > - pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6; > - pkt->l4_ofs += inner_l4 - (char *) icmp6; > - > - if (conn->nat_action & NAT_ACTION_SRC) { > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - inner_l3_6->ip6_src.be32, > - &conn->key.src.addr.ipv6, true); > - } else if (conn->nat_action & NAT_ACTION_DST) { > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - inner_l3_6->ip6_dst.be32, > - &conn->key.dst.addr.ipv6, true); > - } > - reverse_pat_packet(pkt, conn); > + nat_packet_ipv6(pkt, &rev_key, nat_action); > + > + struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4; > icmp6->icmp6_base.icmp6_cksum = 0; > - icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, icmp6, > - IPPROTO_ICMPV6, tail - (char *) icmp6 - pad); > + icmp6->icmp6_base.icmp6_cksum = > + packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6, > + tail - (char *) icmp6 - pad); > } > + > pkt->l3_ofs = orig_l3_ofs; > pkt->l4_ofs = orig_l4_ofs; > } > > static void > -un_nat_packet(struct dp_packet *pkt, const struct conn *conn, > - bool related) > +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool related) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - pkt->md.ct_state |= CS_DST_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_dst, > - conn->key.src.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_dst.be32, > - &conn->key.src.addr.ipv6, true); > - } > + struct conn_key *key = reply ? &conn->key : &conn->rev_key; > + uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action) > + : conn->nat_action; > > - if (OVS_UNLIKELY(related)) { > - reverse_nat_packet(pkt, conn); > - } else { > - un_pat_packet(pkt, conn); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > + /* Update ct_state. */ > + if (nat_action & NAT_ACTION_SRC) { > pkt->md.ct_state |= CS_SRC_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_src, > - conn->key.dst.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_src.be32, > - &conn->key.dst.addr.ipv6, true); > - } > + } else if (nat_action & NAT_ACTION_DST) { > + pkt->md.ct_state |= CS_DST_NAT; > + } > + > + /* Reverse the key for outer header. */ > + if (key->dl_type == htons(ETH_TYPE_IP)) { > + nat_packet_ipv4(pkt, key, nat_action); > + } else { > + nat_packet_ipv6(pkt, key, nat_action); > + } > > + if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) { > if (OVS_UNLIKELY(related)) { > - reverse_nat_packet(pkt, conn); > + nat_action = nat_action_reverse(nat_action); > + nat_inner_packet(pkt, key, nat_action); > } else { > - un_pat_packet(pkt, conn); > + pat_packet(pkt, key); > } > } > } > @@ -1082,7 +1013,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, > memcpy(nc, nat_conn, sizeof *nc); > } > > - nat_packet(pkt, nc, ctx->icmp_related); > + nat_packet(pkt, nc, false, ctx->icmp_related); > memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key); > memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key); > nat_conn->conn_type = CT_CONN_TYPE_UN_NAT; > @@ -1185,11 +1116,8 @@ handle_nat(struct dp_packet *pkt, struct conn *conn, > if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) { > pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT); > } > - if (reply) { > - un_nat_packet(pkt, conn, related); > - } else { > - nat_packet(pkt, conn, related); > - } > + > + nat_packet(pkt, conn, reply, related); > } > } > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index b1b01380a..6c9e00fa3 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -7193,6 +7193,113 @@ recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([conntrack - ICMP from different source related with NAT]) > +AT_SKIP_IF([test $HAVE_NC = no]) > +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_NAT() > +OVS_TRAFFIC_VSWITCHD_START() > + > +ADD_NAMESPACES(client, server) > + > +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10") > +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20") > + > +dnl Send traffic from client to CT, do DNAT if the traffic is new otherwise send it to server > +AT_DATA([flows.txt], [dnl > +table=0,ip,actions=ct(table=1,zone=42,nat) > +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,zone=42,nat(dst(192.168.10.20)) nit: I'd stick with nat(dst=192.168...) > +table=1,icmp,ct_state=+trk+rel-rpl,actions=ct(commit,table=2,zone=42,nat) > +table=1,ip,actions=resubmit(,2) > +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server > +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server > +table=2,in_port=ovs-server,icmp,ct_state=+trk+rel,actions=output:ovs-client > +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client > +]) > + > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > + > +rm server.pcap > +OVS_DAEMONIZE([tcpdump -l -U -i ovs-server -w server.pcap 2>tcpdump0_err], [tcpdump0.pid]) > +OVS_WAIT_UNTIL([grep "listening" tcpdump0_err]) > + > +dnl Send UDP client->server > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"]) > +dnl Send UDP response server->client > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\ > +packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"]) > +dnl Fake router sending ICMP need frag router->server > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > +packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\ > +actions=resubmit(,0)" > +]) > + > +AT_CHECK([ovs-appctl revalidator/purge], [0]) > +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], [0], [dnl > + n_packets=3, n_bytes=154, reset_counts ip actions=ct(table=1,zone=42,nat) > + table=1, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=ct(commit,table=2,zone=42,nat(dst=192.168.10.20)) > + table=1, n_packets=1, n_bytes=42, reset_counts ip actions=resubmit(,2) > + table=1, n_packets=1, n_bytes=70, reset_counts ct_state=+rel-rpl+trk,icmp actions=ct(commit,table=2,zone=42,nat) > + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=output:2 > + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+rpl+trk,ip,in_port=2 actions=output:1 > + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=output:2 > + table=2, reset_counts ct_state=+rel+trk,icmp,in_port=2 actions=output:1 > +OFPST_FLOW reply (OF1.5): > +]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], [dnl > +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1),zone=42 > +]) > + > +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000]) > + > +AT_CHECK([ovs-pcap server.pcap | grep 000000001020000000002000], [0], [dnl > +000000001020000000002000080045000038011f0000ff011b40c0a81401c0a80a140304f778000005784500001c000040000a11d162c0a80a14c0a8140a0002000100080000 > +]) > + > +dnl Check the ICMP error in reply direction > +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=42]) > + > +rm client.pcap > +OVS_DAEMONIZE([tcpdump -l -U -i ovs-client -w client.pcap 2>tcpdump1_err], [tcpdump1.pid]) > +OVS_WAIT_UNTIL([grep "listening" tcpdump1_err]) > + > +dnl Send UDP client->server > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"]) > +dnl Fake router sending ICMP need frag router->client > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\ > +packet=000000002010000000002000080045000038011F0000FF01114AC0A81401C0A8140A0304F778000005784500001C000040000A11D162C0A8140AC0A80A140001000200080000,\ > +actions=resubmit(,0)" > +]) > + > +AT_CHECK([ovs-appctl revalidator/purge], [0]) > +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], [0], [dnl > + n_packets=5, n_bytes=266, reset_counts ip actions=ct(table=1,zone=42,nat) > + table=1, n_packets=1, n_bytes=70, reset_counts ct_state=+rel-rpl+trk,icmp actions=ct(commit,table=2,zone=42,nat) > + table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2) > + table=1, n_packets=2, n_bytes=84, reset_counts ct_state=+new+trk,ip,in_port=1 actions=ct(commit,table=2,zone=42,nat(dst=192.168.10.20)) > + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+rpl+trk,ip,in_port=2 actions=output:1 > + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=output:2 > + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=2 actions=output:1 > + table=2, n_packets=2, n_bytes=84, reset_counts ct_state=+new+trk,ip,in_port=1 actions=output:2 > +OFPST_FLOW reply (OF1.5): > +]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], [dnl > +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1),zone=42 > +]) > + > +OVS_WAIT_UNTIL([ovs-pcap client.pcap | grep 000000002010000000002000]) > + > +AT_CHECK([ovs-pcap client.pcap | grep 000000002010000000002000], [0], [dnl > +000000002010000000002000080045000038011f0000ff011137c0a81414c0a8140a0304f778000005784500001c000040000a11c762c0a8140ac0a814140001000200080000 > +]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_BANNER([IGMP]) > > AT_SETUP([IGMP - flood under normal action]) > -- > 2.39.1
On Thu, Feb 9, 2023 at 1:17 PM Paolo Valerio <pvalerio@redhat.com> wrote: > Hi Ales, > > I just have two small nits, but other than that the patch LGTM. > Hi Paolo, I've addressed both comments and added your ack to the patch. Thanks, Ales > > Acked-by: Paolo Valerio <pvalerio@redhat.com> > > Ales Musil <amusil@redhat.com> writes: > > > The inner header was not handled properly. > > Simplify the code which allows proper handling > > of the inner headers. > > > > Reported-at: https://bugzilla.redhat.com/2137754 > > Signed-off-by: Ales Musil <amusil@redhat.com> > > --- > > v6: Rebase on top of current master. > > Address comments from Paolo: > > - Add test case for ICMP related in reply direction. > > - Fix a mistake when the inner header was using > > wrong nat_action. > > v5: Rebase on top of current master. > > Address comments from Dumitru: > > - Use explicit struct sizes for inner_l3 pointer. > > - Use copied conn_key for reverse operation instead > > of double reverse of the original one. > > - Update the test case to use separate zone instead > > of default one. > > v4: Rebase on top of current master. > > Use output of ovs-pcap in tests rather than tcpdump. > > v3: Rebase on top of current master. > > Update the BZ reference. > > Update the test case. > > --- > > lib/conntrack.c | 254 ++++++++++++++-------------------------- > > tests/system-traffic.at | 107 +++++++++++++++++ > > 2 files changed, 198 insertions(+), 163 deletions(-) > > > > diff --git a/lib/conntrack.c b/lib/conntrack.c > > index 550b2be9b..3162924ca 100644 > > --- a/lib/conntrack.c > > +++ b/lib/conntrack.c > > @@ -764,109 +764,61 @@ handle_alg_ctl(struct conntrack *ct, const struct > conn_lookup_ctx *ctx, > > } > > > > static void > > -pat_packet(struct dp_packet *pkt, const struct conn *conn) > > +pat_packet(struct dp_packet *pkt, const struct conn_key *key) > > { > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - if (conn->key.nw_proto == IPPROTO_TCP) { > > - struct tcp_header *th = dp_packet_l4(pkt); > > - packet_set_tcp_port(pkt, conn->rev_key.dst.port, > th->tcp_dst); > > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > > - struct udp_header *uh = dp_packet_l4(pkt); > > - packet_set_udp_port(pkt, conn->rev_key.dst.port, > uh->udp_dst); > > - } > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - if (conn->key.nw_proto == IPPROTO_TCP) { > > - packet_set_tcp_port(pkt, conn->rev_key.dst.port, > > - conn->rev_key.src.port); > > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > > - packet_set_udp_port(pkt, conn->rev_key.dst.port, > > - conn->rev_key.src.port); > > - } > > + if (key->nw_proto == IPPROTO_TCP) { > > + packet_set_tcp_port(pkt, key->dst.port, key->src.port); > > + } else if (key->nw_proto == IPPROTO_UDP) { > > + packet_set_udp_port(pkt, key->dst.port, key->src.port); > > } > > } > > > > -static void > > -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related) > > +static uint16_t > > +nat_action_reverse(uint16_t nat_action) > > { > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - pkt->md.ct_state |= CS_SRC_NAT; > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > > - struct ip_header *nh = dp_packet_l3(pkt); > > - packet_set_ipv4_addr(pkt, &nh->ip_src, > > - conn->rev_key.dst.addr.ipv4); > > - } else { > > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - nh6->ip6_src.be32, > > - &conn->rev_key.dst.addr.ipv6, true); > > - } > > - if (!related) { > > - pat_packet(pkt, conn); > > - } > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - pkt->md.ct_state |= CS_DST_NAT; > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > > - struct ip_header *nh = dp_packet_l3(pkt); > > - packet_set_ipv4_addr(pkt, &nh->ip_dst, > > - conn->rev_key.src.addr.ipv4); > > - } else { > > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - nh6->ip6_dst.be32, > > - &conn->rev_key.src.addr.ipv6, true); > > - } > > - if (!related) { > > - pat_packet(pkt, conn); > > - } > > + if (nat_action & NAT_ACTION_SRC) { > > + VLOG_INFO("original SRC"); > > Not sure this is useful. I'd remove it including the one below. > > > + nat_action ^= NAT_ACTION_SRC; > > + nat_action |= NAT_ACTION_DST; > > + } else if (nat_action & NAT_ACTION_DST) { > > + VLOG_INFO("original DST"); > > + nat_action ^= NAT_ACTION_DST; > > + nat_action |= NAT_ACTION_SRC; > > } > > + return nat_action; > > } > > > > static void > > -un_pat_packet(struct dp_packet *pkt, const struct conn *conn) > > +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key, > > + uint16_t nat_action) > > { > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - if (conn->key.nw_proto == IPPROTO_TCP) { > > - struct tcp_header *th = dp_packet_l4(pkt); > > - packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port); > > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > > - struct udp_header *uh = dp_packet_l4(pkt); > > - packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port); > > - } > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - if (conn->key.nw_proto == IPPROTO_TCP) { > > - packet_set_tcp_port(pkt, conn->key.dst.port, > conn->key.src.port); > > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > > - packet_set_udp_port(pkt, conn->key.dst.port, > conn->key.src.port); > > - } > > + struct ip_header *nh = dp_packet_l3(pkt); > > + > > + if (nat_action & NAT_ACTION_SRC) { > > + packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4); > > + } else if (nat_action & NAT_ACTION_DST) { > > + packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4); > > } > > } > > > > static void > > -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn) > > +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key, > > + uint16_t nat_action) > > { > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - if (conn->key.nw_proto == IPPROTO_TCP) { > > - struct tcp_header *th_in = dp_packet_l4(pkt); > > - packet_set_tcp_port(pkt, conn->key.src.port, > > - th_in->tcp_dst); > > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > > - struct udp_header *uh_in = dp_packet_l4(pkt); > > - packet_set_udp_port(pkt, conn->key.src.port, > > - uh_in->udp_dst); > > - } > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - if (conn->key.nw_proto == IPPROTO_TCP) { > > - packet_set_tcp_port(pkt, conn->key.src.port, > > - conn->key.dst.port); > > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > > - packet_set_udp_port(pkt, conn->key.src.port, > > - conn->key.dst.port); > > - } > > + struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > > + > > + if (nat_action & NAT_ACTION_SRC) { > > + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32, > > + &key->dst.addr.ipv6, true); > > + } else if (nat_action & NAT_ACTION_DST) { > > + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32, > > + &key->src.addr.ipv6, true); > > } > > } > > > > static void > > -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) > > +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, > > + uint16_t nat_action) > > { > > char *tail = dp_packet_tail(pkt); > > uint16_t pad = dp_packet_l2_pad_size(pkt); > > @@ -875,98 +827,77 @@ reverse_nat_packet(struct dp_packet *pkt, const > struct conn *conn) > > uint16_t orig_l3_ofs = pkt->l3_ofs; > > uint16_t orig_l4_ofs = pkt->l4_ofs; > > > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > > - struct ip_header *nh = dp_packet_l3(pkt); > > - struct icmp_header *icmp = dp_packet_l4(pkt); > > - struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); > > - /* This call is already verified to succeed during the code > path from > > - * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */ > > - extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) > - pad, > > + void *l3 = dp_packet_l3(pkt); > > + void *l4 = dp_packet_l4(pkt); > > + void *inner_l3; > > + /* These calls are already verified to succeed during the code path > from > > + * 'conn_key_extract()' which calls > > + * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */ > > + if (key->dl_type == htons(ETH_TYPE_IP)) { > > + inner_l3 = (char *) l4 + sizeof(struct icmp_header); > > + extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *) > inner_l3) - pad, > > &inner_l4, false); > > - pkt->l3_ofs += (char *) inner_l3 - (char *) nh; > > - pkt->l4_ofs += inner_l4 - (char *) icmp; > > + } else { > > + inner_l3 = (char *) l4 + sizeof(struct icmp6_data_header); > > + extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *) > inner_l3) - pad, > > + &inner_l4); > > + } > > + pkt->l3_ofs += (char *) inner_l3 - (char *) l3; > > + pkt->l4_ofs += inner_l4 - (char *) l4; > > > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - packet_set_ipv4_addr(pkt, &inner_l3->ip_src, > > - conn->key.src.addr.ipv4); > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - packet_set_ipv4_addr(pkt, &inner_l3->ip_dst, > > - conn->key.dst.addr.ipv4); > > - } > > + /* Reverse the key for inner packet. */ > > + struct conn_key rev_key = *key; > > + conn_key_reverse(&rev_key); > > + > > + pat_packet(pkt, &rev_key); > > + > > + if (key->dl_type == htons(ETH_TYPE_IP)) { > > + nat_packet_ipv4(pkt, &rev_key, nat_action); > > > > - reverse_pat_packet(pkt, conn); > > + struct icmp_header *icmp = (struct icmp_header *) l4; > > icmp->icmp_csum = 0; > > icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad); > > } else { > > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > > - struct icmp6_data_header *icmp6 = dp_packet_l4(pkt); > > - struct ovs_16aligned_ip6_hdr *inner_l3_6 = > > - (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1); > > - /* This call is already verified to succeed during the code > path from > > - * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */ > > - extract_l3_ipv6(&inner_key, inner_l3_6, > > - tail - ((char *)inner_l3_6) - pad, > > - &inner_l4); > > - pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6; > > - pkt->l4_ofs += inner_l4 - (char *) icmp6; > > - > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - inner_l3_6->ip6_src.be32, > > - &conn->key.src.addr.ipv6, true); > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - inner_l3_6->ip6_dst.be32, > > - &conn->key.dst.addr.ipv6, true); > > - } > > - reverse_pat_packet(pkt, conn); > > + nat_packet_ipv6(pkt, &rev_key, nat_action); > > + > > + struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) > l4; > > icmp6->icmp6_base.icmp6_cksum = 0; > > - icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, > icmp6, > > - IPPROTO_ICMPV6, tail - (char *) icmp6 - pad); > > + icmp6->icmp6_base.icmp6_cksum = > > + packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6, > > + tail - (char *) icmp6 - pad); > > } > > + > > pkt->l3_ofs = orig_l3_ofs; > > pkt->l4_ofs = orig_l4_ofs; > > } > > > > static void > > -un_nat_packet(struct dp_packet *pkt, const struct conn *conn, > > - bool related) > > +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool > related) > > { > > - if (conn->nat_action & NAT_ACTION_SRC) { > > - pkt->md.ct_state |= CS_DST_NAT; > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > > - struct ip_header *nh = dp_packet_l3(pkt); > > - packet_set_ipv4_addr(pkt, &nh->ip_dst, > > - conn->key.src.addr.ipv4); > > - } else { > > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - nh6->ip6_dst.be32, > > - &conn->key.src.addr.ipv6, true); > > - } > > + struct conn_key *key = reply ? &conn->key : &conn->rev_key; > > + uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action) > > + : conn->nat_action; > > > > - if (OVS_UNLIKELY(related)) { > > - reverse_nat_packet(pkt, conn); > > - } else { > > - un_pat_packet(pkt, conn); > > - } > > - } else if (conn->nat_action & NAT_ACTION_DST) { > > + /* Update ct_state. */ > > + if (nat_action & NAT_ACTION_SRC) { > > pkt->md.ct_state |= CS_SRC_NAT; > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > > - struct ip_header *nh = dp_packet_l3(pkt); > > - packet_set_ipv4_addr(pkt, &nh->ip_src, > > - conn->key.dst.addr.ipv4); > > - } else { > > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > > - nh6->ip6_src.be32, > > - &conn->key.dst.addr.ipv6, true); > > - } > > + } else if (nat_action & NAT_ACTION_DST) { > > + pkt->md.ct_state |= CS_DST_NAT; > > + } > > + > > + /* Reverse the key for outer header. */ > > + if (key->dl_type == htons(ETH_TYPE_IP)) { > > + nat_packet_ipv4(pkt, key, nat_action); > > + } else { > > + nat_packet_ipv6(pkt, key, nat_action); > > + } > > > > + if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) { > > if (OVS_UNLIKELY(related)) { > > - reverse_nat_packet(pkt, conn); > > + nat_action = nat_action_reverse(nat_action); > > + nat_inner_packet(pkt, key, nat_action); > > } else { > > - un_pat_packet(pkt, conn); > > + pat_packet(pkt, key); > > } > > } > > } > > @@ -1082,7 +1013,7 @@ conn_not_found(struct conntrack *ct, struct > dp_packet *pkt, > > memcpy(nc, nat_conn, sizeof *nc); > > } > > > > - nat_packet(pkt, nc, ctx->icmp_related); > > + nat_packet(pkt, nc, false, ctx->icmp_related); > > memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key); > > memcpy(&nat_conn->rev_key, &nc->key, sizeof > nat_conn->rev_key); > > nat_conn->conn_type = CT_CONN_TYPE_UN_NAT; > > @@ -1185,11 +1116,8 @@ handle_nat(struct dp_packet *pkt, struct conn > *conn, > > if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) { > > pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT); > > } > > - if (reply) { > > - un_nat_packet(pkt, conn, related); > > - } else { > > - nat_packet(pkt, conn, related); > > - } > > + > > + nat_packet(pkt, conn, reply, related); > > } > > } > > > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > > index b1b01380a..6c9e00fa3 100644 > > --- a/tests/system-traffic.at > > +++ b/tests/system-traffic.at > > @@ -7193,6 +7193,113 @@ > recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > > > +AT_SETUP([conntrack - ICMP from different source related with NAT]) > > +AT_SKIP_IF([test $HAVE_NC = no]) > > +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) > > +CHECK_CONNTRACK() > > +CHECK_CONNTRACK_NAT() > > +OVS_TRAFFIC_VSWITCHD_START() > > + > > +ADD_NAMESPACES(client, server) > > + > > +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10") > > +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20") > > + > > +dnl Send traffic from client to CT, do DNAT if the traffic is new > otherwise send it to server > > +AT_DATA([flows.txt], [dnl > > +table=0,ip,actions=ct(table=1,zone=42,nat) > > > +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,zone=42,nat(dst(192.168.10.20)) > > nit: I'd stick with nat(dst=192.168...) > > > > +table=1,icmp,ct_state=+trk+rel-rpl,actions=ct(commit,table=2,zone=42,nat) > > +table=1,ip,actions=resubmit(,2) > > > +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server > > > +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server > > > +table=2,in_port=ovs-server,icmp,ct_state=+trk+rel,actions=output:ovs-client > > > +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client > > +]) > > + > > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > > + > > +rm server.pcap > > +OVS_DAEMONIZE([tcpdump -l -U -i ovs-server -w server.pcap > 2>tcpdump0_err], [tcpdump0.pid]) > > +OVS_WAIT_UNTIL([grep "listening" tcpdump0_err]) > > + > > +dnl Send UDP client->server > > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > > > +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"]) > > +dnl Send UDP response server->client > > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\ > > > +packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"]) > > +dnl Fake router sending ICMP need frag router->server > > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > > > +packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\ > > +actions=resubmit(,0)" > > +]) > > + > > +AT_CHECK([ovs-appctl revalidator/purge], [0]) > > +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort > ], [0], [dnl > > + n_packets=3, n_bytes=154, reset_counts ip > actions=ct(table=1,zone=42,nat) > > + table=1, n_packets=1, n_bytes=42, reset_counts > ct_state=+new+trk,ip,in_port=1 > actions=ct(commit,table=2,zone=42,nat(dst=192.168.10.20)) > > + table=1, n_packets=1, n_bytes=42, reset_counts ip actions=resubmit(,2) > > + table=1, n_packets=1, n_bytes=70, reset_counts > ct_state=+rel-rpl+trk,icmp actions=ct(commit,table=2,zone=42,nat) > > + table=2, n_packets=1, n_bytes=42, reset_counts > ct_state=+new+trk,ip,in_port=1 actions=output:2 > > + table=2, n_packets=1, n_bytes=42, reset_counts > ct_state=+rpl+trk,ip,in_port=2 actions=output:1 > > + table=2, n_packets=1, n_bytes=70, reset_counts > ct_state=+rel+trk,icmp,in_port=1 actions=output:2 > > + table=2, reset_counts ct_state=+rel+trk,icmp,in_port=2 actions=output:1 > > +OFPST_FLOW reply (OF1.5): > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], > [dnl > > > +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1),zone=42 > > +]) > > + > > +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000]) > > + > > +AT_CHECK([ovs-pcap server.pcap | grep 000000001020000000002000], [0], > [dnl > > > +000000001020000000002000080045000038011f0000ff011b40c0a81401c0a80a140304f778000005784500001c000040000a11d162c0a80a14c0a8140a0002000100080000 > > +]) > > + > > +dnl Check the ICMP error in reply direction > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=42]) > > + > > +rm client.pcap > > +OVS_DAEMONIZE([tcpdump -l -U -i ovs-client -w client.pcap > 2>tcpdump1_err], [tcpdump1.pid]) > > +OVS_WAIT_UNTIL([grep "listening" tcpdump1_err]) > > + > > +dnl Send UDP client->server > > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > > > +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"]) > > +dnl Fake router sending ICMP need frag router->client > > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\ > > > +packet=000000002010000000002000080045000038011F0000FF01114AC0A81401C0A8140A0304F778000005784500001C000040000A11D162C0A8140AC0A80A140001000200080000,\ > > +actions=resubmit(,0)" > > +]) > > + > > +AT_CHECK([ovs-appctl revalidator/purge], [0]) > > +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort > ], [0], [dnl > > + n_packets=5, n_bytes=266, reset_counts ip > actions=ct(table=1,zone=42,nat) > > + table=1, n_packets=1, n_bytes=70, reset_counts > ct_state=+rel-rpl+trk,icmp actions=ct(commit,table=2,zone=42,nat) > > + table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2) > > + table=1, n_packets=2, n_bytes=84, reset_counts > ct_state=+new+trk,ip,in_port=1 > actions=ct(commit,table=2,zone=42,nat(dst=192.168.10.20)) > > + table=2, n_packets=1, n_bytes=42, reset_counts > ct_state=+rpl+trk,ip,in_port=2 actions=output:1 > > + table=2, n_packets=1, n_bytes=70, reset_counts > ct_state=+rel+trk,icmp,in_port=1 actions=output:2 > > + table=2, n_packets=1, n_bytes=70, reset_counts > ct_state=+rel+trk,icmp,in_port=2 actions=output:1 > > + table=2, n_packets=2, n_bytes=84, reset_counts > ct_state=+new+trk,ip,in_port=1 actions=output:2 > > +OFPST_FLOW reply (OF1.5): > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], > [dnl > > > +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1),zone=42 > > +]) > > + > > +OVS_WAIT_UNTIL([ovs-pcap client.pcap | grep 000000002010000000002000]) > > + > > +AT_CHECK([ovs-pcap client.pcap | grep 000000002010000000002000], [0], > [dnl > > > +000000002010000000002000080045000038011f0000ff011137c0a81414c0a8140a0304f778000005784500001c000040000a11c762c0a8140ac0a814140001000200080000 > > +]) > > + > > +OVS_TRAFFIC_VSWITCHD_STOP > > +AT_CLEANUP > > + > > AT_BANNER([IGMP]) > > > > AT_SETUP([IGMP - flood under normal action]) > > -- > > 2.39.1 > >
diff --git a/lib/conntrack.c b/lib/conntrack.c index 550b2be9b..3162924ca 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -764,109 +764,61 @@ handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx, } static void -pat_packet(struct dp_packet *pkt, const struct conn *conn) +pat_packet(struct dp_packet *pkt, const struct conn_key *key) { - if (conn->nat_action & NAT_ACTION_SRC) { - if (conn->key.nw_proto == IPPROTO_TCP) { - struct tcp_header *th = dp_packet_l4(pkt); - packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst); - } else if (conn->key.nw_proto == IPPROTO_UDP) { - struct udp_header *uh = dp_packet_l4(pkt); - packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst); - } - } else if (conn->nat_action & NAT_ACTION_DST) { - if (conn->key.nw_proto == IPPROTO_TCP) { - packet_set_tcp_port(pkt, conn->rev_key.dst.port, - conn->rev_key.src.port); - } else if (conn->key.nw_proto == IPPROTO_UDP) { - packet_set_udp_port(pkt, conn->rev_key.dst.port, - conn->rev_key.src.port); - } + if (key->nw_proto == IPPROTO_TCP) { + packet_set_tcp_port(pkt, key->dst.port, key->src.port); + } else if (key->nw_proto == IPPROTO_UDP) { + packet_set_udp_port(pkt, key->dst.port, key->src.port); } } -static void -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related) +static uint16_t +nat_action_reverse(uint16_t nat_action) { - if (conn->nat_action & NAT_ACTION_SRC) { - pkt->md.ct_state |= CS_SRC_NAT; - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { - struct ip_header *nh = dp_packet_l3(pkt); - packet_set_ipv4_addr(pkt, &nh->ip_src, - conn->rev_key.dst.addr.ipv4); - } else { - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); - packet_set_ipv6_addr(pkt, conn->key.nw_proto, - nh6->ip6_src.be32, - &conn->rev_key.dst.addr.ipv6, true); - } - if (!related) { - pat_packet(pkt, conn); - } - } else if (conn->nat_action & NAT_ACTION_DST) { - pkt->md.ct_state |= CS_DST_NAT; - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { - struct ip_header *nh = dp_packet_l3(pkt); - packet_set_ipv4_addr(pkt, &nh->ip_dst, - conn->rev_key.src.addr.ipv4); - } else { - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); - packet_set_ipv6_addr(pkt, conn->key.nw_proto, - nh6->ip6_dst.be32, - &conn->rev_key.src.addr.ipv6, true); - } - if (!related) { - pat_packet(pkt, conn); - } + if (nat_action & NAT_ACTION_SRC) { + VLOG_INFO("original SRC"); + nat_action ^= NAT_ACTION_SRC; + nat_action |= NAT_ACTION_DST; + } else if (nat_action & NAT_ACTION_DST) { + VLOG_INFO("original DST"); + nat_action ^= NAT_ACTION_DST; + nat_action |= NAT_ACTION_SRC; } + return nat_action; } static void -un_pat_packet(struct dp_packet *pkt, const struct conn *conn) +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key, + uint16_t nat_action) { - if (conn->nat_action & NAT_ACTION_SRC) { - if (conn->key.nw_proto == IPPROTO_TCP) { - struct tcp_header *th = dp_packet_l4(pkt); - packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port); - } else if (conn->key.nw_proto == IPPROTO_UDP) { - struct udp_header *uh = dp_packet_l4(pkt); - packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port); - } - } else if (conn->nat_action & NAT_ACTION_DST) { - if (conn->key.nw_proto == IPPROTO_TCP) { - packet_set_tcp_port(pkt, conn->key.dst.port, conn->key.src.port); - } else if (conn->key.nw_proto == IPPROTO_UDP) { - packet_set_udp_port(pkt, conn->key.dst.port, conn->key.src.port); - } + struct ip_header *nh = dp_packet_l3(pkt); + + if (nat_action & NAT_ACTION_SRC) { + packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4); + } else if (nat_action & NAT_ACTION_DST) { + packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4); } } static void -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn) +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key, + uint16_t nat_action) { - if (conn->nat_action & NAT_ACTION_SRC) { - if (conn->key.nw_proto == IPPROTO_TCP) { - struct tcp_header *th_in = dp_packet_l4(pkt); - packet_set_tcp_port(pkt, conn->key.src.port, - th_in->tcp_dst); - } else if (conn->key.nw_proto == IPPROTO_UDP) { - struct udp_header *uh_in = dp_packet_l4(pkt); - packet_set_udp_port(pkt, conn->key.src.port, - uh_in->udp_dst); - } - } else if (conn->nat_action & NAT_ACTION_DST) { - if (conn->key.nw_proto == IPPROTO_TCP) { - packet_set_tcp_port(pkt, conn->key.src.port, - conn->key.dst.port); - } else if (conn->key.nw_proto == IPPROTO_UDP) { - packet_set_udp_port(pkt, conn->key.src.port, - conn->key.dst.port); - } + struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); + + if (nat_action & NAT_ACTION_SRC) { + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32, + &key->dst.addr.ipv6, true); + } else if (nat_action & NAT_ACTION_DST) { + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32, + &key->src.addr.ipv6, true); } } static void -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, + uint16_t nat_action) { char *tail = dp_packet_tail(pkt); uint16_t pad = dp_packet_l2_pad_size(pkt); @@ -875,98 +827,77 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) uint16_t orig_l3_ofs = pkt->l3_ofs; uint16_t orig_l4_ofs = pkt->l4_ofs; - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { - struct ip_header *nh = dp_packet_l3(pkt); - struct icmp_header *icmp = dp_packet_l4(pkt); - struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); - /* This call is already verified to succeed during the code path from - * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */ - extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad, + void *l3 = dp_packet_l3(pkt); + void *l4 = dp_packet_l4(pkt); + void *inner_l3; + /* These calls are already verified to succeed during the code path from + * 'conn_key_extract()' which calls + * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */ + if (key->dl_type == htons(ETH_TYPE_IP)) { + inner_l3 = (char *) l4 + sizeof(struct icmp_header); + extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad, &inner_l4, false); - pkt->l3_ofs += (char *) inner_l3 - (char *) nh; - pkt->l4_ofs += inner_l4 - (char *) icmp; + } else { + inner_l3 = (char *) l4 + sizeof(struct icmp6_data_header); + extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *) inner_l3) - pad, + &inner_l4); + } + pkt->l3_ofs += (char *) inner_l3 - (char *) l3; + pkt->l4_ofs += inner_l4 - (char *) l4; - if (conn->nat_action & NAT_ACTION_SRC) { - packet_set_ipv4_addr(pkt, &inner_l3->ip_src, - conn->key.src.addr.ipv4); - } else if (conn->nat_action & NAT_ACTION_DST) { - packet_set_ipv4_addr(pkt, &inner_l3->ip_dst, - conn->key.dst.addr.ipv4); - } + /* Reverse the key for inner packet. */ + struct conn_key rev_key = *key; + conn_key_reverse(&rev_key); + + pat_packet(pkt, &rev_key); + + if (key->dl_type == htons(ETH_TYPE_IP)) { + nat_packet_ipv4(pkt, &rev_key, nat_action); - reverse_pat_packet(pkt, conn); + struct icmp_header *icmp = (struct icmp_header *) l4; icmp->icmp_csum = 0; icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad); } else { - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); - struct icmp6_data_header *icmp6 = dp_packet_l4(pkt); - struct ovs_16aligned_ip6_hdr *inner_l3_6 = - (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1); - /* This call is already verified to succeed during the code path from - * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */ - extract_l3_ipv6(&inner_key, inner_l3_6, - tail - ((char *)inner_l3_6) - pad, - &inner_l4); - pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6; - pkt->l4_ofs += inner_l4 - (char *) icmp6; - - if (conn->nat_action & NAT_ACTION_SRC) { - packet_set_ipv6_addr(pkt, conn->key.nw_proto, - inner_l3_6->ip6_src.be32, - &conn->key.src.addr.ipv6, true); - } else if (conn->nat_action & NAT_ACTION_DST) { - packet_set_ipv6_addr(pkt, conn->key.nw_proto, - inner_l3_6->ip6_dst.be32, - &conn->key.dst.addr.ipv6, true); - } - reverse_pat_packet(pkt, conn); + nat_packet_ipv6(pkt, &rev_key, nat_action); + + struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) l4; icmp6->icmp6_base.icmp6_cksum = 0; - icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, icmp6, - IPPROTO_ICMPV6, tail - (char *) icmp6 - pad); + icmp6->icmp6_base.icmp6_cksum = + packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6, + tail - (char *) icmp6 - pad); } + pkt->l3_ofs = orig_l3_ofs; pkt->l4_ofs = orig_l4_ofs; } static void -un_nat_packet(struct dp_packet *pkt, const struct conn *conn, - bool related) +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool related) { - if (conn->nat_action & NAT_ACTION_SRC) { - pkt->md.ct_state |= CS_DST_NAT; - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { - struct ip_header *nh = dp_packet_l3(pkt); - packet_set_ipv4_addr(pkt, &nh->ip_dst, - conn->key.src.addr.ipv4); - } else { - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); - packet_set_ipv6_addr(pkt, conn->key.nw_proto, - nh6->ip6_dst.be32, - &conn->key.src.addr.ipv6, true); - } + struct conn_key *key = reply ? &conn->key : &conn->rev_key; + uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action) + : conn->nat_action; - if (OVS_UNLIKELY(related)) { - reverse_nat_packet(pkt, conn); - } else { - un_pat_packet(pkt, conn); - } - } else if (conn->nat_action & NAT_ACTION_DST) { + /* Update ct_state. */ + if (nat_action & NAT_ACTION_SRC) { pkt->md.ct_state |= CS_SRC_NAT; - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { - struct ip_header *nh = dp_packet_l3(pkt); - packet_set_ipv4_addr(pkt, &nh->ip_src, - conn->key.dst.addr.ipv4); - } else { - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); - packet_set_ipv6_addr(pkt, conn->key.nw_proto, - nh6->ip6_src.be32, - &conn->key.dst.addr.ipv6, true); - } + } else if (nat_action & NAT_ACTION_DST) { + pkt->md.ct_state |= CS_DST_NAT; + } + + /* Reverse the key for outer header. */ + if (key->dl_type == htons(ETH_TYPE_IP)) { + nat_packet_ipv4(pkt, key, nat_action); + } else { + nat_packet_ipv6(pkt, key, nat_action); + } + if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) { if (OVS_UNLIKELY(related)) { - reverse_nat_packet(pkt, conn); + nat_action = nat_action_reverse(nat_action); + nat_inner_packet(pkt, key, nat_action); } else { - un_pat_packet(pkt, conn); + pat_packet(pkt, key); } } } @@ -1082,7 +1013,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, memcpy(nc, nat_conn, sizeof *nc); } - nat_packet(pkt, nc, ctx->icmp_related); + nat_packet(pkt, nc, false, ctx->icmp_related); memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key); memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key); nat_conn->conn_type = CT_CONN_TYPE_UN_NAT; @@ -1185,11 +1116,8 @@ handle_nat(struct dp_packet *pkt, struct conn *conn, if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) { pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT); } - if (reply) { - un_nat_packet(pkt, conn, related); - } else { - nat_packet(pkt, conn, related); - } + + nat_packet(pkt, conn, reply, related); } } diff --git a/tests/system-traffic.at b/tests/system-traffic.at index b1b01380a..6c9e00fa3 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -7193,6 +7193,113 @@ recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - ICMP from different source related with NAT]) +AT_SKIP_IF([test $HAVE_NC = no]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +CHECK_CONNTRACK() +CHECK_CONNTRACK_NAT() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(client, server) + +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10") +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20") + +dnl Send traffic from client to CT, do DNAT if the traffic is new otherwise send it to server +AT_DATA([flows.txt], [dnl +table=0,ip,actions=ct(table=1,zone=42,nat) +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=2,zone=42,nat(dst(192.168.10.20)) +table=1,icmp,ct_state=+trk+rel-rpl,actions=ct(commit,table=2,zone=42,nat) +table=1,ip,actions=resubmit(,2) +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=output:ovs-server +table=2,in_port=ovs-server,icmp,ct_state=+trk+rel,actions=output:ovs-client +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +rm server.pcap +OVS_DAEMONIZE([tcpdump -l -U -i ovs-server -w server.pcap 2>tcpdump0_err], [tcpdump0.pid]) +OVS_WAIT_UNTIL([grep "listening" tcpdump0_err]) + +dnl Send UDP client->server +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"]) +dnl Send UDP response server->client +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\ +packet=00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions=resubmit(,0)"]) +dnl Fake router sending ICMP need frag router->server +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ +packet=000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,\ +actions=resubmit(,0)" +]) + +AT_CHECK([ovs-appctl revalidator/purge], [0]) +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], [0], [dnl + n_packets=3, n_bytes=154, reset_counts ip actions=ct(table=1,zone=42,nat) + table=1, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=ct(commit,table=2,zone=42,nat(dst=192.168.10.20)) + table=1, n_packets=1, n_bytes=42, reset_counts ip actions=resubmit(,2) + table=1, n_packets=1, n_bytes=70, reset_counts ct_state=+rel-rpl+trk,icmp actions=ct(commit,table=2,zone=42,nat) + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+new+trk,ip,in_port=1 actions=output:2 + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+rpl+trk,ip,in_port=2 actions=output:1 + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=output:2 + table=2, reset_counts ct_state=+rel+trk,icmp,in_port=2 actions=output:1 +OFPST_FLOW reply (OF1.5): +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], [dnl +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1),zone=42 +]) + +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000]) + +AT_CHECK([ovs-pcap server.pcap | grep 000000001020000000002000], [0], [dnl +000000001020000000002000080045000038011f0000ff011b40c0a81401c0a80a140304f778000005784500001c000040000a11d162c0a80a14c0a8140a0002000100080000 +]) + +dnl Check the ICMP error in reply direction +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=42]) + +rm client.pcap +OVS_DAEMONIZE([tcpdump -l -U -i ovs-client -w client.pcap 2>tcpdump1_err], [tcpdump1.pid]) +OVS_WAIT_UNTIL([grep "listening" tcpdump1_err]) + +dnl Send UDP client->server +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ +packet=00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions=resubmit(,0)"]) +dnl Fake router sending ICMP need frag router->client +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\ +packet=000000002010000000002000080045000038011F0000FF01114AC0A81401C0A8140A0304F778000005784500001C000040000A11D162C0A8140AC0A80A140001000200080000,\ +actions=resubmit(,0)" +]) + +AT_CHECK([ovs-appctl revalidator/purge], [0]) +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], [0], [dnl + n_packets=5, n_bytes=266, reset_counts ip actions=ct(table=1,zone=42,nat) + table=1, n_packets=1, n_bytes=70, reset_counts ct_state=+rel-rpl+trk,icmp actions=ct(commit,table=2,zone=42,nat) + table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2) + table=1, n_packets=2, n_bytes=84, reset_counts ct_state=+new+trk,ip,in_port=1 actions=ct(commit,table=2,zone=42,nat(dst=192.168.10.20)) + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=+rpl+trk,ip,in_port=2 actions=output:1 + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=1 actions=output:2 + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=+rel+trk,icmp,in_port=2 actions=output:1 + table=2, n_packets=2, n_bytes=84, reset_counts ct_state=+new+trk,ip,in_port=1 actions=output:2 +OFPST_FLOW reply (OF1.5): +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], [dnl +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=192.168.10.20,dst=192.168.20.10,sport=2,dport=1),zone=42 +]) + +OVS_WAIT_UNTIL([ovs-pcap client.pcap | grep 000000002010000000002000]) + +AT_CHECK([ovs-pcap client.pcap | grep 000000002010000000002000], [0], [dnl +000000002010000000002000080045000038011f0000ff011137c0a81414c0a8140a0304f778000005784500001c000040000a11c762c0a8140ac0a814140001000200080000 +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([IGMP]) AT_SETUP([IGMP - flood under normal action])
The inner header was not handled properly. Simplify the code which allows proper handling of the inner headers. Reported-at: https://bugzilla.redhat.com/2137754 Signed-off-by: Ales Musil <amusil@redhat.com> --- v6: Rebase on top of current master. Address comments from Paolo: - Add test case for ICMP related in reply direction. - Fix a mistake when the inner header was using wrong nat_action. v5: Rebase on top of current master. Address comments from Dumitru: - Use explicit struct sizes for inner_l3 pointer. - Use copied conn_key for reverse operation instead of double reverse of the original one. - Update the test case to use separate zone instead of default one. v4: Rebase on top of current master. Use output of ovs-pcap in tests rather than tcpdump. v3: Rebase on top of current master. Update the BZ reference. Update the test case. --- lib/conntrack.c | 254 ++++++++++++++-------------------------- tests/system-traffic.at | 107 +++++++++++++++++ 2 files changed, 198 insertions(+), 163 deletions(-)