Message ID | 42a54f0abac560ad8a20991a05b30c5e0cdecc97.1623420545.git.lorenzo.bianconi@redhat.com |
---|---|
State | Not Applicable |
Headers | show |
Series | Introduce check_pkt_larger for ingress traffic | expand |
On Fri, Jun 11, 2021 at 10:32 AM Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote: > > Introduce check_pkt_larger action for ingress traffic > entering the cluster from a distributed gw router port > or from a gw router. This patch enables pMTU discovery > for ingress traffic. > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> Hi Lorenzo, Please update the ovn nb logical flow documentation. This patch calls check_pkt_larger() in two places - one in 'lr_in_admission' and the other in 'lr_in_chk_pkt_len'. Instead of this I'd suggest to first determine if check pkt larger is required or not by storing 1 in a register. And then you can call check_pkt_larger() if the register bit is set. Thanks Numan > --- > northd/ovn-northd.c | 166 ++++++++++++++++++++++++++------------------ > tests/ovn.at | 137 ++++++++++++++++++++++++++++++++++-- > 2 files changed, 231 insertions(+), 72 deletions(-) > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 23367dbb0..a9b046898 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -9501,6 +9501,10 @@ build_adm_ctrl_flows_for_lrouter( > } > } > > +static void > +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu, > + struct ds *actions); > + > /* Logical router ingress Table 0: L2 Admission Control > * This table drops packets that the router shouldn’t see at all based > * on their Ethernet headers. > @@ -9528,6 +9532,8 @@ build_adm_ctrl_flows_for_lrouter_port( > * the pipeline. > */ > ds_clear(actions); > + > + build_check_pkt_len_action_string(op, NULL, actions); > ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", > op->lrp_networks.ea_s); > > @@ -10422,32 +10428,110 @@ build_arp_resolve_flows_for_lrouter_port( > > } > > +static void > +build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows, > + struct ds *match, struct ds *actions, > + enum ovn_stage stage) > +{ > + if (op->lrp_networks.ipv4_addrs) { > + ds_clear(match); > + ds_put_format(match, > + "inport == %s && ip4 && "REGBIT_PKT_LARGER > + " && !"REGBIT_EGRESS_LOOPBACK, op->json_key); > + > + ds_clear(actions); > + /* Set icmp4.frag_mtu to gw_mtu */ > + ds_put_format(actions, > + "icmp4_error {" > + REGBIT_EGRESS_LOOPBACK" = 1; " > + REGBIT_PKT_LARGER" = 0; " > + "eth.dst = %s; " > + "ip4.dst = ip4.src; " > + "ip4.src = %s; " > + "ip.ttl = 255; " > + "icmp4.type = 3; /* Destination Unreachable. */ " > + "icmp4.code = 4; /* Frag Needed and DF was Set. */ " > + "icmp4.frag_mtu = %d; " > + "next(pipeline=ingress, table=%d); };", > + op->lrp_networks.ea_s, > + op->lrp_networks.ipv4_addrs[0].addr_s, > + mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); > + ovn_lflow_add_with_hint(lflows, op->od, stage, 150, > + ds_cstr(match), ds_cstr(actions), > + &op->nbrp->header_); > + } > + > + if (op->lrp_networks.ipv6_addrs) { > + ds_clear(match); > + ds_put_format(match, "inport == %s && ip6 && "REGBIT_PKT_LARGER > + " && !"REGBIT_EGRESS_LOOPBACK, op->json_key); > + > + ds_clear(actions); > + /* Set icmp6.frag_mtu to gw_mtu */ > + ds_put_format(actions, > + "icmp6_error {" > + REGBIT_EGRESS_LOOPBACK" = 1; " > + REGBIT_PKT_LARGER" = 0; " > + "eth.dst = %s; " > + "ip6.dst = ip6.src; " > + "ip6.src = %s; " > + "ip.ttl = 255; " > + "icmp6.type = 2; /* Packet Too Big. */ " > + "icmp6.code = 0; " > + "icmp6.frag_mtu = %d; " > + "next(pipeline=ingress, table=%d); };", > + op->lrp_networks.ea_s, > + op->lrp_networks.ipv6_addrs[0].addr_s, > + mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); > + ovn_lflow_add_with_hint(lflows, op->od, stage, 150, > + ds_cstr(match), ds_cstr(actions), > + &op->nbrp->header_); > + } > +} > + > +static void > +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu, > + struct ds *actions) > +{ > + int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0); > + > + if (gw_mtu > 0) { > + /* Add the flows only if gateway_mtu is configured. */ > + ds_put_format(actions, > + REGBIT_PKT_LARGER" = check_pkt_larger(%d); ", > + gw_mtu + VLAN_ETH_HEADER_LEN); > + } > + if (pmtu) { > + *pmtu = gw_mtu; > + } > +} > + > static void > build_check_pkt_len_flows_for_lrp(struct ovn_port *op, > struct hmap *lflows, struct hmap *ports, > struct ds *match, struct ds *actions) > { > - int gw_mtu = 0; > + int gw_mtu; > > - if (op->nbrp) { > - gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0); > - } > - /* Add the flows only if gateway_mtu is configured. */ > + ds_clear(actions); > + build_check_pkt_len_action_string(op, &gw_mtu, actions); > if (gw_mtu <= 0) { > return; > } > > + ds_put_format(actions, "next;"); > + > ds_clear(match); > ds_put_format(match, "outport == %s", op->json_key); > > - ds_clear(actions); > - ds_put_format(actions, > - REGBIT_PKT_LARGER" = check_pkt_larger(%d);" > - " next;", gw_mtu + VLAN_ETH_HEADER_LEN); > ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50, > ds_cstr(match), ds_cstr(actions), > &op->nbrp->header_); > > + /* ingress traffic */ > + build_icmperr_pkt_big_flows(op, gw_mtu, lflows, match, actions, > + S_ROUTER_IN_IP_INPUT); > + > for (size_t i = 0; i < op->od->nbr->n_ports; i++) { > struct ovn_port *rp = ovn_port_find(ports, > op->od->nbr->ports[i]->name); > @@ -10455,63 +10539,9 @@ build_check_pkt_len_flows_for_lrp(struct ovn_port *op, > continue; > } > > - if (rp->lrp_networks.ipv4_addrs) { > - ds_clear(match); > - ds_put_format(match, "inport == %s && outport == %s" > - " && ip4 && "REGBIT_PKT_LARGER, > - rp->json_key, op->json_key); > - > - ds_clear(actions); > - /* Set icmp4.frag_mtu to gw_mtu */ > - ds_put_format(actions, > - "icmp4_error {" > - REGBIT_EGRESS_LOOPBACK" = 1; " > - "eth.dst = %s; " > - "ip4.dst = ip4.src; " > - "ip4.src = %s; " > - "ip.ttl = 255; " > - "icmp4.type = 3; /* Destination Unreachable. */ " > - "icmp4.code = 4; /* Frag Needed and DF was Set. */ " > - "icmp4.frag_mtu = %d; " > - "next(pipeline=ingress, table=%d); };", > - rp->lrp_networks.ea_s, > - rp->lrp_networks.ipv4_addrs[0].addr_s, > - gw_mtu, > - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); > - ovn_lflow_add_with_hint(lflows, op->od, > - S_ROUTER_IN_LARGER_PKTS, 50, > - ds_cstr(match), ds_cstr(actions), > - &rp->nbrp->header_); > - } > - > - if (rp->lrp_networks.ipv6_addrs) { > - ds_clear(match); > - ds_put_format(match, "inport == %s && outport == %s" > - " && ip6 && "REGBIT_PKT_LARGER, > - rp->json_key, op->json_key); > - > - ds_clear(actions); > - /* Set icmp6.frag_mtu to gw_mtu */ > - ds_put_format(actions, > - "icmp6_error {" > - REGBIT_EGRESS_LOOPBACK" = 1; " > - "eth.dst = %s; " > - "ip6.dst = ip6.src; " > - "ip6.src = %s; " > - "ip.ttl = 255; " > - "icmp6.type = 2; /* Packet Too Big. */ " > - "icmp6.code = 0; " > - "icmp6.frag_mtu = %d; " > - "next(pipeline=ingress, table=%d); };", > - rp->lrp_networks.ea_s, > - rp->lrp_networks.ipv6_addrs[0].addr_s, > - gw_mtu, > - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); > - ovn_lflow_add_with_hint(lflows, op->od, > - S_ROUTER_IN_LARGER_PKTS, 50, > - ds_cstr(match), ds_cstr(actions), > - &rp->nbrp->header_); > - } > + /* egress traffic */ > + build_icmperr_pkt_big_flows(rp, gw_mtu, lflows, match, actions, > + S_ROUTER_IN_LARGER_PKTS); > } > } > > @@ -11579,8 +11609,10 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od, > * down in the pipeline. > */ > ds_clear(actions); > + > + build_check_pkt_len_action_string(od->l3dgw_port, NULL, actions); > ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", > - od->l3dgw_port->lrp_networks.ea_s); > + od->l3dgw_port->lrp_networks.ea_s); > > ds_clear(match); > ds_put_format(match, > diff --git a/tests/ovn.at b/tests/ovn.at > index 5c3ed2633..cf3cbd4d3 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -16312,6 +16312,52 @@ test_ip_packet_larger() { > fi > } > > +test_ip_packet_larger_ext() { > + local mtu=$1 > + > + # Send ip packet from sw0-port1 to outside > + src_mac="00000012af11" # external mac > + dst_mac="000020201213" # lr0-public mac > + src_ip=`ip_to_hex 172 168 0 4` > + dst_ip=`ip_to_hex 172 168 0 100` > + # Set the packet length to 118. > + pkt_len=0076 > + packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf > + orig_packet_l3=${src_ip}${dst_ip}0900000000000000 > + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 > + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 > + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 > + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 > + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 > + packet=${packet}${orig_packet_l3} > + > + gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064 > + ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004 > + > + src_ip=`ip_to_hex 172 168 0 100` > + dst_ip=`ip_to_hex 172 168 0 4` > + # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload)) > + reply_pkt_len=0092 > + ip_csum=f397 > + icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2 > + icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu) > + icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf > + icmp_reply=${icmp_reply}${orig_packet_l3} > + echo $icmp_reply > br-phys_n1.expected > + > + echo $gw_ip_garp >> br-phys_n1.expected > + > + as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 > + as hv1 reset_pcap_file hv1-vif1 hv1/vif1 > + > + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp > + sleep 1 > + # Send packet from sw0-port1 to outside > + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet > + > + OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected]) > +} > + > test_ip6_packet_larger() { > local mtu=$1 > > @@ -16327,7 +16373,7 @@ test_ip6_packet_larger() { > local payload=${payload}0000000000000000000000000000000000000000 > local payload=${payload}0000000000000000000000000000000000000000 > > - local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst} > + local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst} > local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload} > > as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 > @@ -16344,11 +16390,11 @@ test_ip6_packet_larger() { > mtu_needed=$(expr ${packet_bytes} - 18) > if test $mtu -lt $mtu_needed; then > # First construct the inner IPv6 packet. > - inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst} > + inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst} > inner_icmp6=8000000062f00001 > inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6}) > inner_packet=${inner_ip6}${inner_icmp6_and_payload} > - > + > # Then the outer. > outer_ip6=6000000000883afe${ipv6_rt}${ipv6_src} > outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6) > @@ -16366,6 +16412,53 @@ test_ip6_packet_larger() { > fi > } > > +test_ip6_packet_larger_ext() { > + local mtu=$1 > + > + local eth_src=00000012af11 > + local eth_dst=000020201213 > + > + local ipv6_src=20000000000000000000000000000004 > + local ipv6_dst=20000000000000000000000000000001 > + > + local payload=0000000000000000000000000000000000000000 > + local payload=${payload}0000000000000000000000000000000000000000 > + local payload=${payload}0000000000000000000000000000000000000000 > + local payload=${payload}0000000000000000000000000000000000000000 > + > + local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst} > + local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload} > + > + local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064 > + echo $ns > br-phys_n1.expected > + > + as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 > + as hv1 reset_pcap_file hv1-vif1 hv1/vif1 > + > + local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst} > + local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src} > + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na > + sleep 1 > + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet > + AT_CAPTURE_FILE([trace-$mtu]) > + > + # First construct the inner IPv6 packet. > + inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst} > + inner_icmp6=9000000062f00001 > + inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6}) > + inner_packet=${inner_ip6}${inner_icmp6_and_payload} > + > + # Then the outer. > + outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src} > + outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6) > + outer_packet=${outer_ip6}${outer_icmp6_and_payload} > + > + icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet} > + echo $icmp6_reply >> br-phys_n1.expected > + > + OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected]) > +} > + > wait_for_ports_up > ovn-nbctl --wait=hv sync > > @@ -16398,7 +16491,7 @@ for mtu in 100 500 118; do > OVS_WAIT_FOR_OUTPUT([ > as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu > AT_CAPTURE_FILE([br-int-flows-$mtu]) > - grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1 > + grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3 > ]) > > AS_BOX([testing mtu $mtu - IPv4]) > @@ -16408,6 +16501,23 @@ for mtu in 100 500 118; do > test_ip6_packet_larger $mtu > done > > +AS_BOX([testing mtu $mtu]) > +check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100 > +ovn-sbctl dump-flows > ext-sbflows-100 > +AT_CAPTURE_FILE([ext-sbflows-$mtu]) > + > +OVS_WAIT_FOR_OUTPUT([ > + as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100 > + AT_CAPTURE_FILE([ext-br-int-flows-100]) > + grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3 > +]) > + > +AS_BOX([testing ext mtu 100 - IPv4]) > +test_ip_packet_larger_ext 100 > + > +AS_BOX([testing mtu 100 - IPv6]) > +test_ip6_packet_larger_ext 100 > + > ovn-nbctl lsp-del sw0-lr0 > > ovn-nbctl lr-del lr0 > @@ -16445,7 +16555,7 @@ for mtu in 100 500 118; do > OVS_WAIT_FOR_OUTPUT([ > as hv1 ovs-ofctl dump-flows br-int > br-int-gw-flows-$mtu > AT_CAPTURE_FILE([br-int-gw-flows-$mtu]) > - grep "check_pkt_larger($(expr $mtu + 18))" br-int-gw-flows-$mtu | wc -l], [0], [1 > + grep "check_pkt_larger($(expr $mtu + 18))" br-int-gw-flows-$mtu | wc -l], [0], [3 > ]) > > AS_BOX([testing gw mtu $mtu - IPv4]) > @@ -16455,6 +16565,23 @@ for mtu in 100 500 118; do > test_ip6_packet_larger $mtu > done > > +AS_BOX([testing gw mtu $mtu]) > +check ovn-nbctl --wait=hv set logical_router_port lr1-public options:gateway_mtu=100 > +ovn-sbctl dump-flows > ext-gw-sbflows-100 > +AT_CAPTURE_FILE([ext-gw-sbflows-$mtu]) > + > +OVS_WAIT_FOR_OUTPUT([ > + as hv1 ovs-ofctl dump-flows br-int > ext-br-int-gw-flows-100 > + AT_CAPTURE_FILE([ext-br-int-gw-flows-100]) > + grep "check_pkt_larger(118)" ext-br-int-gw-flows-100 | wc -l], [0], [3 > +]) > + > +AS_BOX([testing gw ext mtu 100 - IPv4]) > +test_ip_packet_larger_ext 100 > + > +AS_BOX([testing gw mtu 100 - IPv6]) > +test_ip6_packet_larger_ext 100 > + > OVN_CLEANUP([hv1]) > AT_CLEANUP > ]) > -- > 2.31.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 23367dbb0..a9b046898 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -9501,6 +9501,10 @@ build_adm_ctrl_flows_for_lrouter( } } +static void +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu, + struct ds *actions); + /* Logical router ingress Table 0: L2 Admission Control * This table drops packets that the router shouldn’t see at all based * on their Ethernet headers. @@ -9528,6 +9532,8 @@ build_adm_ctrl_flows_for_lrouter_port( * the pipeline. */ ds_clear(actions); + + build_check_pkt_len_action_string(op, NULL, actions); ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", op->lrp_networks.ea_s); @@ -10422,32 +10428,110 @@ build_arp_resolve_flows_for_lrouter_port( } +static void +build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows, + struct ds *match, struct ds *actions, + enum ovn_stage stage) +{ + if (op->lrp_networks.ipv4_addrs) { + ds_clear(match); + ds_put_format(match, + "inport == %s && ip4 && "REGBIT_PKT_LARGER + " && !"REGBIT_EGRESS_LOOPBACK, op->json_key); + + ds_clear(actions); + /* Set icmp4.frag_mtu to gw_mtu */ + ds_put_format(actions, + "icmp4_error {" + REGBIT_EGRESS_LOOPBACK" = 1; " + REGBIT_PKT_LARGER" = 0; " + "eth.dst = %s; " + "ip4.dst = ip4.src; " + "ip4.src = %s; " + "ip.ttl = 255; " + "icmp4.type = 3; /* Destination Unreachable. */ " + "icmp4.code = 4; /* Frag Needed and DF was Set. */ " + "icmp4.frag_mtu = %d; " + "next(pipeline=ingress, table=%d); };", + op->lrp_networks.ea_s, + op->lrp_networks.ipv4_addrs[0].addr_s, + mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); + ovn_lflow_add_with_hint(lflows, op->od, stage, 150, + ds_cstr(match), ds_cstr(actions), + &op->nbrp->header_); + } + + if (op->lrp_networks.ipv6_addrs) { + ds_clear(match); + ds_put_format(match, "inport == %s && ip6 && "REGBIT_PKT_LARGER + " && !"REGBIT_EGRESS_LOOPBACK, op->json_key); + + ds_clear(actions); + /* Set icmp6.frag_mtu to gw_mtu */ + ds_put_format(actions, + "icmp6_error {" + REGBIT_EGRESS_LOOPBACK" = 1; " + REGBIT_PKT_LARGER" = 0; " + "eth.dst = %s; " + "ip6.dst = ip6.src; " + "ip6.src = %s; " + "ip.ttl = 255; " + "icmp6.type = 2; /* Packet Too Big. */ " + "icmp6.code = 0; " + "icmp6.frag_mtu = %d; " + "next(pipeline=ingress, table=%d); };", + op->lrp_networks.ea_s, + op->lrp_networks.ipv6_addrs[0].addr_s, + mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); + ovn_lflow_add_with_hint(lflows, op->od, stage, 150, + ds_cstr(match), ds_cstr(actions), + &op->nbrp->header_); + } +} + +static void +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu, + struct ds *actions) +{ + int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0); + + if (gw_mtu > 0) { + /* Add the flows only if gateway_mtu is configured. */ + ds_put_format(actions, + REGBIT_PKT_LARGER" = check_pkt_larger(%d); ", + gw_mtu + VLAN_ETH_HEADER_LEN); + } + if (pmtu) { + *pmtu = gw_mtu; + } +} + static void build_check_pkt_len_flows_for_lrp(struct ovn_port *op, struct hmap *lflows, struct hmap *ports, struct ds *match, struct ds *actions) { - int gw_mtu = 0; + int gw_mtu; - if (op->nbrp) { - gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0); - } - /* Add the flows only if gateway_mtu is configured. */ + ds_clear(actions); + build_check_pkt_len_action_string(op, &gw_mtu, actions); if (gw_mtu <= 0) { return; } + ds_put_format(actions, "next;"); + ds_clear(match); ds_put_format(match, "outport == %s", op->json_key); - ds_clear(actions); - ds_put_format(actions, - REGBIT_PKT_LARGER" = check_pkt_larger(%d);" - " next;", gw_mtu + VLAN_ETH_HEADER_LEN); ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50, ds_cstr(match), ds_cstr(actions), &op->nbrp->header_); + /* ingress traffic */ + build_icmperr_pkt_big_flows(op, gw_mtu, lflows, match, actions, + S_ROUTER_IN_IP_INPUT); + for (size_t i = 0; i < op->od->nbr->n_ports; i++) { struct ovn_port *rp = ovn_port_find(ports, op->od->nbr->ports[i]->name); @@ -10455,63 +10539,9 @@ build_check_pkt_len_flows_for_lrp(struct ovn_port *op, continue; } - if (rp->lrp_networks.ipv4_addrs) { - ds_clear(match); - ds_put_format(match, "inport == %s && outport == %s" - " && ip4 && "REGBIT_PKT_LARGER, - rp->json_key, op->json_key); - - ds_clear(actions); - /* Set icmp4.frag_mtu to gw_mtu */ - ds_put_format(actions, - "icmp4_error {" - REGBIT_EGRESS_LOOPBACK" = 1; " - "eth.dst = %s; " - "ip4.dst = ip4.src; " - "ip4.src = %s; " - "ip.ttl = 255; " - "icmp4.type = 3; /* Destination Unreachable. */ " - "icmp4.code = 4; /* Frag Needed and DF was Set. */ " - "icmp4.frag_mtu = %d; " - "next(pipeline=ingress, table=%d); };", - rp->lrp_networks.ea_s, - rp->lrp_networks.ipv4_addrs[0].addr_s, - gw_mtu, - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); - ovn_lflow_add_with_hint(lflows, op->od, - S_ROUTER_IN_LARGER_PKTS, 50, - ds_cstr(match), ds_cstr(actions), - &rp->nbrp->header_); - } - - if (rp->lrp_networks.ipv6_addrs) { - ds_clear(match); - ds_put_format(match, "inport == %s && outport == %s" - " && ip6 && "REGBIT_PKT_LARGER, - rp->json_key, op->json_key); - - ds_clear(actions); - /* Set icmp6.frag_mtu to gw_mtu */ - ds_put_format(actions, - "icmp6_error {" - REGBIT_EGRESS_LOOPBACK" = 1; " - "eth.dst = %s; " - "ip6.dst = ip6.src; " - "ip6.src = %s; " - "ip.ttl = 255; " - "icmp6.type = 2; /* Packet Too Big. */ " - "icmp6.code = 0; " - "icmp6.frag_mtu = %d; " - "next(pipeline=ingress, table=%d); };", - rp->lrp_networks.ea_s, - rp->lrp_networks.ipv6_addrs[0].addr_s, - gw_mtu, - ovn_stage_get_table(S_ROUTER_IN_ADMISSION)); - ovn_lflow_add_with_hint(lflows, op->od, - S_ROUTER_IN_LARGER_PKTS, 50, - ds_cstr(match), ds_cstr(actions), - &rp->nbrp->header_); - } + /* egress traffic */ + build_icmperr_pkt_big_flows(rp, gw_mtu, lflows, match, actions, + S_ROUTER_IN_LARGER_PKTS); } } @@ -11579,8 +11609,10 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od, * down in the pipeline. */ ds_clear(actions); + + build_check_pkt_len_action_string(od->l3dgw_port, NULL, actions); ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", - od->l3dgw_port->lrp_networks.ea_s); + od->l3dgw_port->lrp_networks.ea_s); ds_clear(match); ds_put_format(match, diff --git a/tests/ovn.at b/tests/ovn.at index 5c3ed2633..cf3cbd4d3 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -16312,6 +16312,52 @@ test_ip_packet_larger() { fi } +test_ip_packet_larger_ext() { + local mtu=$1 + + # Send ip packet from sw0-port1 to outside + src_mac="00000012af11" # external mac + dst_mac="000020201213" # lr0-public mac + src_ip=`ip_to_hex 172 168 0 4` + dst_ip=`ip_to_hex 172 168 0 100` + # Set the packet length to 118. + pkt_len=0076 + packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf + orig_packet_l3=${src_ip}${dst_ip}0900000000000000 + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 + orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 + packet=${packet}${orig_packet_l3} + + gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064 + ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004 + + src_ip=`ip_to_hex 172 168 0 100` + dst_ip=`ip_to_hex 172 168 0 4` + # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload)) + reply_pkt_len=0092 + ip_csum=f397 + icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2 + icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu) + icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf + icmp_reply=${icmp_reply}${orig_packet_l3} + echo $icmp_reply > br-phys_n1.expected + + echo $gw_ip_garp >> br-phys_n1.expected + + as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 + as hv1 reset_pcap_file hv1-vif1 hv1/vif1 + + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp + sleep 1 + # Send packet from sw0-port1 to outside + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet + + OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected]) +} + test_ip6_packet_larger() { local mtu=$1 @@ -16327,7 +16373,7 @@ test_ip6_packet_larger() { local payload=${payload}0000000000000000000000000000000000000000 local payload=${payload}0000000000000000000000000000000000000000 - local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst} + local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst} local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload} as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 @@ -16344,11 +16390,11 @@ test_ip6_packet_larger() { mtu_needed=$(expr ${packet_bytes} - 18) if test $mtu -lt $mtu_needed; then # First construct the inner IPv6 packet. - inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst} + inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst} inner_icmp6=8000000062f00001 inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6}) inner_packet=${inner_ip6}${inner_icmp6_and_payload} - + # Then the outer. outer_ip6=6000000000883afe${ipv6_rt}${ipv6_src} outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6) @@ -16366,6 +16412,53 @@ test_ip6_packet_larger() { fi } +test_ip6_packet_larger_ext() { + local mtu=$1 + + local eth_src=00000012af11 + local eth_dst=000020201213 + + local ipv6_src=20000000000000000000000000000004 + local ipv6_dst=20000000000000000000000000000001 + + local payload=0000000000000000000000000000000000000000 + local payload=${payload}0000000000000000000000000000000000000000 + local payload=${payload}0000000000000000000000000000000000000000 + local payload=${payload}0000000000000000000000000000000000000000 + + local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst} + local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload} + + local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064 + echo $ns > br-phys_n1.expected + + as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 + as hv1 reset_pcap_file hv1-vif1 hv1/vif1 + + local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst} + local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src} + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na + sleep 1 + check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet + AT_CAPTURE_FILE([trace-$mtu]) + + # First construct the inner IPv6 packet. + inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst} + inner_icmp6=9000000062f00001 + inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6}) + inner_packet=${inner_ip6}${inner_icmp6_and_payload} + + # Then the outer. + outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src} + outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6) + outer_packet=${outer_ip6}${outer_icmp6_and_payload} + + icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet} + echo $icmp6_reply >> br-phys_n1.expected + + OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected]) +} + wait_for_ports_up ovn-nbctl --wait=hv sync @@ -16398,7 +16491,7 @@ for mtu in 100 500 118; do OVS_WAIT_FOR_OUTPUT([ as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu AT_CAPTURE_FILE([br-int-flows-$mtu]) - grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1 + grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3 ]) AS_BOX([testing mtu $mtu - IPv4]) @@ -16408,6 +16501,23 @@ for mtu in 100 500 118; do test_ip6_packet_larger $mtu done +AS_BOX([testing mtu $mtu]) +check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100 +ovn-sbctl dump-flows > ext-sbflows-100 +AT_CAPTURE_FILE([ext-sbflows-$mtu]) + +OVS_WAIT_FOR_OUTPUT([ + as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100 + AT_CAPTURE_FILE([ext-br-int-flows-100]) + grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3 +]) + +AS_BOX([testing ext mtu 100 - IPv4]) +test_ip_packet_larger_ext 100 + +AS_BOX([testing mtu 100 - IPv6]) +test_ip6_packet_larger_ext 100 + ovn-nbctl lsp-del sw0-lr0 ovn-nbctl lr-del lr0 @@ -16445,7 +16555,7 @@ for mtu in 100 500 118; do OVS_WAIT_FOR_OUTPUT([ as hv1 ovs-ofctl dump-flows br-int > br-int-gw-flows-$mtu AT_CAPTURE_FILE([br-int-gw-flows-$mtu]) - grep "check_pkt_larger($(expr $mtu + 18))" br-int-gw-flows-$mtu | wc -l], [0], [1 + grep "check_pkt_larger($(expr $mtu + 18))" br-int-gw-flows-$mtu | wc -l], [0], [3 ]) AS_BOX([testing gw mtu $mtu - IPv4]) @@ -16455,6 +16565,23 @@ for mtu in 100 500 118; do test_ip6_packet_larger $mtu done +AS_BOX([testing gw mtu $mtu]) +check ovn-nbctl --wait=hv set logical_router_port lr1-public options:gateway_mtu=100 +ovn-sbctl dump-flows > ext-gw-sbflows-100 +AT_CAPTURE_FILE([ext-gw-sbflows-$mtu]) + +OVS_WAIT_FOR_OUTPUT([ + as hv1 ovs-ofctl dump-flows br-int > ext-br-int-gw-flows-100 + AT_CAPTURE_FILE([ext-br-int-gw-flows-100]) + grep "check_pkt_larger(118)" ext-br-int-gw-flows-100 | wc -l], [0], [3 +]) + +AS_BOX([testing gw ext mtu 100 - IPv4]) +test_ip_packet_larger_ext 100 + +AS_BOX([testing gw mtu 100 - IPv6]) +test_ip6_packet_larger_ext 100 + OVN_CLEANUP([hv1]) AT_CLEANUP ])
Introduce check_pkt_larger action for ingress traffic entering the cluster from a distributed gw router port or from a gw router. This patch enables pMTU discovery for ingress traffic. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- northd/ovn-northd.c | 166 ++++++++++++++++++++++++++------------------ tests/ovn.at | 137 ++++++++++++++++++++++++++++++++++-- 2 files changed, 231 insertions(+), 72 deletions(-)