diff mbox series

[ovs-dev,v2,3/3] northd: add check_pkt_larger lflows for ingress traffic

Message ID 78e9f7299f47f2add61b6d083aa5bc1adc907fb6.1622154070.git.lorenzo.bianconi@redhat.com
State Changes Requested
Headers show
Series Introduce check_pkt_larger for ingress traffic | expand

Commit Message

Lorenzo Bianconi May 27, 2021, 10:26 p.m. UTC
Introduce check_pkt_larger action for ingress traffic
entering the cluster from a distributed gw router port
or from a gw router. This patch enables pMTU discovery
for ingress traffic.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 northd/ovn-northd.c | 166 ++++++++++++++++++-------------
 tests/ovn.at        | 234 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 326 insertions(+), 74 deletions(-)

Comments

Mark Michelson June 8, 2021, 4:46 p.m. UTC | #1
DDLog equivalent is missing from this patch.

See below for more comments in-line.

On 5/27/21 6:26 PM, Lorenzo Bianconi wrote:
> Introduce check_pkt_larger action for ingress traffic
> entering the cluster from a distributed gw router port
> or from a gw router. This patch enables pMTU discovery
> for ingress traffic.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>   northd/ovn-northd.c | 166 ++++++++++++++++++-------------
>   tests/ovn.at        | 234 ++++++++++++++++++++++++++++++++++++++++++--
>   2 files changed, 326 insertions(+), 74 deletions(-)
> 
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 2269f0185..77b482081 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -9492,6 +9492,10 @@ build_adm_ctrl_flows_for_lrouter(
>       }
>   }
>   
> +static void
> +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
> +                                  struct ds **actions);
> +
>   /* Logical router ingress Table 0: L2 Admission Control
>    * This table drops packets that the router shouldn’t see at all based
>    * on their Ethernet headers.
> @@ -9519,6 +9523,9 @@ build_adm_ctrl_flows_for_lrouter_port(
>            * the pipeline.
>            */
>           ds_clear(actions);
> +
> +        int gw_mtu;
> +        build_check_pkt_len_action_string(op, &gw_mtu, &actions);
>           ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
>                         op->lrp_networks.ea_s);
>   
> @@ -10413,32 +10420,108 @@ build_arp_resolve_flows_for_lrouter_port(
>   
>   }
>   
> +static void
> +build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows,
> +                            struct ds *match, struct ds *actions,
> +                            enum ovn_stage stage)
> +{
> +    if (op->lrp_networks.ipv4_addrs) {
> +        ds_clear(match);
> +        ds_put_format(match,
> +                      "inport == %s && ip4 && "REGBIT_PKT_LARGER
> +                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
> +
> +        ds_clear(actions);
> +        /* Set icmp4.frag_mtu to gw_mtu */
> +        ds_put_format(actions,
> +            "icmp4_error {"
> +            REGBIT_EGRESS_LOOPBACK" = 1; "
> +            REGBIT_PKT_LARGER" = 0; "
> +            "eth.dst = %s; "
> +            "ip4.dst = ip4.src; "
> +            "ip4.src = %s; "
> +            "ip.ttl = 255; "
> +            "icmp4.type = 3; /* Destination Unreachable. */ "
> +            "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
> +            "icmp4.frag_mtu = %d; "
> +            "next(pipeline=ingress, table=%d); };",
> +            op->lrp_networks.ea_s,
> +            op->lrp_networks.ipv4_addrs[0].addr_s,
> +            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> +        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
> +                                ds_cstr(match), ds_cstr(actions),
> +                                &op->nbrp->header_);
> +    }
> +
> +    if (op->lrp_networks.ipv6_addrs) {
> +        ds_clear(match);
> +        ds_put_format(match, "inport == %s&& ip6 && "REGBIT_PKT_LARGER

Nit: Missing a space before "&&"

> +                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
> +
> +        ds_clear(actions);
> +        /* Set icmp6.frag_mtu to gw_mtu */
> +        ds_put_format(actions,
> +            "icmp6_error {"
> +            REGBIT_EGRESS_LOOPBACK" = 1; "
> +            REGBIT_PKT_LARGER" = 0; "
> +            "eth.dst = %s; "
> +            "ip6.dst = ip6.src; "
> +            "ip6.src = %s; "
> +            "ip.ttl = 255; "
> +            "icmp6.type = 2; /* Packet Too Big. */ "
> +            "icmp6.code = 0; "
> +            "icmp6.frag_mtu = %d; "
> +            "next(pipeline=ingress, table=%d); };",
> +            op->lrp_networks.ea_s,
> +            op->lrp_networks.ipv6_addrs[0].addr_s,
> +            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> +        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
> +                                ds_cstr(match), ds_cstr(actions),
> +                                &op->nbrp->header_);
> +    }
> +}
> +
> +static void
> +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
> +                                  struct ds **actions)

"actions" can be made a struct ds * instead of struct ds **.

> +{
> +    int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
> +
> +    if (gw_mtu > 0) {
> +        /* Add the flows only if gateway_mtu is configured. */
> +        ds_put_format(*actions,
> +                      REGBIT_PKT_LARGER" = check_pkt_larger(%d); ",
> +                      gw_mtu + VLAN_ETH_HEADER_LEN);
> +    }
> +    *pmtu = gw_mtu;

This function is called in three places, but only one uses the returned 
pmtu. To prevent those other two places from declaring a variable they 
won't use, you might want to make build_check_pkt_len_action_string() 
tolerant of having a NULL pmtu parameter passed in.

> +}
> +
>   static void
>   build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
>                                     struct hmap *lflows, struct hmap *ports,
>                                     struct ds *match, struct ds *actions)
>   {
> -    int gw_mtu = 0;
> +    int gw_mtu;
>   
> -    if (op->nbrp) {
> -         gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
> -    }
> -    /* Add the flows only if gateway_mtu is configured. */
> +    ds_clear(actions);
> +    build_check_pkt_len_action_string(op, &gw_mtu, &actions);
>       if (gw_mtu <= 0) {
>           return;
>       }
>   
> +    ds_put_format(actions, "next;");
> +
>       ds_clear(match);
>       ds_put_format(match, "outport == %s", op->json_key);
>   
> -    ds_clear(actions);
> -    ds_put_format(actions,
> -                  REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
> -                  " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
>       ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50,
>                               ds_cstr(match), ds_cstr(actions),
>                               &op->nbrp->header_);
>   
> +    /* ingress traffic */
> +    build_icmperr_pkt_big_flows(op, gw_mtu, lflows, match, actions,
> +                                S_ROUTER_IN_IP_INPUT);

If I understand what's going on here, you've added some 
check_pkt_larger() flows to S_ROUTER_IN_ADMISSION, and you've added the 
ICMP error flows to S_ROUTER_IN_IP_INPUT so that we can send ICMP errors 
at that point if the packet is too large.

If that is the case, then is there any point to keeping the 
S_ROUTER_IN_CHK_PKT_LEN and S_ROUTER_IN_LARGER_PKTS tables? Wouldn't we 
always reject the incoming packet before reaching those tables if the 
packet is too large?

> +
>       for (size_t i = 0; i < op->od->nbr->n_ports; i++) {
>           struct ovn_port *rp = ovn_port_find(ports,
>                                               op->od->nbr->ports[i]->name);
> @@ -10446,63 +10529,9 @@ build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
>               continue;
>           }
>   
> -        if (rp->lrp_networks.ipv4_addrs) {
> -            ds_clear(match);
> -            ds_put_format(match, "inport == %s && outport == %s"
> -                          " && ip4 && "REGBIT_PKT_LARGER,
> -                          rp->json_key, op->json_key);
> -
> -            ds_clear(actions);
> -            /* Set icmp4.frag_mtu to gw_mtu */
> -            ds_put_format(actions,
> -                "icmp4_error {"
> -                REGBIT_EGRESS_LOOPBACK" = 1; "
> -                "eth.dst = %s; "
> -                "ip4.dst = ip4.src; "
> -                "ip4.src = %s; "
> -                "ip.ttl = 255; "
> -                "icmp4.type = 3; /* Destination Unreachable. */ "
> -                "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
> -                "icmp4.frag_mtu = %d; "
> -                "next(pipeline=ingress, table=%d); };",
> -                rp->lrp_networks.ea_s,
> -                rp->lrp_networks.ipv4_addrs[0].addr_s,
> -                gw_mtu,
> -                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> -            ovn_lflow_add_with_hint(lflows, op->od,
> -                                    S_ROUTER_IN_LARGER_PKTS, 50,
> -                                    ds_cstr(match), ds_cstr(actions),
> -                                    &rp->nbrp->header_);
> -        }
> -
> -        if (rp->lrp_networks.ipv6_addrs) {
> -            ds_clear(match);
> -            ds_put_format(match, "inport == %s && outport == %s"
> -                          " && ip6 && "REGBIT_PKT_LARGER,
> -                          rp->json_key, op->json_key);
> -
> -            ds_clear(actions);
> -            /* Set icmp6.frag_mtu to gw_mtu */
> -            ds_put_format(actions,
> -                "icmp6_error {"
> -                REGBIT_EGRESS_LOOPBACK" = 1; "
> -                "eth.dst = %s; "
> -                "ip6.dst = ip6.src; "
> -                "ip6.src = %s; "
> -                "ip.ttl = 255; "
> -                "icmp6.type = 2; /* Packet Too Big. */ "
> -                "icmp6.code = 0; "
> -                "icmp6.frag_mtu = %d; "
> -                "next(pipeline=ingress, table=%d); };",
> -                rp->lrp_networks.ea_s,
> -                rp->lrp_networks.ipv6_addrs[0].addr_s,
> -                gw_mtu,
> -                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> -            ovn_lflow_add_with_hint(lflows, op->od,
> -                                    S_ROUTER_IN_LARGER_PKTS, 50,
> -                                    ds_cstr(match), ds_cstr(actions),
> -                                    &rp->nbrp->header_);
> -        }
> +        /* egress traffic */
> +        build_icmperr_pkt_big_flows(rp, gw_mtu, lflows, match, actions,
> +                                    S_ROUTER_IN_LARGER_PKTS);
>       }
>   }
>   
> @@ -11570,8 +11599,11 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
>           * down in the pipeline.
>           */
>           ds_clear(actions);
> +
> +        int gw_mtu;
> +        build_check_pkt_len_action_string(od->l3dgw_port, &gw_mtu, &actions);
>           ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
> -                    od->l3dgw_port->lrp_networks.ea_s);
> +                      od->l3dgw_port->lrp_networks.ea_s);
>   
>           ds_clear(match);
>           ds_put_format(match,
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 7ad0dcb54..39189219c 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -16312,6 +16312,52 @@ test_ip_packet_larger() {
>       fi
>   }
>   
> +test_ip_packet_larger_ext() {
> +    local mtu=$1
> +
> +    # Send ip packet from sw0-port1 to outside
> +    src_mac="00000012af11" # external mac
> +    dst_mac="000020201213" # lr0-public mac
> +    src_ip=`ip_to_hex 172 168 0 4`
> +    dst_ip=`ip_to_hex 172 168 0 100`
> +    # Set the packet length to 118.
> +    pkt_len=0076
> +    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
> +    orig_packet_l3=${src_ip}${dst_ip}0900000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    packet=${packet}${orig_packet_l3}
> +
> +    gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> +    ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
> +
> +    src_ip=`ip_to_hex 172 168 0 100`
> +    dst_ip=`ip_to_hex 172 168 0 4`
> +    # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
> +    reply_pkt_len=0092
> +    ip_csum=f397
> +    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
> +    icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
> +    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
> +    icmp_reply=${icmp_reply}${orig_packet_l3}
> +    echo $icmp_reply > br-phys_n1.expected
> +
> +    echo $gw_ip_garp >> br-phys_n1.expected
> +
> +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
> +    sleep 1
> +    # Send packet from sw0-port1 to outside
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> +
> +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +}
> +
>   test_ip6_packet_larger() {
>       local mtu=$1
>   
> @@ -16327,7 +16373,7 @@ test_ip6_packet_larger() {
>       local payload=${payload}0000000000000000000000000000000000000000
>       local payload=${payload}0000000000000000000000000000000000000000
>   
> -    local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst}
> +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
>       local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload}
>   
>       as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> @@ -16344,11 +16390,11 @@ test_ip6_packet_larger() {
>       mtu_needed=$(expr ${packet_bytes} - 18)
>       if test $mtu -lt $mtu_needed; then
>           # First construct the inner IPv6 packet.
> -        inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> +        inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst}
>           inner_icmp6=8000000062f00001
>           inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
>           inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> -
> +
>           # Then the outer.
>           outer_ip6=6000000000883afe${ipv6_rt}${ipv6_src}
>           outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> @@ -16366,6 +16412,53 @@ test_ip6_packet_larger() {
>       fi
>   }
>   
> +test_ip6_packet_larger_ext() {
> +    local mtu=$1
> +
> +    local eth_src=00000012af11
> +    local eth_dst=000020201213
> +
> +    local ipv6_src=20000000000000000000000000000004
> +    local ipv6_dst=20000000000000000000000000000001
> +
> +    local payload=0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +
> +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> +    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
> +
> +    local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> +    echo $ns > br-phys_n1.expected
> +
> +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +
> +    local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
> +    local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na
> +    sleep 1
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> +    AT_CAPTURE_FILE([trace-$mtu])
> +
> +    # First construct the inner IPv6 packet.
> +    inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> +    inner_icmp6=9000000062f00001
> +    inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
> +    inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> +
> +    # Then the outer.
> +    outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src}
> +    outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> +    outer_packet=${outer_ip6}${outer_icmp6_and_payload}
> +
> +    icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
> +    echo $icmp6_reply >> br-phys_n1.expected
> +
> +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +}
> +
>   wait_for_ports_up
>   ovn-nbctl --wait=hv sync
>   
> @@ -16398,7 +16491,7 @@ for mtu in 100 500 118; do
>       OVS_WAIT_FOR_OUTPUT([
>           as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
>           AT_CAPTURE_FILE([br-int-flows-$mtu])
> -        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1
> +        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
>   ])
>   
>       AS_BOX([testing mtu $mtu - IPv4])
> @@ -16408,6 +16501,23 @@ for mtu in 100 500 118; do
>       test_ip6_packet_larger $mtu
>   done
>   
> +AS_BOX([testing mtu $mtu])
> +check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
> +ovn-sbctl dump-flows > ext-sbflows-100
> +AT_CAPTURE_FILE([ext-sbflows-$mtu])
> +
> +OVS_WAIT_FOR_OUTPUT([
> +    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
> +    AT_CAPTURE_FILE([ext-br-int-flows-100])
> +    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
> +])
> +
> +AS_BOX([testing ext mtu 100 - IPv4])
> +test_ip_packet_larger_ext 100
> +
> +AS_BOX([testing mtu 100 - IPv6])
> +test_ip6_packet_larger_ext 100
> +
>   OVN_CLEANUP([hv1])
>   AT_CLEANUP
>   ])
> @@ -16550,6 +16660,52 @@ test_ip_packet_larger() {
>       fi
>   }
>   
> +test_ip_packet_larger_ext() {
> +    local mtu=$1
> +
> +    # Send ip packet from sw0-port1 to outside
> +    src_mac="00000012af11" # external mac
> +    dst_mac="000020201213" # lr0-public mac
> +    src_ip=`ip_to_hex 172 168 0 4`
> +    dst_ip=`ip_to_hex 172 168 0 100`
> +    # Set the packet length to 118.
> +    pkt_len=0076
> +    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
> +    orig_packet_l3=${src_ip}${dst_ip}0900000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    packet=${packet}${orig_packet_l3}
> +
> +    gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> +    ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
> +
> +    src_ip=`ip_to_hex 172 168 0 100`
> +    dst_ip=`ip_to_hex 172 168 0 4`
> +    # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
> +    reply_pkt_len=0092
> +    ip_csum=f397
> +    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
> +    icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
> +    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
> +    icmp_reply=${icmp_reply}${orig_packet_l3}
> +    echo $icmp_reply > br-phys_n1.expected
> +
> +    echo $gw_ip_garp >> br-phys_n1.expected
> +
> +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
> +    sleep 1
> +    # Send packet from sw0-port1 to outside
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> +
> +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +}
> +
>   test_ip6_packet_larger() {
>       local mtu=$1
>   
> @@ -16565,7 +16721,7 @@ test_ip6_packet_larger() {
>       local payload=${payload}0000000000000000000000000000000000000000
>       local payload=${payload}0000000000000000000000000000000000000000
>   
> -    local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst}
> +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
>       local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload}
>   
>       as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> @@ -16582,7 +16738,7 @@ test_ip6_packet_larger() {
>       mtu_needed=$(expr ${packet_bytes} - 18)
>       if test $mtu -lt $mtu_needed; then
>           # First construct the inner IPv6 packet.
> -        inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> +        inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst}
>           inner_icmp6=8000000062f00001
>           inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
>           inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> @@ -16604,6 +16760,53 @@ test_ip6_packet_larger() {
>       fi
>   }
>   
> +test_ip6_packet_larger_ext() {
> +    local mtu=$1
> +
> +    local eth_src=00000012af11
> +    local eth_dst=000020201213
> +
> +    local ipv6_src=20000000000000000000000000000004
> +    local ipv6_dst=20000000000000000000000000000001
> +
> +    local payload=0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +
> +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> +    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
> +
> +    local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> +    echo $ns > br-phys_n1.expected
> +
> +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +
> +    local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
> +    local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na
> +    sleep 1
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> +    AT_CAPTURE_FILE([trace-$mtu])
> +
> +    # First construct the inner IPv6 packet.
> +    inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> +    inner_icmp6=9000000062f00001
> +    inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
> +    inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> +
> +    # Then the outer.
> +    outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src}
> +    outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> +    outer_packet=${outer_ip6}${outer_icmp6_and_payload}
> +
> +    icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
> +    echo $icmp6_reply >> br-phys_n1.expected
> +
> +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +}
> +
>   wait_for_ports_up
>   ovn-nbctl --wait=hv sync
>   
> @@ -16636,7 +16839,7 @@ for mtu in 100 500 118; do
>       OVS_WAIT_FOR_OUTPUT([
>           as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
>           AT_CAPTURE_FILE([br-int-flows-$mtu])
> -        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1
> +        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
>   ])
>   
>       AS_BOX([testing mtu $mtu - IPv4])
> @@ -16646,6 +16849,23 @@ for mtu in 100 500 118; do
>       test_ip6_packet_larger $mtu
>   done
>   
> +AS_BOX([testing mtu $mtu])
> +check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
> +ovn-sbctl dump-flows > ext-sbflows-100
> +AT_CAPTURE_FILE([ext-sbflows-$mtu])
> +
> +OVS_WAIT_FOR_OUTPUT([
> +    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
> +    AT_CAPTURE_FILE([ext-br-int-flows-100])
> +    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
> +])
> +
> +AS_BOX([testing ext mtu 100 - IPv4])
> +test_ip_packet_larger_ext 100
> +
> +AS_BOX([testing mtu 100 - IPv6])
> +test_ip6_packet_larger_ext 100
> +
>   OVN_CLEANUP([hv1])
>   AT_CLEANUP
>   ])
> 
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Lorenzo Bianconi June 8, 2021, 5:04 p.m. UTC | #2
> DDLog equivalent is missing from this patch.
> 
> See below for more comments in-line.
> 
> On 5/27/21 6:26 PM, Lorenzo Bianconi wrote:
> > Introduce check_pkt_larger action for ingress traffic
> > entering the cluster from a distributed gw router port
> > or from a gw router. This patch enables pMTU discovery
> > for ingress traffic.
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > ---
> >   northd/ovn-northd.c | 166 ++++++++++++++++++-------------
> >   tests/ovn.at        | 234 ++++++++++++++++++++++++++++++++++++++++++--
> >   2 files changed, 326 insertions(+), 74 deletions(-)
> > 
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index 2269f0185..77b482081 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -9492,6 +9492,10 @@ build_adm_ctrl_flows_for_lrouter(
> >       }
> >   }
> > +static void
> > +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
> > +                                  struct ds **actions);
> > +
> >   /* Logical router ingress Table 0: L2 Admission Control
> >    * This table drops packets that the router shouldn’t see at all based
> >    * on their Ethernet headers.
> > @@ -9519,6 +9523,9 @@ build_adm_ctrl_flows_for_lrouter_port(
> >            * the pipeline.
> >            */
> >           ds_clear(actions);
> > +
> > +        int gw_mtu;
> > +        build_check_pkt_len_action_string(op, &gw_mtu, &actions);
> >           ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
> >                         op->lrp_networks.ea_s);
> > @@ -10413,32 +10420,108 @@ build_arp_resolve_flows_for_lrouter_port(
> >   }
> > +static void
> > +build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows,
> > +                            struct ds *match, struct ds *actions,
> > +                            enum ovn_stage stage)
> > +{
> > +    if (op->lrp_networks.ipv4_addrs) {
> > +        ds_clear(match);
> > +        ds_put_format(match,
> > +                      "inport == %s && ip4 && "REGBIT_PKT_LARGER
> > +                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
> > +
> > +        ds_clear(actions);
> > +        /* Set icmp4.frag_mtu to gw_mtu */
> > +        ds_put_format(actions,
> > +            "icmp4_error {"
> > +            REGBIT_EGRESS_LOOPBACK" = 1; "
> > +            REGBIT_PKT_LARGER" = 0; "
> > +            "eth.dst = %s; "
> > +            "ip4.dst = ip4.src; "
> > +            "ip4.src = %s; "
> > +            "ip.ttl = 255; "
> > +            "icmp4.type = 3; /* Destination Unreachable. */ "
> > +            "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
> > +            "icmp4.frag_mtu = %d; "
> > +            "next(pipeline=ingress, table=%d); };",
> > +            op->lrp_networks.ea_s,
> > +            op->lrp_networks.ipv4_addrs[0].addr_s,
> > +            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> > +        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
> > +                                ds_cstr(match), ds_cstr(actions),
> > +                                &op->nbrp->header_);
> > +    }
> > +
> > +    if (op->lrp_networks.ipv6_addrs) {
> > +        ds_clear(match);
> > +        ds_put_format(match, "inport == %s&& ip6 && "REGBIT_PKT_LARGER
> 
> Nit: Missing a space before "&&"

ack, I will fix it in v3.

> 
> > +                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
> > +
> > +        ds_clear(actions);
> > +        /* Set icmp6.frag_mtu to gw_mtu */
> > +        ds_put_format(actions,
> > +            "icmp6_error {"
> > +            REGBIT_EGRESS_LOOPBACK" = 1; "
> > +            REGBIT_PKT_LARGER" = 0; "
> > +            "eth.dst = %s; "
> > +            "ip6.dst = ip6.src; "
> > +            "ip6.src = %s; "
> > +            "ip.ttl = 255; "
> > +            "icmp6.type = 2; /* Packet Too Big. */ "
> > +            "icmp6.code = 0; "
> > +            "icmp6.frag_mtu = %d; "
> > +            "next(pipeline=ingress, table=%d); };",
> > +            op->lrp_networks.ea_s,
> > +            op->lrp_networks.ipv6_addrs[0].addr_s,
> > +            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> > +        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
> > +                                ds_cstr(match), ds_cstr(actions),
> > +                                &op->nbrp->header_);
> > +    }
> > +}
> > +
> > +static void
> > +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
> > +                                  struct ds **actions)
> 
> "actions" can be made a struct ds * instead of struct ds **.

ack, I will fix it in v3.
> 
> > +{
> > +    int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
> > +
> > +    if (gw_mtu > 0) {
> > +        /* Add the flows only if gateway_mtu is configured. */
> > +        ds_put_format(*actions,
> > +                      REGBIT_PKT_LARGER" = check_pkt_larger(%d); ",
> > +                      gw_mtu + VLAN_ETH_HEADER_LEN);
> > +    }
> > +    *pmtu = gw_mtu;
> 
> This function is called in three places, but only one uses the returned
> pmtu. To prevent those other two places from declaring a variable they won't
> use, you might want to make build_check_pkt_len_action_string() tolerant of
> having a NULL pmtu parameter passed in.

ack, I will fix it in v3.

> 
> > +}
> > +
> >   static void
> >   build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
> >                                     struct hmap *lflows, struct hmap *ports,
> >                                     struct ds *match, struct ds *actions)
> >   {
> > -    int gw_mtu = 0;
> > +    int gw_mtu;
> > -    if (op->nbrp) {
> > -         gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
> > -    }
> > -    /* Add the flows only if gateway_mtu is configured. */
> > +    ds_clear(actions);
> > +    build_check_pkt_len_action_string(op, &gw_mtu, &actions);
> >       if (gw_mtu <= 0) {
> >           return;
> >       }
> > +    ds_put_format(actions, "next;");
> > +
> >       ds_clear(match);
> >       ds_put_format(match, "outport == %s", op->json_key);
> > -    ds_clear(actions);
> > -    ds_put_format(actions,
> > -                  REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
> > -                  " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
> >       ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50,
> >                               ds_cstr(match), ds_cstr(actions),
> >                               &op->nbrp->header_);
> > +    /* ingress traffic */
> > +    build_icmperr_pkt_big_flows(op, gw_mtu, lflows, match, actions,
> > +                                S_ROUTER_IN_IP_INPUT);
> 
> If I understand what's going on here, you've added some check_pkt_larger()
> flows to S_ROUTER_IN_ADMISSION, and you've added the ICMP error flows to
> S_ROUTER_IN_IP_INPUT so that we can send ICMP errors at that point if the
> packet is too large.

yes, this is to avoid the packet is snatted/dnatted

> 
> If that is the case, then is there any point to keeping the
> S_ROUTER_IN_CHK_PKT_LEN and S_ROUTER_IN_LARGER_PKTS tables? Wouldn't we
> always reject the incoming packet before reaching those tables if the packet
> is too large?

S_ROUTER_IN_CHK_PKT_LEN/S_ROUTER_IN_LARGER_PKTS tables are still used for
egress traffic, maybe we can try to move them above in the pipeline? I will
check.

Regards,
Lorenzo

> 
> > +
> >       for (size_t i = 0; i < op->od->nbr->n_ports; i++) {
> >           struct ovn_port *rp = ovn_port_find(ports,
> >                                               op->od->nbr->ports[i]->name);
> > @@ -10446,63 +10529,9 @@ build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
> >               continue;
> >           }
> > -        if (rp->lrp_networks.ipv4_addrs) {
> > -            ds_clear(match);
> > -            ds_put_format(match, "inport == %s && outport == %s"
> > -                          " && ip4 && "REGBIT_PKT_LARGER,
> > -                          rp->json_key, op->json_key);
> > -
> > -            ds_clear(actions);
> > -            /* Set icmp4.frag_mtu to gw_mtu */
> > -            ds_put_format(actions,
> > -                "icmp4_error {"
> > -                REGBIT_EGRESS_LOOPBACK" = 1; "
> > -                "eth.dst = %s; "
> > -                "ip4.dst = ip4.src; "
> > -                "ip4.src = %s; "
> > -                "ip.ttl = 255; "
> > -                "icmp4.type = 3; /* Destination Unreachable. */ "
> > -                "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
> > -                "icmp4.frag_mtu = %d; "
> > -                "next(pipeline=ingress, table=%d); };",
> > -                rp->lrp_networks.ea_s,
> > -                rp->lrp_networks.ipv4_addrs[0].addr_s,
> > -                gw_mtu,
> > -                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> > -            ovn_lflow_add_with_hint(lflows, op->od,
> > -                                    S_ROUTER_IN_LARGER_PKTS, 50,
> > -                                    ds_cstr(match), ds_cstr(actions),
> > -                                    &rp->nbrp->header_);
> > -        }
> > -
> > -        if (rp->lrp_networks.ipv6_addrs) {
> > -            ds_clear(match);
> > -            ds_put_format(match, "inport == %s && outport == %s"
> > -                          " && ip6 && "REGBIT_PKT_LARGER,
> > -                          rp->json_key, op->json_key);
> > -
> > -            ds_clear(actions);
> > -            /* Set icmp6.frag_mtu to gw_mtu */
> > -            ds_put_format(actions,
> > -                "icmp6_error {"
> > -                REGBIT_EGRESS_LOOPBACK" = 1; "
> > -                "eth.dst = %s; "
> > -                "ip6.dst = ip6.src; "
> > -                "ip6.src = %s; "
> > -                "ip.ttl = 255; "
> > -                "icmp6.type = 2; /* Packet Too Big. */ "
> > -                "icmp6.code = 0; "
> > -                "icmp6.frag_mtu = %d; "
> > -                "next(pipeline=ingress, table=%d); };",
> > -                rp->lrp_networks.ea_s,
> > -                rp->lrp_networks.ipv6_addrs[0].addr_s,
> > -                gw_mtu,
> > -                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> > -            ovn_lflow_add_with_hint(lflows, op->od,
> > -                                    S_ROUTER_IN_LARGER_PKTS, 50,
> > -                                    ds_cstr(match), ds_cstr(actions),
> > -                                    &rp->nbrp->header_);
> > -        }
> > +        /* egress traffic */
> > +        build_icmperr_pkt_big_flows(rp, gw_mtu, lflows, match, actions,
> > +                                    S_ROUTER_IN_LARGER_PKTS);
> >       }
> >   }
> > @@ -11570,8 +11599,11 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
> >           * down in the pipeline.
> >           */
> >           ds_clear(actions);
> > +
> > +        int gw_mtu;
> > +        build_check_pkt_len_action_string(od->l3dgw_port, &gw_mtu, &actions);
> >           ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
> > -                    od->l3dgw_port->lrp_networks.ea_s);
> > +                      od->l3dgw_port->lrp_networks.ea_s);
> >           ds_clear(match);
> >           ds_put_format(match,
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 7ad0dcb54..39189219c 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -16312,6 +16312,52 @@ test_ip_packet_larger() {
> >       fi
> >   }
> > +test_ip_packet_larger_ext() {
> > +    local mtu=$1
> > +
> > +    # Send ip packet from sw0-port1 to outside
> > +    src_mac="00000012af11" # external mac
> > +    dst_mac="000020201213" # lr0-public mac
> > +    src_ip=`ip_to_hex 172 168 0 4`
> > +    dst_ip=`ip_to_hex 172 168 0 100`
> > +    # Set the packet length to 118.
> > +    pkt_len=0076
> > +    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
> > +    orig_packet_l3=${src_ip}${dst_ip}0900000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    packet=${packet}${orig_packet_l3}
> > +
> > +    gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> > +    ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
> > +
> > +    src_ip=`ip_to_hex 172 168 0 100`
> > +    dst_ip=`ip_to_hex 172 168 0 4`
> > +    # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
> > +    reply_pkt_len=0092
> > +    ip_csum=f397
> > +    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
> > +    icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
> > +    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
> > +    icmp_reply=${icmp_reply}${orig_packet_l3}
> > +    echo $icmp_reply > br-phys_n1.expected
> > +
> > +    echo $gw_ip_garp >> br-phys_n1.expected
> > +
> > +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> > +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> > +
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
> > +    sleep 1
> > +    # Send packet from sw0-port1 to outside
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> > +
> > +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> > +}
> > +
> >   test_ip6_packet_larger() {
> >       local mtu=$1
> > @@ -16327,7 +16373,7 @@ test_ip6_packet_larger() {
> >       local payload=${payload}0000000000000000000000000000000000000000
> >       local payload=${payload}0000000000000000000000000000000000000000
> > -    local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst}
> > +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> >       local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload}
> >       as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> > @@ -16344,11 +16390,11 @@ test_ip6_packet_larger() {
> >       mtu_needed=$(expr ${packet_bytes} - 18)
> >       if test $mtu -lt $mtu_needed; then
> >           # First construct the inner IPv6 packet.
> > -        inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> > +        inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst}
> >           inner_icmp6=8000000062f00001
> >           inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
> >           inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> > -
> > +
> >           # Then the outer.
> >           outer_ip6=6000000000883afe${ipv6_rt}${ipv6_src}
> >           outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> > @@ -16366,6 +16412,53 @@ test_ip6_packet_larger() {
> >       fi
> >   }
> > +test_ip6_packet_larger_ext() {
> > +    local mtu=$1
> > +
> > +    local eth_src=00000012af11
> > +    local eth_dst=000020201213
> > +
> > +    local ipv6_src=20000000000000000000000000000004
> > +    local ipv6_dst=20000000000000000000000000000001
> > +
> > +    local payload=0000000000000000000000000000000000000000
> > +    local payload=${payload}0000000000000000000000000000000000000000
> > +    local payload=${payload}0000000000000000000000000000000000000000
> > +    local payload=${payload}0000000000000000000000000000000000000000
> > +
> > +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> > +    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
> > +
> > +    local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> > +    echo $ns > br-phys_n1.expected
> > +
> > +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> > +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> > +
> > +    local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
> > +    local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na
> > +    sleep 1
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> > +    AT_CAPTURE_FILE([trace-$mtu])
> > +
> > +    # First construct the inner IPv6 packet.
> > +    inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> > +    inner_icmp6=9000000062f00001
> > +    inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
> > +    inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> > +
> > +    # Then the outer.
> > +    outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src}
> > +    outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> > +    outer_packet=${outer_ip6}${outer_icmp6_and_payload}
> > +
> > +    icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
> > +    echo $icmp6_reply >> br-phys_n1.expected
> > +
> > +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> > +}
> > +
> >   wait_for_ports_up
> >   ovn-nbctl --wait=hv sync
> > @@ -16398,7 +16491,7 @@ for mtu in 100 500 118; do
> >       OVS_WAIT_FOR_OUTPUT([
> >           as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
> >           AT_CAPTURE_FILE([br-int-flows-$mtu])
> > -        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1
> > +        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
> >   ])
> >       AS_BOX([testing mtu $mtu - IPv4])
> > @@ -16408,6 +16501,23 @@ for mtu in 100 500 118; do
> >       test_ip6_packet_larger $mtu
> >   done
> > +AS_BOX([testing mtu $mtu])
> > +check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
> > +ovn-sbctl dump-flows > ext-sbflows-100
> > +AT_CAPTURE_FILE([ext-sbflows-$mtu])
> > +
> > +OVS_WAIT_FOR_OUTPUT([
> > +    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
> > +    AT_CAPTURE_FILE([ext-br-int-flows-100])
> > +    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
> > +])
> > +
> > +AS_BOX([testing ext mtu 100 - IPv4])
> > +test_ip_packet_larger_ext 100
> > +
> > +AS_BOX([testing mtu 100 - IPv6])
> > +test_ip6_packet_larger_ext 100
> > +
> >   OVN_CLEANUP([hv1])
> >   AT_CLEANUP
> >   ])
> > @@ -16550,6 +16660,52 @@ test_ip_packet_larger() {
> >       fi
> >   }
> > +test_ip_packet_larger_ext() {
> > +    local mtu=$1
> > +
> > +    # Send ip packet from sw0-port1 to outside
> > +    src_mac="00000012af11" # external mac
> > +    dst_mac="000020201213" # lr0-public mac
> > +    src_ip=`ip_to_hex 172 168 0 4`
> > +    dst_ip=`ip_to_hex 172 168 0 100`
> > +    # Set the packet length to 118.
> > +    pkt_len=0076
> > +    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
> > +    orig_packet_l3=${src_ip}${dst_ip}0900000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> > +    packet=${packet}${orig_packet_l3}
> > +
> > +    gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> > +    ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
> > +
> > +    src_ip=`ip_to_hex 172 168 0 100`
> > +    dst_ip=`ip_to_hex 172 168 0 4`
> > +    # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
> > +    reply_pkt_len=0092
> > +    ip_csum=f397
> > +    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
> > +    icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
> > +    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
> > +    icmp_reply=${icmp_reply}${orig_packet_l3}
> > +    echo $icmp_reply > br-phys_n1.expected
> > +
> > +    echo $gw_ip_garp >> br-phys_n1.expected
> > +
> > +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> > +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> > +
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
> > +    sleep 1
> > +    # Send packet from sw0-port1 to outside
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> > +
> > +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> > +}
> > +
> >   test_ip6_packet_larger() {
> >       local mtu=$1
> > @@ -16565,7 +16721,7 @@ test_ip6_packet_larger() {
> >       local payload=${payload}0000000000000000000000000000000000000000
> >       local payload=${payload}0000000000000000000000000000000000000000
> > -    local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst}
> > +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> >       local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload}
> >       as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> > @@ -16582,7 +16738,7 @@ test_ip6_packet_larger() {
> >       mtu_needed=$(expr ${packet_bytes} - 18)
> >       if test $mtu -lt $mtu_needed; then
> >           # First construct the inner IPv6 packet.
> > -        inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> > +        inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst}
> >           inner_icmp6=8000000062f00001
> >           inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
> >           inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> > @@ -16604,6 +16760,53 @@ test_ip6_packet_larger() {
> >       fi
> >   }
> > +test_ip6_packet_larger_ext() {
> > +    local mtu=$1
> > +
> > +    local eth_src=00000012af11
> > +    local eth_dst=000020201213
> > +
> > +    local ipv6_src=20000000000000000000000000000004
> > +    local ipv6_dst=20000000000000000000000000000001
> > +
> > +    local payload=0000000000000000000000000000000000000000
> > +    local payload=${payload}0000000000000000000000000000000000000000
> > +    local payload=${payload}0000000000000000000000000000000000000000
> > +    local payload=${payload}0000000000000000000000000000000000000000
> > +
> > +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> > +    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
> > +
> > +    local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> > +    echo $ns > br-phys_n1.expected
> > +
> > +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> > +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> > +
> > +    local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
> > +    local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na
> > +    sleep 1
> > +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> > +    AT_CAPTURE_FILE([trace-$mtu])
> > +
> > +    # First construct the inner IPv6 packet.
> > +    inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> > +    inner_icmp6=9000000062f00001
> > +    inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
> > +    inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> > +
> > +    # Then the outer.
> > +    outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src}
> > +    outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> > +    outer_packet=${outer_ip6}${outer_icmp6_and_payload}
> > +
> > +    icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
> > +    echo $icmp6_reply >> br-phys_n1.expected
> > +
> > +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> > +}
> > +
> >   wait_for_ports_up
> >   ovn-nbctl --wait=hv sync
> > @@ -16636,7 +16839,7 @@ for mtu in 100 500 118; do
> >       OVS_WAIT_FOR_OUTPUT([
> >           as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
> >           AT_CAPTURE_FILE([br-int-flows-$mtu])
> > -        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1
> > +        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
> >   ])
> >       AS_BOX([testing mtu $mtu - IPv4])
> > @@ -16646,6 +16849,23 @@ for mtu in 100 500 118; do
> >       test_ip6_packet_larger $mtu
> >   done
> > +AS_BOX([testing mtu $mtu])
> > +check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
> > +ovn-sbctl dump-flows > ext-sbflows-100
> > +AT_CAPTURE_FILE([ext-sbflows-$mtu])
> > +
> > +OVS_WAIT_FOR_OUTPUT([
> > +    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
> > +    AT_CAPTURE_FILE([ext-br-int-flows-100])
> > +    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
> > +])
> > +
> > +AS_BOX([testing ext mtu 100 - IPv4])
> > +test_ip_packet_larger_ext 100
> > +
> > +AS_BOX([testing mtu 100 - IPv6])
> > +test_ip6_packet_larger_ext 100
> > +
> >   OVN_CLEANUP([hv1])
> >   AT_CLEANUP
> >   ])
> > 
> > 
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > 
>
diff mbox series

Patch

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 2269f0185..77b482081 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -9492,6 +9492,10 @@  build_adm_ctrl_flows_for_lrouter(
     }
 }
 
+static void
+build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
+                                  struct ds **actions);
+
 /* Logical router ingress Table 0: L2 Admission Control
  * This table drops packets that the router shouldn’t see at all based
  * on their Ethernet headers.
@@ -9519,6 +9523,9 @@  build_adm_ctrl_flows_for_lrouter_port(
          * the pipeline.
          */
         ds_clear(actions);
+
+        int gw_mtu;
+        build_check_pkt_len_action_string(op, &gw_mtu, &actions);
         ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
                       op->lrp_networks.ea_s);
 
@@ -10413,32 +10420,108 @@  build_arp_resolve_flows_for_lrouter_port(
 
 }
 
+static void
+build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows,
+                            struct ds *match, struct ds *actions,
+                            enum ovn_stage stage)
+{
+    if (op->lrp_networks.ipv4_addrs) {
+        ds_clear(match);
+        ds_put_format(match,
+                      "inport == %s && ip4 && "REGBIT_PKT_LARGER
+                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
+
+        ds_clear(actions);
+        /* Set icmp4.frag_mtu to gw_mtu */
+        ds_put_format(actions,
+            "icmp4_error {"
+            REGBIT_EGRESS_LOOPBACK" = 1; "
+            REGBIT_PKT_LARGER" = 0; "
+            "eth.dst = %s; "
+            "ip4.dst = ip4.src; "
+            "ip4.src = %s; "
+            "ip.ttl = 255; "
+            "icmp4.type = 3; /* Destination Unreachable. */ "
+            "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
+            "icmp4.frag_mtu = %d; "
+            "next(pipeline=ingress, table=%d); };",
+            op->lrp_networks.ea_s,
+            op->lrp_networks.ipv4_addrs[0].addr_s,
+            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
+                                ds_cstr(match), ds_cstr(actions),
+                                &op->nbrp->header_);
+    }
+
+    if (op->lrp_networks.ipv6_addrs) {
+        ds_clear(match);
+        ds_put_format(match, "inport == %s&& ip6 && "REGBIT_PKT_LARGER
+                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
+
+        ds_clear(actions);
+        /* Set icmp6.frag_mtu to gw_mtu */
+        ds_put_format(actions,
+            "icmp6_error {"
+            REGBIT_EGRESS_LOOPBACK" = 1; "
+            REGBIT_PKT_LARGER" = 0; "
+            "eth.dst = %s; "
+            "ip6.dst = ip6.src; "
+            "ip6.src = %s; "
+            "ip.ttl = 255; "
+            "icmp6.type = 2; /* Packet Too Big. */ "
+            "icmp6.code = 0; "
+            "icmp6.frag_mtu = %d; "
+            "next(pipeline=ingress, table=%d); };",
+            op->lrp_networks.ea_s,
+            op->lrp_networks.ipv6_addrs[0].addr_s,
+            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
+                                ds_cstr(match), ds_cstr(actions),
+                                &op->nbrp->header_);
+    }
+}
+
+static void
+build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
+                                  struct ds **actions)
+{
+    int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
+
+    if (gw_mtu > 0) {
+        /* Add the flows only if gateway_mtu is configured. */
+        ds_put_format(*actions,
+                      REGBIT_PKT_LARGER" = check_pkt_larger(%d); ",
+                      gw_mtu + VLAN_ETH_HEADER_LEN);
+    }
+    *pmtu = gw_mtu;
+}
+
 static void
 build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
                                   struct hmap *lflows, struct hmap *ports,
                                   struct ds *match, struct ds *actions)
 {
-    int gw_mtu = 0;
+    int gw_mtu;
 
-    if (op->nbrp) {
-         gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
-    }
-    /* Add the flows only if gateway_mtu is configured. */
+    ds_clear(actions);
+    build_check_pkt_len_action_string(op, &gw_mtu, &actions);
     if (gw_mtu <= 0) {
         return;
     }
 
+    ds_put_format(actions, "next;");
+
     ds_clear(match);
     ds_put_format(match, "outport == %s", op->json_key);
 
-    ds_clear(actions);
-    ds_put_format(actions,
-                  REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
-                  " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
     ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50,
                             ds_cstr(match), ds_cstr(actions),
                             &op->nbrp->header_);
 
+    /* ingress traffic */
+    build_icmperr_pkt_big_flows(op, gw_mtu, lflows, match, actions,
+                                S_ROUTER_IN_IP_INPUT);
+
     for (size_t i = 0; i < op->od->nbr->n_ports; i++) {
         struct ovn_port *rp = ovn_port_find(ports,
                                             op->od->nbr->ports[i]->name);
@@ -10446,63 +10529,9 @@  build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
             continue;
         }
 
-        if (rp->lrp_networks.ipv4_addrs) {
-            ds_clear(match);
-            ds_put_format(match, "inport == %s && outport == %s"
-                          " && ip4 && "REGBIT_PKT_LARGER,
-                          rp->json_key, op->json_key);
-
-            ds_clear(actions);
-            /* Set icmp4.frag_mtu to gw_mtu */
-            ds_put_format(actions,
-                "icmp4_error {"
-                REGBIT_EGRESS_LOOPBACK" = 1; "
-                "eth.dst = %s; "
-                "ip4.dst = ip4.src; "
-                "ip4.src = %s; "
-                "ip.ttl = 255; "
-                "icmp4.type = 3; /* Destination Unreachable. */ "
-                "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
-                "icmp4.frag_mtu = %d; "
-                "next(pipeline=ingress, table=%d); };",
-                rp->lrp_networks.ea_s,
-                rp->lrp_networks.ipv4_addrs[0].addr_s,
-                gw_mtu,
-                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
-            ovn_lflow_add_with_hint(lflows, op->od,
-                                    S_ROUTER_IN_LARGER_PKTS, 50,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    &rp->nbrp->header_);
-        }
-
-        if (rp->lrp_networks.ipv6_addrs) {
-            ds_clear(match);
-            ds_put_format(match, "inport == %s && outport == %s"
-                          " && ip6 && "REGBIT_PKT_LARGER,
-                          rp->json_key, op->json_key);
-
-            ds_clear(actions);
-            /* Set icmp6.frag_mtu to gw_mtu */
-            ds_put_format(actions,
-                "icmp6_error {"
-                REGBIT_EGRESS_LOOPBACK" = 1; "
-                "eth.dst = %s; "
-                "ip6.dst = ip6.src; "
-                "ip6.src = %s; "
-                "ip.ttl = 255; "
-                "icmp6.type = 2; /* Packet Too Big. */ "
-                "icmp6.code = 0; "
-                "icmp6.frag_mtu = %d; "
-                "next(pipeline=ingress, table=%d); };",
-                rp->lrp_networks.ea_s,
-                rp->lrp_networks.ipv6_addrs[0].addr_s,
-                gw_mtu,
-                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
-            ovn_lflow_add_with_hint(lflows, op->od,
-                                    S_ROUTER_IN_LARGER_PKTS, 50,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    &rp->nbrp->header_);
-        }
+        /* egress traffic */
+        build_icmperr_pkt_big_flows(rp, gw_mtu, lflows, match, actions,
+                                    S_ROUTER_IN_LARGER_PKTS);
     }
 }
 
@@ -11570,8 +11599,11 @@  build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
         * down in the pipeline.
         */
         ds_clear(actions);
+
+        int gw_mtu;
+        build_check_pkt_len_action_string(od->l3dgw_port, &gw_mtu, &actions);
         ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
-                    od->l3dgw_port->lrp_networks.ea_s);
+                      od->l3dgw_port->lrp_networks.ea_s);
 
         ds_clear(match);
         ds_put_format(match,
diff --git a/tests/ovn.at b/tests/ovn.at
index 7ad0dcb54..39189219c 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -16312,6 +16312,52 @@  test_ip_packet_larger() {
     fi
 }
 
+test_ip_packet_larger_ext() {
+    local mtu=$1
+
+    # Send ip packet from sw0-port1 to outside
+    src_mac="00000012af11" # external mac
+    dst_mac="000020201213" # lr0-public mac
+    src_ip=`ip_to_hex 172 168 0 4`
+    dst_ip=`ip_to_hex 172 168 0 100`
+    # Set the packet length to 118.
+    pkt_len=0076
+    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
+    orig_packet_l3=${src_ip}${dst_ip}0900000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    packet=${packet}${orig_packet_l3}
+
+    gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
+    ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
+
+    src_ip=`ip_to_hex 172 168 0 100`
+    dst_ip=`ip_to_hex 172 168 0 4`
+    # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
+    reply_pkt_len=0092
+    ip_csum=f397
+    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
+    icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
+    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
+    icmp_reply=${icmp_reply}${orig_packet_l3}
+    echo $icmp_reply > br-phys_n1.expected
+
+    echo $gw_ip_garp >> br-phys_n1.expected
+
+    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
+    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
+    sleep 1
+    # Send packet from sw0-port1 to outside
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
+
+    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
+}
+
 test_ip6_packet_larger() {
     local mtu=$1
 
@@ -16327,7 +16373,7 @@  test_ip6_packet_larger() {
     local payload=${payload}0000000000000000000000000000000000000000
     local payload=${payload}0000000000000000000000000000000000000000
 
-    local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst}
+    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
     local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload}
 
     as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
@@ -16344,11 +16390,11 @@  test_ip6_packet_larger() {
     mtu_needed=$(expr ${packet_bytes} - 18)
     if test $mtu -lt $mtu_needed; then
         # First construct the inner IPv6 packet.
-        inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
+        inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst}
         inner_icmp6=8000000062f00001
         inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
         inner_packet=${inner_ip6}${inner_icmp6_and_payload}
-        
+
         # Then the outer.
         outer_ip6=6000000000883afe${ipv6_rt}${ipv6_src}
         outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
@@ -16366,6 +16412,53 @@  test_ip6_packet_larger() {
     fi
 }
 
+test_ip6_packet_larger_ext() {
+    local mtu=$1
+
+    local eth_src=00000012af11
+    local eth_dst=000020201213
+
+    local ipv6_src=20000000000000000000000000000004
+    local ipv6_dst=20000000000000000000000000000001
+
+    local payload=0000000000000000000000000000000000000000
+    local payload=${payload}0000000000000000000000000000000000000000
+    local payload=${payload}0000000000000000000000000000000000000000
+    local payload=${payload}0000000000000000000000000000000000000000
+
+    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
+    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
+
+    local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
+    echo $ns > br-phys_n1.expected
+
+    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
+    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+
+    local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
+    local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na
+    sleep 1
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
+    AT_CAPTURE_FILE([trace-$mtu])
+
+    # First construct the inner IPv6 packet.
+    inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
+    inner_icmp6=9000000062f00001
+    inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
+    inner_packet=${inner_ip6}${inner_icmp6_and_payload}
+
+    # Then the outer.
+    outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src}
+    outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
+    outer_packet=${outer_ip6}${outer_icmp6_and_payload}
+
+    icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
+    echo $icmp6_reply >> br-phys_n1.expected
+
+    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
+}
+
 wait_for_ports_up
 ovn-nbctl --wait=hv sync
 
@@ -16398,7 +16491,7 @@  for mtu in 100 500 118; do
     OVS_WAIT_FOR_OUTPUT([
         as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
         AT_CAPTURE_FILE([br-int-flows-$mtu])
-        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1
+        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
 ])
 
     AS_BOX([testing mtu $mtu - IPv4])
@@ -16408,6 +16501,23 @@  for mtu in 100 500 118; do
     test_ip6_packet_larger $mtu
 done
 
+AS_BOX([testing mtu $mtu])
+check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
+ovn-sbctl dump-flows > ext-sbflows-100
+AT_CAPTURE_FILE([ext-sbflows-$mtu])
+
+OVS_WAIT_FOR_OUTPUT([
+    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
+    AT_CAPTURE_FILE([ext-br-int-flows-100])
+    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
+])
+
+AS_BOX([testing ext mtu 100 - IPv4])
+test_ip_packet_larger_ext 100
+
+AS_BOX([testing mtu 100 - IPv6])
+test_ip6_packet_larger_ext 100
+
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
@@ -16550,6 +16660,52 @@  test_ip_packet_larger() {
     fi
 }
 
+test_ip_packet_larger_ext() {
+    local mtu=$1
+
+    # Send ip packet from sw0-port1 to outside
+    src_mac="00000012af11" # external mac
+    dst_mac="000020201213" # lr0-public mac
+    src_ip=`ip_to_hex 172 168 0 4`
+    dst_ip=`ip_to_hex 172 168 0 100`
+    # Set the packet length to 118.
+    pkt_len=0076
+    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
+    orig_packet_l3=${src_ip}${dst_ip}0900000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
+    packet=${packet}${orig_packet_l3}
+
+    gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
+    ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
+
+    src_ip=`ip_to_hex 172 168 0 100`
+    dst_ip=`ip_to_hex 172 168 0 4`
+    # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
+    reply_pkt_len=0092
+    ip_csum=f397
+    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
+    icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
+    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
+    icmp_reply=${icmp_reply}${orig_packet_l3}
+    echo $icmp_reply > br-phys_n1.expected
+
+    echo $gw_ip_garp >> br-phys_n1.expected
+
+    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
+    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
+    sleep 1
+    # Send packet from sw0-port1 to outside
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
+
+    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
+}
+
 test_ip6_packet_larger() {
     local mtu=$1
 
@@ -16565,7 +16721,7 @@  test_ip6_packet_larger() {
     local payload=${payload}0000000000000000000000000000000000000000
     local payload=${payload}0000000000000000000000000000000000000000
 
-    local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst}
+    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
     local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload}
 
     as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
@@ -16582,7 +16738,7 @@  test_ip6_packet_larger() {
     mtu_needed=$(expr ${packet_bytes} - 18)
     if test $mtu -lt $mtu_needed; then
         # First construct the inner IPv6 packet.
-        inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
+        inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst}
         inner_icmp6=8000000062f00001
         inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
         inner_packet=${inner_ip6}${inner_icmp6_and_payload}
@@ -16604,6 +16760,53 @@  test_ip6_packet_larger() {
     fi
 }
 
+test_ip6_packet_larger_ext() {
+    local mtu=$1
+
+    local eth_src=00000012af11
+    local eth_dst=000020201213
+
+    local ipv6_src=20000000000000000000000000000004
+    local ipv6_dst=20000000000000000000000000000001
+
+    local payload=0000000000000000000000000000000000000000
+    local payload=${payload}0000000000000000000000000000000000000000
+    local payload=${payload}0000000000000000000000000000000000000000
+    local payload=${payload}0000000000000000000000000000000000000000
+
+    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
+    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
+
+    local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
+    echo $ns > br-phys_n1.expected
+
+    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
+    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+
+    local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
+    local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na
+    sleep 1
+    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
+    AT_CAPTURE_FILE([trace-$mtu])
+
+    # First construct the inner IPv6 packet.
+    inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
+    inner_icmp6=9000000062f00001
+    inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
+    inner_packet=${inner_ip6}${inner_icmp6_and_payload}
+
+    # Then the outer.
+    outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src}
+    outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
+    outer_packet=${outer_ip6}${outer_icmp6_and_payload}
+
+    icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
+    echo $icmp6_reply >> br-phys_n1.expected
+
+    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
+}
+
 wait_for_ports_up
 ovn-nbctl --wait=hv sync
 
@@ -16636,7 +16839,7 @@  for mtu in 100 500 118; do
     OVS_WAIT_FOR_OUTPUT([
         as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
         AT_CAPTURE_FILE([br-int-flows-$mtu])
-        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1
+        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
 ])
 
     AS_BOX([testing mtu $mtu - IPv4])
@@ -16646,6 +16849,23 @@  for mtu in 100 500 118; do
     test_ip6_packet_larger $mtu
 done
 
+AS_BOX([testing mtu $mtu])
+check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
+ovn-sbctl dump-flows > ext-sbflows-100
+AT_CAPTURE_FILE([ext-sbflows-$mtu])
+
+OVS_WAIT_FOR_OUTPUT([
+    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
+    AT_CAPTURE_FILE([ext-br-int-flows-100])
+    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
+])
+
+AS_BOX([testing ext mtu 100 - IPv4])
+test_ip_packet_larger_ext 100
+
+AS_BOX([testing mtu 100 - IPv6])
+test_ip6_packet_larger_ext 100
+
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])