@@ -2002,7 +2002,86 @@ next;
</li>
</ul>
- <h3>Ingress Table 9: Gateway Redirect</h3>
+ <h3>Ingress Table 9: Check packet length</h3>
+
+ <p>
+ For distributed logical routers with distributed gateway port configured
+ with <code>options:gateway_mtu</code> to a valid integer value, this
+ table adds a priority-50 logical flow with the match
+ <code>ip4 && outport == <var>GW_PORT</var></code> where
+ <var>GW_PORT</var> is the distributed gateway router port and applies the
+ action <code>check_pkt_larger</code> and advances the packet to the
+ next table.
+ </p>
+
+ <pre>
+REGBIT_PKT_LARGER = check_pkt_larger(<var>L</var>); next;
+ </pre>
+
+ <p>
+ where <var>L</var> is the packet length to check for. If the packet
+ is larger than <var>L</var>, it stores 1 in the register bit
+ <code>REGBIT_PKT_LARGER</code>. The value of
+ <var>L</var> is taken from <ref column="options:gateway_mtu"
+ table="Logical_Router_Port" db="OVN_Northbound"/> column of
+ <ref table="Logical_Router_Port" db="OVN_Northbound"/> row.
+ </p>
+
+ <p>
+ This table adds one priority-0 fallback flow that matches all packets
+ and advances to the next table.
+ </p>
+
+ <h3>Ingress Table 10: Handle larger packets</h3>
+
+ <p>
+ For distributed logical routers with distributed gateway port configured
+ with <code>options:gateway_mtu</code> to a valid integer value, this
+ table adds the following priority-50 logical flow for each
+ logical router port with the match <code>ip4 &&
+ inport == <var>LRP</var> && outport == <var>GW_PORT</var>
+ && REGBIT_PKT_LARGER</code>, where <var>LRP</var> is the logical
+ router port and <var>GW_PORT</var> is the distributed gateway router port
+ and applies the following action
+ </p>
+
+ <pre>
+icmp4 {
+ icmp4.type = 3; /* Destination Unreachable. */
+ icmp4.code = 4; /* Frag Needed and DF was Set. */
+ icmp4.frag_mtu = <var>M</var>;
+ eth.dst = <var>E</var>;
+ ip4.dst = ip4.src;
+ ip4.src = <var>I</var>;
+ ip.ttl = 255;
+ REGBIT_EGRESS_LOOPBACK = 1;
+ next(pipeline=ingress, table=0);
+};
+ </pre>
+
+ <ul>
+ <li>
+ Where <var>M</var> is the fragment MTU whose value is taken from
+ <ref column="options:gateway_mtu" table="Logical_Router_Port"
+ db="OVN_Northbound"/> column of
+ <ref table="Logical_Router_Port" db="OVN_Northbound"/> row.
+ </li>
+
+ <li>
+ <var>E</var> is the Ethernet address of the logical router port.
+ </li>
+
+ <li>
+ <var>I</var> is the IPv4 address of the logical router port.
+ </li>
+ </ul>
+
+ <p>
+ This table adds one priority-0 fallback flow that matches all packets
+ and advances to the next table.
+ </p>
+
+ <h3>Ingress Table 11: Gateway Redirect</h3>
<p>
For distributed logical routers where one of the logical router
@@ -2059,7 +2138,7 @@ next;
</li>
</ul>
- <h3>Ingress Table 10: ARP Request</h3>
+ <h3>Ingress Table 12: ARP Request</h3>
<p>
In the common case where the Ethernet destination has been resolved, this
@@ -143,8 +143,10 @@ enum ovn_stage {
PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
- PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
+ PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 9, "lr_in_chk_pkt_len") \
+ PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 10,"lr_in_larger_pkts") \
+ PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 11, "lr_in_gw_redirect") \
+ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 12, "lr_in_arp_request") \
\
/* Logical router egress stages. */ \
PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
@@ -180,6 +182,8 @@ enum ovn_stage {
* logical router dropping packets with source IP address equals
* one of the logical router's own IP addresses. */
#define REGBIT_EGRESS_LOOPBACK "reg9[1]"
+/* Register to store the result of check_pkt_larger action. */
+#define REGBIT_PKT_LARGER "reg9[2]"
/* Returns an "enum ovn_stage" built from the arguments. */
static enum ovn_stage
@@ -6574,7 +6578,84 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
"get_nd(outport, xxreg0); next;");
}
- /* Logical router ingress table 9: Gateway redirect.
+ /* Local router ingress table 9: Check packet length.
+ *
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port, check the packet length and store the result in the
+ * 'REGBIT_PKT_LARGER' register bit.
+ *
+ * Local router ingress table 10: Handle larger packets.
+ *
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port and the 'REGBIT_PKT_LARGER' register bit is set,
+ * generate ICMPv4 packet with type 3 (Destination Unreachable) and
+ * code 4 (Fragmentation needed).
+ * */
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbr) {
+ continue;
+ }
+
+ /* Packets are allowed by default. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
+ "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
+ "next;");
+
+ if (od->l3dgw_port && od->l3redirect_port) {
+ int gw_mtu = 0;
+ if (od->l3dgw_port->nbrp) {
+ gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options,
+ "gateway_mtu", 0);
+ }
+ /* Add the flows only if gateway_mtu is configured. */
+ if (gw_mtu <= 0) {
+ continue;
+ }
+
+ ds_clear(&match);
+ ds_put_format(&match, "outport == %s && ip4",
+ od->l3dgw_port->json_key);
+
+ ds_clear(&actions);
+ ds_put_format(&actions,
+ REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
+ " next;", gw_mtu);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50,
+ ds_cstr(&match), ds_cstr(&actions));
+
+ for (size_t i = 0; i < od->nbr->n_ports; i++) {
+ struct ovn_port *rp = ovn_port_find(ports,
+ od->nbr->ports[i]->name);
+ if (!rp || rp == od->l3dgw_port) {
+ continue;
+ }
+ ds_clear(&match);
+ ds_put_format(&match, "inport == %s && outport == %s && ip4 "
+ "&& "REGBIT_PKT_LARGER,
+ rp->json_key, od->l3dgw_port->json_key);
+
+ ds_clear(&actions);
+ ds_put_format(&actions,
+ "icmp4 {"
+ "icmp4.type = 3; /* Destination Unreachable. */ "
+ "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
+ "icmp4.frag_mtu = %d; "
+ "eth.dst = %s; "
+ "ip4.dst = ip4.src; "
+ "ip4.src = %s; "
+ "ip.ttl = 255; "
+ REGBIT_EGRESS_LOOPBACK" = 1; "
+ "next(pipeline=ingress, table=0); };",
+ gw_mtu - 58, rp->lrp_networks.ea_s,
+ rp->lrp_networks.ipv4_addrs[0].addr_s);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 50,
+ ds_cstr(&match), ds_cstr(&actions));
+ }
+ }
+ }
+
+ /* Logical router ingress table 11: Gateway redirect.
*
* For traffic with outport equal to the l3dgw_port
* on a distributed router, this table redirects a subset
@@ -6614,7 +6695,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
}
- /* Local router ingress table 10: ARP request.
+ /* Local router ingress table 12: ARP request.
*
* In the common case where the Ethernet destination has been resolved,
* this table outputs the packet (priority 0). Otherwise, it composes
@@ -11707,6 +11707,7 @@ as hv2 start_daemon ovn-controller
OVN_CLEANUP([hv1],[hv2])
+
AT_CLEANUP
AT_SETUP([ovn -- ovn-nbctl duplicate addresses])
@@ -11765,6 +11766,170 @@ AT_CHECK([ovn-nbctl lsp-set-addresses sw2-p1 "00:00:00:00:00:04 aef0::1"])
AT_CLEANUP
+AT_SETUP([ovn -- router - check packet length - icmp defrag])
+AT_KEYWORDS([check packet length])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+ovn-nbctl ls-add sw0
+ovn-nbctl lsp-add sw0 sw0-port1
+ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.3"
+
+ovn-nbctl lr-add lr0
+ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24
+ovn-nbctl lsp-add sw0 sw0-lr0
+ovn-nbctl lsp-set-type sw0-lr0 router
+ovn-nbctl lsp-set-addresses sw0-lr0 router
+ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+ovn-nbctl ls-add public
+ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24
+ovn-nbctl lsp-add public public-lr0
+ovn-nbctl lsp-set-type public-lr0 router
+ovn-nbctl lsp-set-addresses public-lr0 router
+ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
+
+# localnet port
+ovn-nbctl lsp-add public ln-public
+ovn-nbctl lsp-set-type ln-public localnet
+ovn-nbctl lsp-set-addresses ln-public unknown
+ovn-nbctl lsp-set-options ln-public network_name=phys
+
+ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20
+ovn-nbctl lr-nat-add lr0 snat 172.168.0.100 10.0.0.0/24
+
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+ovs-vsctl -- add-port br-int hv1-vif1 -- \
+ set interface hv1-vif1 external-ids:iface-id=sw0-port1 \
+ options:tx_pcap=hv1/vif1-tx.pcap \
+ options:rxq_pcap=hv1/vif1-rx.pcap \
+ ofport-request=1
+
+reset_pcap_file() {
+ local iface=$1
+ local pcap_file=$2
+ ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \
+ options:rxq_pcap=dummy-rx.pcap
+ rm -f ${pcap_file}*.pcap
+ ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \
+ options:rxq_pcap=${pcap_file}-rx.pcap
+}
+
+ip_to_hex() {
+ printf "%02x%02x%02x%02x" "$@"
+}
+
+test_ip_packet_larger() {
+ local icmp_pmtu_reply_expected=$1
+
+ # Send ip packet from sw0-port1 to outside
+ src_mac="505400000001" # sw-port1 mac
+ dst_mac="00000000ff01" # sw0-lr0 mac (internal router leg)
+ src_ip=`ip_to_hex 10 0 0 3`
+ dst_ip=`ip_to_hex 172 168 0 3`
+ # Set the packet length to 100.
+ pkt_len=0064
+ packet=${dst_mac}${src_mac}08004500${pkt_len}0000000040010000
+ packet=${packet}${src_ip}${dst_ip}0304000000000000
+ packet=${packet}000000000000000000000000000000000000
+ packet=${packet}000000000000000000000000000000000000
+ packet=${packet}000000000000000000000000000000000000
+ packet=${packet}000000000000000000000000000000000000
+
+ # If icmp_pmtu_reply_expected is 0, it means the packet is lesser than
+ # the gateway mtu and should be delivered to the provider bridge via the
+ # localnet port.
+ # If icmp_pmtu_reply_expected is 1, it means the packet is larger than
+ # the gateway mtu and ovn-controller should drop the packet and instead
+ # generate ICMPv4 Destination Unreachable message with pmtu set to 100.
+ if test $icmp_pmtu_reply_expected = 0; then
+ # Packet to expect at br-phys.
+ src_mac="000020201213"
+ dst_mac="00000012af11"
+ src_ip=`ip_to_hex 10 0 0 3`
+ dst_ip=`ip_to_hex 172 168 0 3`
+ expected=${dst_mac}${src_mac}08004500${pkt_len}000000003f010100
+ expected=${expected}${src_ip}${dst_ip}0304000000000000
+ expected=${expected}000000000000000000000000000000000000
+ expected=${expected}000000000000000000000000000000000000
+ expected=${expected}000000000000000000000000000000000000
+ expected=${expected}000000000000000000000000000000000000
+ echo $expected > br_phys_n1.expected
+ else
+ mtu=$pkt_len
+ src_ip=`ip_to_hex 10 0 0 1`
+ dst_ip=`ip_to_hex 10 0 0 3`
+ orig_pkt_src_ip=`ip_to_hex 10 0 0 3`
+ orig_pkt_dst_ip=`ip_to_hex 172 168 0 3`
+ icmp_reply=${src_mac}${dst_mac}08004500003800004000fe0168c1
+ icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304bd7f0000${mtu}
+ icmp_reply=${icmp_reply}4500${pkt_len}000000003f010100
+ icmp_reply=${icmp_reply}${orig_pkt_src_ip}${orig_pkt_dst_ip}
+ icmp_reply=${icmp_reply}0304000000000000
+ echo $icmp_reply > hv1-vif1.expected
+ fi
+
+ as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
+ as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+
+ # Send packet from sw0-port1 to outside
+ as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
+
+ if test $icmp_pmtu_reply_expected = 0; then
+ OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br_phys_n1.expected])
+ $PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > packets
+ AT_CHECK([cat packets], [0], [])
+ else
+ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [hv1-vif1.expected])
+ $PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv1/br-phys_n1-tx.pcap > \
+ packets
+ AT_CHECK([cat packets], [0], [])
+ fi
+}
+
+ovn-nbctl show
+ovn-sbctl show
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int \
+| grep "check_pkt_larger" | wc -l], [0], [[0
+]])
+dp_uuid=$(ovn-sbctl find datapath_binding | grep sw0 -B2 | grep _uuid | \
+awk '{print $3}')
+ovn-sbctl create MAC_Binding ip=172.168.0.3 datapath=$dp_uuid \
+logical_port=lr0-public mac="00\:00\:00\:12\:af\:11"
+
+# Set the gateway mtu to 100. If the packet length is > 100, ovn-controller
+# should send icmp host not reachable with pmtu set to 100.
+ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
+as hv3 ovs-appctl netdev-dummy/receive hv3-vif1 $arp_reply
+OVS_WAIT_UNTIL([
+ test `as hv1 ovs-ofctl dump-flows br-int | grep "check_pkt_larger(100)" | \
+ wc -l` -eq 1
+])
+
+icmp_reply_expected=1
+test_ip_packet_larger $icmp_reply_expected
+
+# Set the gateway mtu to 500.
+ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=500
+as hv3 ovs-appctl netdev-dummy/receive hv3-vif1 $arp_reply
+OVS_WAIT_UNTIL([
+ test `as hv1 ovs-ofctl dump-flows br-int | grep "check_pkt_larger(500)" | \
+ wc -l` -eq 1
+])
+
+# Now the packet should be sent via the localnet port to br-phys.
+icmp_reply_expected=0
+test_ip_packet_larger $icmp_reply_expected
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+
AT_SETUP([ovn -- IP packet buffering])
AT_KEYWORDS([ip-buffering])
AT_SKIP_IF([test $HAVE_PYTHON = no])