diff mbox series

[ovs-dev,v4] OVN: add buffering support for ip packets

Message ID 2eea7b92e2f0d3df87f16040d28fb4cfb2756cee.1539867930.git.lorenzo.bianconi@redhat.com
State Accepted
Headers show
Series [ovs-dev,v4] OVN: add buffering support for ip packets | expand

Commit Message

Lorenzo Bianconi Oct. 18, 2018, 2:05 p.m. UTC
Add buffering support for IPv4/IPv6 packets that will be processed
by arp{}/nd_ns{} action when L2 address is not discovered yet since
otherwise the packet will be substituted with an ARP/Neighbor
Solicitation frame and this will result in the lost of the first
packet of the connection.
Moreover fix following automatic tests broken by ip-buffering support
since now original ip packets are transmitted by OVN logical
router:
- ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR
- ovn -- /32 router IP address

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
Changes since v3:
- fix 'ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR' test case if the process
  is pinned on a given cpu
- rebased on top of current ovs master branch

Changes since v2:
- use in6_addr instead of ovs_be128 as hash key
- fix spelling error
- merge patch 2/3 and 3/3 in 1/3

Changes since v1:
- add automatic test support for IPv4 and IPv6
- fix automatic tests broken by ip-buffering support
- reduce queue depth to 3 packets
- use binary form of the IP address as hash key
- in buffered_send_packets() routine signature use eth_addr instead of
  unsigned char pointer
- use unsigned int for {head/tail} queue index
---
 ovn/controller/pinctrl.c | 256 ++++++++++++++++++++++++++++-----
 tests/ovn.at             | 297 +++++++++++++++++++++++++++------------
 2 files changed, 429 insertions(+), 124 deletions(-)

Comments

Ben Pfaff Oct. 18, 2018, 3:51 p.m. UTC | #1
On Thu, Oct 18, 2018 at 04:05:56PM +0200, Lorenzo Bianconi wrote:
> Add buffering support for IPv4/IPv6 packets that will be processed
> by arp{}/nd_ns{} action when L2 address is not discovered yet since
> otherwise the packet will be substituted with an ARP/Neighbor
> Solicitation frame and this will result in the lost of the first
> packet of the connection.
> Moreover fix following automatic tests broken by ip-buffering support
> since now original ip packets are transmitted by OVN logical
> router:
> - ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR
> - ovn -- /32 router IP address
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>

Thanks, (re)applied to master.
diff mbox series

Patch

diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index a20f6b6ef..e3ee516e4 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -61,6 +61,9 @@  static struct rconn *swconn;
  * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
 static unsigned int conn_seq_no;
 
+static void init_buffered_packets_map(void);
+static void destroy_buffered_packets_map(void);
+
 static void pinctrl_handle_put_mac_binding(const struct flow *md,
                                            const struct flow *headers,
                                            bool is_arp);
@@ -96,6 +99,7 @@  static void pinctrl_handle_put_nd_ra_opts(
     struct ofputil_packet_in *pin, struct ofpbuf *userdata,
     struct ofpbuf *continuation);
 static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
+                                 struct dp_packet *pkt_in,
                                  const struct match *md,
                                  struct ofpbuf *userdata);
 static void init_ipv6_ras(void);
@@ -108,6 +112,7 @@  static void send_ipv6_ras(
 ;
 
 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
+COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
 
 void
 pinctrl_init(void)
@@ -117,6 +122,7 @@  pinctrl_init(void)
     init_put_mac_bindings();
     init_send_garps();
     init_ipv6_ras();
+    init_buffered_packets_map();
 }
 
 static ovs_be32
@@ -190,9 +196,180 @@  set_actions_and_enqueue_msg(const struct dp_packet *packet,
     ofpbuf_uninit(&ofpacts);
 }
 
+struct buffer_info {
+    struct ofpbuf ofpacts;
+    struct dp_packet *p;
+};
+
+#define BUFFER_QUEUE_DEPTH     4
+struct buffered_packets {
+    struct hmap_node hmap_node;
+
+    /* key */
+    struct in6_addr ip;
+
+    long long int timestamp;
+
+    struct buffer_info data[BUFFER_QUEUE_DEPTH];
+    uint32_t head, tail;
+};
+
+static struct hmap buffered_packets_map;
+
+static void
+init_buffered_packets_map(void)
+{
+    hmap_init(&buffered_packets_map);
+}
+
+static void
+destroy_buffered_packets(struct buffered_packets *bp)
+{
+    struct buffer_info *bi;
+
+    while (bp->head != bp->tail) {
+        bi = &bp->data[bp->head];
+        dp_packet_uninit(bi->p);
+        ofpbuf_uninit(&bi->ofpacts);
+
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+    hmap_remove(&buffered_packets_map, &bp->hmap_node);
+    free(bp);
+}
+
+static void
+destroy_buffered_packets_map(void)
+{
+    struct buffered_packets *bp;
+    HMAP_FOR_EACH_POP (bp, hmap_node, &buffered_packets_map) {
+        destroy_buffered_packets(bp);
+    }
+    hmap_destroy(&buffered_packets_map);
+}
+
+static void
+buffered_push_packet(struct buffered_packets *bp,
+                     struct dp_packet *packet,
+                     const struct match *md)
+{
+    uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH;
+    struct buffer_info *bi = &bp->data[bp->tail];
+
+    ofpbuf_init(&bi->ofpacts, 4096);
+
+    reload_metadata(&bi->ofpacts, md);
+    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts);
+    resubmit->in_port = OFPP_CONTROLLER;
+    resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
+
+    bi->p = packet;
+
+    if (next == bp->head) {
+        bi = &bp->data[bp->head];
+        dp_packet_uninit(bi->p);
+        ofpbuf_uninit(&bi->ofpacts);
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+    bp->tail = next;
+}
+
+static void
+buffered_send_packets(struct buffered_packets *bp, struct eth_addr *addr)
+{
+    enum ofp_version version = rconn_get_version(swconn);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+
+    while (bp->head != bp->tail) {
+        struct buffer_info *bi = &bp->data[bp->head];
+        struct eth_header *eth = dp_packet_data(bi->p);
+
+        eth->eth_dst = *addr;
+        struct ofputil_packet_out po = {
+            .packet = dp_packet_data(bi->p),
+            .packet_len = dp_packet_size(bi->p),
+            .buffer_id = UINT32_MAX,
+            .ofpacts = bi->ofpacts.data,
+            .ofpacts_len = bi->ofpacts.size,
+        };
+        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+        queue_msg(ofputil_encode_packet_out(&po, proto));
+
+        ofpbuf_uninit(&bi->ofpacts);
+        dp_packet_uninit(bi->p);
+
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+}
+
+#define BUFFER_MAP_TIMEOUT   10000
 static void
-pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
-                   struct ofpbuf *userdata)
+buffered_packets_map_gc(void)
+{
+    struct buffered_packets *cur_qp, *next_qp;
+    long long int now = time_msec();
+
+    HMAP_FOR_EACH_SAFE (cur_qp, next_qp, hmap_node, &buffered_packets_map) {
+        if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) {
+            destroy_buffered_packets(cur_qp);
+        }
+    }
+}
+
+static struct buffered_packets *
+pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash)
+{
+    struct buffered_packets *qp;
+
+    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
+                             &buffered_packets_map) {
+        if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
+            return qp;
+        }
+    }
+    return NULL;
+}
+
+static int
+pinctrl_handle_buffered_packets(const struct flow *ip_flow,
+                                struct dp_packet *pkt_in,
+                                const struct match *md, bool is_arp)
+{
+    struct buffered_packets *bp;
+    struct dp_packet *clone;
+    struct in6_addr addr;
+
+    if (is_arp) {
+        addr = in6_addr_mapped_ipv4(ip_flow->nw_dst);
+    } else {
+        addr = ip_flow->ipv6_dst;
+    }
+
+    uint32_t hash = hash_bytes(&addr, sizeof addr, 0);
+    bp = pinctrl_find_buffered_packets(&addr, hash);
+    if (!bp) {
+        if (hmap_count(&buffered_packets_map) >= 1000) {
+            COVERAGE_INC(pinctrl_drop_buffered_packets_map);
+            return -ENOMEM;
+        }
+
+        bp = xmalloc(sizeof *bp);
+        hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
+        bp->head = bp->tail = 0;
+        bp->ip = addr;
+    }
+    bp->timestamp = time_msec();
+    /* clone the packet to send it later with correct L2 address */
+    clone = dp_packet_clone_data(dp_packet_data(pkt_in),
+                                 dp_packet_size(pkt_in));
+    buffered_push_packet(bp, clone, md);
+
+    return 0;
+}
+
+static void
+pinctrl_handle_arp(const struct flow *ip_flow, struct dp_packet *pkt_in,
+                   const struct match *md, struct ofpbuf *userdata)
 {
     /* This action only works for IP packets, and the switch should only send
      * us IP packets this way, but check here just to be sure. */
@@ -203,6 +380,8 @@  pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
         return;
     }
 
+    pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, true);
+
     /* Compose an ARP packet. */
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
@@ -1162,7 +1341,7 @@  process_packet_in(const struct ofp_header *msg,
 
     switch (ntohl(ah->opcode)) {
     case ACTION_OPCODE_ARP:
-        pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata);
+        pinctrl_handle_arp(&headers, &packet, &pin.flow_metadata, &userdata);
         break;
 
     case ACTION_OPCODE_PUT_ARP:
@@ -1207,7 +1386,8 @@  process_packet_in(const struct ofp_header *msg,
         break;
 
     case ACTION_OPCODE_ND_NS:
-        pinctrl_handle_nd_ns(&headers, &pin.flow_metadata, &userdata);
+        pinctrl_handle_nd_ns(&headers, &packet, &pin.flow_metadata,
+                             &userdata);
         break;
 
     case ACTION_OPCODE_ICMP:
@@ -1310,6 +1490,7 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                   local_datapaths, active_tunnels);
     send_ipv6_ras(sbrec_port_binding_by_datapath,
                   sbrec_port_binding_by_name, local_datapaths);
+    buffered_packets_map_gc();
 }
 
 /* Table of ipv6_ra_state structures, keyed on logical port name */
@@ -1620,6 +1801,7 @@  pinctrl_destroy(void)
     destroy_put_mac_bindings();
     destroy_send_garps();
     destroy_ipv6_ras();
+    destroy_buffered_packets_map();
 }
 
 /* Implementation of the "put_arp" and "put_nd" OVN actions.  These
@@ -1642,7 +1824,7 @@  struct put_mac_binding {
     /* Key. */
     uint32_t dp_key;
     uint32_t port_key;
-    char ip_s[INET6_ADDRSTRLEN + 1];
+    struct in6_addr ip_key;
 
     /* Value. */
     struct eth_addr mac;
@@ -1666,13 +1848,13 @@  destroy_put_mac_bindings(void)
 
 static struct put_mac_binding *
 pinctrl_find_put_mac_binding(uint32_t dp_key, uint32_t port_key,
-                             const char *ip_s, uint32_t hash)
+                             const struct in6_addr *ip_key, uint32_t hash)
 {
     struct put_mac_binding *pa;
     HMAP_FOR_EACH_WITH_HASH (pa, hmap_node, hash, &put_mac_bindings) {
         if (pa->dp_key == dp_key
             && pa->port_key == port_key
-            && !strcmp(pa->ip_s, ip_s)) {
+            && IN6_ARE_ADDR_EQUAL(&pa->ip_key, ip_key)) {
             return pa;
         }
     }
@@ -1685,18 +1867,19 @@  pinctrl_handle_put_mac_binding(const struct flow *md,
 {
     uint32_t dp_key = ntohll(md->metadata);
     uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
-    char ip_s[INET6_ADDRSTRLEN];
+    struct buffered_packets *bp;
+    struct in6_addr ip_key;
 
     if (is_arp) {
-        ovs_be32 ip = htonl(md->regs[0]);
-        inet_ntop(AF_INET, &ip, ip_s, sizeof(ip_s));
+        ip_key = in6_addr_mapped_ipv4(htonl(md->regs[0]));
     } else {
         ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0));
-        inet_ntop(AF_INET6, &ip6, ip_s, sizeof(ip_s));
+        memcpy(&ip_key, &ip6, sizeof ip_key);
     }
-    uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key));
+    uint32_t hash = hash_bytes(&ip_key, sizeof ip_key,
+                               hash_2words(dp_key, port_key));
     struct put_mac_binding *pmb
-        = pinctrl_find_put_mac_binding(dp_key, port_key, ip_s, hash);
+        = pinctrl_find_put_mac_binding(dp_key, port_key, &ip_key, hash);
     if (!pmb) {
         if (hmap_count(&put_mac_bindings) >= 1000) {
             COVERAGE_INC(pinctrl_drop_put_mac_binding);
@@ -1707,10 +1890,17 @@  pinctrl_handle_put_mac_binding(const struct flow *md,
         hmap_insert(&put_mac_bindings, &pmb->hmap_node, hash);
         pmb->dp_key = dp_key;
         pmb->port_key = port_key;
-        ovs_strlcpy_arrays(pmb->ip_s, ip_s);
+        pmb->ip_key = ip_key;
     }
     pmb->timestamp = time_msec();
     pmb->mac = headers->dl_src;
+
+    /* send queued pkts */
+    uint32_t bhash = hash_bytes(&ip_key, sizeof ip_key, 0);
+    bp = pinctrl_find_buffered_packets(&ip_key, bhash);
+    if (bp) {
+        buffered_send_packets(bp, &pmb->mac);
+    }
 }
 
 static const struct sbrec_mac_binding *
@@ -1760,25 +1950,23 @@  run_put_mac_binding(struct ovsdb_idl_txn *ovnsb_idl_txn,
     snprintf(mac_string, sizeof mac_string,
              ETH_ADDR_FMT, ETH_ADDR_ARGS(pmb->mac));
 
-    /* Check for and update an existing IP-MAC binding for this logical
-     * port.
-     */
+    struct ds ip_s = DS_EMPTY_INITIALIZER;
+    ipv6_format_mapped(&pmb->ip_key, &ip_s);
+
+    /* Update or add an IP-MAC binding for this logical port. */
     const struct sbrec_mac_binding *b =
         mac_binding_lookup(sbrec_mac_binding_by_lport_ip, pb->logical_port,
-                           pmb->ip_s);
-    if (b) {
-        if (strcmp(b->mac, mac_string)) {
-            sbrec_mac_binding_set_mac(b, mac_string);
-        }
-        return;
-    }
-
-    /* Add new IP-MAC binding for this logical port. */
-    b = sbrec_mac_binding_insert(ovnsb_idl_txn);
-    sbrec_mac_binding_set_logical_port(b, pb->logical_port);
-    sbrec_mac_binding_set_ip(b, pmb->ip_s);
-    sbrec_mac_binding_set_mac(b, mac_string);
-    sbrec_mac_binding_set_datapath(b, pb->datapath);
+                           ds_cstr(&ip_s));
+    if (!b) {
+        b = sbrec_mac_binding_insert(ovnsb_idl_txn);
+        sbrec_mac_binding_set_logical_port(b, pb->logical_port);
+        sbrec_mac_binding_set_ip(b, ds_cstr(&ip_s));
+        sbrec_mac_binding_set_mac(b, mac_string);
+        sbrec_mac_binding_set_datapath(b, pb->datapath);
+    } else if (strcmp(b->mac, mac_string)) {
+        sbrec_mac_binding_set_mac(b, mac_string);
+    }
+    ds_destroy(&ip_s);
 }
 
 static void
@@ -2409,8 +2597,8 @@  pinctrl_handle_nd_na(const struct flow *ip_flow, const struct match *md,
 }
 
 static void
-pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md,
-                     struct ofpbuf *userdata)
+pinctrl_handle_nd_ns(const struct flow *ip_flow, struct dp_packet *pkt_in,
+                     const struct match *md, struct ofpbuf *userdata)
 {
     /* This action only works for IPv6 packets. */
     if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) {
@@ -2419,6 +2607,8 @@  pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md,
         return;
     }
 
+    pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, false);
+
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
     dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
diff --git a/tests/ovn.at b/tests/ovn.at
index 86d339f92..8825beca3 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -2545,6 +2545,45 @@  test_ip() {
     done
 }
 
+# test_arp INPORT SHA SPA TPA [REPLY_HA]
+#
+# Causes a packet to be received on INPORT.  The packet is an ARP
+# request with SHA, SPA, and TPA as specified.  If REPLY_HA is provided, then
+# it should be the hardware address of the target to expect to receive in an
+# ARP reply; otherwise no reply is expected.
+#
+# INPORT is an logical switch port number, e.g. 11 for vif11.
+# SHA and REPLY_HA are each 12 hex digits.
+# SPA and TPA are each 8 hex digits.
+test_arp() {
+    local inport=$1 sha=$2 spa=$3 tpa=$4 reply_ha=$5
+    local request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
+    hv=hv`vif_to_hv $inport`
+    as $hv ovs-appctl netdev-dummy/receive vif$inport $request
+    as $hv ovs-appctl ofproto/trace br-int in_port=$inport $request
+
+    # Expect to receive the broadcast ARP on the other logical switch ports if
+    # IP address is not configured to the switch patch port.
+    local i=`vif_to_ls $inport`
+    local j k
+    for j in 1 2 3; do
+        for k in 1 2 3; do
+            # 192.168.33.254 is configured to the switch patch port for lrp33,
+            # so no ARP flooding expected for it.
+            if test $i$j$k != $inport && test $tpa != `ip_to_hex 192 168 33 254`; then
+                echo $request >> $i$j$k.expected
+            fi
+        done
+    done
+
+    # Expect to receive the reply, if any.
+    if test X$reply_ha != X; then
+        lrp=`vif_to_lrp $inport`
+        local reply=${sha}00000000ff${lrp}08060001080006040002${reply_ha}${tpa}${sha}${spa}
+        echo $reply >> $inport.expected
+    fi
+}
+
 as hv1 ovs-vsctl --columns=name,ofport list interface
 as hv1 ovn-sbctl list port_binding
 as hv1 ovn-sbctl list datapath_binding
@@ -2587,10 +2626,13 @@  for is in 1 2 3; do
   done
 done
 
+: > mac_bindings.expected
+
 # 3. Send an IP packet from every logical port to every other subnet,
 #    to an IP address that does not have a static IP-MAC binding.
 #    This should generate a broadcast ARP request for the destination
 #    IP address in the destination subnet.
+#    Moreover generate an ARP reply for each of the IP addresses ARPed
 for is in 1 2 3; do
   for js in 1 2 3; do
     for ks in 1 2 3; do
@@ -2623,51 +2665,29 @@  for is in 1 2 3; do
               echo $arp >> $id$jd2$kd.expected
             done
           done
+          if test $(vif_to_hv ${is}${js}${ks}) = $(vif_to_hv ${id}${jd}1); then
+              hmac=8000000000$o4
+              rmac=00000000ff$id$jd
+              echo ${hmac}${rmac}08004500001c00000000"3f1101"00${sip}${dip}0035111100080000 >> ${id}11.expected
+          fi
+
+          host_mac=8000000000$o4
+          lrmac=00000000ff$id$jd
+
+          arp_reply=${lrmac}${host_mac}08060001080006040002${host_mac}${dip}${lrmac}${lrip}
+
+          hv=hv`vif_to_hv ${id}${jd}1`
+          as $hv ovs-appctl netdev-dummy/receive vif${id}${jd}1 $arp_reply
+
+          host_ip_pretty=192.168.$id$jd.$o4
+          host_mac_pretty=80:00:00:00:00:$o4
+          echo lrp$id$jd,$host_ip_pretty,$host_mac_pretty >> mac_bindings.expected
         done
       done
     done
   done
 done
 
-# test_arp INPORT SHA SPA TPA [REPLY_HA]
-#
-# Causes a packet to be received on INPORT.  The packet is an ARP
-# request with SHA, SPA, and TPA as specified.  If REPLY_HA is provided, then
-# it should be the hardware address of the target to expect to receive in an
-# ARP reply; otherwise no reply is expected.
-#
-# INPORT is an logical switch port number, e.g. 11 for vif11.
-# SHA and REPLY_HA are each 12 hex digits.
-# SPA and TPA are each 8 hex digits.
-test_arp() {
-    local inport=$1 sha=$2 spa=$3 tpa=$4 reply_ha=$5
-    local request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
-    hv=hv`vif_to_hv $inport`
-    as $hv ovs-appctl netdev-dummy/receive vif$inport $request
-    as $hv ovs-appctl ofproto/trace br-int in_port=$inport $request
-
-    # Expect to receive the broadcast ARP on the other logical switch ports if
-    # IP address is not configured to the switch patch port.
-    local i=`vif_to_ls $inport`
-    local j k
-    for j in 1 2 3; do
-        for k in 1 2 3; do
-            # 192.168.33.254 is configured to the switch patch port for lrp33,
-            # so no ARP flooding expected for it.
-            if test $i$j$k != $inport && test $tpa != `ip_to_hex 192 168 33 254`; then
-                echo $request >> $i$j$k.expected
-            fi
-        done
-    done
-
-    # Expect to receive the reply, if any.
-    if test X$reply_ha != X; then
-        lrp=`vif_to_lrp $inport`
-        local reply=${sha}00000000ff${lrp}08060001080006040002${reply_ha}${tpa}${sha}${spa}
-        echo $reply >> $inport.expected
-    fi
-}
-
 # Test router replies to ARP requests from all source ports:
 #
 # 4. Router replies to query for its MAC address from port's own IP address.
@@ -2678,7 +2698,6 @@  test_arp() {
 # 6. No reply to query for IP address other than router IP.
 #
 # 7. No reply to query from another subnet.
-: > mac_bindings.expected
 for i in 1 2 3; do
   for j in 1 2 3; do
     for k in 1 2 3; do
@@ -2722,60 +2741,12 @@  for i in 1 2 3; do
   done
 done
 
-# Allow some time for packet forwarding.
-# XXX This can be improved.
-sleep 1
-
-# 8. Generate an ARP reply for each of the IP addresses ARPed for
-#    earlier as #3.
-#
-#    Here, the $s is the VIF that originated the ARP request and $d is
-#    the VIF that sends the ARP reply, which is somewhat backward but
-#    it means that $s and $d are the same as #3.
-for is in 1 2 3; do
-  for js in 1 2 3; do
-    for ks in 1 2 3; do
-      s=$is$js$ks
-      for id in 1 2 3; do
-        for jd in 1 2 3; do
-          if test $is$js = $id$jd; then
-            continue
-          fi
-
-          kd=1
-          d=$id$jd$kd
-
-          o4=`expr $is '*' 9 + $js '*' 3 + $ks + 10`
-          host_ip=`ip_to_hex 192 168 $id$jd $o4`
-          host_mac=8000000000$o4
-
-          lrmac=00000000ff$id$jd
-          lrip=`ip_to_hex 192 168 $id$jd 254`
-
-          arp=${lrmac}${host_mac}08060001080006040002${host_mac}${host_ip}${lrmac}${lrip}
-
-          echo
-          echo
-          echo
-          hv=hv`vif_to_hv $d`
-          as $hv ovs-appctl netdev-dummy/receive vif$d $arp
-          #as $hv ovs-appctl ofproto/trace br-int in_port=$d $arp
-          #as $hv ovs-ofctl dump-flows br-int table=19
-
-          host_ip_pretty=192.168.$id$jd.$o4
-          host_mac_pretty=80:00:00:00:00:$o4
-          echo lrp$id$jd,$host_ip_pretty,$host_mac_pretty >> mac_bindings.expected
-        done
-      done
-    done
-  done
-done
 
 # Allow some time for packet forwarding.
 # XXX This can be improved.
 sleep 1
 
-# 9. Send an IP packet from every logical port to every other subnet.  These
+# 8. Send an IP packet from every logical port to every other subnet.  These
 #    are the same packets already sent as #3, but now the destinations' IP-MAC
 #    bindings have been discovered via ARP, so instead of provoking an ARP
 #    request, these packets now get routed to their destinations (which don't
@@ -8912,8 +8883,6 @@  packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111
 as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
 OVS_WAIT_UNTIL([test `ovn-sbctl find mac_binding ip="10.0.0.2" | wc -l` -gt 0])
 ovn-nbctl --wait=hv sync
-# Send the second packet to reach the destination.
-as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
 
 # Packet to Expect at 'alice1'
 src_mac="000000010204"
@@ -11322,3 +11291,149 @@  AT_CHECK([ovn-nbctl lsp-set-addresses sw2-p1 "00:00:00:00:00:04 192.168.0.3"])
 AT_CHECK([ovn-nbctl lsp-set-addresses sw2-p1 "00:00:00:00:00:04 aef0::1"])
 
 AT_CLEANUP
+
+AT_SETUP([ovn -- IP packet buffering])
+AT_KEYWORDS([ip-buffering])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Logical network:
+# One LR lr0 that has switches sw0 (192.168.1.0/24) and
+# sw1 (172.16.1.0/24) connected to it.
+#
+# Physical network:
+# Tw0 hypervisors hv[12].
+# hv1 hosts vif sw0-p0.
+# hv1 hosts vif sw1-p0.
+
+send_icmp_packet() {
+    local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv4_src=$5 ipv4_dst=$6 ip_chksum=$7 data=$8
+    shift 8
+
+    local ip_ttl=ff
+    local ip_len=001c
+    local packet=${eth_dst}${eth_src}08004500${ip_len}00004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst}${data}
+    as hv$hv ovs-appctl netdev-dummy/receive hv$hv-vif$inport $packet
+}
+
+send_icmp6_packet() {
+    local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 ipv6_router=$7 exp_icmp_chksum=$8
+    shift 8
+
+    local ip6_hdr=6000000000083aff${ipv6_src}${ipv6_dst}
+    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000dcb662f00001
+
+    as hv$hv ovs-appctl netdev-dummy/receive hv$hv-vif$inport $packet
+}
+
+get_arp_req() {
+    local eth_src=$1 spa=$2 tpa=$3
+    local request=ffffffffffff${eth_src}08060001080006040001${eth_src}${spa}000000000000${tpa}
+    echo $request
+}
+
+send_arp_reply() {
+    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
+    local request=${eth_dst}${eth_src}08060001080006040002${eth_src}${spa}${eth_dst}${tpa}
+    as hv$hv ovs-appctl netdev-dummy/receive hv${hv}-vif$inport $request
+}
+
+send_na() {
+    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 src_ip=$5 dst_ip=$6
+    local ip6_hdr=6000000000203aff${src_ip}${dst_ip}
+    local request=${eth_dst}${eth_src}86dd${ip6_hdr}8800d78440000000${src_ip}0201${eth_src}
+
+    as hv$hv ovs-appctl netdev-dummy/receive hv${hv}-vif$inport $request
+}
+
+get_nd() {
+    local eth_src=$1 src_ip=$2 dst_ip=$3 ta=$4
+    local ip6_hdr=6000000000203aff${src_ip}${dst_ip}
+    request=3333ff000010${eth_src}86dd${ip6_hdr}8700357600000000${ta}0101${eth_src}
+
+    echo $request
+}
+
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=sw0-p0 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+ovs-vsctl -- add-port br-int hv2-vif1 -- \
+    set interface hv2-vif1 external-ids:iface-id=sw1-p0 \
+    options:tx_pcap=hv2/vif1-tx.pcap \
+    options:rxq_pcap=hv2/vif1-rx.pcap \
+    ofport-request=1
+
+ovn-nbctl create Logical_Router name=lr0 options:chassis=hv1
+ovn-nbctl ls-add sw0
+ovn-nbctl ls-add sw1
+
+ovn-nbctl lrp-add lr0 sw0 00:00:01:01:02:03 192.168.1.1/24 2001::1/64
+ovn-nbctl lsp-add sw0 rp-sw0 -- set Logical_Switch_Port rp-sw0 \
+    type=router options:router-port=sw0 \
+    -- lsp-set-addresses rp-sw0 router
+
+ovn-nbctl lrp-add lr0 sw1 00:00:02:01:02:03 172.16.1.1/24 2002::1/64
+ovn-nbctl lsp-add sw1 rp-sw1 -- set Logical_Switch_Port rp-sw1 \
+    type=router options:router-port=sw1 \
+    -- lsp-set-addresses rp-sw1 router
+
+ovn-nbctl lsp-add sw0 sw0-p0 \
+    -- lsp-set-addresses sw0-p0 "f0:00:00:01:02:03 192.168.1.2 2001::2"
+
+ovn-nbctl lsp-add sw1 sw1-p0 \
+    -- lsp-set-addresses sw1-p0 unknown
+
+OVN_POPULATE_ARP
+ovn-nbctl --wait=hv sync
+
+ip_to_hex() {
+    printf "%02x%02x%02x%02x" "$@"
+}
+
+src_mac=f00000010203
+src_ip=$(ip_to_hex 192 168 1 2)
+src_ip6=20010000000000000000000000000002
+
+router_mac0=000001010203
+router_mac1=000002010203
+router_ip=$(ip_to_hex 172 16 1 1)
+router_ip6=20020000000000000000000000000001
+
+dst_mac=001122334455
+dst_ip=$(ip_to_hex 172 16 1 10)
+dst_ip6=20020000000000000000000000000010
+
+data=0800bee4391a0001
+
+send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip 0000 $data
+send_arp_reply 2 1 $dst_mac $router_mac1 $dst_ip $router_ip
+echo $(get_arp_req $router_mac1 $router_ip $dst_ip) > expected
+echo "${dst_mac}${router_mac1}08004500001c00004000fe010100${src_ip}${dst_ip}${data}" >> expected
+
+OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])
+
+nd_ip=ff0200000000000000000001ff000010
+ip6_hdr=6000000000083afe${src_ip6}${dst_ip6}
+
+send_icmp6_packet 1 1 $src_mac $router_mac0 $src_ip6 $dst_ip6
+echo $(get_nd $router_mac1 $src_ip6 $nd_ip $dst_ip6) >> expected
+echo "${dst_mac}${router_mac1}86dd${ip6_hdr}8000dcb662f00001" >> expected
+send_na 2 1 $dst_mac $router_mac1 $dst_ip6 $router_ip6
+
+OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])
+
+OVN_CLEANUP([hv1],[hv2])
+AT_CLEANUP