[ovs-dev,v2,1/3] OVN: add buffering support for ipv4 packets
diff mbox series

Message ID d12abe06bae7aea0159048ddbe80a5a349a8c76d.1538487934.git.lorenzo.bianconi@redhat.com
State Changes Requested
Headers show
Series
  • add buffering support for IP packets
Related show

Commit Message

Lorenzo Bianconi Oct. 2, 2018, 1:59 p.m. UTC
Add buffering support for IPv4 packets that will be processed
by arp {} action when L2 address is not discovered yet since
otherwise the packet will be substituted with an ARP frame and
this will result in the lost of the first packet of the connection

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 ovn/controller/pinctrl.c | 200 ++++++++++++++++++++++++++++++++++++++-
 tests/ovn.at             | 107 +++++++++++++++++++++
 2 files changed, 304 insertions(+), 3 deletions(-)

Patch
diff mbox series

diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 8ae4c9e52..5f15e1696 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -61,6 +61,9 @@  static struct rconn *swconn;
  * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
 static unsigned int conn_seq_no;
 
+static void init_buffered_packets_map(void);
+static void destroy_buffered_packets_map(void);
+
 static void pinctrl_handle_put_mac_binding(const struct flow *md,
                                            const struct flow *headers,
                                            bool is_arp);
@@ -108,6 +111,7 @@  static void send_ipv6_ras(
 ;
 
 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
+COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
 
 void
 pinctrl_init(void)
@@ -117,6 +121,7 @@  pinctrl_init(void)
     init_put_mac_bindings();
     init_send_garps();
     init_ipv6_ras();
+    init_buffered_packets_map();
 }
 
 static ovs_be32
@@ -190,9 +195,181 @@  set_actions_and_enqueue_msg(const struct dp_packet *packet,
     ofpbuf_uninit(&ofpacts);
 }
 
+struct buffer_info {
+    struct ofpbuf ofpacts;
+    struct dp_packet *p;
+};
+
+#define BUFFER_QUEUE_DEPTH     4
+struct buffered_packets {
+    struct hmap_node hmap_node;
+
+    /* key */
+    ovs_be128 ip;
+
+    long long int timestamp;
+
+    struct buffer_info data[BUFFER_QUEUE_DEPTH];
+    uint32_t head, tail;
+};
+
+static struct hmap buffered_packets_map;
+
+static void
+init_buffered_packets_map(void)
+{
+    hmap_init(&buffered_packets_map);
+}
+
+static void
+destroy_buffered_packets(struct buffered_packets *bp)
+{
+    struct buffer_info *bi;
+
+    while (bp->head != bp->tail) {
+        bi = &bp->data[bp->head];
+        dp_packet_uninit(bi->p);
+        ofpbuf_uninit(&bi->ofpacts);
+
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+    hmap_remove(&buffered_packets_map, &bp->hmap_node);
+    free(bp);
+}
+
+static void
+destroy_buffered_packets_map(void)
+{
+    struct buffered_packets *bp;
+    HMAP_FOR_EACH_POP (bp, hmap_node, &buffered_packets_map) {
+        destroy_buffered_packets(bp);
+    }
+    hmap_destroy(&buffered_packets_map);
+}
+
+static void
+buffered_push_packet(struct buffered_packets *bp,
+                     struct dp_packet *packet,
+                     const struct match *md)
+{
+    uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH;
+    struct buffer_info *bi = &bp->data[bp->tail];
+
+    ofpbuf_init(&bi->ofpacts, 4096);
+
+    reload_metadata(&bi->ofpacts, md);
+    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts);
+    resubmit->in_port = OFPP_CONTROLLER;
+    resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
+
+    bi->p = packet;
+
+    if (next == bp->head) {
+        bi = &bp->data[bp->head];
+        dp_packet_uninit(bi->p);
+        ofpbuf_uninit(&bi->ofpacts);
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+    bp->tail = next;
+}
+
+static void
+buffered_send_packets(struct buffered_packets *bp, struct eth_addr *addr)
+{
+    enum ofp_version version = rconn_get_version(swconn);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+
+    while (bp->head != bp->tail) {
+        struct buffer_info *bi = &bp->data[bp->head];
+        struct eth_header *eth = dp_packet_data(bi->p);
+
+        eth->eth_dst = *addr;
+        struct ofputil_packet_out po = {
+            .packet = dp_packet_data(bi->p),
+            .packet_len = dp_packet_size(bi->p),
+            .buffer_id = UINT32_MAX,
+            .ofpacts = bi->ofpacts.data,
+            .ofpacts_len = bi->ofpacts.size,
+        };
+        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+        queue_msg(ofputil_encode_packet_out(&po, proto));
+
+        ofpbuf_uninit(&bi->ofpacts);
+        dp_packet_uninit(bi->p);
+
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+}
+
+#define BUFFER_MAP_TIMEOUT   10000
+static void
+buffered_packets_map_gc(void)
+{
+    struct buffered_packets *cur_qp, *next_qp;
+    long long int now = time_msec();
+
+    HMAP_FOR_EACH_SAFE (cur_qp, next_qp, hmap_node, &buffered_packets_map) {
+        if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) {
+            destroy_buffered_packets(cur_qp);
+        }
+    }
+}
+
+static struct buffered_packets *
+pinctrl_find_buffered_packets(const ovs_be128 *ip, uint32_t hash)
+{
+    struct buffered_packets *qp;
+
+    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
+                             &buffered_packets_map) {
+        if (!memcmp(&qp->ip, ip, sizeof(ovs_be128))) {
+            return qp;
+        }
+    }
+    return NULL;
+}
+
+static int
+pinctrl_handle_bufferd_packets(const struct flow *ip_flow,
+                               struct dp_packet *pkt_in,
+                               const struct match *md, bool is_arp)
+{
+    struct buffered_packets *bp;
+    struct dp_packet *clone;
+    struct in6_addr addr;
+
+    if (is_arp) {
+        addr = in6_addr_mapped_ipv4(ip_flow->nw_dst);
+    } else {
+        addr = ip_flow->ipv6_dst;
+    }
+
+    ovs_be128 ip = get_32aligned_be128((const ovs_32aligned_be128 *)&addr);
+    uint32_t hash = hash_bytes(&ip, sizeof(ovs_be128), 0);
+    bp = pinctrl_find_buffered_packets(&ip, hash);
+    if (!bp) {
+        if (hmap_count(&buffered_packets_map) >= 1000) {
+            COVERAGE_INC(pinctrl_drop_buffered_packets_map);
+            return -ENOMEM;
+        }
+
+        bp = xmalloc(sizeof *bp);
+        hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
+        bp->head = bp->tail = 0;
+        bp->ip = ip;
+    }
+    bp->timestamp = time_msec();
+    /* clone the packet to send it later with correct L2 address */
+    clone = dp_packet_clone_data(dp_packet_data(pkt_in),
+                                 dp_packet_size(pkt_in));
+    buffered_push_packet(bp, clone, md);
+
+    return 0;
+}
+
 static void
-pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
-                   struct ofpbuf *userdata)
+pinctrl_handle_arp(const struct flow *ip_flow, struct dp_packet *pkt_in,
+                   const struct match *md, struct ofpbuf *userdata)
 {
     /* This action only works for IP packets, and the switch should only send
      * us IP packets this way, but check here just to be sure. */
@@ -203,6 +380,8 @@  pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
         return;
     }
 
+    pinctrl_handle_bufferd_packets(ip_flow, pkt_in, md, true);
+
     /* Compose an ARP packet. */
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
@@ -1152,7 +1331,7 @@  process_packet_in(const struct ofp_header *msg,
 
     switch (ntohl(ah->opcode)) {
     case ACTION_OPCODE_ARP:
-        pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata);
+        pinctrl_handle_arp(&headers, &packet, &pin.flow_metadata, &userdata);
         break;
 
     case ACTION_OPCODE_PUT_ARP:
@@ -1300,6 +1479,7 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                   local_datapaths, active_tunnels);
     send_ipv6_ras(sbrec_port_binding_by_datapath,
                   sbrec_port_binding_by_name, local_datapaths);
+    buffered_packets_map_gc();
 }
 
 /* Table of ipv6_ra_state structures, keyed on logical port name */
@@ -1610,6 +1790,7 @@  pinctrl_destroy(void)
     destroy_put_mac_bindings();
     destroy_send_garps();
     destroy_ipv6_ras();
+    destroy_buffered_packets_map();
 }
 
 /* Implementation of the "put_arp" and "put_nd" OVN actions.  These
@@ -1676,13 +1857,19 @@  pinctrl_handle_put_mac_binding(const struct flow *md,
     uint32_t dp_key = ntohll(md->metadata);
     uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
     char ip_s[INET6_ADDRSTRLEN];
+    struct buffered_packets *bp;
+    ovs_be128 ip_key;
 
     if (is_arp) {
         ovs_be32 ip = htonl(md->regs[0]);
         inet_ntop(AF_INET, &ip, ip_s, sizeof(ip_s));
+
+        struct in6_addr addr = in6_addr_mapped_ipv4(ip);
+        ip_key = get_32aligned_be128((const ovs_32aligned_be128 *)&addr);
     } else {
         ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0));
         inet_ntop(AF_INET6, &ip6, ip_s, sizeof(ip_s));
+        ip_key = ip6;
     }
     uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key));
     struct put_mac_binding *pmb
@@ -1701,6 +1888,13 @@  pinctrl_handle_put_mac_binding(const struct flow *md,
     }
     pmb->timestamp = time_msec();
     pmb->mac = headers->dl_src;
+
+    /* send queued pkts */
+    uint32_t bhash = hash_bytes(&ip_key, sizeof(ovs_be128), 0);
+    bp = pinctrl_find_buffered_packets(&ip_key, bhash);
+    if (bp) {
+        buffered_send_packets(bp, &pmb->mac);
+    }
 }
 
 static const struct sbrec_mac_binding *
diff --git a/tests/ovn.at b/tests/ovn.at
index 769e09f81..23c2cabaf 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -11253,3 +11253,110 @@  AT_CHECK([ovn-nbctl lsp-set-addresses sw2-p1 "00:00:00:00:00:04 192.168.0.3"])
 AT_CHECK([ovn-nbctl lsp-set-addresses sw2-p1 "00:00:00:00:00:04 aef0::1"])
 
 AT_CLEANUP
+
+AT_SETUP([ovn -- IP packet buffering])
+AT_KEYWORDS([ip-buffering])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Logical network:
+# One LR lr0 that has switches sw0 (192.168.1.0/24) and
+# sw1 (172.16.1.0/24) connected to it.
+#
+# Physical network:
+# Tw0 hypervisors hv[12].
+# hv1 hosts vif sw0-p0.
+# hv1 hosts vif sw1-p0.
+
+send_icmp_packet() {
+    local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv4_src=$5 ipv4_dst=$6 ip_chksum=$7 data=$8
+    shift 8
+
+    local ip_ttl=ff
+    local ip_len=001c
+    local packet=${eth_dst}${eth_src}08004500${ip_len}00004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst}${data}
+    as hv$hv ovs-appctl netdev-dummy/receive hv$hv-vif$inport $packet
+}
+
+get_arp_req() {
+    local eth_src=$1 spa=$2 tpa=$3
+    local request=ffffffffffff${eth_src}08060001080006040001${eth_src}${spa}000000000000${tpa}
+    echo $request
+}
+
+send_arp_reply() {
+    local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
+    local request=${eth_dst}${eth_src}08060001080006040002${eth_src}${spa}${eth_dst}${tpa}
+    as hv$hv ovs-appctl netdev-dummy/receive hv${hv}-vif$inport $request
+}
+
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=sw0-p0 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+ovs-vsctl -- add-port br-int hv2-vif1 -- \
+    set interface hv2-vif1 external-ids:iface-id=sw1-p0 \
+    options:tx_pcap=hv2/vif1-tx.pcap \
+    options:rxq_pcap=hv2/vif1-rx.pcap \
+    ofport-request=1
+
+ovn-nbctl create Logical_Router name=lr0 options:chassis=hv1
+ovn-nbctl ls-add sw0
+ovn-nbctl ls-add sw1
+
+ovn-nbctl lrp-add lr0 sw0 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lsp-add sw0 rp-sw0 -- set Logical_Switch_Port rp-sw0 \
+    type=router options:router-port=sw0 \
+    -- lsp-set-addresses rp-sw0 router
+
+ovn-nbctl lrp-add lr0 sw1 00:00:02:01:02:03 172.16.1.1/24
+ovn-nbctl lsp-add sw1 rp-sw1 -- set Logical_Switch_Port rp-sw1 \
+    type=router options:router-port=sw1 \
+    -- lsp-set-addresses rp-sw1 router
+
+ovn-nbctl lsp-add sw0 sw0-p0 \
+    -- lsp-set-addresses sw0-p0 "f0:00:00:01:02:03 192.168.1.2"
+
+ovn-nbctl lsp-add sw1 sw1-p0 \
+    -- lsp-set-addresses sw1-p0 unknown
+
+OVN_POPULATE_ARP
+ovn-nbctl --wait=hv sync
+
+ip_to_hex() {
+    printf "%02x%02x%02x%02x" "$@"
+}
+
+src_mac=f00000010203
+src_ip=$(ip_to_hex 192 168 1 2)
+
+router_mac0=000001010203
+router_mac1=000002010203
+router_ip=$(ip_to_hex 172 16 1 1)
+
+dst_mac=001122334455
+dst_ip=$(ip_to_hex 172 16 1 10)
+
+data=0800bee4391a0001
+
+send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip 0000 $data
+send_arp_reply 2 1 $dst_mac $router_mac1 $dst_ip $router_ip
+echo $(get_arp_req $router_mac1 $router_ip $dst_ip) > expected
+echo "${dst_mac}${router_mac1}08004500001c00004000fe010100${src_ip}${dst_ip}${data}" >> expected
+
+OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])
+
+OVN_CLEANUP([hv1],[hv2])
+AT_CLEANUP