[ovs-dev,1/2] OVN: add buffering support for ipv4 packets

Message ID 5f76714a1a81a862f4b656b965de62b35c5184ad.1536937902.git.lorenzo.bianconi@redhat.com
State New
Headers show
Series
  • add buffering support for IP packets
Related show

Commit Message

Lorenzo Bianconi Sept. 14, 2018, 3:19 p.m.
Add buffering support for IPv4 packets that will be processed
by arp {} action when L2 address is not discovered yet since
otherwise the packet will be substituted with an ARP frame and
this will result in the lost of the first packet of the connection

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 ovn/controller/pinctrl.c | 180 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 175 insertions(+), 5 deletions(-)

Patch

diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 0164696cc..3f8c0ac4e 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -61,6 +61,9 @@  static struct rconn *swconn;
  * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
 static unsigned int conn_seq_no;
 
+static void init_buffered_packets_map(void);
+static void destroy_buffered_packets_map(void);
+
 static void pinctrl_handle_put_mac_binding(const struct flow *md,
                                            const struct flow *headers,
                                            bool is_arp);
@@ -108,6 +111,7 @@  static void send_ipv6_ras(
 ;
 
 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
+COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
 
 void
 pinctrl_init(void)
@@ -117,6 +121,7 @@  pinctrl_init(void)
     init_put_mac_bindings();
     init_send_garps();
     init_ipv6_ras();
+    init_buffered_packets_map();
 }
 
 static ovs_be32
@@ -190,10 +195,147 @@  set_actions_and_enqueue_msg(const struct dp_packet *packet,
     ofpbuf_uninit(&ofpacts);
 }
 
+struct buffer_info {
+    struct ofpbuf ofpacts;
+    struct dp_packet *p;
+};
+
+#define BUFFER_QUEUE_DEPTH     64
+struct buffered_packets {
+    struct hmap_node hmap_node;
+
+    /* key */
+    char ip[INET6_ADDRSTRLEN];
+
+    long long int timestamp;
+
+    struct buffer_info data[BUFFER_QUEUE_DEPTH];
+    int head, tail;
+};
+
+static struct hmap buffered_packets_map;
+
+static void
+init_buffered_packets_map(void)
+{
+    hmap_init(&buffered_packets_map);
+}
+
+static void
+destroy_buffered_packets(struct buffered_packets *bp)
+{
+    struct buffer_info *bi;
+
+    while (bp->head != bp->tail) {
+        bi = &bp->data[bp->head];
+        dp_packet_uninit(bi->p);
+        ofpbuf_uninit(&bi->ofpacts);
+
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+    hmap_remove(&buffered_packets_map, &bp->hmap_node);
+    free(bp);
+}
+
+static void
+destroy_buffered_packets_map(void)
+{
+    struct buffered_packets *bp;
+    HMAP_FOR_EACH_POP (bp, hmap_node, &buffered_packets_map) {
+        destroy_buffered_packets(bp);
+    }
+    hmap_destroy(&buffered_packets_map);
+}
+
+static void
+buffered_push_packet(struct buffered_packets *bp,
+                     struct dp_packet *packet,
+                     const struct match *md)
+{
+    struct buffer_info *bi = &bp->data[bp->tail];
+    int next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH;
+
+    ofpbuf_init(&bi->ofpacts, 4096);
+
+    reload_metadata(&bi->ofpacts, md);
+    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts);
+    resubmit->in_port = OFPP_CONTROLLER;
+    resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
+
+    bi->p = packet;
+
+    if (next == bp->head) {
+        bi = &bp->data[bp->head];
+        dp_packet_uninit(bi->p);
+        ofpbuf_uninit(&bi->ofpacts);
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+    bp->tail = next;
+}
+
 static void
-pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
-                   struct ofpbuf *userdata)
+buffered_send_packets(struct buffered_packets *bp, unsigned char *addr)
 {
+    enum ofp_version version = rconn_get_version(swconn);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+
+    while (bp->head != bp->tail) {
+        struct buffer_info *bi = &bp->data[bp->head];
+        memcpy(dp_packet_data(bi->p), addr, 6);
+
+        struct ofputil_packet_out po = {
+            .packet = dp_packet_data(bi->p),
+            .packet_len = dp_packet_size(bi->p),
+            .buffer_id = UINT32_MAX,
+            .ofpacts = bi->ofpacts.data,
+            .ofpacts_len = bi->ofpacts.size,
+        };
+        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+        queue_msg(ofputil_encode_packet_out(&po, proto));
+
+        ofpbuf_uninit(&bi->ofpacts);
+        dp_packet_uninit(bi->p);
+
+        bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+    }
+}
+
+#define BUFFER_MAP_TIMEOUT   30000
+static void
+buffered_packets_map_gc(void)
+{
+    struct buffered_packets *cur_qp, *next_qp;
+    long long int now = time_msec();
+
+    HMAP_FOR_EACH_SAFE (cur_qp, next_qp, hmap_node, &buffered_packets_map) {
+        if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) {
+            destroy_buffered_packets(cur_qp);
+        }
+    }
+}
+
+static struct buffered_packets *
+pinctrl_find_buffered_packets(const char *ip, uint32_t hash)
+{
+    struct buffered_packets *qp;
+
+    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
+                             &buffered_packets_map) {
+        if (!strcmp(qp->ip, ip)) {
+            return qp;
+        }
+    }
+    return NULL;
+}
+
+static void
+pinctrl_handle_arp(const struct flow *ip_flow, struct dp_packet *pkt_in,
+                   const struct match *md, struct ofpbuf *userdata)
+{
+    struct dp_packet *clone, packet;
+    uint64_t packet_stub[128 / 8];
+    char ip[INET6_ADDRSTRLEN];
+
     /* This action only works for IP packets, and the switch should only send
      * us IP packets this way, but check here just to be sure. */
     if (ip_flow->dl_type != htons(ETH_TYPE_IP)) {
@@ -203,9 +345,28 @@  pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
         return;
     }
 
+    inet_ntop(AF_INET, &ip_flow->nw_dst, ip, sizeof(ip));
+    uint32_t hash = hash_string(ip, 0);
+    struct buffered_packets *bp = pinctrl_find_buffered_packets(ip, hash);
+    if (!bp) {
+        if (hmap_count(&buffered_packets_map) >= 1000) {
+            COVERAGE_INC(pinctrl_drop_buffered_packets_map);
+            goto send_arp;
+        }
+
+        bp = xmalloc(sizeof *bp);
+        hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
+        ovs_strlcpy_arrays(bp->ip, ip);
+        bp->head = bp->tail = 0;
+    }
+    bp->timestamp = time_msec();
+    /* clone the packet to send it later with correct L2 address */
+    clone = dp_packet_clone_data(dp_packet_data(pkt_in),
+                                 dp_packet_size(pkt_in));
+    buffered_push_packet(bp, clone, md);
+
+send_arp:
     /* Compose an ARP packet. */
-    uint64_t packet_stub[128 / 8];
-    struct dp_packet packet;
     dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
     compose_arp__(&packet);
 
@@ -1152,7 +1313,7 @@  process_packet_in(const struct ofp_header *msg,
 
     switch (ntohl(ah->opcode)) {
     case ACTION_OPCODE_ARP:
-        pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata);
+        pinctrl_handle_arp(&headers, &packet, &pin.flow_metadata, &userdata);
         break;
 
     case ACTION_OPCODE_PUT_ARP:
@@ -1300,6 +1461,7 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                   local_datapaths, active_tunnels);
     send_ipv6_ras(sbrec_port_binding_by_datapath,
                   sbrec_port_binding_by_name, local_datapaths);
+    buffered_packets_map_gc();
 }
 
 /* Table of ipv6_ra_state structures, keyed on logical port name */
@@ -1610,6 +1772,7 @@  pinctrl_destroy(void)
     destroy_put_mac_bindings();
     destroy_send_garps();
     destroy_ipv6_ras();
+    destroy_buffered_packets_map();
 }
 
 /* Implementation of the "put_arp" and "put_nd" OVN actions.  These
@@ -1676,6 +1839,7 @@  pinctrl_handle_put_mac_binding(const struct flow *md,
     uint32_t dp_key = ntohll(md->metadata);
     uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
     char ip_s[INET6_ADDRSTRLEN];
+    struct buffered_packets *bp;
 
     if (is_arp) {
         ovs_be32 ip = htonl(md->regs[0]);
@@ -1701,6 +1865,12 @@  pinctrl_handle_put_mac_binding(const struct flow *md,
     }
     pmb->timestamp = time_msec();
     pmb->mac = headers->dl_src;
+
+    /* send queued pkts */
+    bp = pinctrl_find_buffered_packets(ip_s, hash_string(ip_s, 0));
+    if (bp) {
+        buffered_send_packets(bp, pmb->mac.ea);
+    }
 }
 
 static const struct sbrec_mac_binding *