diff mbox

[ovs-dev,11/11] ovs/dp-cls: inserting rule to HW from offloading thread context

Message ID 5803cabfcceac168c93c203296651ee3b8b29f06.1499257385.git.shacharbe@mellanox.com
State Deferred
Headers show

Commit Message

Shachar Beiser July 5, 2017, 12:27 p.m. UTC
The offloading dp classifier thread calls to hw rte_create_flow
and inserts a rule to HW classifier.

Signed-off-by: Shachar Beiser <shacharbe@mellanox.com>

Conflicts:
	lib/dpif-netdev.c
	lib/dpif-netdev.h
---
 lib/dpif-netdev.c |  22 +++
 lib/hw-pipeline.c | 491 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 lib/netdev-dpdk.c |  24 +++
 lib/netdev-dpdk.h |   8 +
 4 files changed, 527 insertions(+), 18 deletions(-)
diff mbox

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 45066d4..6e5ec38 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -51,6 +51,7 @@ 
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
+#include "netdev-vport-private.h"
 #include "netlink.h"
 #include "odp-execute.h"
 #include "odp-util.h"
@@ -1573,6 +1574,27 @@  dp_netdev_pmd_find_dpcls(struct dp_netdev_pmd_thread *pmd,
     return cls;
 }
 
+int dpif_netdev_vport_is_tunnel(struct dp_netdev *dp,odp_port_t port_no)
+{
+    struct dp_netdev_port *port = dp_netdev_lookup_port(dp, port_no);
+    if (port == NULL) {
+        VLOG_ERR("no port found : %d", port_no);
+        return -1;
+    }
+
+    if (is_vport_class(netdev_get_class(port->netdev))) {
+        struct netdev_vport *vport=NULL;
+        vport = netdev_vport_cast(port->netdev);
+        if (vport!=NULL && vport->carrier_status) {
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+    return 0;
+}
+
 static void
 dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
                           struct dp_netdev_flow *flow)
diff --git a/lib/hw-pipeline.c b/lib/hw-pipeline.c
index e2d040e..e7104af 100644
--- a/lib/hw-pipeline.c
+++ b/lib/hw-pipeline.c
@@ -43,6 +43,10 @@ 
 
 VLOG_DEFINE_THIS_MODULE(hw_pipeline);
 
+struct set_rte_action {
+    void (*set)(struct rte_flow_action *, uint32_t data, size_t *);
+};
+
 static struct dp_netdev_flow *hw_pipeline_read_flow(flow_tag_pool *p,
                                                     uint32_t flow_tag);
 static int hw_pipeline_send_insert_flow(struct dp_netdev *dp,
@@ -60,6 +64,32 @@  bool hw_pipeline_ft_pool_is_valid(flow_tag_pool *p);
 
 static int hw_pipeline_remove_flow(struct dp_netdev *dp,
                                    msg_hw_flow *ptr_rule);
+
+static int hw_pipeline_create_external_rule(struct dp_netdev *dp,
+                                            msg_sw_flow *ptr_rule,
+                                            struct set_rte_action *action,
+                                            size_t action_num,
+                                            struct rte_flow **hw_flow_h);
+
+static void hw_pipeline_item_array_build(struct set_rte_item *item_any_flow,
+                                         struct flow *mask,
+                                         size_t *buf_size,
+                                         size_t *item_num);
+
+static inline struct rte_flow *hw_pipeline_rte_flow_create(
+        struct dp_netdev *dp,
+        struct flow *flow,
+        struct flow *mask,
+        odp_port_t in_port,
+        struct set_rte_item item_op[],
+        struct set_rte_action action_op[],
+        uint32_t action_data[],
+        size_t item_op_size,
+        size_t action_op_size,
+        size_t buf_size,
+        size_t table_id);
+
+
 // Internal functions Flow Tags Pool
 
 uint32_t hw_pipeline_ft_pool_init(flow_tag_pool *p,uint32_t pool_size);
@@ -444,6 +474,387 @@  hw_pipeline_get_packet_md(struct netdev *netdev,
         netdev->netdev_class->get_pipeline(netdev, packet, ppl_md);
     }
 }
+
+
+enum {
+    ITEM_SET_MASK,
+    ITEM_SET_SPEC
+};
+
+static inline void
+rte_item_set_eth(struct flow *flow,
+                 struct rte_flow_item *item,
+                 size_t *offset,
+                 int mode)
+{
+    struct rte_flow_item_eth *eth;
+
+    switch (mode) {
+       case ITEM_SET_MASK:
+           eth = (struct rte_flow_item_eth *)item->mask;
+           break;
+       case ITEM_SET_SPEC:
+           eth = (struct rte_flow_item_eth *)item->spec;
+           break;
+       default:
+           return;
+    }
+    item->type = RTE_FLOW_ITEM_TYPE_ETH;
+    *offset += sizeof(struct rte_flow_item_eth);
+
+    memcpy(&eth->dst, &flow->dl_dst.ea[0], sizeof(eth->dst));
+    memcpy(&eth->src, &flow->dl_src.ea[0], sizeof(eth->src));
+}
+
+static inline void
+rte_item_set_eth_vlan(struct flow *flow,
+                      struct rte_flow_item *item,
+                      size_t *offset,
+                      int mode)
+{
+    struct rte_flow_item_vlan *vlan ;
+
+    switch (mode) {
+       case ITEM_SET_MASK:
+           vlan = (struct rte_flow_item_vlan *)item->mask;
+           break;
+       case ITEM_SET_SPEC:
+          vlan = (struct rte_flow_item_vlan *)item->spec;
+          break;
+       default:
+           return;
+    }
+    item->type = RTE_FLOW_ITEM_TYPE_VLAN;
+    *offset += sizeof(*vlan);
+    vlan->tci= flow->vlans[0].tci;
+    vlan->tpid=flow->vlans[0].tpid;
+}
+
+
+static inline void
+rte_item_set_ip(struct flow *flow,
+                struct rte_flow_item *item,
+                size_t *offset,
+                int mode)
+{
+    struct rte_flow_item_ipv4 *ip;
+
+    switch (mode) {
+        case ITEM_SET_MASK:
+            ip = (struct rte_flow_item_ipv4 *)item->mask;
+            break;
+        case ITEM_SET_SPEC:
+          ip = (struct rte_flow_item_ipv4 *)item->spec;
+          break;
+        default:
+          return;
+    }
+    item->type = RTE_FLOW_ITEM_TYPE_IPV4;
+    *offset += sizeof(*ip);
+
+    ip->hdr.src_addr = flow->nw_src;
+    ip->hdr.dst_addr = flow->nw_dst;
+
+    VLOG_INFO("%s - src ip: %d.%d.%d.%d dst ip: %d.%d.%d.%d\n",
+          __func__,
+          (ip->hdr.src_addr >> 0) & 0xff,
+          (ip->hdr.src_addr >> 8) & 0xff,
+          (ip->hdr.src_addr >> 16) & 0xff,
+          (ip->hdr.src_addr >> 24) & 0xff,
+          (ip->hdr.dst_addr >> 0) & 0xff,
+          (ip->hdr.dst_addr >> 8) & 0xff,
+          (ip->hdr.dst_addr >> 16) & 0xff,
+          (ip->hdr.dst_addr >> 24) & 0xff);
+}
+
+static inline void
+rte_item_set_udp(struct flow *flow,
+                 struct rte_flow_item *item,
+                 size_t *offset,
+                 int mode)
+{
+    struct rte_flow_item_udp *udp;
+
+    switch (mode) {
+       case ITEM_SET_MASK:
+           udp = (struct rte_flow_item_udp *)item->mask;
+           break;
+       case ITEM_SET_SPEC:
+          udp = (struct rte_flow_item_udp *)item->spec;
+          break;
+       default:
+           return;
+    }
+
+    item->type = RTE_FLOW_ITEM_TYPE_UDP;
+    *offset += sizeof(struct rte_flow_item_udp);
+
+    udp->hdr.dst_port = flow->tp_dst;
+    udp->hdr.src_port = flow->tp_src;
+}
+
+static inline void
+rte_item_set_end(__attribute__ ((unused))struct flow *flow,
+                 struct rte_flow_item *item,
+                 __attribute__ ((unused))size_t *offset,
+                 __attribute__ ((unused))int mode)
+{
+    item->type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static inline void
+rte_action_set_mark(struct rte_flow_action *action,
+                    uint32_t data,
+                    size_t *offset)
+{
+    struct rte_flow_action_mark *mark =
+                (struct rte_flow_action_mark *)action->conf;
+    action->type = RTE_FLOW_ACTION_TYPE_MARK;
+    *offset += sizeof(*mark);
+    mark->id = data;
+}
+
+static inline void
+rte_action_set_queue(struct rte_flow_action *action,
+                     uint32_t data,
+                     size_t *offset)
+{
+    struct rte_flow_action_queue *queue =
+            (struct rte_flow_action_queue *)action->conf;
+    action->type = RTE_FLOW_ACTION_TYPE_QUEUE;
+    *offset += sizeof(*queue);
+
+    queue->index = data;
+}
+
+static inline void
+rte_action_set_end(struct rte_flow_action *action,
+            __attribute__ ((unused))uint32_t data,
+            __attribute__ ((unused))size_t *offset)
+{
+    action->type = RTE_FLOW_ACTION_TYPE_END;
+}
+
+struct set_rte_action action_mark_flow[] = {
+    { .set = rte_action_set_mark },
+    { .set = rte_action_set_queue },
+    { .set = rte_action_set_end },
+};
+
+static inline int hw_pipeline_rte_flow_create_and_save(
+        odp_port_t in_port,
+        struct dp_netdev *dp,
+        struct flow *flow,
+        struct flow *mask,
+        struct set_rte_item item_op[],
+        struct set_rte_action action_op[],
+        uint32_t action_data[],
+        size_t item_op_size,
+        size_t action_op_size,
+        size_t buf_size,
+        size_t table_id,
+        struct rte_flow **hw_flow_h)
+{
+
+    *hw_flow_h = hw_pipeline_rte_flow_create(dp,flow,mask,in_port, item_op,
+            action_op, action_data,item_op_size,action_op_size,
+            buf_size,table_id);
+    if (OVS_UNLIKELY(*hw_flow_h == NULL)) {
+        VLOG_ERR("Can not insert rule to HW \n");
+        return -1;
+    }
+    return 0;
+}
+static inline struct rte_flow *hw_pipeline_rte_flow_create(
+        struct dp_netdev *dp,
+        struct flow *flow,
+        struct flow *mask,
+        odp_port_t in_port,
+        struct set_rte_item item_op[],
+        struct set_rte_action action_op[],
+        uint32_t action_data[],
+        size_t item_op_size,
+        size_t action_op_size,
+        size_t buf_size,
+        size_t table_id)
+{
+    struct rte_flow_attr attr = {.ingress = 1};
+    struct rte_flow_item item[item_op_size];
+    struct rte_flow_action action[action_op_size];
+    struct rte_flow_error error = {0};
+    struct rte_flow *hw_flow_ptr;
+    uint8_t buf[buf_size];
+    struct dp_netdev_port *dp_port;
+    size_t offset = 0;
+    size_t i;
+    int ret;
+
+    memset(item, 0, sizeof(item[0]) * item_op_size);
+    memset(action, 0, sizeof(action[0]) * action_op_size);
+    memset(buf, 0, sizeof(buf[0]) * buf_size);
+
+    attr.priority = table_id;
+    for (i = 0; i < item_op_size; i++) {
+        item[i].spec = &buf[offset];
+        item_op[i].set(flow, &item[i], &offset,ITEM_SET_SPEC);
+        item[i].mask = &buf[offset];
+        item_op[i].set(mask, &item[i], &offset,ITEM_SET_MASK);
+    }
+
+    for (i = 0; i < action_op_size; i++) {
+        action[i].conf = buf + offset;
+        action_op[i].set(&action[i], action_data[i], &offset);
+    }
+
+    ret = get_port_by_number(dp,in_port,&dp_port);
+    if (OVS_UNLIKELY(ret)) {
+        VLOG_INFO("Can't get pmd port\n");
+        return NULL;
+    }
+
+    hw_flow_ptr = netdev_dpdk_rte_flow_validate(dp_port->netdev, &attr, item,
+            action, &error);
+    if (OVS_UNLIKELY(hw_flow_ptr == NULL)) {
+        VLOG_INFO("Can't insert (%s)\n", error.message);
+        return NULL;
+    }
+
+    return hw_flow_ptr;
+}
+
+void hw_pipeline_item_array_build(struct set_rte_item *item_any_flow,
+                                  struct flow *mask,
+                                  size_t *buf_size,
+                                  size_t *item_num)
+{
+    int ii=0;
+    struct eth_addr eth_mac;
+
+    *buf_size =0;
+    memset(&eth_mac,0,sizeof(struct eth_addr));
+
+    VLOG_INFO("dl_dst : %x %x %x %x %x %x\n",mask->dl_dst.ea[0],
+            mask->dl_dst.ea[1],mask->dl_dst.ea[2],
+            mask->dl_dst.ea[3],mask->dl_dst.ea[4],
+            mask->dl_dst.ea[5]);
+    VLOG_INFO("dl_src : %x %x %x %x %x %x\n",mask->dl_src.ea[0],
+            mask->dl_src.ea[1],mask->dl_src.ea[2],
+            mask->dl_src.ea[3],mask->dl_src.ea[4],
+            mask->dl_src.ea[5]);
+
+    if (memcmp(&mask->dl_dst,&eth_mac,sizeof(struct eth_addr))!=0
+    || memcmp(&mask->dl_src,&eth_mac,sizeof(struct eth_addr))!=0) {
+        VLOG_INFO("rte_item_eth\n");
+        item_any_flow[ii++].set = rte_item_set_eth;
+        *buf_size += sizeof(struct rte_flow_item_eth);
+        *buf_size += sizeof(struct rte_flow_item_eth);
+        if (mask->nw_src!=0 || mask->nw_dst!=0) {
+            VLOG_INFO("rte_item_ip\n");
+            item_any_flow[ii++].set = rte_item_set_ip;
+            *buf_size += sizeof(struct rte_flow_item_ipv4);
+            *buf_size += sizeof(struct rte_flow_item_ipv4);
+            if (mask->tp_dst!=0 || mask->tp_src!=0) {
+                item_any_flow[ii++].set = rte_item_set_udp;
+                *buf_size += sizeof(struct rte_flow_item_udp);
+                *buf_size += sizeof(struct rte_flow_item_udp);
+            }
+        }
+    }
+
+    item_any_flow[ii].set = rte_item_set_end;
+    *item_num=ii+1;
+    return;
+}
+
+
+inline static void hw_pipeline_prepare_action(uint32_t flow_tag,int rxqid,
+        size_t *buf_size,uint32_t *action_data)
+{
+    *buf_size += sizeof(struct rte_flow_action_mark) +
+                 sizeof(struct rte_flow_action_queue);
+    /* actions order:
+     * Flow Tag mark
+     * queue
+     * end
+     */
+    action_data[0] = flow_tag;
+    action_data[1] = rxqid;
+    action_data[2] = 0;
+    return;
+}
+
+/*
+ This case intend to deal with all cases but VxLAN
+
+  The flow attribute group is set to 0
+  The flow item is flexible
+  The flow action is flow tag marking plus destination queue
+  flow tag is unique and taken from a pool of tags.
+  It is saved for lookup later on in the processing phase.
+
+*/
+
+static int hw_pipeline_create_external_rule(struct dp_netdev *dp,
+                                           msg_sw_flow *ptr_rule,
+                                           struct set_rte_action *action,
+                                           size_t action_num,
+                                           struct rte_flow **hw_flow_h)
+{
+    struct flow *sw_flow = (struct flow *)&ptr_rule->sw_flow.flow;
+    struct flow *hw_flow = sw_flow;
+    uint32_t flow_tag = ptr_rule->sw_flow.cr.flow_tag;
+    struct flow *wildcard_mask =  &ptr_rule->sw_flow_mask;
+    size_t item_num = 0;
+    size_t buf_size =   0;
+    struct set_rte_item item_any_flow[] = {
+                { .set = NULL },
+                { .set = NULL },
+                { .set = NULL },
+                { .set = NULL },
+                { .set = NULL },
+    };
+    uint32_t action_data[action_num];
+    int ret =0;
+
+    hw_pipeline_item_array_build(item_any_flow,wildcard_mask,&buf_size,
+            &item_num);
+
+    hw_pipeline_prepare_action(flow_tag,ptr_rule->rxqid,&buf_size,action_data);
+
+    ret = hw_pipeline_rte_flow_create_and_save(hw_flow->in_port.odp_port, dp,
+                                               hw_flow, wildcard_mask,
+                                               item_any_flow,
+                                               action, action_data,
+                                               item_num, action_num,
+                                               buf_size, 0, hw_flow_h);
+    if (OVS_UNLIKELY(ret == -1)) {
+        VLOG_ERR("Rule with flow_tag can not be inserted : %x  \n",flow_tag);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int hw_pipeline_send_remove_flow(struct dp_netdev *dp,uint32_t flow_tag,
+        ovs_u128 *ufidp)
+{
+    msg_queue_elem rule;
+
+    rule.data.rm_flow.in_port=
+        dp->ft_pool.ft_data[flow_tag].sw_flow->flow.in_port.odp_port;
+    rule.data.rm_flow.flow_tag = flow_tag;
+    memcpy(&rule.data.rm_flow.ufid,ufidp,sizeof(ovs_u128));
+    rule.mode = HW_PIPELINE_REMOVE_RULE;
+
+    if (OVS_UNLIKELY(
+            !hw_pipeline_msg_queue_enqueue(&dp->message_queue,&rule))) {
+        VLOG_INFO("queue overflow");
+        return -1;
+    }
+
+    return 0;
+}
+
 static struct dp_netdev_flow *hw_pipeline_read_flow(flow_tag_pool *p,
         uint32_t handle)
 {
@@ -477,6 +888,68 @@  static int hw_pipeline_send_insert_flow(struct dp_netdev *dp,
     return 0;
 }
 
+static inline int
+hw_pipeline_insert_flow(struct dp_netdev *dp,msg_sw_flow *ptr_rule)
+{
+    bool drop_action = false;
+    int ret =-1;
+    bool found_tun_pop=false;
+    struct rte_flow *hw_flow_h;
+
+    /* Program the NICs */
+    dpif_netdev_find_action_active(&ptr_rule->sw_flow,&drop_action,
+            &found_tun_pop);
+    if (drop_action) {
+        if ((dpif_netdev_vport_is_tunnel(dp, ptr_rule->in_port) ||
+            ptr_rule->sw_flow.flow.nw_proto == GRE_PROTOCOL )
+            && found_tun_pop) {
+            VLOG_INFO("Internal table , drop rule\n");
+        }
+        else {
+            VLOG_INFO("External table , drop rule\n");
+        }
+        return ret;
+    }
+    if (dpdk_netdev_is_dpdk_port(dp,ptr_rule->in_port)) {
+        if ((dpif_netdev_vport_is_tunnel(dp, ptr_rule->in_port) ||
+            ptr_rule->sw_flow.flow.nw_proto == GRE_PROTOCOL )
+            && found_tun_pop) {
+            VLOG_INFO("External table , tunneling rule\n");
+        }
+        else {
+            ret = hw_pipeline_create_external_rule(dp,ptr_rule,
+                    action_mark_flow,ARRAY_SIZE(action_mark_flow),&hw_flow_h);
+            dp->ft_pool.ft_data[ptr_rule->sw_flow.cr.flow_tag].hw_flow_h =
+                    hw_flow_h;
+        }
+    }
+    else {
+    //  the internal header of a tunnel.
+        if ((dpif_netdev_vport_is_tunnel(dp, ptr_rule->in_port) ||
+           ptr_rule->sw_flow.flow.nw_proto == GRE_PROTOCOL )
+           && found_tun_pop) {
+            // nested tunnel .
+            VLOG_INFO("internal table , tunneling rule\n");
+        }
+        else
+        {
+            // No offloading for internal ports which are not tunnel.
+            // return Tag to the pool & return.
+            VLOG_INFO("free flow_tag:%x\n",ptr_rule->sw_flow.cr.flow_tag);
+            if (OVS_UNLIKELY(!hw_pipeline_ft_pool_free(&dp->ft_pool,
+                    ptr_rule->sw_flow.cr.flow_tag))) {
+                    VLOG_ERR("tag is out of range");
+                    return ret;
+            }
+            return 0;
+        }
+    }
+    if (OVS_UNLIKELY(ret == -1)) {
+        VLOG_ERR("create_rule failed to insert rule ");
+    }
+    return ret;
+}
+
 static int hw_pipeline_remove_flow(struct dp_netdev *dp,
                                    msg_hw_flow *ptr_rule)
 {
@@ -568,24 +1041,6 @@  int hw_pipeline_uninit(struct dp_netdev *dp)
     return 0;
 }
 
-static int hw_pipeline_send_remove_flow(struct dp_netdev *dp,uint32_t flow_tag,
-        ovs_u128 *ufidp)
-{
-    msg_queue_elem rule;
-
-    rule.data.rm_flow.in_port=
-        dp->ft_pool.ft_data[flow_tag].sw_flow->flow.in_port.odp_port;
-    rule.data.rm_flow.flow_tag = flow_tag;
-    memcpy(&rule.data.rm_flow.ufid,ufidp,sizeof(ovs_u128));
-    rule.mode = HW_PIPELINE_REMOVE_RULE;
-    if (OVS_UNLIKELY(
-            !hw_pipeline_msg_queue_enqueue(&dp->message_queue,&rule))) {
-        VLOG_INFO("queue overflow");
-        return -1;
-    }
-    return 0;
-}
-
 bool hw_pipeline_dpcls_lookup(struct dp_netdev *dp,
                               struct pipeline_md *md_tags,
                               const size_t cnt,
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 53f49ad..8f281ca 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -3261,6 +3261,30 @@  unlock:
     return err;
 }
 
+
+struct rte_flow *
+netdev_dpdk_rte_flow_validate(struct netdev *netdev,
+                              struct rte_flow_attr *attr,
+                              struct rte_flow_item *item,
+                              struct rte_flow_action *action,
+                              struct rte_flow_error *error)
+{
+    int ret;
+
+    ret = rte_flow_validate(netdev_dpdk_cast(netdev)->port_id,
+                            attr, item, action, error);
+
+    if (!ret) {
+        struct rte_flow *flow;
+
+        /* I should really save the pointer somwhere and delete it :) */
+        flow = rte_flow_create(netdev_dpdk_cast(netdev)->port_id,
+                               attr, item, action, error);
+        return flow;
+    }
+return NULL;
+}
+
 #define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT,    \
                           SET_CONFIG, SET_TX_MULTIQ, SEND,    \
                           GET_CARRIER, GET_STATS,             \
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index a630da3..2c3a686 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -29,9 +29,17 @@  struct rte_flow_action;
 struct rte_flow_error;
 
 #ifdef DPDK_NETDEV
+#include <rte_flow.h>
 
+int dpdk_netdev_is_dpdk_port(struct dp_netdev *dp, odp_port_t in_port);
 void netdev_dpdk_register(void);
 void free_dpdk_buf(struct dp_packet *);
+struct rte_flow*
+netdev_dpdk_rte_flow_validate(struct netdev *netdev,
+                              struct rte_flow_attr *attr,
+                              struct rte_flow_item *item,
+                              struct rte_flow_action *action,
+                              struct rte_flow_error *error);
 void
 netdev_dpdk_get_pipeline(__attribute__ ((unused))const struct netdev *netdev,
                          struct dp_packet *packet,