diff mbox series

[ovs-dev,48/62] dpif-netdev: Add NAT offloading init version

Message ID 20201228092520.11807-49-taoyunxiang@cmss.chinamobile.com
State Not Applicable
Headers show
Series DPDK Offload API to test | expand

Commit Message

Tao YunXiang Dec. 28, 2020, 9:25 a.m. UTC
From: Taoyunxiang <taoyunxiang@cmss.chinamobile.com>

Code Source From: Self Code
Description:
    opensource code does not support conntrack and NAT offloading by
rte_flow.This change add a init version to support it.

Jira:  #[Optional]
市场项目编号(名称):[Optional]
---
 lib/conntrack.c           |  44 +++++++++++++++-
 lib/dpif-netdev.c         | 127 ++++++++++++++--------------------------------
 lib/netdev-offload-dpdk.c |  51 ++++++++++++++++---
 lib/netdev-offload.h      |   5 ++
 lib/odp-execute.c         |   2 -
 lib/odp-execute.h         |   5 +-
 lib/packets.h             |   8 +++
 7 files changed, 142 insertions(+), 100 deletions(-)
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 5318df0..adb4dbb 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -736,6 +736,12 @@  nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
             struct ip_header *nh = dp_packet_l3(pkt);
             packet_set_ipv4_addr(pkt, &nh->ip_src,
                                  conn->rev_key.dst.addr.ipv4);
+            /*
+            if (pkt->md.skb_priority & (1 << NAT_MOD_SIP)) {
+            } else {
+                pkt->md.skb_priority |= 1 << NAT_MOD_SIP;
+            }
+             */
         } else {
             struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
             packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -751,6 +757,12 @@  nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
             struct ip_header *nh = dp_packet_l3(pkt);
             packet_set_ipv4_addr(pkt, &nh->ip_dst,
                                  conn->rev_key.src.addr.ipv4);
+            /*
+            if (pkt->md.skb_priority & (1 << NAT_MOD_DIP)) {
+            } else {
+                pkt->md.skb_priority |= 1 << NAT_MOD_DIP;
+            }
+             */
         } else {
             struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
             packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -835,9 +847,17 @@  reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
         if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
             packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
                                  conn->key.src.addr.ipv4);
+            if (pkt->md.skb_priority & (1 << NAT_MOD_SIP)) {
+            } else {
+                pkt->md.skb_priority |= 1 << NAT_MOD_SIP;
+            }
         } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
             packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
                                  conn->key.dst.addr.ipv4);
+            if (pkt->md.skb_priority & (1 << NAT_MOD_DIP)) {
+            } else {
+                pkt->md.skb_priority |= 1 << NAT_MOD_DIP;
+            }
         }
 
         reverse_pat_packet(pkt, conn);
@@ -884,7 +904,10 @@  un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
             struct ip_header *nh = dp_packet_l3(pkt);
             packet_set_ipv4_addr(pkt, &nh->ip_dst,
                                  conn->key.src.addr.ipv4);
-            pkt->md.skb_priority |= 1 << SET_ACTION_SET ;
+            if (pkt->md.skb_priority & (1 << NAT_MOD_DIP)) {
+            } else {
+                pkt->md.skb_priority |= 1 << NAT_MOD_DIP;
+            }
         } else {
             struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
             packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -903,7 +926,10 @@  un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
             struct ip_header *nh = dp_packet_l3(pkt);
             packet_set_ipv4_addr(pkt, &nh->ip_src,
                                  conn->key.dst.addr.ipv4);
-            pkt->md.skb_priority |= 1 << SET_ACTION_SET ;
+            if (pkt->md.skb_priority & (1 << NAT_MOD_SIP)) {
+            } else {
+                pkt->md.skb_priority |= 1 << NAT_MOD_SIP;
+            }
         } else {
             struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
             packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -1409,6 +1435,20 @@  conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
 
     DP_PACKET_BATCH_FOR_EACH (i, packet, pkt_batch) {
         struct conn *conn = packet->md.conn;
+        if (packet->md.ori_nw_src){
+        } else {
+            struct ip_header *nh = dp_packet_l3(packet);
+            packet->md.ori_nw_src =get_16aligned_be32(&nh->ip_src);
+        }
+        if (packet->md.ori_nw_dst){
+        } else {
+            struct ip_header *nh = dp_packet_l3(packet);
+            packet->md.ori_nw_dst =get_16aligned_be32(&nh->ip_dst);
+        }
+        /* clear NAT_MOD_ Flag to 0, when doing un_nat action, it should
+         * be set to non-zero value
+         */
+        packet->md.skb_priority = 0;
         if (OVS_UNLIKELY(packet->md.ct_state == CS_INVALID)) {
             write_ct_md(packet, zone, NULL, NULL, NULL);
         } else if (conn && conn->key.zone == zone && !force
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e48423f..a40441f 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -420,6 +420,8 @@  struct dp_flow_offload_item {
     uint8_t mod_flag; /* from packet->md.mod_flag */
     bool   should_jump;
     struct nat_action_info_t nat_action;
+    ovs_be32 ori_nw_src;
+    ovs_be32 ori_nw_dst;
 
     struct ovs_list node;
 };
@@ -2481,6 +2483,8 @@  dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
     info.dpif_type_str = dpif_type_str;
     info.mod_flag = offload->mod_flag;
     info.nat_action = offload->nat_action;
+    info.ori_nw_src = offload->ori_nw_src;
+    info.ori_nw_dst = offload->ori_nw_dst;
     info.ct_enable = false;
     info.group_id = 0;
 
@@ -2598,30 +2602,27 @@  parse_netdev_flow_put(struct match *match, const struct nlattr *actions,
                       size_t actions_len,  struct nat_action_info_t *nat_action
                       )
 {
-    bool action_has_recirc = false;
-    bool action_has_set = false;
-    bool action_has_dnat = false;
     bool action_has_null_nat = false;
     bool action_has_ct = false;
     bool action_has_ct_nat = false;
     bool ret = false;
     struct nlattr *nla;
     size_t left;
-    uint32_t mod_flag = match->flow.skb_priority;
 
     /* filter non IP pkt out */
     if ((match->flow.dl_type != htons(ETH_TYPE_IP)) &&
         (match->flow.dl_type != htons(ETH_TYPE_IPV6))) {
         goto out;
     }
+    /* recirc_id =0 , need to translate*/
+    if (match->flow.recirc_id ==0){
+        ret = true;
+        goto out;
+    }
+
     /*parse actions to decide flags */
     NL_ATTR_FOR_EACH_UNSAFE (nla, left, actions, actions_len) {
-        if (nl_attr_type(nla) == OVS_ACTION_ATTR_RECIRC) {
-            action_has_recirc = true;
-        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET ||
-                   nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
-            action_has_set = true;
-        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
+        if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
             const struct nlattr *b;
             unsigned int ct_left;
 
@@ -2653,8 +2654,6 @@  parse_netdev_flow_put(struct match *match, const struct nlattr *actions,
                             nat_action->nat_action |=
                                 ((sub_type_nest == OVS_NAT_ATTR_SRC)
                                     ? NAT_ACTION_SRC : NAT_ACTION_DST);
-                            action_has_dnat = (sub_type_nest == OVS_NAT_ATTR_DST)?
-                                               true : action_has_dnat;
                             action_has_null_nat = false;
                             break;
                         case OVS_NAT_ATTR_IP_MIN:
@@ -2712,84 +2711,24 @@  parse_netdev_flow_put(struct match *match, const struct nlattr *actions,
         }
     } /*FOR_EACH_UNSAFE (nla, left, actions, actions_len) */
 
-    /* mod_flag has been set,means it's loopbacked pkt with new match
+     /* recirc_id is not 0,means it's loopbacked pkt
      * we should try to offload jump action and group id
-     * in this condition: there are three scenarios:
-     * 1. care ct_state && ct_state = est  (should offload)
-     * 2. care ct_state && ct_state != est (not offload)
-     * 3. don't care ct_state (should try offload)
-     * For 1, the rte_flow ct action will be set
-     * For 3, the rte_flow ct action won't be set
-     */
-    if (mod_flag) {
-        if (match->wc.masks.ct_state &&
-            (match->wc.masks.ct_state & match->flow.ct_state & CS_TRACKED) &&
-            !(match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) &&
-            !(action_has_ct_nat && !action_has_null_nat) ) {
-            /* for nat(src= or nat(dst= , we should try to offload no matter
-             * what ct_state is
-             */
-            ret = false;
-        } else {
-            ret = true;
-        }
-        goto out;
-    }
-
-    /* no mod_flag means: 1. no set action at all
-     *                    2. first translated pkt with set action
-     *                    3. first translated pkt with reply dir nat action
-     *                    4. first translated pkt with req dir dnat action
-     *                    5. others
-     * For 1 , only established pkt should try to offload
-     * For 2 and 3 , still established pkt should be offload
-     * For 4 , we don't care ct_state,and try to offload
-     * so for 1 - 3: we cannot offload non-est pkt
-     * for 4, we try to offload non-est pkt
-     * for 5, no need to offload
+     * in this condition: there are two scenarios:
+     * 1. nat action with src/dst info: not care ct_state should offload
+     * 2. others:only ct_state = est should offload
      */
-
-    /* scenario:4 */
-    if (action_has_dnat) {
+    if (action_has_ct_nat && !action_has_null_nat) {
+        /* for nat(src= or nat(dst= , we should try to offload no matter
+         * what ct_state is
+         */
         ret = true;
         goto out;
-    }
-    if (match->wc.masks.ct_state &&
+    } else if (match->wc.masks.ct_state &&
         (match->wc.masks.ct_state & match->flow.ct_state & CS_TRACKED) &&
-        !(match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) ){
-        goto out;
-    }
-
-    /* scenario 3: if in reply direction's nat and also +est ,
-     * we should try to offload it
-     */
-    if (match->wc.masks.ct_state &&
-        action_has_ct_nat &&
-        action_has_null_nat &&
-        (match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) &&
-        (match->wc.masks.ct_state & match->flow.ct_state & CS_REPLY_DIR) ){
+        (match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) ){
         ret = true;
         goto out;
     }
-    /* scenario:1 */
-    /* no mod_flag and no recirc flow: we should try to offload */
-    if (action_has_recirc==false) {
-        ret = true;
-        goto out;
-    }
-    /* scenario:2 */
-    /* no mod_flag and recirc and set and +est,means it's first loopback pkt
-     * we should try to offload jump action and "no" group id
-     */
-    if (action_has_set==true) {
-        if (match->wc.masks.ct_state &&
-            (match->wc.masks.ct_state & match->flow.ct_state & CS_TRACKED) &&
-            (match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED)) {
-            ret = true;
-            goto out;
-        }
-    }
-
 
 out:
     /* scenario:5 */
@@ -2841,6 +2780,13 @@  queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
     offload->actions_len = actions_len;
     offload->mod_flag = match->flow.skb_priority;
     offload->nat_action = nat_action;
+    if (offload->mod_flag & (1<<NAT_MOD_SIP) ) {
+        offload->ori_nw_src = match->flow.ct_nw_src;
+    }
+    if (offload->mod_flag & (1<<NAT_MOD_DIP) ) {
+        offload->ori_nw_dst = match->flow.ct_nw_dst;
+    }
+
 
     dp_netdev_append_flow_offload(offload);
 }
@@ -7077,6 +7023,16 @@  handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
         ovs_mutex_lock(&pmd->flow_mutex);
         netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
         if (OVS_LIKELY(!netdev_flow)) {
+            /* If pkt has been modified before, we should store
+             * original mac/ip information into match
+             */
+            if (match.flow.skb_priority & (1<<NAT_MOD_SIP) ) {
+              match.flow.ct_nw_src = packet->md.ori_nw_src;
+            }
+            if (match.flow.skb_priority & (1<<NAT_MOD_DIP) ) {
+              match.flow.ct_nw_dst = packet->md.ori_nw_dst;
+            }
+
             netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
                                              add_actions->data,
                                              add_actions->size);
@@ -7707,13 +7663,6 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
                         nat_action_info.nat_action |=
                             ((sub_type_nest == OVS_NAT_ATTR_SRC)
                                 ? NAT_ACTION_SRC : NAT_ACTION_DST);
-                        /*For DNAT, we should also set mod_flag */
-                        if (sub_type_nest == OVS_NAT_ATTR_DST) {
-                            struct dp_packet *packet;
-                            DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
-                                packet->md.skb_priority |= 1 << SET_ACTION_SET ;
-                            }
-                        }
                         break;
                     case OVS_NAT_ATTR_IP_MIN:
                         memcpy(&nat_action_info.min_addr,
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index f04f61f..59ecf88 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -32,6 +32,7 @@ 
 #include <netinet/icmp6.h>
 #include "id-pool.h"
 #include "odp-util.h"
+#include "odp-execute.h"
 
 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
@@ -858,10 +859,7 @@  parse_flow_match(struct flow_patterns *patterns,
         return -1;
     }
 
-    /* Group id */
-    if (info->mod_flag) {
-        info->group_id = match->flow.recirc_id;
-    }
+    info->group_id = match->flow.recirc_id;
     /* Eth */
     if (match->flow.packet_type == htonl(PT_ETH)) {
         struct rte_flow_item_eth *spec, *mask;
@@ -932,8 +930,19 @@  parse_flow_match(struct flow_patterns *patterns,
         spec->hdr.type_of_service = match->flow.nw_tos;
         spec->hdr.time_to_live    = match->flow.nw_ttl;
         spec->hdr.next_proto_id   = match->flow.nw_proto;
-        spec->hdr.src_addr        = match->flow.nw_src;
-        spec->hdr.dst_addr        = match->flow.nw_dst;
+        /* If IP has been modified, we should translate origin IP */
+        if (info->mod_flag & (1<<NAT_MOD_SIP) ) {
+            spec->hdr.src_addr        = match->flow.ct_nw_src;
+            info->mod_nw_src = match->flow.nw_src;
+        } else {
+            spec->hdr.src_addr        = match->flow.nw_src;
+        }
+        if (info->mod_flag & (1<<NAT_MOD_DIP) ) {
+            spec->hdr.dst_addr        = match->flow.ct_nw_dst;
+            info->mod_nw_dst = match->flow.nw_dst;
+        } else {
+            spec->hdr.dst_addr        = match->flow.nw_dst;
+        }
 
         mask->hdr.type_of_service = match->wc.masks.nw_tos;
         mask->hdr.time_to_live    = match->wc.masks.nw_ttl;
@@ -1415,6 +1424,29 @@  add_ipv4_nat_action(struct flow_actions *actions,
     return 0;
 }
 
+static int
+add_ipv4_un_nat_action(struct flow_actions *actions,
+                       struct offload_info *info)
+{
+    /*only support set fixed sip/dip ,not support range*/
+    if (info->mod_flag & (1<<NAT_MOD_SIP) ) {
+        __be32 ipv4_src = info->mod_nw_src;
+
+        if (add_set_flow_action__(actions, &ipv4_src, NULL, sizeof(ipv4_src),
+                                  RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC) ) {
+            return -1;
+        }
+    }
+    if (info->mod_flag & (1<<NAT_MOD_DIP) ) {
+        __be32 ipv4_dst = info->mod_nw_dst;
+
+        if (add_set_flow_action__(actions, &ipv4_dst, NULL, sizeof(ipv4_dst),
+                                  RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) ) {
+            return -1;
+        }
+    }
+    return 0;
+}
 /* Maximum number of items in struct rte_flow_action_vxlan_encap.
  * ETH / IPv4(6) / UDP / VXLAN / END
  */
@@ -1790,7 +1822,14 @@  parse_flow_actions(struct netdev *netdev,
         if(add_ipv4_nat_action(actions,&(info->nat_action)) ) {
             return -1;
         }
+    } else if (info->mod_flag & (1<<NAT_MOD_SIP) ||
+               info->mod_flag & (1<<NAT_MOD_DIP) ){
+    /* translate un_nat IP mod action to rte_flow set action */
+        if(add_ipv4_un_nat_action(actions,info)) {
+            return -1;
+        }
     }
+
     add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
     return 0;
 }
diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
index 7c39706..f2c4fbc 100644
--- a/lib/netdev-offload.h
+++ b/lib/netdev-offload.h
@@ -80,6 +80,11 @@  struct offload_info {
 
     uint8_t mod_flag; /* from packet->md.mod_flag */
     bool    ct_enable; /* if true, we should translate to rte_flow_action_ct */
+    ovs_be32 ori_nw_src;
+    ovs_be32 ori_nw_dst;
+    ovs_be32 mod_nw_src;
+    ovs_be32 mod_nw_dst;
+
     uint32_t group_id;
     struct nat_action_info_t nat_action;
 };
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 559f260..42d3335 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -955,14 +955,12 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_SET:
             DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
                 odp_execute_set_action(packet, nl_attr_get(a));
-                packet->md.skb_priority |= 1 << SET_ACTION_SET ;
             }
             break;
 
         case OVS_ACTION_ATTR_SET_MASKED:
             DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
                 odp_execute_masked_set_action(packet, nl_attr_get(a));
-                packet->md.skb_priority |= 1 << SET_ACTION_SET ;
             }
             break;
 
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 74e308c..5235fa1 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -30,7 +30,10 @@  struct dp_packet_batch;
 
 enum {
     RECIRC_ACTION_SET,
-    SET_ACTION_SET,
+    NAT_MOD_SIP,
+    NAT_MOD_DIP,
+    NAT_MOD_SPORT,
+    NAT_MOD_DPORT,
 };
 
 typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
diff --git a/lib/packets.h b/lib/packets.h
index 84c554e..25e0325 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -122,6 +122,10 @@  PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
         struct ovs_key_ct_tuple_ipv4 ipv4;
         struct ovs_key_ct_tuple_ipv6 ipv6;   /* Used only if                */
     } ct_orig_tuple;                         /* 'ct_orig_tuple_ipv6' is set */
+    ovs_be32 ori_nw_src;
+    ovs_be32 ori_nw_dst;
+    ovs_be16 ori_tp_sport;
+    ovs_be16 ori_tp_dport;
 );
 
 PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline2,
@@ -171,6 +175,10 @@  pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
     md->tunnel.ipv6_dst = in6addr_any;
     md->in_port.odp_port = port;
     md->conn = NULL;
+    md->ori_nw_src = 0;
+    md->ori_nw_dst = 0;
+    md->ori_tp_sport = 0;
+    md->ori_tp_dport = 0;
 }
 
 /* This function prefetches the cachelines touched by pkt_metadata_init()