@@ -736,6 +736,12 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
struct ip_header *nh = dp_packet_l3(pkt);
packet_set_ipv4_addr(pkt, &nh->ip_src,
conn->rev_key.dst.addr.ipv4);
+ /*
+ if (pkt->md.skb_priority & (1 << NAT_MOD_SIP)) {
+ } else {
+ pkt->md.skb_priority |= 1 << NAT_MOD_SIP;
+ }
+ */
} else {
struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -751,6 +757,12 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
struct ip_header *nh = dp_packet_l3(pkt);
packet_set_ipv4_addr(pkt, &nh->ip_dst,
conn->rev_key.src.addr.ipv4);
+ /*
+ if (pkt->md.skb_priority & (1 << NAT_MOD_DIP)) {
+ } else {
+ pkt->md.skb_priority |= 1 << NAT_MOD_DIP;
+ }
+ */
} else {
struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -835,9 +847,17 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
conn->key.src.addr.ipv4);
+ if (pkt->md.skb_priority & (1 << NAT_MOD_SIP)) {
+ } else {
+ pkt->md.skb_priority |= 1 << NAT_MOD_SIP;
+ }
} else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
conn->key.dst.addr.ipv4);
+ if (pkt->md.skb_priority & (1 << NAT_MOD_DIP)) {
+ } else {
+ pkt->md.skb_priority |= 1 << NAT_MOD_DIP;
+ }
}
reverse_pat_packet(pkt, conn);
@@ -884,7 +904,10 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
struct ip_header *nh = dp_packet_l3(pkt);
packet_set_ipv4_addr(pkt, &nh->ip_dst,
conn->key.src.addr.ipv4);
- pkt->md.skb_priority |= 1 << SET_ACTION_SET ;
+ if (pkt->md.skb_priority & (1 << NAT_MOD_DIP)) {
+ } else {
+ pkt->md.skb_priority |= 1 << NAT_MOD_DIP;
+ }
} else {
struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -903,7 +926,10 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
struct ip_header *nh = dp_packet_l3(pkt);
packet_set_ipv4_addr(pkt, &nh->ip_src,
conn->key.dst.addr.ipv4);
- pkt->md.skb_priority |= 1 << SET_ACTION_SET ;
+ if (pkt->md.skb_priority & (1 << NAT_MOD_SIP)) {
+ } else {
+ pkt->md.skb_priority |= 1 << NAT_MOD_SIP;
+ }
} else {
struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
packet_set_ipv6_addr(pkt, conn->key.nw_proto,
@@ -1409,6 +1435,20 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
DP_PACKET_BATCH_FOR_EACH (i, packet, pkt_batch) {
struct conn *conn = packet->md.conn;
+ if (packet->md.ori_nw_src){
+ } else {
+ struct ip_header *nh = dp_packet_l3(packet);
+ packet->md.ori_nw_src =get_16aligned_be32(&nh->ip_src);
+ }
+ if (packet->md.ori_nw_dst){
+ } else {
+ struct ip_header *nh = dp_packet_l3(packet);
+ packet->md.ori_nw_dst =get_16aligned_be32(&nh->ip_dst);
+ }
+ /* clear NAT_MOD_ Flag to 0, when doing un_nat action, it should
+ * be set to non-zero value
+ */
+ packet->md.skb_priority = 0;
if (OVS_UNLIKELY(packet->md.ct_state == CS_INVALID)) {
write_ct_md(packet, zone, NULL, NULL, NULL);
} else if (conn && conn->key.zone == zone && !force
@@ -420,6 +420,8 @@ struct dp_flow_offload_item {
uint8_t mod_flag; /* from packet->md.mod_flag */
bool should_jump;
struct nat_action_info_t nat_action;
+ ovs_be32 ori_nw_src;
+ ovs_be32 ori_nw_dst;
struct ovs_list node;
};
@@ -2481,6 +2483,8 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
info.dpif_type_str = dpif_type_str;
info.mod_flag = offload->mod_flag;
info.nat_action = offload->nat_action;
+ info.ori_nw_src = offload->ori_nw_src;
+ info.ori_nw_dst = offload->ori_nw_dst;
info.ct_enable = false;
info.group_id = 0;
@@ -2598,30 +2602,27 @@ parse_netdev_flow_put(struct match *match, const struct nlattr *actions,
size_t actions_len, struct nat_action_info_t *nat_action
)
{
- bool action_has_recirc = false;
- bool action_has_set = false;
- bool action_has_dnat = false;
bool action_has_null_nat = false;
bool action_has_ct = false;
bool action_has_ct_nat = false;
bool ret = false;
struct nlattr *nla;
size_t left;
- uint32_t mod_flag = match->flow.skb_priority;
/* filter non IP pkt out */
if ((match->flow.dl_type != htons(ETH_TYPE_IP)) &&
(match->flow.dl_type != htons(ETH_TYPE_IPV6))) {
goto out;
}
+ /* recirc_id =0 , need to translate*/
+ if (match->flow.recirc_id ==0){
+ ret = true;
+ goto out;
+ }
+
/*parse actions to decide flags */
NL_ATTR_FOR_EACH_UNSAFE (nla, left, actions, actions_len) {
- if (nl_attr_type(nla) == OVS_ACTION_ATTR_RECIRC) {
- action_has_recirc = true;
- } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET ||
- nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
- action_has_set = true;
- } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
+ if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
const struct nlattr *b;
unsigned int ct_left;
@@ -2653,8 +2654,6 @@ parse_netdev_flow_put(struct match *match, const struct nlattr *actions,
nat_action->nat_action |=
((sub_type_nest == OVS_NAT_ATTR_SRC)
? NAT_ACTION_SRC : NAT_ACTION_DST);
- action_has_dnat = (sub_type_nest == OVS_NAT_ATTR_DST)?
- true : action_has_dnat;
action_has_null_nat = false;
break;
case OVS_NAT_ATTR_IP_MIN:
@@ -2712,84 +2711,24 @@ parse_netdev_flow_put(struct match *match, const struct nlattr *actions,
}
} /*FOR_EACH_UNSAFE (nla, left, actions, actions_len) */
- /* mod_flag has been set,means it's loopbacked pkt with new match
+ /* recirc_id is not 0,means it's loopbacked pkt
* we should try to offload jump action and group id
- * in this condition: there are three scenarios:
- * 1. care ct_state && ct_state = est (should offload)
- * 2. care ct_state && ct_state != est (not offload)
- * 3. don't care ct_state (should try offload)
- * For 1, the rte_flow ct action will be set
- * For 3, the rte_flow ct action won't be set
- */
- if (mod_flag) {
- if (match->wc.masks.ct_state &&
- (match->wc.masks.ct_state & match->flow.ct_state & CS_TRACKED) &&
- !(match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) &&
- !(action_has_ct_nat && !action_has_null_nat) ) {
- /* for nat(src= or nat(dst= , we should try to offload no matter
- * what ct_state is
- */
- ret = false;
- } else {
- ret = true;
- }
- goto out;
- }
-
- /* no mod_flag means: 1. no set action at all
- * 2. first translated pkt with set action
- * 3. first translated pkt with reply dir nat action
- * 4. first translated pkt with req dir dnat action
- * 5. others
- * For 1 , only established pkt should try to offload
- * For 2 and 3 , still established pkt should be offload
- * For 4 , we don't care ct_state,and try to offload
- * so for 1 - 3: we cannot offload non-est pkt
- * for 4, we try to offload non-est pkt
- * for 5, no need to offload
+ * in this condition: there are two scenarios:
+ * 1. nat action with src/dst info: not care ct_state should offload
+ * 2. others:only ct_state = est should offload
*/
-
- /* scenario:4 */
- if (action_has_dnat) {
+ if (action_has_ct_nat && !action_has_null_nat) {
+ /* for nat(src= or nat(dst= , we should try to offload no matter
+ * what ct_state is
+ */
ret = true;
goto out;
- }
- if (match->wc.masks.ct_state &&
+ } else if (match->wc.masks.ct_state &&
(match->wc.masks.ct_state & match->flow.ct_state & CS_TRACKED) &&
- !(match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) ){
- goto out;
- }
-
- /* scenario 3: if in reply direction's nat and also +est ,
- * we should try to offload it
- */
- if (match->wc.masks.ct_state &&
- action_has_ct_nat &&
- action_has_null_nat &&
- (match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) &&
- (match->wc.masks.ct_state & match->flow.ct_state & CS_REPLY_DIR) ){
+ (match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED) ){
ret = true;
goto out;
}
- /* scenario:1 */
- /* no mod_flag and no recirc flow: we should try to offload */
- if (action_has_recirc==false) {
- ret = true;
- goto out;
- }
- /* scenario:2 */
- /* no mod_flag and recirc and set and +est,means it's first loopback pkt
- * we should try to offload jump action and "no" group id
- */
- if (action_has_set==true) {
- if (match->wc.masks.ct_state &&
- (match->wc.masks.ct_state & match->flow.ct_state & CS_TRACKED) &&
- (match->wc.masks.ct_state & match->flow.ct_state & CS_ESTABLISHED)) {
- ret = true;
- goto out;
- }
- }
-
out:
/* scenario:5 */
@@ -2841,6 +2780,13 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
offload->actions_len = actions_len;
offload->mod_flag = match->flow.skb_priority;
offload->nat_action = nat_action;
+ if (offload->mod_flag & (1<<NAT_MOD_SIP) ) {
+ offload->ori_nw_src = match->flow.ct_nw_src;
+ }
+ if (offload->mod_flag & (1<<NAT_MOD_DIP) ) {
+ offload->ori_nw_dst = match->flow.ct_nw_dst;
+ }
+
dp_netdev_append_flow_offload(offload);
}
@@ -7077,6 +7023,16 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
ovs_mutex_lock(&pmd->flow_mutex);
netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
if (OVS_LIKELY(!netdev_flow)) {
+ /* If pkt has been modified before, we should store
+ * original mac/ip information into match
+ */
+ if (match.flow.skb_priority & (1<<NAT_MOD_SIP) ) {
+ match.flow.ct_nw_src = packet->md.ori_nw_src;
+ }
+ if (match.flow.skb_priority & (1<<NAT_MOD_DIP) ) {
+ match.flow.ct_nw_dst = packet->md.ori_nw_dst;
+ }
+
netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
add_actions->data,
add_actions->size);
@@ -7707,13 +7663,6 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
nat_action_info.nat_action |=
((sub_type_nest == OVS_NAT_ATTR_SRC)
? NAT_ACTION_SRC : NAT_ACTION_DST);
- /*For DNAT, we should also set mod_flag */
- if (sub_type_nest == OVS_NAT_ATTR_DST) {
- struct dp_packet *packet;
- DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
- packet->md.skb_priority |= 1 << SET_ACTION_SET ;
- }
- }
break;
case OVS_NAT_ATTR_IP_MIN:
memcpy(&nat_action_info.min_addr,
@@ -32,6 +32,7 @@
#include <netinet/icmp6.h>
#include "id-pool.h"
#include "odp-util.h"
+#include "odp-execute.h"
VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
@@ -858,10 +859,7 @@ parse_flow_match(struct flow_patterns *patterns,
return -1;
}
- /* Group id */
- if (info->mod_flag) {
- info->group_id = match->flow.recirc_id;
- }
+ info->group_id = match->flow.recirc_id;
/* Eth */
if (match->flow.packet_type == htonl(PT_ETH)) {
struct rte_flow_item_eth *spec, *mask;
@@ -932,8 +930,19 @@ parse_flow_match(struct flow_patterns *patterns,
spec->hdr.type_of_service = match->flow.nw_tos;
spec->hdr.time_to_live = match->flow.nw_ttl;
spec->hdr.next_proto_id = match->flow.nw_proto;
- spec->hdr.src_addr = match->flow.nw_src;
- spec->hdr.dst_addr = match->flow.nw_dst;
+ /* If IP has been modified, we should translate origin IP */
+ if (info->mod_flag & (1<<NAT_MOD_SIP) ) {
+ spec->hdr.src_addr = match->flow.ct_nw_src;
+ info->mod_nw_src = match->flow.nw_src;
+ } else {
+ spec->hdr.src_addr = match->flow.nw_src;
+ }
+ if (info->mod_flag & (1<<NAT_MOD_DIP) ) {
+ spec->hdr.dst_addr = match->flow.ct_nw_dst;
+ info->mod_nw_dst = match->flow.nw_dst;
+ } else {
+ spec->hdr.dst_addr = match->flow.nw_dst;
+ }
mask->hdr.type_of_service = match->wc.masks.nw_tos;
mask->hdr.time_to_live = match->wc.masks.nw_ttl;
@@ -1415,6 +1424,29 @@ add_ipv4_nat_action(struct flow_actions *actions,
return 0;
}
+static int
+add_ipv4_un_nat_action(struct flow_actions *actions,
+ struct offload_info *info)
+{
+ /*only support set fixed sip/dip ,not support range*/
+ if (info->mod_flag & (1<<NAT_MOD_SIP) ) {
+ __be32 ipv4_src = info->mod_nw_src;
+
+ if (add_set_flow_action__(actions, &ipv4_src, NULL, sizeof(ipv4_src),
+ RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC) ) {
+ return -1;
+ }
+ }
+ if (info->mod_flag & (1<<NAT_MOD_DIP) ) {
+ __be32 ipv4_dst = info->mod_nw_dst;
+
+ if (add_set_flow_action__(actions, &ipv4_dst, NULL, sizeof(ipv4_dst),
+ RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) ) {
+ return -1;
+ }
+ }
+ return 0;
+}
/* Maximum number of items in struct rte_flow_action_vxlan_encap.
* ETH / IPv4(6) / UDP / VXLAN / END
*/
@@ -1790,7 +1822,14 @@ parse_flow_actions(struct netdev *netdev,
if(add_ipv4_nat_action(actions,&(info->nat_action)) ) {
return -1;
}
+ } else if (info->mod_flag & (1<<NAT_MOD_SIP) ||
+ info->mod_flag & (1<<NAT_MOD_DIP) ){
+ /* translate un_nat IP mod action to rte_flow set action */
+ if(add_ipv4_un_nat_action(actions,info)) {
+ return -1;
+ }
}
+
add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
return 0;
}
@@ -80,6 +80,11 @@ struct offload_info {
uint8_t mod_flag; /* from packet->md.mod_flag */
bool ct_enable; /* if true, we should translate to rte_flow_action_ct */
+ ovs_be32 ori_nw_src;
+ ovs_be32 ori_nw_dst;
+ ovs_be32 mod_nw_src;
+ ovs_be32 mod_nw_dst;
+
uint32_t group_id;
struct nat_action_info_t nat_action;
};
@@ -955,14 +955,12 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
case OVS_ACTION_ATTR_SET:
DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
odp_execute_set_action(packet, nl_attr_get(a));
- packet->md.skb_priority |= 1 << SET_ACTION_SET ;
}
break;
case OVS_ACTION_ATTR_SET_MASKED:
DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
odp_execute_masked_set_action(packet, nl_attr_get(a));
- packet->md.skb_priority |= 1 << SET_ACTION_SET ;
}
break;
@@ -30,7 +30,10 @@ struct dp_packet_batch;
enum {
RECIRC_ACTION_SET,
- SET_ACTION_SET,
+ NAT_MOD_SIP,
+ NAT_MOD_DIP,
+ NAT_MOD_SPORT,
+ NAT_MOD_DPORT,
};
typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
@@ -122,6 +122,10 @@ PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
struct ovs_key_ct_tuple_ipv4 ipv4;
struct ovs_key_ct_tuple_ipv6 ipv6; /* Used only if */
} ct_orig_tuple; /* 'ct_orig_tuple_ipv6' is set */
+ ovs_be32 ori_nw_src;
+ ovs_be32 ori_nw_dst;
+ ovs_be16 ori_tp_sport;
+ ovs_be16 ori_tp_dport;
);
PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline2,
@@ -171,6 +175,10 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
md->tunnel.ipv6_dst = in6addr_any;
md->in_port.odp_port = port;
md->conn = NULL;
+ md->ori_nw_src = 0;
+ md->ori_nw_dst = 0;
+ md->ori_tp_sport = 0;
+ md->ori_tp_dport = 0;
}
/* This function prefetches the cachelines touched by pkt_metadata_init()
From: Taoyunxiang <taoyunxiang@cmss.chinamobile.com> Code Source From: Self Code Description: opensource code does not support conntrack and NAT offloading by rte_flow.This change add a init version to support it. Jira: #[Optional] 市场项目编号(名称):[Optional] --- lib/conntrack.c | 44 +++++++++++++++- lib/dpif-netdev.c | 127 ++++++++++++++-------------------------------- lib/netdev-offload-dpdk.c | 51 ++++++++++++++++--- lib/netdev-offload.h | 5 ++ lib/odp-execute.c | 2 - lib/odp-execute.h | 5 +- lib/packets.h | 8 +++ 7 files changed, 142 insertions(+), 100 deletions(-)