[ovs-dev,RFC] Pipeline packet processing in OVS using FVL flow director.

Message ID	1480616636-40703-2-git-send-email-sugesh.chandran@intel.com
State	Not Applicable
Delegated to:	Daniele Di Proietto
Headers	show Return-Path: <ovs-dev-bounces@openvswitch.org> From: Sugesh Chandran <sugesh.chandran@intel.com> To: dev@openvswitch.org, ronye@mellanox.com, ktraynor@redhat.com, flavio@flaviof.com Date: Thu, 1 Dec 2016 18:23:56 +0000 Message-Id: <1480616636-40703-2-git-send-email-sugesh.chandran@intel.com> In-Reply-To: <1480616636-40703-1-git-send-email-sugesh.chandran@intel.com> References: <1480616636-40703-1-git-send-email-sugesh.chandran@intel.com> Subject: [ovs-dev] [RFC PATCH] Pipeline packet processing in OVS using FVL flow director. Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org

diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h index df80dfe..3639fc0 100644 --- a/include/openvswitch/flow.h +++ b/include/openvswitch/flow.h @@ -23,7 +23,7 @@ /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 36 +#define FLOW_WC_SEQ 37 /* Number of Open vSwitch extension 32-bit registers. */ #define FLOW_N_REGS 16 @@ -99,6 +99,9 @@ struct flow { uint32_t conj_id; /* Conjunction ID. */ ofp_port_t actset_output; /* Output port in action set. */ + uint16_t pipeline_id; + uint16_t pipeline_state; + uint8_t pad0[4]; /* Pad to make pipeline 64 bit */ /* L2, Order the same as in the Ethernet header! (64-bit aligned) */ struct eth_addr dl_dst; /* Ethernet destination address. */ struct eth_addr dl_src; /* Ethernet source address. */ @@ -135,8 +138,8 @@ BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % sizeof(uint64_t) == 0); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t) - == sizeof(struct flow_tnl) + 248 - && FLOW_WC_SEQ == 36); + == sizeof(struct flow_tnl) + 256 + && FLOW_WC_SEQ == 37); /* Incremental points at which flow classification may be performed in * segments. diff --git a/include/openvswitch/packets.h b/include/openvswitch/packets.h index 5d97309..26fbc87 100644 --- a/include/openvswitch/packets.h +++ b/include/openvswitch/packets.h @@ -19,6 +19,13 @@ #include <netinet/in.h> #include "openvswitch/tun-metadata.h" +/* Unfortunately, a "struct flow" sometimes has to handle OpenFlow port + * numbers and other times datapath (dpif) port numbers. This union allows + * access to both. */ +union flow_in_port { + odp_port_t odp_port; + ofp_port_t ofp_port; +}; /* Tunnel information used in flow key and metadata. */ struct flow_tnl { @@ -53,12 +60,4 @@ struct flow_tnl { #define FLOW_TNL_F_MASK ((1 << 4) - 1) -/* Unfortunately, a "struct flow" sometimes has to handle OpenFlow port - * numbers and other times datapath (dpif) port numbers. This union allows - * access to both. */ -union flow_in_port { - odp_port_t odp_port; - ofp_port_t ofp_port; -}; - #endif /* packets.h */ diff --git a/lib/automake.mk b/lib/automake.mk index 81d5097..3dc0204 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -370,8 +370,12 @@ endif if DPDK_NETDEV lib_libopenvswitch_la_SOURCES += \ - lib/dpdk.c \ - lib/netdev-dpdk.c + lib/dpdk-i40e-ofld.c \ + lib/dpdk-i40e-ofld.h \ + lib/dpdk.c \ + lib/netdev-dpdk.c \ + lib/hw-pipeline.c \ + lib/hw-pipeline.h \ else lib_libopenvswitch_la_SOURCES += \ lib/dpdk-stub.c diff --git a/lib/dpdk-i40e-ofld.c b/lib/dpdk-i40e-ofld.c new file mode 100644 index 0000000..35afae7 --- /dev/null +++ b/lib/dpdk-i40e-ofld.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2016 Intel Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#include "dpdk-i40e-ofld.h" +#include "errno.h" +#include "ovs-thread.h" +#include "openvswitch/vlog.h" +#include "netdev-provider.h" + +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE +VLOG_DEFINE_THIS_MODULE(dpdk_hw_ofld); + +#define VXLAN_DST_PORT 4789 +#define VXLAN_HLEN 50 +#define MAX_FDIR_RULES 8000 + +static uint32_t total_fdir_ids; +static struct ovs_mutex hw_ofld_mutex = OVS_MUTEX_INITIALIZER; + +/* + * Returns '0' if FDIR IDs reaches max limit. Only 8000 entries are + * supported in FVL. + */ +static inline uint32_t +i40e_fdir_entry_cnt_inc(void) +{ + if (total_fdir_ids < MAX_FDIR_RULES) { + ovs_mutex_lock(&hw_ofld_mutex); + total_fdir_ids++; + ovs_mutex_unlock(&hw_ofld_mutex); + return (total_fdir_ids); + } + return 0; +} + +static inline void +i40e_fdir_entry_cnt_decr(void) +{ + ovs_mutex_lock(&hw_ofld_mutex); + total_fdir_ids ? total_fdir_ids-- : 0; + ovs_mutex_unlock(&hw_ofld_mutex); +} + +/* + * Release the hardware offloading functionality from the dpdk-port. + */ +int +dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port) +{ + ovs_mutex_lock(&hw_ofld_mutex); + //set_i40e_ofld_flag(dpdk_port, 0); + ovs_mutex_unlock(&hw_ofld_mutex); + return 0; +} + +int +dpdk_eth_dev_hw_ofld_init(struct netdev_dpdk *dev, + int n_rxq, int n_txq, + struct rte_eth_conf *port_conf) +{ + int err = 0; + struct rte_eth_dev_info info; + uint16_t vendor_id, device_id; + + rte_eth_dev_info_get(get_dpdk_port_id(dev), &info); + vendor_id = info.pci_dev->id.vendor_id; + device_id = info.pci_dev->id.device_id; + /* Configure vxlan offload only if its FVL NIC */ + if (vendor_id != PCI_VENDOR_ID_INTEL || device_id != + I40E_DEV_ID_SFP_XL710) { + //ovs_mutex_lock(&hw_ofld_mutex); + //set_i40e_ofld_flag(dev, 0); + //ovs_mutex_unlock(&hw_ofld_mutex); + VLOG_INFO("Failed to configure NIC, unsupported NIC"); + err = rte_eth_dev_configure(get_dpdk_port_id(dev), + n_rxq, n_txq, port_conf); + return err; + } + //ovs_mutex_lock(&hw_ofld_mutex); + //set_i40e_ofld_flag(dev, 1); + //ovs_mutex_unlock(&hw_ofld_mutex); + /* Configure FVL FDIR VxLAN tunnel handing */ + port_conf->fdir_conf.mode = RTE_FDIR_MODE_PERFECT; + port_conf->fdir_conf.status = RTE_FDIR_REPORT_STATUS_ALWAYS; + port_conf->fdir_conf.flex_conf.nb_payloads = 1; + port_conf->fdir_conf.flex_conf.flex_set[0].type = RTE_ETH_L4_PAYLOAD; + /* Need to initilize all the 16 flex bytes,no matter; + * what we really using, possibly a DPDK bug?? */ + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[0] = 0; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[1] = 1; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[2] = 2; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[3] = 3; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[4] = 4; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[5] = 5; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[6] = 6; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[7] = 7; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[8] = 8; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[9] = 9; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[10] = 10; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[11] = 11; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[12] = 12; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[13] = 13; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[14] = 14; + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[15] = 15; + err = rte_eth_dev_configure(get_dpdk_port_id(dev), + n_rxq, n_txq, port_conf); + if (err) { + VLOG_ERR("Failed to configure DPDK port with hardware offload"); + return err; + } + /*Clean all FDIR entries if any */ + err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(dev), + RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_FLUSH, NULL); + VLOG_INFO("Configured port with FDIR , %d", err); + return err; +} + +/* + * Install rules for VxLAN packets in hardware + */ +int +set_up_hw_offload_port_rule(struct netdev *netdev__, + const struct flow *flow, + /*const uint32_t hw_flow_id,*/ + const bool is_add_rule) +{ + int err = 0; + uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN] = { 0 }; + uint32_t *vni; + enum rte_filter_op filter_op; + struct rte_eth_fdir_filter entry = { 0 }; + struct netdev_dpdk *netdev; + + netdev = netdev_dpdk_cast(netdev__); + /*if (is_i40e_ofld_enable(netdev)) {*/ + entry.soft_id = (flow->tunnel.tun_id >>32); + if (!entry.soft_id) { + VLOG_DBG("Invalid flow ID, Cant install rule in the NIC for " + "hardware offload"); + err = ECANCELED; + return err; + } + /* Install rules in NIC only for VxLAN flows */ + if (ntohs(flow->tp_dst) != VXLAN_DST_PORT) { + return 0; + } + + entry.input.flow_ext.vlan_tci = 0; //! ignored by i40e fdir + entry.input.flow_ext.is_vf = 0; + entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP; + entry.input.flow.udp4_flow.ip.src_ip = flow->nw_src; + entry.input.flow.udp4_flow.ip.dst_ip = flow->nw_dst; + entry.input.flow.udp4_flow.ip.tos = flow->nw_tos; + entry.input.flow.udp4_flow.ip.ttl = flow->nw_ttl; + entry.input.flow.udp4_flow.ip.proto = 17; /* UDP */ + + entry.input.flow.udp4_flow.dst_port = flow->tp_dst; + entry.input.flow.udp4_flow.src_port = flow->tp_src; + vni = (uint32_t *)&flexbytes[4]; + //*vni = flow->tunnel.tun_id << 8; + *vni = flow->tunnel.tun_id; + memcpy(entry.input.flow_ext.flexbytes, flexbytes, + RTE_ETH_FDIR_MAX_FLEXLEN); + + entry.action.behavior = RTE_ETH_FDIR_PASSTHRU; + //entry.action.report_status = RTE_ETH_FDIR_REPORT_ID_FLEX_4; + entry.action.report_status = RTE_ETH_FDIR_REPORT_ID; + entry.action.rx_queue = 0; + entry.action.flex_off = 0; /* use 0 by default */ + filter_op = is_add_rule ? RTE_ETH_FILTER_ADD : + RTE_ETH_FILTER_DELETE; + err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(netdev), + RTE_ETH_FILTER_FDIR, filter_op, &entry); + + /* + * XXX : Delayed the max limit check for flow director entries after + * the configuration. Anyway the rte_eth_dev_filter_ctrl will fail if + * max limit reaches. This can be used for tracking. + */ + if (is_add_rule) { + if (!i40e_fdir_entry_cnt_inc()) { + VLOG_DBG("Cant configure rule on NIC, Flow director " + "entries hits max limit"); + } + } + else { + i40e_fdir_entry_cnt_decr(); + } + if (err < 0) { + VLOG_ERR("flow director programming error in NIC: (%d)\n", err); + return err; + } + //} + return err; +} + +/*static void +process_i40e_ofld_tnl_pkts(struct + dp_netdev_pmd_thread *pmd, struct dp_packet + **in_packets, uint32_t cnt) +{ + int i, hw_pkt_cnt = 0, norm_pkt_cnt = 0; + const struct dp_netdev_flow *flow; + struct rte_mbuf *mbuf; + + for (i = 0; i < cnt; i++) { + mbuf = (struct rte_mbuf *)in_packets[i]; + if (mbuf->ol_flags & PKT_RX_FDIR_ID) { + flow = lookup_hw_offload_flow_for_fdirid(pmd, mbuf, + mbuf->hash.fdir.hi); + if (!flow) {*/ + /* Bogus flow in hw, cannot find it in OVS EMC */ +/* mbuf->ol_flags &= ~PKT_RX_FDIR_ID; + continue; + } + dp_packet_reset_packet(in_packets[i], VXLAN_HLEN); + mbuf->ol_flags |= PKT_RX_RSS_HASH; + mbuf->hash.rss = hash_finish(mbuf->hash.rss, 1); + } + } +}*/ + +/* + * Process the packets based on hardware offload configuration + */ +/*void +hw_ofld_dp_netdev_input(struct dp_netdev_pmd_thread *pmd, + struct netdev_rxq *netdev_rxq, + struct dp_packet_batch *_packets, + odp_port_t port_no) +{ + int hw_pkt_cnt; + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_rxq->netdev); + struct dp_packet **packets = _packets->packets; + int cnt = _packets->count; + + if (is_i40e_ofld_enable(netdev)) { + process_i40e_ofld_tnl_pkts(pmd, packets, cnt); + } + dp_netdev_input(pmd, _packets, port_no); +}*/ +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE diff --git a/lib/dpdk-i40e-ofld.h b/lib/dpdk-i40e-ofld.h new file mode 100644 index 0000000..fe8102f --- /dev/null +++ b/lib/dpdk-i40e-ofld.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 Intel Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DPDK_I40E_OFLD_H_ +#define DPDK_I40E_OFLD_H_ + +#include <config.h> + +#include "dp-packet.h" +#include "netdev.h" +#include "rte_pci_dev_ids.h" +#include "rte_ethdev.h" + +/* + * Macro to enable/disable HW OFFLOAD feature for DPDK. + * 1 :- Enable HW_OFFLOAD support in OVS + * 0 :- Disable HW_OFFLOAD support in OVS + */ +#define DPDK_I40E_TNL_OFFLOAD_ENABLE 1 +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE + +struct netdev_dpdk; +struct dp_netdev_pmd_thread; +struct dp_netdev_flow; + +#define I40E_DEV_ID_SFP_XL710 0x1572 +struct netdev_dpdk *netdev_dpdk_cast(const struct netdev *netdev); +extern inline int get_dpdk_port_id(struct netdev_dpdk *dpdk_port); +int dpdk_eth_dev_hw_ofld_init(struct netdev_dpdk *dev, int n_rxq, int n_txq, + struct rte_eth_conf *port_conf); +int dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port); +int set_up_hw_offload_port_rule(struct netdev *netdev__, + const struct flow *flow, + /* const uint32_t hw_flow_id, */ + const bool is_add_rule); +const struct dp_netdev_flow *lookup_hw_offload_flow_for_fdirid( + const struct dp_netdev_pmd_thread *pmd, + struct rte_mbuf *mbuf, uint32_t flow_id); + +static inline uint32_t +get_fdir_flow_id(struct dp_packet *packet) +{ + struct rte_mbuf *mbuf; + uint32_t flow_id =0; + mbuf = (struct rte_mbuf *)packet; + flow_id = mbuf->hash.fdir.hi; + mbuf->hash.fdir.hi = 0; + return flow_id; +} + +static inline void +reset_fdir_flow_id(struct dp_packet *packet) +{ + struct rte_mbuf *mbuf; + mbuf = (struct rte_mbuf *)packet; + mbuf->hash.fdir.hi = 0; +} +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE +#endif /* DPDK_I40E_OFLD_H_ */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index eb9f764..10d49b7 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -76,6 +76,8 @@ #include "tnl-ports.h" #include "unixctl.h" #include "util.h" +#include "hw-pipeline.h" +#include "netdev-provider.h" VLOG_DEFINE_THIS_MODULE(dpif_netdev); @@ -378,6 +380,58 @@ struct dp_netdev_flow { /* 'cr' must be the last member. */ }; +struct pipeline_flow { + struct dp_netdev_flow flow; + struct netdev_flow_key key; +}; +struct pipeline_flow_queue { + /* XXX: its necessary to have queue per pipeline in the future. + * For now its only for one pipeline. + */ + struct pipeline_flow ppl_flow[MAX_PIPELINE_FLOW]; + int front; + int back; +}; +static struct pipeline_flow_queue flow_queue = { 0 }; + +static void enqueue_flow(struct dp_netdev_flow *flow, struct netdev_flow_key *key) +{ + /* Enqueue the flow into pipeline queue. Dont care the case of overwrite + * This has to be atomic, but anyway the flow insert is handled by PMD itself. + * So no chance of preemption. + */ + struct pipeline_flow *ppl_flow = &flow_queue.ppl_flow[flow_queue.front]; + memcpy(&ppl_flow->flow, flow, sizeof ppl_flow->flow); + memcpy(&ppl_flow->key, key, sizeof ppl_flow->key); + flow_queue.front++; + if (flow_queue.front >= MAX_PIPELINE_FLOW) { + /* Reset the index for read from start */ + flow_queue.front =0; + } +} + +static struct pipeline_flow *dequeue_flow(void) +{ + struct pipeline_flow *ppl_flow = &flow_queue.ppl_flow[flow_queue.back]; + if(!ppl_flow->flow.pmd_id) { + VLOG_DBG("The queue is empty, cannot read"); + return NULL; + } + flow_queue.back++; + if (flow_queue.back >= MAX_PIPELINE_FLOW) { + /* Reset the back pointer for the proper read */ + flow_queue.back = 0; + } + return ppl_flow; +} + +static inline void del_pipeline_flow_in_q(struct pipeline_flow *ppl_flow) +{ + if(ppl_flow){ + memset(ppl_flow, 0, sizeof *ppl_flow); + } +} + static void dp_netdev_flow_unref(struct dp_netdev_flow *); static bool dp_netdev_flow_ref(struct dp_netdev_flow *); static int dpif_netdev_flow_from_nlattrs(const struct nlattr *, uint32_t, @@ -549,7 +603,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, size_t actions_len, long long now); static void dp_netdev_input(struct dp_netdev_pmd_thread *, - struct dp_packet_batch *, odp_port_t port_no); + struct dp_packet_batch *, struct dp_netdev_port *port); static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, struct dp_packet_batch *); @@ -603,6 +657,21 @@ static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, static inline bool emc_entry_alive(struct emc_entry *ce); static void emc_clear_entry(struct emc_entry *ce); +static inline void +emc_insert(struct dp_netdev_pmd_thread *pmd, const struct netdev_flow_key *key, + struct dp_netdev_flow *flow); +static inline void +emc_hw_insert(struct dp_netdev_pmd_thread *pmd, const struct netdev_flow_key *key, + struct dp_netdev_flow *flow); +/* + * XXX :: Do not change the order of following flow_insert function set. Its depends on + * the order of enum pipeline_id. + */ +static pipeline_dp_flow_insert ppl_dp_flow_insert[] = { + emc_insert, + emc_hw_insert //HW_OFFLOAD_PIPE_LINE +}; + static void emc_cache_init(struct emc_cache *flow_cache) { @@ -1951,11 +2020,12 @@ emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow, } static inline void -emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key, +emc_insert(struct dp_netdev_pmd_thread *pmd, const struct netdev_flow_key *key, struct dp_netdev_flow *flow) { struct emc_entry *to_be_replaced = NULL; struct emc_entry *current_entry; + struct emc_cache *cache = &pmd->flow_cache; EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) { if (netdev_flow_key_equal(&current_entry->key, key)) { @@ -1979,6 +2049,100 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key, emc_change_entry(to_be_replaced, flow, key); } +/* Flow rule insertion for hardware offload */ +static inline void +emc_hw_insert(struct dp_netdev_pmd_thread *pmd, const struct netdev_flow_key *key, + struct dp_netdev_flow *flow) +{ + struct emc_cache *cache = &pmd->flow_cache; + struct flow *sw_flow = &flow->flow; + struct flow_tnl *tnl_md = &sw_flow->tunnel; + emc_insert(pmd, key, flow); + /* Set up the hardware flow to insert into hardware */ + if(tnl_md->ip_dst) { + struct flow in_flow = { 0 }; + struct netdev_flow_key in_key = { 0 }; + /* + * just enqueue the flow, need it later at the time of outer flow insertion. + */ + enqueue_flow(flow, key); + + /* Insert the inner flow for pipeline */ + in_flow.dl_dst = sw_flow->dl_dst; + in_flow.dl_src = sw_flow->dl_src; + in_flow.pipeline_id = HW_OFFLOAD_PIPELINE; + in_flow.pipeline_state = PIPELINE_ACTIVE; + in_flow.in_port = sw_flow->in_port; + in_key.len = 0; + in_key.hash = key->hash; + miniflow_map_init(&in_key.mf, &in_flow); + miniflow_init(&in_key.mf, &in_flow); + memcpy(&flow->flow, &in_flow, sizeof(in_flow)); + emc_insert(pmd, &in_key, flow); + } + else { + /* Insert the outer flow using the flow data that stored last time. */ + struct pipeline_flow *ppl_flow; + struct flow *old_flow; + struct flow out_flow = { 0 }; + struct flow hw_flow = *sw_flow; + struct netdev_flow_key out_key = { 0 }; + ppl_flow = dequeue_flow(); + old_flow = &ppl_flow->flow.flow; + if(!old_flow) { + VLOG_DBG("NULL OLD FLOW, cannot do much"); + goto out; + } + if (old_flow->tunnel.ip_dst != sw_flow->nw_dst || + old_flow->tunnel.ip_src != sw_flow->nw_src) { + /* Looks like the tunnel is missing in queue */ + VLOG_DBG("Cannot find the tunnel information in the queue, Cannot insert " + "hardware rule"); + goto out; + } + + /* Insert outer flow now */ + out_flow.pipeline_id = HW_OFFLOAD_PIPELINE; + out_flow.pipeline_state = PIPELINE_ACTIVE; + out_flow.in_port = sw_flow->in_port; + out_flow.dl_dst = sw_flow->dl_dst; + out_flow.dl_src = sw_flow->dl_src; + out_key.len = 0; + out_key.hash = key->hash; + miniflow_map_init(&out_key.mf, &out_flow); + miniflow_init(&out_key.mf, &out_flow); + memcpy(&flow->flow, &out_flow, sizeof(out_flow)); + emc_insert(pmd, &out_key, flow); + { + struct dp_netdev_port *dp_port; + uint32_t err; + /* Program the NICs */ + hw_flow.tunnel.tun_id = old_flow->tunnel.tun_id; + err = get_port_by_number(pmd->dp, hw_flow.in_port.odp_port, &dp_port); + if (err) { + VLOG_ERR("Cannot get the port information, Failed to configure " + "hardware offload"); + goto out; + } + set_up_hw_offload_port_rule(dp_port->netdev, &hw_flow, 1); + } +out: + del_pipeline_flow_in_q(ppl_flow); + } +} + +/* Flow rule insertion into the emc + * Decides what function is going to insert the rule. + */ +static inline void +dp_emc_flow_insert(struct dp_packet *packet, struct dp_netdev_pmd_thread *pmd, + const struct netdev_flow_key *key, struct dp_netdev_flow *flow) +{ + struct pipeline_md *ppl_md; + ppl_md = &packet->md.ppl_md; + ppl_dp_flow_insert[ppl_md->id](pmd, key, flow); +} + static inline struct dp_netdev_flow * emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key) { @@ -2653,7 +2817,7 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) break; case DPIF_OP_FLOW_DEL: - op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del); + //op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del); break; case DPIF_OP_EXECUTE: @@ -2858,7 +3022,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, *recirc_depth_get() = 0; cycles_count_start(pmd); - dp_netdev_input(pmd, &batch, port->port_no); + dp_netdev_input(pmd, &batch, port); cycles_count_end(pmd, PMD_CYCLES_PROCESSING); } else if (error != EAGAIN && error != EOPNOTSUPP) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -3928,14 +4092,26 @@ static inline size_t emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets_, struct netdev_flow_key *keys, struct packet_batch_per_flow batches[], size_t *n_batches, - bool md_is_valid, odp_port_t port_no) + bool md_is_valid, struct dp_netdev_port *port) { struct emc_cache *flow_cache = &pmd->flow_cache; struct netdev_flow_key *key = &keys[0]; size_t i, n_missed = 0, n_dropped = 0; struct dp_packet **packets = packets_->packets; int cnt = packets_->count; - + struct pipeline_md *ppl_md; + odp_port_t port_no = 0; + struct netdev *netdev =NULL; + inline void + (* get_packet_pipeline_ptr)(struct netdev *netdev, struct dp_packet *packet, + struct pipeline_md *ppl_md); + + get_packet_pipeline_ptr = &get_packet_pipeline_no_op; + if(port) { + port_no = port->port_no; + netdev = port->netdev; + get_packet_pipeline_ptr = &get_packet_pipeline; + } for (i = 0; i < cnt; i++) { struct dp_netdev_flow *flow; struct dp_packet *packet = packets[i]; @@ -3955,7 +4131,10 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets if (!md_is_valid) { pkt_metadata_init(&packet->md, port_no); } - miniflow_extract(packet, &key->mf); + /* Call the miniflow extract for the specific pipeline */ + ppl_md = &packet->md.ppl_md; + (* get_packet_pipeline_ptr)(netdev, packet, ppl_md); + ppl_mf_extract[ppl_md->id](packet, &key->mf); key->len = 0; /* Not computed yet. */ key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf); @@ -3988,9 +4167,19 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet, struct ofpbuf *add_actions; struct dp_packet_batch b; struct match match; + struct pipeline_md *ppl_md = &packet->md.ppl_md; ovs_u128 ufid; int error; + if (ppl_md->id && ppl_md->state) { + /* Upcall for a different active pipeline than software pipeline is not + * allowed. + */ + VLOG_INFO("Cannot make upcall on packet from pipeline %d", ppl_md->id); + dp_packet_delete(packet); + (*lost_cnt)++; + return; + } match.tun_md.valid = false; miniflow_expand(&key->mf, &match.flow); @@ -4042,8 +4231,8 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet, add_actions->size); } ovs_mutex_unlock(&pmd->flow_mutex); - - emc_insert(&pmd->flow_cache, key, netdev_flow); + dp_emc_flow_insert(packet, pmd, key, netdev_flow); + //emc_insert(&pmd->flow_cache, key, netdev_flow); } } @@ -4066,7 +4255,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, struct dpcls *cls; struct dpcls_rule *rules[PKT_ARRAY_SIZE]; struct dp_netdev *dp = pmd->dp; - struct emc_cache *flow_cache = &pmd->flow_cache; + //struct emc_cache *flow_cache = &pmd->flow_cache; int miss_cnt = 0, lost_cnt = 0; int lookup_cnt = 0, add_lookup_cnt; bool any_miss; @@ -4138,7 +4327,8 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); - emc_insert(flow_cache, &keys[i], flow); + dp_emc_flow_insert(packet, pmd, &keys[i], flow); + //emc_insert(flow_cache, &keys[i], flow); dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); } @@ -4157,7 +4347,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, static void dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, - bool md_is_valid, odp_port_t port_no) + bool md_is_valid, struct dp_netdev_port *port) { int cnt = packets->count; #if !defined(__CHECKER__) && !defined(_WIN32) @@ -4174,7 +4364,7 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, n_batches = 0; newcnt = emc_processing(pmd, packets, keys, batches, &n_batches, - md_is_valid, port_no); + md_is_valid, port); if (OVS_UNLIKELY(newcnt)) { packets->count = newcnt; /* Get ingress port from first packet's metadata. */ @@ -4194,16 +4384,16 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, static void dp_netdev_input(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, - odp_port_t port_no) + struct dp_netdev_port *port) { - dp_netdev_input__(pmd, packets, false, port_no); + dp_netdev_input__(pmd, packets, false, port); } static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets) { - dp_netdev_input__(pmd, packets, true, 0); + dp_netdev_input__(pmd, packets, true, NULL); } struct dp_netdev_execute_aux { diff --git a/lib/flow.c b/lib/flow.c index f4ac8b3..a3d0725 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -40,6 +40,8 @@ #include "random.h" #include "unaligned.h" #include "util.h" +#include "hw-pipeline.h" +#include "dpdk-i40e-ofld.h" COVERAGE_DEFINE(flow_extract); COVERAGE_DEFINE(miniflow_malloc); @@ -125,7 +127,7 @@ struct mf_ctx { * away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are * defined as macros. */ -#if (FLOW_WC_SEQ != 36) +#if (FLOW_WC_SEQ != 37) #define MINIFLOW_ASSERT(X) ovs_assert(X) BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " "assertions enabled. Consider updating FLOW_WC_SEQ after " @@ -547,6 +549,56 @@ flow_extract(struct dp_packet *packet, struct flow *flow) miniflow_expand(&m.mf, flow); } +void +hw_fvl_mf_extract(struct dp_packet *packet, struct miniflow *dst) +{ + uint64_t hw_flow_id; + uint64_t *values = miniflow_values(dst); + const struct pkt_metadata *md = &packet->md; + const struct pipeline_md *ppl_md = &md->ppl_md; + struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values, + values + FLOW_U64S }; + miniflow_push_uint32(mf, dp_hash, md->dp_hash); + miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port)); + //miniflow_push_uint16(mf, pipeline_id, HW_OFFLOAD_PIPELINE); + //miniflow_push_uint16(mf, pipeline_state, PIPELINE_ACTIVE); + miniflow_push_uint32(mf, pipeline_id, + (HW_OFFLOAD_PIPELINE<<16 | PIPELINE_ACTIVE)); + miniflow_pad_to_64(mf, pipeline_state); + + /*hw_flow_id = get_fdir_flow_id(packet); + if(!hw_flow_id) {*/ + const void *data = dp_packet_data(packet); + miniflow_push_macs(mf, dl_dst, data); + /*}*/ + dst->map = mf.map; +} + +void +hw_vlan_mf_extract(struct dp_packet *packet, struct miniflow *dst) +{ + ovs_be16 vlan_tci; + ovs_be16 dl_type; + const struct pkt_metadata *md = &packet->md; + const void *data = dp_packet_data(packet); + size_t size = dp_packet_size(packet); + uint64_t *values = miniflow_values(dst); + struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values, + values + FLOW_U64S }; + miniflow_push_uint32(mf, dp_hash, md->dp_hash); + miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port)); + + miniflow_push_uint16(mf, pipeline_id, HW_OFFLOAD_PIPELINE); + miniflow_push_uint16(mf, pipeline_state, PIPELINE_ACTIVE); + miniflow_pad_to_64(mf, pipeline_state); + miniflow_push_macs(mf, dl_dst, data); + vlan_tci = parse_vlan(&data, &size); + dl_type = parse_ethertype(&data, &size); + miniflow_push_be16(mf, dl_type, dl_type); + miniflow_push_be16(mf, vlan_tci, vlan_tci); + dst->map = mf.map; +} + /* Caller is responsible for initializing 'dst' with enough storage for * FLOW_U64S * 8 bytes. */ void @@ -869,7 +921,7 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata) { int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); match_init_catchall(flow_metadata); if (flow->tunnel.tun_id != htonll(0)) { @@ -1275,7 +1327,7 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc, memset(&wc->masks, 0x0, sizeof wc->masks); /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); if (flow_tnl_dst_is_set(&flow->tunnel)) { if (flow->tunnel.flags & FLOW_TNL_F_KEY) { @@ -1319,6 +1371,7 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc, WC_MASK_FIELD(wc, ct_label); WC_MASK_FIELD(wc, recirc_id); WC_MASK_FIELD(wc, dp_hash); + //WC_MASK_FIELD(wc, pipeline_id); WC_MASK_FIELD(wc, in_port); /* actset_output wildcarded. */ @@ -1393,7 +1446,7 @@ void flow_wc_map(const struct flow *flow, struct flowmap *map) { /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); flowmap_init(map); @@ -1416,6 +1469,7 @@ flow_wc_map(const struct flow *flow, struct flowmap *map) FLOWMAP_SET(map, recirc_id); FLOWMAP_SET(map, dp_hash); FLOWMAP_SET(map, in_port); + //FLOWMAP_SET(map, pipeline_id); FLOWMAP_SET(map, dl_dst); FLOWMAP_SET(map, dl_src); FLOWMAP_SET(map, dl_type); @@ -1477,12 +1531,13 @@ void flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc) { /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata); memset(&wc->masks.regs, 0, sizeof wc->masks.regs); wc->masks.actset_output = 0; wc->masks.conj_id = 0; + wc->masks.pipeline_id = 0; } /* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or @@ -1621,7 +1676,7 @@ flow_wildcards_set_xxreg_mask(struct flow_wildcards *wc, int idx, uint32_t miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); uint32_t hash = basis; if (flow) { @@ -1668,7 +1723,7 @@ ASSERT_SEQUENTIAL(ipv6_src, ipv6_dst); uint32_t flow_hash_5tuple(const struct flow *flow, uint32_t basis) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); uint32_t hash = basis; if (flow) { @@ -2135,7 +2190,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type, flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label)); /* Clear all L3 and L4 fields and dp_hash. */ - BUILD_ASSERT(FLOW_WC_SEQ == 36); + BUILD_ASSERT(FLOW_WC_SEQ == 37); memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0, sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT); flow->dp_hash = 0; diff --git a/lib/hw-pipeline.c b/lib/hw-pipeline.c new file mode 100644 index 0000000..378f45f --- /dev/null +++ b/lib/hw-pipeline.c @@ -0,0 +1,75 @@ +/* + * hw-pipeline.c + * + * Created on: 13 Oct 2016 + * Author: sugeshch + */ +#include <config.h> +#include "hw-pipeline.h" +#include "openvswitch/vlog.h" +#include "netdev-provider.h" +#include "dp-packet.h" +VLOG_DEFINE_THIS_MODULE(hw_pipeline); + +/* + * XXX: Do not change the order of following list of miniflow extract functions. + * The index is being mapped to the pipeline_id. The changes in the order must + * update the enum pipeline_id. + */ +pipeline_mf_extract ppl_mf_extract[] = { + miniflow_extract, + hw_ofld_tunnel_mf_extract //HW_OFFLOAD_PIPE_LINE +}; + +static inline void +init_packet_pipeline_md(struct pkt_metadata *md, enum pipeline_id ppl_id, + bool status) +{ + md->ppl_md.id = ppl_id; + md->ppl_md.state = status; +} + + +inline void +get_packet_pipeline_no_op(struct netdev *netdev, struct dp_packet *packet, + struct pipeline_md *ppl_md) +{ + /* Does nothing. Simple no-op */ +} + +inline void +get_packet_pipeline(struct netdev *netdev, struct dp_packet *packet, + struct pipeline_md *ppl_md) +{ + if(netdev->netdev_class->get_pipeline) { + /* The port has specific pipeline function to determine the pipeline and status */ + netdev->netdev_class->get_pipeline(netdev, packet, ppl_md); + } +} + +/* + * hardware offload miniflow extract function + */ +void +hw_ofld_tunnel_mf_extract(struct dp_packet *packet, struct miniflow *mf) +{ + // first check if packet has the pipeline_id set, if yes, check if the pipeline is active, + //if yes, then do the specific miniflow extract than default and return. + // Packet received on the physical port doesnt have the pipeline. so call the port's pipeline define function. the function takes the packet as input. + // In DPDK this function must be defined. packet metadata has to carry the + //pipeline id and status for processing. + // Default should be the sofware miniflow extract. + struct pipeline_md *ppl_md; + ppl_md = &packet->md.ppl_md; + if ((!ppl_md->id || ppl_md->state == PIPELINE_INACTIVE)) { + /* if the pipeline id is not set, then do the default miniflow extract + * Also when pipeline state is inactive, do the default miniflow extract + */ + /* Default miniflow extract */ + miniflow_extract(packet, mf); + return; + } + //hw_vlan_mf_extract(packet, mf); + hw_fvl_mf_extract(packet, mf); +} + diff --git a/lib/hw-pipeline.h b/lib/hw-pipeline.h new file mode 100644 index 0000000..3690d99 --- /dev/null +++ b/lib/hw-pipeline.h @@ -0,0 +1,52 @@ +/* + * hw-pipeline.h + * + * Created on: 13 Oct 2016 + * Author: sugeshch + */ + +#ifndef LIB_HW_PIPELINE_H_ +#define LIB_HW_PIPELINE_H_ + +#include "flow.h" + +enum pipeline_id { + DEFAULT_SW_PIPELINE = 0, + HW_OFFLOAD_PIPELINE +}; + +enum pipeline_state { + PIPELINE_INACTIVE = 0, + PIPELINE_ACTIVE +}; + +void hw_ofld_tunnel_mf_extract(struct dp_packet *packet, struct miniflow *mf); + +/**** FORWARD References ****/ +struct netdev; +struct dp_packet; +struct emc_cache; +struct netdev_flow_key; +struct dp_netdev_flow; +struct pipeline_flow_batch; +/***************************/ + +#define MAX_PIPELINE_FLOW 5 + +void +get_packet_pipeline_no_op(struct netdev *netdev, struct dp_packet *packet, + struct pipeline_md *ppl_md); +void get_packet_pipeline(struct netdev *netdev, struct dp_packet *packet, + struct pipeline_md *ppl_md); +/* + * List of extrat function corresponds to the pipeline_id. Index of function is directly + * mapped to the pipe_line_id enum. + */ +typedef void (*pipeline_mf_extract)(struct dp_packet *packet, struct miniflow *mf); +extern pipeline_mf_extract ppl_mf_extract[]; + +typedef void (*pipeline_dp_flow_insert)(struct dp_netdev_pmd_thread *pmd, + const struct netdev_flow_key *key, + struct dp_netdev_flow *flow); + +#endif /* LIB_HW_PIPELINE_H_ */ diff --git a/lib/match.c b/lib/match.c index 3fcaec5..57529f5 100644 --- a/lib/match.c +++ b/lib/match.c @@ -1075,7 +1075,7 @@ match_format(const struct match *match, struct ds *s, int priority) int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "%spriority=%s%d,", diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 75a330b..626009c 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -1488,6 +1488,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum netdev_flags off, CONSTRUCT, \ netdev_bsd_destruct, \ netdev_bsd_dealloc, \ + NULL, /* get_pipeline */ \ NULL, /* get_config */ \ NULL, /* set_config */ \ NULL, /* get_tunnel_config */ \ diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index ab8c34f..b15c9f2 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -58,6 +58,8 @@ #include "unaligned.h" #include "timeval.h" #include "unixctl.h" +#include "hw-pipeline.h" +#include "dpdk-i40e-ofld.h" VLOG_DEFINE_THIS_MODULE(netdev_dpdk); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); @@ -146,7 +148,7 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) #define VHOST_ENQ_RETRY_NUM 8 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) -static const struct rte_eth_conf port_conf = { +static struct rte_eth_conf port_conf = { .rxmode = { .mq_mode = ETH_MQ_RX_RSS, .split_hdr_size = 0, @@ -407,6 +409,11 @@ is_dpdk_class(const struct netdev_class *class) return class->construct == netdev_dpdk_construct; } +inline int get_dpdk_port_id(struct netdev_dpdk *dpdk_port) +{ + return dpdk_port->port_id; +} + /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically * aligned at 1k or less. If a declared mbuf size is not a multiple of this * value, insufficient buffers are allocated to accomodate the packet in its @@ -644,7 +651,8 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq) VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq); } - diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &conf); + //diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &conf); + diag = dpdk_eth_dev_hw_ofld_init(dev, n_rxq, n_txq, &conf); if (diag) { VLOG_WARN("Interface %s eth_dev setup error %s\n", dev->up.name, rte_strerror(-diag)); @@ -760,7 +768,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) return 0; } -static struct netdev_dpdk * +struct netdev_dpdk * netdev_dpdk_cast(const struct netdev *netdev) { return CONTAINER_OF(netdev, struct netdev_dpdk, up); @@ -1046,6 +1054,24 @@ netdev_dpdk_dealloc(struct netdev *netdev) rte_free(dev); } +static void +netdev_dpdk_get_pipeline(const struct netdev *netdev, struct dp_packet *packet, + void *pipeline_res) +{ + struct pipeline_md *ppl_md = pipeline_res; + struct rte_mbuf *mbuf; + /* TODO :: Have to look at the packet as well to decide the pipeline status. Now lets + * hardcord it + */ + ppl_md->id = HW_OFFLOAD_PIPELINE; + + /* DPDK pipeline is defined by the ol_flags n the packet, + */ + mbuf = (struct rte_mbuf *)packet; + ppl_md->state = (mbuf->ol_flags & PKT_RX_FDIR_ID)? PIPELINE_ACTIVE : + PIPELINE_INACTIVE; +} + static int netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) { @@ -1466,7 +1492,6 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch) } batch->count = nb_rx; - return 0; } @@ -3058,6 +3083,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) CONSTRUCT, \ DESTRUCT, \ netdev_dpdk_dealloc, \ + netdev_dpdk_get_pipeline, \ netdev_dpdk_get_config, \ SET_CONFIG, \ NULL, /* get_tunnel_config */ \ diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index dec1a8e..3b07697 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1328,6 +1328,7 @@ netdev_dummy_update_flags(struct netdev *netdev_, netdev_dummy_construct, \ netdev_dummy_destruct, \ netdev_dummy_dealloc, \ + NULL, /* get_pipeline */ \ netdev_dummy_get_config, \ netdev_dummy_set_config, \ NULL, /* get_tunnel_config */ \ diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index a5a9ec1..8e5d0a8 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -2775,6 +2775,7 @@ netdev_linux_update_flags(struct netdev *netdev_, enum netdev_flags off, CONSTRUCT, \ netdev_linux_destruct, \ netdev_linux_dealloc, \ + NULL, /* get_pipeline */ \ NULL, /* get_config */ \ NULL, /* set_config */ \ NULL, /* get_tunnel_config */ \ diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index ce2582f..6be89c7 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -44,6 +44,7 @@ #include "unaligned.h" #include "unixctl.h" #include "openvswitch/vlog.h" +#include "hw-pipeline.h" VLOG_DEFINE_THIS_MODULE(native_tnl); static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5); @@ -507,6 +508,47 @@ err: return NULL; } +struct dp_packet * +hw_ofld_netdev_vxlan_pop_header(struct dp_packet *packet) +{ + struct rte_mbuf *mbuf; + struct pipeline_md ppl_md = (packet->md.ppl_md); + unsigned int hlen; + if(!ppl_md.id || ppl_md.state == PIPELINE_INACTIVE) { + struct dp_packet *ret_pkt; + /* Call the default pop, but retain the pipeline, in_port, hash + * for future use + */ + ret_pkt = netdev_vxlan_pop_header(packet); + ret_pkt->md.ppl_md = ppl_md; + //ret_pkt->md.tunnel.in_port = tnl_port; + return ret_pkt; + } + /* + * XXX:: A fair assumption that the packets in this pipeline are + * ETH -->IP -->UDP-->VXLAN + */ + hlen = sizeof(struct eth_header) + IP_HEADER_LEN; + dp_packet_reset_packet(packet, hlen + VXLAN_HLEN); + mbuf = (struct rte_mbuf *)packet; + mbuf->ol_flags &= ~PKT_RX_FDIR_ID; + return packet; +} + +/* VxLAN pop operation needed to be pipelined. */ +typedef struct dp_packet * (*pipeline_vxlan_pop_hdr_fn)(struct dp_packet *packet); +pipeline_vxlan_pop_hdr_fn pipeline_vxlan_pop_header[] = { + netdev_vxlan_pop_header, + hw_ofld_netdev_vxlan_pop_header +}; + +struct dp_packet * +pipeline_netdev_vxlan_pop_header(struct dp_packet *packet) +{ + struct pipeline_md *ppl_md = &(packet->md.ppl_md); + return pipeline_vxlan_pop_header[ppl_md->id](packet); +} + int netdev_vxlan_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data, diff --git a/lib/netdev-native-tnl.h b/lib/netdev-native-tnl.h index a912ce9..4ad5c12 100644 --- a/lib/netdev-native-tnl.h +++ b/lib/netdev-native-tnl.h @@ -56,7 +56,7 @@ netdev_vxlan_build_header(const struct netdev *netdev, const struct netdev_tnl_build_header_params *params); struct dp_packet * -netdev_vxlan_pop_header(struct dp_packet *packet); +pipeline_netdev_vxlan_pop_header(struct dp_packet *packet); static inline bool netdev_tnl_is_header_ipv6(const void *header) diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index c8507a5..7ef4824 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -73,6 +73,7 @@ struct netdev { int n_txq; int n_rxq; int ref_cnt; /* Times this devices was opened. */ + uint32_t pipeline_id; /* The id of pipeline the port associated with */ struct shash_node *node; /* Pointer to element in global map. */ struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */ }; @@ -267,6 +268,11 @@ struct netdev_class { void (*destruct)(struct netdev *); void (*dealloc)(struct netdev *); + /* Get the pipeline information for the netdev. This will return the pipe_line id and + * status of pipeline for packet processing. + */ + void (*get_pipeline)(const struct netdev *netdev, struct dp_packet *packet, + void *pipeline_res); /* Fetches the device 'netdev''s configuration, storing it in 'args'. * The caller owns 'args' and pre-initializes it to an empty smap. * diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 02a246a..5d0f755 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -755,6 +755,7 @@ get_stats(const struct netdev *netdev, struct netdev_stats *stats) netdev_vport_construct, \ netdev_vport_destruct, \ netdev_vport_dealloc, \ + NULL, /* get_pipeline */ \ GET_CONFIG, \ SET_CONFIG, \ GET_TUNNEL_CONFIG, \ @@ -836,7 +837,7 @@ netdev_vport_tunnel_register(void) netdev_gre_pop_header), TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header, netdev_tnl_push_udp_header, - netdev_vxlan_pop_header), + pipeline_netdev_vxlan_pop_header), TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL), TUNNEL_CLASS("stt", "stt_sys", NULL, NULL, NULL), }; diff --git a/lib/nx-match.c b/lib/nx-match.c index 9201aae..da2919f 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -930,7 +930,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, int match_len; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); /* Metadata. */ if (match->wc.masks.dp_hash) { diff --git a/lib/odp-util.h b/lib/odp-util.h index ccdbf8e..0f17175 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -141,7 +141,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); * add another field and forget to adjust this value. */ #define ODPUTIL_FLOW_KEY_BYTES 640 -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow * key. An array of "struct nlattr" might not, in theory, be sufficiently diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 0445968..6d2722e 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -101,7 +101,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); diff --git a/lib/packets.h b/lib/packets.h index 21bd35c..52a8214 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -90,6 +90,12 @@ flow_tnl_equal(const struct flow_tnl *a, const struct flow_tnl *b) return a_size == flow_tnl_size(b) && !memcmp(a, b, a_size); } +/* Refer hardware pipeline.h for more details */ +struct pipeline_md { + uint16_t id; //enum pipeline_id + uint16_t state; //enum pipeline_state +}; + /* Datapath packet metadata */ struct pkt_metadata { uint32_t recirc_id; /* Recirculation id carried with the @@ -104,6 +110,8 @@ struct pkt_metadata { uint32_t ct_mark; /* Connection mark. */ ovs_u128 ct_label; /* Connection label. */ union flow_in_port in_port; /* Input port. */ + struct pipeline_md ppl_md; + uint8_t pad[4]; /*pad for the pipeline metadata */ struct flow_tnl tunnel; /* Encapsulating tunnel parameters. Note that * if 'ip_dst' == 0, the rest of the fields may * be uninitialized. */ diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h index 3bca817..f622278 100644 --- a/ofproto/ofproto-dpif-rid.h +++ b/ofproto/ofproto-dpif-rid.h @@ -99,7 +99,7 @@ struct rule; /* Metadata for restoring pipeline context after recirculation. Helpers * are inlined below to keep them together with the definition for easier * updates. */ -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); struct frozen_metadata { /* Metadata in struct flow. */ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index f6391ed..daf56b6 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -2862,7 +2862,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37); memset(&flow_tnl, 0, sizeof flow_tnl); if (!xport) {

[ovs-dev,RFC] Pipeline packet processing in OVS using FVL flow director.

Commit Message

Patch