diff mbox series

[ovs-dev,V3,05/14] netdev-offload-dpdk: Implement HW miss packet recover for vport

Message ID 20210302112536.32680-6-elibr@nvidia.com
State Changes Requested
Headers show
Series Netdev vxlan-decap offload | expand

Commit Message

Eli Britstein March 2, 2021, 11:25 a.m. UTC
A miss in virtual port offloads means the flow with tnl_pop was
offloaded, but not the following one. Recover the state and continue
with SW processing.

Signed-off-by: Eli Britstein <elibr@nvidia.com>
Reviewed-by: Gaetan Rivet <gaetanr@nvidia.com>
---
 lib/netdev-offload-dpdk.c | 138 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)

Comments

Ivan Malov March 15, 2021, 9:54 p.m. UTC | #1
Hi Eli,

On 02/03/2021 14:25, Eli Britstein wrote:
 > +        /* For tunnel recovery (RTE_FLOW_RESTORE_INFO_TUNNEL), it is 
possible
 > +         * to have the packet to still be encapsulated, or not
 > +         * (RTE_FLOW_RESTORE_INFO_ENCAPSULATED).
 > +         * In the case it is on, the packet is still encapsulated, 
and we do
 > +         * the pop in SW.
 > +         * In the case it is off, the packet is already decapsulated 
by HW, and
 > +         * the tunnel info is provided in the tunnel struct. For 
this case we
 > +         * take it to OVS metadata.
 > +         */
 > +        if (rte_restore_info.flags & 
RTE_FLOW_RESTORE_INFO_ENCAPSULATED) {

The highlighted code is located *after* 
netdev_dpdk_rte_flow_get_restore_info() invocation. The comment says 
that the packet may *still* be encapsulated. Is the packet in fact 
allowed to have encapsulation header in it even *before* 
netdev_dpdk_rte_flow_get_restore_info()? I.e. right from the point where 
the packet is read from an ethdev's Rx queue. Is that legitimate?

What concerns me is that in patch [06/14] TCP flags get parsed before 
the miss recovery:
 > +    /* Restore the packet if HW processing was terminated before 
completion. */
 > +    *tcp_flags = parse_tcp_flags(packet);
 > +    p = pmd_send_port_cache_lookup(pmd, port_no);
 > +    if (OVS_LIKELY(p)) {
 > +        int err = netdev_hw_miss_packet_recover(p->port->netdev, 
packet);

So, I wonder, if the packet is indeed allowed to have encapsulation 
header at that point, can parse_tcp_flags() tolerate that? Will the 
flags lookup still be fine?

(I apologise for asking too many questions).
Eli Britstein March 16, 2021, 7:23 a.m. UTC | #2
On 3/15/2021 11:54 PM, Ivan Malov wrote:
> External email: Use caution opening links or attachments
>
>
> Hi Eli,
>
> On 02/03/2021 14:25, Eli Britstein wrote:
> > +        /* For tunnel recovery (RTE_FLOW_RESTORE_INFO_TUNNEL), it is
> possible
> > +         * to have the packet to still be encapsulated, or not
> > +         * (RTE_FLOW_RESTORE_INFO_ENCAPSULATED).
> > +         * In the case it is on, the packet is still encapsulated,
> and we do
> > +         * the pop in SW.
> > +         * In the case it is off, the packet is already decapsulated
> by HW, and
> > +         * the tunnel info is provided in the tunnel struct. For
> this case we
> > +         * take it to OVS metadata.
> > +         */
> > +        if (rte_restore_info.flags &
> RTE_FLOW_RESTORE_INFO_ENCAPSULATED) {
>
> The highlighted code is located *after*
> netdev_dpdk_rte_flow_get_restore_info() invocation. The comment says
> that the packet may *still* be encapsulated. Is the packet in fact
> allowed to have encapsulation header in it even *before*
> netdev_dpdk_rte_flow_get_restore_info()? I.e. right from the point where
> the packet is read from an ethdev's Rx queue. Is that legitimate?

The get_restore_info does not change the packet, but just provides the info.

Whether a packet with tunnel info (RTE_FLOW_RESTORE_INFO_TUNNEL) to 
still be encapsulated or not (RTE_FLOW_RESTORE_INFO_ENCAPSULATED) 
depends on PMD.

Thus, yes, it is legitimate.

>
> What concerns me is that in patch [06/14] TCP flags get parsed before
> the miss recovery:
> > +    /* Restore the packet if HW processing was terminated before
> completion. */
> > +    *tcp_flags = parse_tcp_flags(packet);
> > +    p = pmd_send_port_cache_lookup(pmd, port_no);
> > +    if (OVS_LIKELY(p)) {
> > +        int err = netdev_hw_miss_packet_recover(p->port->netdev,
> packet);
>
> So, I wonder, if the packet is indeed allowed to have encapsulation
> header at that point, can parse_tcp_flags() tolerate that? Will the
> flags lookup still be fine?
Thanks. I'll fix in v4.
>
> (I apologise for asking too many questions).
NP. Thanks for reviewing.
>
> -- 
> Ivan M
diff mbox series

Patch

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index f2413f5be..356a77188 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1588,6 +1588,143 @@  netdev_offload_dpdk_flow_flush(struct netdev *netdev)
     return 0;
 }
 
+struct get_vport_netdev_aux {
+    struct rte_flow_tunnel *tunnel;
+    odp_port_t *odp_port;
+    struct netdev *vport;
+};
+
+static bool
+get_vxlan_netdev_cb(struct netdev *netdev,
+                    odp_port_t odp_port,
+                    void *aux_)
+{
+    const struct netdev_tunnel_config *tnl_cfg;
+    struct get_vport_netdev_aux *aux = aux_;
+
+    if (strcmp(netdev_get_type(netdev), "vxlan")) {
+       return false;
+    }
+
+    tnl_cfg = netdev_get_tunnel_config(netdev);
+    if (!tnl_cfg) {
+        VLOG_ERR_RL(&rl, "Cannot get a tunnel config for netdev %s",
+                    netdev_get_name(netdev));
+        return false;
+    }
+
+    if (tnl_cfg->dst_port == aux->tunnel->tp_dst) {
+        /* Found the netdev. Store the results and stop the traversing. */
+        aux->vport = netdev_ref(netdev);
+        *aux->odp_port = odp_port;
+        return true;
+    }
+
+    return false;
+}
+
+static struct netdev *
+get_vxlan_netdev(const char *dpif_type,
+                 struct rte_flow_tunnel *tunnel,
+                 odp_port_t *odp_port)
+{
+    struct get_vport_netdev_aux aux = {
+        .tunnel = tunnel,
+        .odp_port = odp_port,
+        .vport = NULL,
+    };
+
+    netdev_ports_traverse(dpif_type, get_vxlan_netdev_cb, &aux);
+    return aux.vport;
+}
+
+static struct netdev *
+get_vport_netdev(const char *dpif_type,
+                 struct rte_flow_tunnel *tunnel,
+                 odp_port_t *odp_port)
+{
+    if (tunnel->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+        return get_vxlan_netdev(dpif_type, tunnel, odp_port);
+    }
+
+    OVS_NOT_REACHED();
+}
+
+static int
+netdev_offload_dpdk_hw_miss_packet_recover(struct netdev *netdev,
+                                           struct dp_packet *packet)
+{
+    struct rte_flow_restore_info rte_restore_info;
+    struct rte_flow_tunnel *rte_tnl;
+    struct rte_flow_error error;
+    struct netdev *vport_netdev;
+    struct pkt_metadata *md;
+    struct flow_tnl *md_tnl;
+    odp_port_t vport_odp;
+
+    if (netdev_dpdk_rte_flow_get_restore_info(netdev, packet,
+                                              &rte_restore_info, &error)) {
+        /* This function is called for every packet, and in most cases there
+         * will be no restore info from the HW, thus error is expected.
+         */
+        (void) error;
+        return 0;
+    }
+
+    rte_tnl = &rte_restore_info.tunnel;
+    if (rte_restore_info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) {
+        vport_netdev = get_vport_netdev(netdev->dpif_type, rte_tnl,
+                                        &vport_odp);
+        md = &packet->md;
+        /* For tunnel recovery (RTE_FLOW_RESTORE_INFO_TUNNEL), it is possible
+         * to have the packet to still be encapsulated, or not
+         * (RTE_FLOW_RESTORE_INFO_ENCAPSULATED).
+         * In the case it is on, the packet is still encapsulated, and we do
+         * the pop in SW.
+         * In the case it is off, the packet is already decapsulated by HW, and
+         * the tunnel info is provided in the tunnel struct. For this case we
+         * take it to OVS metadata.
+         */
+        if (rte_restore_info.flags & RTE_FLOW_RESTORE_INFO_ENCAPSULATED) {
+            if (!vport_netdev || !vport_netdev->netdev_class ||
+                !vport_netdev->netdev_class->pop_header) {
+                VLOG_ERR("vport nedtdev=%s with no pop_header method",
+                         netdev_get_name(vport_netdev));
+                return 0;
+            }
+            netdev_close(vport_netdev);
+            if (!vport_netdev->netdev_class->pop_header(packet)) {
+                /* If there is an error with popping the header, the packet is
+                 * freed. In this case should not proceed SW processing.
+                 */
+                return -1;
+            }
+         } else {
+             md_tnl = &md->tunnel;
+             if (rte_tnl->is_ipv6) {
+                 memcpy(&md_tnl->ipv6_src, &rte_tnl->ipv6.src_addr,
+                        sizeof md_tnl->ipv6_src);
+                 memcpy(&md_tnl->ipv6_dst, &rte_tnl->ipv6.dst_addr,
+                        sizeof md_tnl->ipv6_dst);
+             } else {
+                 md_tnl->ip_src = rte_tnl->ipv4.src_addr;
+                 md_tnl->ip_dst = rte_tnl->ipv4.dst_addr;
+             }
+             md_tnl->tun_id = htonll(rte_tnl->tun_id);
+             md_tnl->flags = rte_tnl->tun_flags;
+             md_tnl->ip_tos = rte_tnl->tos;
+             md_tnl->ip_ttl = rte_tnl->ttl;
+             md_tnl->tp_src = rte_tnl->tp_src;
+         }
+         if (vport_netdev) {
+             md->in_port.odp_port = vport_odp;
+         }
+    }
+    dp_packet_reset_offload(packet);
+
+    return 0;
+}
+
 const struct netdev_flow_api netdev_offload_dpdk = {
     .type = "dpdk_flow_api",
     .flow_put = netdev_offload_dpdk_flow_put,
@@ -1595,4 +1732,5 @@  const struct netdev_flow_api netdev_offload_dpdk = {
     .init_flow_api = netdev_offload_dpdk_init_flow_api,
     .flow_get = netdev_offload_dpdk_flow_get,
     .flow_flush = netdev_offload_dpdk_flow_flush,
+    .hw_miss_packet_recover = netdev_offload_dpdk_hw_miss_packet_recover,
 };