diff mbox series

[ovs-dev,v3,3/3] netdev-offload-dpdk: use flow transfer proxy mechanism

Message ID 20220720121823.2497727-4-ivan.malov@oktetlabs.ru
State Superseded
Headers show
Series Rework the usage of DPDK transfer flow offloads | expand

Checks

Context Check Description
ovsrobot/intel-ovs-compilation fail test: fail
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Ivan Malov July 20, 2022, 12:18 p.m. UTC
Manage "transfer" flows via the corresponding mechanism.
Doing so requires that the traffic source be specified
explicitly, via the corresponding pattern item.

Signed-off-by: Ivan Malov <ivan.malov@oktetlabs.ru>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
 lib/netdev-dpdk.c         | 99 ++++++++++++++++++++++++++++++++-------
 lib/netdev-dpdk.h         |  4 +-
 lib/netdev-offload-dpdk.c | 61 ++++++++++++++++++++----
 3 files changed, 135 insertions(+), 29 deletions(-)

Comments

Eli Britstein July 20, 2022, 1:11 p.m. UTC | #1
>-----Original Message-----
>From: Ivan Malov <ivan.malov@oktetlabs.ru>
>Sent: Wednesday, July 20, 2022 3:18 PM
>To: dev@openvswitch.org
>Cc: Eli Britstein <elibr@nvidia.com>; Stephen Hemminger
><stephen@networkplumber.org>; Ilya Maximets <i.maximets@ovn.org>; Ori
>Kam <orika@nvidia.com>; Maxime Coquelin
><maxime.coquelin@redhat.com>; David Marchand
><david.marchand@redhat.com>; Andrew Rybchenko
><andrew.rybchenko@oktetlabs.ru>
>Subject: [PATCH v3 3/3] netdev-offload-dpdk: use flow transfer proxy
>mechanism
>
>External email: Use caution opening links or attachments
>
>
>Manage "transfer" flows via the corresponding mechanism.
>Doing so requires that the traffic source be specified explicitly, via the
>corresponding pattern item.
>
>Signed-off-by: Ivan Malov <ivan.malov@oktetlabs.ru>
>Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>---
> lib/netdev-dpdk.c         | 99 ++++++++++++++++++++++++++++++++-------
> lib/netdev-dpdk.h         |  4 +-
> lib/netdev-offload-dpdk.c | 61 ++++++++++++++++++++----
> 3 files changed, 135 insertions(+), 29 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>45e5d26d2..01fb40255 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
>
> struct netdev_dpdk {
>     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>+        dpdk_port_t flow_transfer_proxy_port_id;
>         dpdk_port_t port_id;
>
>         /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1130,8
>+1131,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>-
> #ifdef ALLOW_EXPERIMENTAL_API
There are all over the patches this ifdef, even in cases it's harmless to do without. It makes the code less readable and might cause future cherry-picking issues. Try to minimize it only to places of a must.
>+    int ret;
>+
>     /*
>      * Full tunnel offload requires that tunnel ID metadata be
>      * delivered with "miss" packets from the hardware to the @@ -1141,6
>+1143,27 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>      * Request delivery of such metadata.
>      */
>     dpdk_eth_dev_init_rx_metadata(dev);
>+
>+    /*
>+     * Managing "transfer" flows requires that the user communicate them
>+     * via a port which has the privilege to control the embedded switch.
>+     * For some vendors, all ports in a given switching domain have
>+     * this privilege. For other vendors, it's only one port.
>+     *
>+     * Get the proxy port ID and remember it for later use.
>+     */
>+    ret = rte_flow_pick_transfer_proxy(dev->port_id,
>+                                       &dev->flow_transfer_proxy_port_id, NULL);
>+    if (ret != 0) {
>+        /*
>+         * The PMD does not indicate the proxy port.
>+         * Assume the proxy is unneeded.
>+         */
>+        dev->flow_transfer_proxy_port_id = dev->port_id;
>+    }
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>+    /* No API to get transfer proxy; assume the proxy is unneeded. */
>+    dev->flow_transfer_proxy_port_id = dev->port_id;
> #endif /* ALLOW_EXPERIMENTAL_API */
>
>     rte_eth_dev_info_get(dev->port_id, &info); @@ -3762,8 +3785,12 @@
>netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>                    const char *argv[], void *aux OVS_UNUSED)  {
>     struct ds used_interfaces = DS_EMPTY_INITIALIZER;
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    struct netdev_dpdk *dev_self = NULL; #endif /*
>+ALLOW_EXPERIMENTAL_API */
>     struct rte_eth_dev_info dev_info;
>     dpdk_port_t sibling_port_id;
>+    struct netdev_dpdk *dev;
>     dpdk_port_t port_id;
>     bool used = false;
>     char *response;
>@@ -3781,8 +3808,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int
>argc OVS_UNUSED,
>                   argv[1]);
>
>     RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
>-        struct netdev_dpdk *dev;
>-
>         LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>             if (dev->port_id != sibling_port_id) {
>                 continue;
>@@ -3802,6 +3827,27 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>int argc OVS_UNUSED,
>     }
>     ds_destroy(&used_interfaces);
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    /*
>+     * The device being detached may happen to be a flow proxy port
>+     * for another device (still attached). If so, do not allow to
>+     * detach. Devices dependent on this one must be detached first.
I don't think this is acceptable to deny the port from being detached, or to enforce such ordering. For example, ports are being detached upon shutdown, with unknown order.
Suppose A is the proxy port for ports B,C. When port A is going to be detached, flush the offloads of ports B and C (in addition to flushing the offloads of port A). Then, it is safe to detach it.
>+     */
>+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>+        if (dev->port_id == port_id) {
>+            dev_self = dev;
>+        } else if (dev->flow_transfer_proxy_port_id == port_id) {
>+            response = xasprintf("Device '%s' can not be detached (flow proxy)",
>+                                 argv[1]);
>+            goto error;
>+        }
>+    }
>+
>+    /* Indicate that the device being detached no longer needs a flow proxy.
>*/
>+    if (dev_self != NULL)
>+        dev_self->flow_transfer_proxy_port_id = port_id; #endif /*
>+ALLOW_EXPERIMENTAL_API */
>+
>     rte_eth_dev_info_get(port_id, &dev_info);
>     rte_eth_dev_close(port_id);
>     if (rte_dev_remove(dev_info.device) < 0) { @@ -5167,7 +5213,8 @@
>unlock:
> }
>
> int
>-netdev_dpdk_get_port_id(struct netdev *netdev)
>+netdev_dpdk_get_port_id(struct netdev *netdev,
>+                        bool flow_transfer_proxy)
Maybe better to have a separate API to get the proxy port-id. Then use it only where needed.
> {
>     struct netdev_dpdk *dev;
>     int ret = -1;
>@@ -5178,7 +5225,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = dev->port_id;
>+    ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id :
>+ dev->port_id;
>     ovs_mutex_unlock(&dev->mutex);
> out:
>     return ret;
>@@ -5214,13 +5261,15 @@ out:
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>-                             struct rte_flow *rte_flow,
>+                             bool transfer, struct rte_flow *rte_flow,
>                              struct rte_flow_error *error)  {
>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>     int ret;
>
>-    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
>+    ret = rte_flow_destroy(transfer ?
>+                           dev->flow_transfer_proxy_port_id : dev->port_id,
>+                           rte_flow, error);
>     return ret;
> }
>
>@@ -5234,7 +5283,19 @@ netdev_dpdk_rte_flow_create(struct netdev
>*netdev,
>     struct rte_flow *flow;
>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
>-    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    if (!attr->transfer) {
>+        /*
>+         * The 1st item in any pattern is a traffic source one.
>+         * It is unnecessary in the case of non-transfer rules.
>+         */
>+        ++(items);
>+    }
>+#endif /* ALLOW_EXPERIMENTAL_API */
>+
>+    flow = rte_flow_create(attr->transfer ?
>+                           dev->flow_transfer_proxy_port_id : dev->port_id,
>+                           attr, items, actions, error);
>     return flow;
> }
>
>@@ -5262,7 +5323,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev
>*netdev,
>     }
>
>     dev = netdev_dpdk_cast(netdev);
>-    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
>+    ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
>+                         actions, query, error);
>     return ret;
> }
>
>@@ -5284,8 +5346,8 @@ netdev_dpdk_rte_flow_tunnel_decap_set(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
>-                                    num_of_actions, error);
>+    ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id,
>tunnel,
>+                                    actions, num_of_actions, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5306,8 +5368,8 @@ netdev_dpdk_rte_flow_tunnel_match(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
>-                                error);
>+    ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
>+                                items, num_of_items, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5328,7 +5390,8 @@ netdev_dpdk_rte_flow_get_restore_info(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
>+    ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id,
>+                                    m, info, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5349,8 +5412,8 @@
>netdev_dpdk_rte_flow_tunnel_action_decap_release(
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
>-                                               num_of_actions, error);
>+    ret = rte_flow_tunnel_action_decap_release(dev-
>>flow_transfer_proxy_port_id,
>+                                               actions, num_of_actions,
>+ error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>@@ -5370,8 +5433,8 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct
>netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
>-    ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
>-                                       error);
>+    ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id,
>+                                       items, num_of_items, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
>diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 699be3fb4..6f18a73b2
>100644
>--- a/lib/netdev-dpdk.h
>+++ b/lib/netdev-dpdk.h
>@@ -35,7 +35,7 @@ bool netdev_dpdk_flow_api_supported(struct netdev *);
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>-                             struct rte_flow *rte_flow,
>+                             bool transfer, struct rte_flow *rte_flow,
>                              struct rte_flow_error *error);  struct rte_flow *
>netdev_dpdk_rte_flow_create(struct netdev *netdev, @@ -49,7 +49,7 @@
>netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
>                                  struct rte_flow_query_count *query,
>                                  struct rte_flow_error *error);  int -
>netdev_dpdk_get_port_id(struct netdev *netdev);
>+netdev_dpdk_get_port_id(struct netdev *netdev, bool
>+flow_transfer_proxy);
>
> #ifdef ALLOW_EXPERIMENTAL_API
>
>diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index
>9cd5a0159..9320a1d0b 100644
>--- a/lib/netdev-offload-dpdk.c
>+++ b/lib/netdev-offload-dpdk.c
>@@ -353,8 +353,23 @@ dump_flow_pattern(struct ds *s,
>
>     if (item->type == RTE_FLOW_ITEM_TYPE_END) {
>         ds_put_cstr(s, "end ");
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    } else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
>+        const struct rte_flow_item_ethdev *ethdev_spec = item->spec;
>+        const struct rte_flow_item_ethdev *ethdev_mask = item->mask;
>+
>+        ds_put_cstr(s, "represented_port ");
>+
>+        DUMP_PATTERN_ITEM(ethdev_mask->port_id, false, "ethdev_port_id",
>+                          "%"PRIu16, ethdev_spec->port_id,
>+                          ethdev_mask->port_id, 0);
>+    } else if (flow_patterns->tnl_pmd_items_cnt &&
>+               pattern_index < 1 /* REPRESENTED_PORT */ +
>+                               flow_patterns->tnl_pmd_items_cnt) {
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>     } else if (flow_patterns->tnl_pmd_items_cnt &&
>                pattern_index < flow_patterns->tnl_pmd_items_cnt) {
>+#endif /* ALLOW_EXPERIMENTAL_API */
>         return;
>     } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
>         const struct rte_flow_item_eth *eth_spec = item->spec; @@ -927,7
>+942,8 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>             extra_str = ds_cstr(&s_extra);
>             VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d
>%s",
>                         netdev_get_name(netdev), (intptr_t) flow, extra_str,
>-                        netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>+                        netdev_dpdk_get_port_id(netdev, attr->transfer),
>+                        ds_cstr(&s));
>         }
>     } else {
>         enum vlog_level level = VLL_WARN; @@ -942,7 +958,8 @@
>netdev_offload_dpdk_flow_create(struct netdev *netdev,
>             extra_str = ds_cstr(&s_extra);
>             VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
>                     netdev_get_name(netdev), extra_str,
>-                    netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>+                    netdev_dpdk_get_port_id(netdev, attr->transfer),
>+                    ds_cstr(&s));
>         }
>     }
>     ds_destroy(&s);
>@@ -1035,6 +1052,12 @@ free_flow_patterns(struct flow_patterns
>*patterns)
>     struct rte_flow_error error;
>     int i;
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    /* REPRESENTED_PORT */
>+    free(CONST_CAST(void *, patterns->items[0].spec));
>+    free(CONST_CAST(void *, patterns->items[0].mask)); #endif /*
This hard coding of [0] for this match is problematic. You could either malloc a regular item for it (and free it here), or ask about the type in the 0..n-1 loop.
>+ALLOW_EXPERIMENTAL_API */
>+
>     if (patterns->tnl_pmd_items) {
>         struct rte_flow_item *tnl_pmd_items = patterns->tnl_pmd_items;
>         uint32_t tnl_pmd_items_cnt = patterns->tnl_pmd_items_cnt; @@ -
>1049,7 +1072,12 @@ free_flow_patterns(struct flow_patterns *patterns)
>         }
>     }
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    for (i = 1 /* REPRESENTED_PORT */ + patterns->tnl_pmd_items_cnt;
>+         i < patterns->cnt; i++) {
>+#else /* ! ALLOW_EXPERIMENTAL_API */
>     for (i = patterns->tnl_pmd_items_cnt; i < patterns->cnt; i++) {
>+#endif /* ALLOW_EXPERIMENTAL_API */
>         if (patterns->items[i].spec) {
>             free(CONST_CAST(void *, patterns->items[i].spec));
>         }
>@@ -1115,13 +1143,13 @@ vport_to_rte_tunnel(struct netdev *vport,
>         tunnel->tp_dst = tnl_cfg->dst_port;
>         if (!VLOG_DROP_DBG(&rl)) {
>             ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
>-                          netdev_dpdk_get_port_id(netdev));
>+                          netdev_dpdk_get_port_id(netdev, true));
>         }
>     } else if (!strcmp(netdev_get_type(vport), "gre")) {
>         tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
>         if (!VLOG_DROP_DBG(&rl)) {
>             ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
>-                          netdev_dpdk_get_port_id(netdev));
>+                          netdev_dpdk_get_port_id(netdev, true));
>         }
>     } else {
>         VLOG_DBG_RL(&rl, "vport type '%s' is not supported", @@ -1383,10
>+1411,23 @@ parse_flow_match(struct netdev *netdev,
>                  struct flow_patterns *patterns,
>                  struct match *match)
> {
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    struct netdev *physdev = netdev_ports_get(orig_in_port, netdev-
>>dpif_type);
>+    struct rte_flow_item_ethdev *ethdev_spec = xzalloc(sizeof *ethdev_spec);
>+    struct rte_flow_item_ethdev *ethdev_mask = xzalloc(sizeof
>+*ethdev_mask); #endif /* ALLOW_EXPERIMENTAL_API */
>     struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
>     struct flow *consumed_masks;
>     uint8_t proto = 0;
>
>+#ifdef ALLOW_EXPERIMENTAL_API
>+    /* Add an explicit traffic source item to the beginning of the pattern. */
>+    ethdev_spec->port_id = netdev_dpdk_get_port_id(physdev, false);
>+    *ethdev_mask = rte_flow_item_ethdev_mask;
>+    add_flow_pattern(patterns,
>RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
>+                     ethdev_spec, ethdev_mask, NULL); #endif /*
>+ALLOW_EXPERIMENTAL_API */
>+
>     consumed_masks = &match->wc.masks;
>
>     if (!flow_tnl_dst_is_set(&match->flow.tunnel)) { @@ -1784,7 +1825,7 @@
>add_represented_port_action(struct flow_actions *actions,
>     struct rte_flow_action_ethdev *ethdev;
>     int outdev_id;
>
>-    outdev_id = netdev_dpdk_get_port_id(outdev);
>+    outdev_id = netdev_dpdk_get_port_id(outdev, false);
>     if (outdev_id < 0) {
>         return -1;
>     }
>@@ -1801,7 +1842,7 @@ add_port_id_action(struct flow_actions *actions,
>     struct rte_flow_action_port_id *port_id;
>     int outdev_id;
>
>-    outdev_id = netdev_dpdk_get_port_id(outdev);
>+    outdev_id = netdev_dpdk_get_port_id(outdev, false);
>     if (outdev_id < 0) {
>         return -1;
>     }
>@@ -2333,6 +2374,7 @@ netdev_offload_dpdk_flow_destroy(struct
>ufid_to_rte_flow_data *rte_flow_data)
>     struct netdev *physdev;
>     struct netdev *netdev;
>     ovs_u128 *ufid;
>+    bool transfer;
>     int ret;
>
>     ovs_mutex_lock(&rte_flow_data->lock);
>@@ -2344,12 +2386,13 @@ netdev_offload_dpdk_flow_destroy(struct
>ufid_to_rte_flow_data *rte_flow_data)
>
>     rte_flow_data->dead = true;
>
>+    transfer = rte_flow_data->actions_offloaded;
>     rte_flow = rte_flow_data->rte_flow;
>     physdev = rte_flow_data->physdev;
>     netdev = rte_flow_data->netdev;
>     ufid = &rte_flow_data->ufid;
>
>-    ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
>+    ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow,
>+ &error);
>
>     if (ret == 0) {
>         struct netdev_offload_dpdk_data *data; @@ -2364,12 +2407,12 @@
>netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data
>*rte_flow_data)
>                     " flow destroy %d ufid " UUID_FMT,
>                     netdev_get_name(netdev), netdev_get_name(physdev),
>                     (intptr_t) rte_flow,
>-                    netdev_dpdk_get_port_id(physdev),
>+                    netdev_dpdk_get_port_id(physdev, transfer),
>                     UUID_ARGS((struct uuid *) ufid));
>     } else {
>         VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
>                  netdev_get_name(netdev), netdev_get_name(physdev),
>-                 netdev_dpdk_get_port_id(physdev),
>+                 netdev_dpdk_get_port_id(physdev, transfer),
>                  UUID_ARGS((struct uuid *) ufid));
>     }
>
>--
>2.30.2
Ivan Malov July 20, 2022, 1:53 p.m. UTC | #2
Hi Eli,

Thanks a lot for a prompt review. PSB.

On Wed, 20 Jul 2022, Eli Britstein wrote:

>
>
>> -----Original Message-----
>> From: Ivan Malov <ivan.malov@oktetlabs.ru>
>> Sent: Wednesday, July 20, 2022 3:18 PM
>> To: dev@openvswitch.org
>> Cc: Eli Britstein <elibr@nvidia.com>; Stephen Hemminger
>> <stephen@networkplumber.org>; Ilya Maximets <i.maximets@ovn.org>; Ori
>> Kam <orika@nvidia.com>; Maxime Coquelin
>> <maxime.coquelin@redhat.com>; David Marchand
>> <david.marchand@redhat.com>; Andrew Rybchenko
>> <andrew.rybchenko@oktetlabs.ru>
>> Subject: [PATCH v3 3/3] netdev-offload-dpdk: use flow transfer proxy
>> mechanism
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Manage "transfer" flows via the corresponding mechanism.
>> Doing so requires that the traffic source be specified explicitly, via the
>> corresponding pattern item.
>>
>> Signed-off-by: Ivan Malov <ivan.malov@oktetlabs.ru>
>> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> ---
>> lib/netdev-dpdk.c         | 99 ++++++++++++++++++++++++++++++++-------
>> lib/netdev-dpdk.h         |  4 +-
>> lib/netdev-offload-dpdk.c | 61 ++++++++++++++++++++----
>> 3 files changed, 135 insertions(+), 29 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>> 45e5d26d2..01fb40255 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
>>
>> struct netdev_dpdk {
>>     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>> +        dpdk_port_t flow_transfer_proxy_port_id;
>>         dpdk_port_t port_id;
>>
>>         /* If true, device was attached by rte_eth_dev_attach(). */ @@ -1130,8
>> +1131,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>>                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>>                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>> -
>> #ifdef ALLOW_EXPERIMENTAL_API
> There are all over the patches this ifdef, even in cases it's harmless to do without. It makes the code less readable and might cause future cherry-picking issues. Try to minimize it only to places of a must.

I see your point. But I'm a rookie here, in OvS, and from the existing
code it's not clear which kinds of places would be harmless to stay
without if-defs and which would not. May I kindly request that
you point out at least one example in my patch = a place where
it would be safe to remove the checks. - ?

>> +    int ret;
>> +
>>     /*
>>      * Full tunnel offload requires that tunnel ID metadata be
>>      * delivered with "miss" packets from the hardware to the @@ -1141,6
>> +1143,27 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>      * Request delivery of such metadata.
>>      */
>>     dpdk_eth_dev_init_rx_metadata(dev);
>> +
>> +    /*
>> +     * Managing "transfer" flows requires that the user communicate them
>> +     * via a port which has the privilege to control the embedded switch.
>> +     * For some vendors, all ports in a given switching domain have
>> +     * this privilege. For other vendors, it's only one port.
>> +     *
>> +     * Get the proxy port ID and remember it for later use.
>> +     */
>> +    ret = rte_flow_pick_transfer_proxy(dev->port_id,
>> +                                       &dev->flow_transfer_proxy_port_id, NULL);
>> +    if (ret != 0) {
>> +        /*
>> +         * The PMD does not indicate the proxy port.
>> +         * Assume the proxy is unneeded.
>> +         */
>> +        dev->flow_transfer_proxy_port_id = dev->port_id;
>> +    }
>> +#else /* ! ALLOW_EXPERIMENTAL_API */
>> +    /* No API to get transfer proxy; assume the proxy is unneeded. */
>> +    dev->flow_transfer_proxy_port_id = dev->port_id;
>> #endif /* ALLOW_EXPERIMENTAL_API */
>>
>>     rte_eth_dev_info_get(dev->port_id, &info); @@ -3762,8 +3785,12 @@
>> netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>>                    const char *argv[], void *aux OVS_UNUSED)  {
>>     struct ds used_interfaces = DS_EMPTY_INITIALIZER;
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    struct netdev_dpdk *dev_self = NULL; #endif /*
>> +ALLOW_EXPERIMENTAL_API */
>>     struct rte_eth_dev_info dev_info;
>>     dpdk_port_t sibling_port_id;
>> +    struct netdev_dpdk *dev;
>>     dpdk_port_t port_id;
>>     bool used = false;
>>     char *response;
>> @@ -3781,8 +3808,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int
>> argc OVS_UNUSED,
>>                   argv[1]);
>>
>>     RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
>> -        struct netdev_dpdk *dev;
>> -
>>         LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>>             if (dev->port_id != sibling_port_id) {
>>                 continue;
>> @@ -3802,6 +3827,27 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>> int argc OVS_UNUSED,
>>     }
>>     ds_destroy(&used_interfaces);
>>
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    /*
>> +     * The device being detached may happen to be a flow proxy port
>> +     * for another device (still attached). If so, do not allow to
>> +     * detach. Devices dependent on this one must be detached first.
> I don't think this is acceptable to deny the port from being detached, or to enforce such ordering. For example, ports are being detached upon shutdown, with unknown order.

I see your point. However, as far as I can understand, this specific
function is not the one that unplugs devices upon the "global" OvS
shutdown. It might be the one which is invoked when the user asks
to unplug a specific device. So, from my point of view, it is OK
to deny detaching a port and inform the user of the reason.

> Suppose A is the proxy port for ports B,C. When port A is going to be detached, flush the offloads of ports B and C (in addition to flushing the offloads of port A). Then, it is safe to detach it.

Perhaps, yes, even if the user asks to unplug a specific device (see my
explanation above), it would be nice to flush dependent ports instead
of rejecting the call. Got it. But do we have in-house APIs to flush
all offloads on a specific port in OvS? Even if we do, I'm afraid
these APIs do not flush specifically transfer-related flows.

>> +     */
>> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>> +        if (dev->port_id == port_id) {
>> +            dev_self = dev;
>> +        } else if (dev->flow_transfer_proxy_port_id == port_id) {
>> +            response = xasprintf("Device '%s' can not be detached (flow proxy)",
>> +                                 argv[1]);
>> +            goto error;
>> +        }
>> +    }
>> +
>> +    /* Indicate that the device being detached no longer needs a flow proxy.
>> */
>> +    if (dev_self != NULL)
>> +        dev_self->flow_transfer_proxy_port_id = port_id; #endif /*
>> +ALLOW_EXPERIMENTAL_API */
>> +
>>     rte_eth_dev_info_get(port_id, &dev_info);
>>     rte_eth_dev_close(port_id);
>>     if (rte_dev_remove(dev_info.device) < 0) { @@ -5167,7 +5213,8 @@
>> unlock:
>> }
>>
>> int
>> -netdev_dpdk_get_port_id(struct netdev *netdev)
>> +netdev_dpdk_get_port_id(struct netdev *netdev,
>> +                        bool flow_transfer_proxy)
> Maybe better to have a separate API to get the proxy port-id. Then use it only where needed.

No strong opinion. Maybe the single API approach produces less code.

>> {
>>     struct netdev_dpdk *dev;
>>     int ret = -1;
>> @@ -5178,7 +5225,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)
>>
>>     dev = netdev_dpdk_cast(netdev);
>>     ovs_mutex_lock(&dev->mutex);
>> -    ret = dev->port_id;
>> +    ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id :
>> + dev->port_id;
>>     ovs_mutex_unlock(&dev->mutex);
>> out:
>>     return ret;
>> @@ -5214,13 +5261,15 @@ out:
>>
>> int
>> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>> -                             struct rte_flow *rte_flow,
>> +                             bool transfer, struct rte_flow *rte_flow,
>>                              struct rte_flow_error *error)  {
>>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>     int ret;
>>
>> -    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
>> +    ret = rte_flow_destroy(transfer ?
>> +                           dev->flow_transfer_proxy_port_id : dev->port_id,
>> +                           rte_flow, error);
>>     return ret;
>> }
>>
>> @@ -5234,7 +5283,19 @@ netdev_dpdk_rte_flow_create(struct netdev
>> *netdev,
>>     struct rte_flow *flow;
>>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> -    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    if (!attr->transfer) {
>> +        /*
>> +         * The 1st item in any pattern is a traffic source one.
>> +         * It is unnecessary in the case of non-transfer rules.
>> +         */
>> +        ++(items);
>> +    }
>> +#endif /* ALLOW_EXPERIMENTAL_API */
>> +
>> +    flow = rte_flow_create(attr->transfer ?
>> +                           dev->flow_transfer_proxy_port_id : dev->port_id,
>> +                           attr, items, actions, error);
>>     return flow;
>> }
>>
>> @@ -5262,7 +5323,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev
>> *netdev,
>>     }
>>
>>     dev = netdev_dpdk_cast(netdev);
>> -    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
>> +    ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
>> +                         actions, query, error);
>>     return ret;
>> }
>>
>> @@ -5284,8 +5346,8 @@ netdev_dpdk_rte_flow_tunnel_decap_set(struct
>> netdev *netdev,
>>
>>     dev = netdev_dpdk_cast(netdev);
>>     ovs_mutex_lock(&dev->mutex);
>> -    ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
>> -                                    num_of_actions, error);
>> +    ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id,
>> tunnel,
>> +                                    actions, num_of_actions, error);
>>     ovs_mutex_unlock(&dev->mutex);
>>     return ret;
>> }
>> @@ -5306,8 +5368,8 @@ netdev_dpdk_rte_flow_tunnel_match(struct
>> netdev *netdev,
>>
>>     dev = netdev_dpdk_cast(netdev);
>>     ovs_mutex_lock(&dev->mutex);
>> -    ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
>> -                                error);
>> +    ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
>> +                                items, num_of_items, error);
>>     ovs_mutex_unlock(&dev->mutex);
>>     return ret;
>> }
>> @@ -5328,7 +5390,8 @@ netdev_dpdk_rte_flow_get_restore_info(struct
>> netdev *netdev,
>>
>>     dev = netdev_dpdk_cast(netdev);
>>     ovs_mutex_lock(&dev->mutex);
>> -    ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
>> +    ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id,
>> +                                    m, info, error);
>>     ovs_mutex_unlock(&dev->mutex);
>>     return ret;
>> }
>> @@ -5349,8 +5412,8 @@
>> netdev_dpdk_rte_flow_tunnel_action_decap_release(
>>
>>     dev = netdev_dpdk_cast(netdev);
>>     ovs_mutex_lock(&dev->mutex);
>> -    ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
>> -                                               num_of_actions, error);
>> +    ret = rte_flow_tunnel_action_decap_release(dev-
>>> flow_transfer_proxy_port_id,
>> +                                               actions, num_of_actions,
>> + error);
>>     ovs_mutex_unlock(&dev->mutex);
>>     return ret;
>> }
>> @@ -5370,8 +5433,8 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct
>> netdev *netdev,
>>
>>     dev = netdev_dpdk_cast(netdev);
>>     ovs_mutex_lock(&dev->mutex);
>> -    ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
>> -                                       error);
>> +    ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id,
>> +                                       items, num_of_items, error);
>>     ovs_mutex_unlock(&dev->mutex);
>>     return ret;
>> }
>> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 699be3fb4..6f18a73b2
>> 100644
>> --- a/lib/netdev-dpdk.h
>> +++ b/lib/netdev-dpdk.h
>> @@ -35,7 +35,7 @@ bool netdev_dpdk_flow_api_supported(struct netdev *);
>>
>> int
>> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>> -                             struct rte_flow *rte_flow,
>> +                             bool transfer, struct rte_flow *rte_flow,
>>                              struct rte_flow_error *error);  struct rte_flow *
>> netdev_dpdk_rte_flow_create(struct netdev *netdev, @@ -49,7 +49,7 @@
>> netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
>>                                  struct rte_flow_query_count *query,
>>                                  struct rte_flow_error *error);  int -
>> netdev_dpdk_get_port_id(struct netdev *netdev);
>> +netdev_dpdk_get_port_id(struct netdev *netdev, bool
>> +flow_transfer_proxy);
>>
>> #ifdef ALLOW_EXPERIMENTAL_API
>>
>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index
>> 9cd5a0159..9320a1d0b 100644
>> --- a/lib/netdev-offload-dpdk.c
>> +++ b/lib/netdev-offload-dpdk.c
>> @@ -353,8 +353,23 @@ dump_flow_pattern(struct ds *s,
>>
>>     if (item->type == RTE_FLOW_ITEM_TYPE_END) {
>>         ds_put_cstr(s, "end ");
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    } else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
>> +        const struct rte_flow_item_ethdev *ethdev_spec = item->spec;
>> +        const struct rte_flow_item_ethdev *ethdev_mask = item->mask;
>> +
>> +        ds_put_cstr(s, "represented_port ");
>> +
>> +        DUMP_PATTERN_ITEM(ethdev_mask->port_id, false, "ethdev_port_id",
>> +                          "%"PRIu16, ethdev_spec->port_id,
>> +                          ethdev_mask->port_id, 0);
>> +    } else if (flow_patterns->tnl_pmd_items_cnt &&
>> +               pattern_index < 1 /* REPRESENTED_PORT */ +
>> +                               flow_patterns->tnl_pmd_items_cnt) {
>> +#else /* ! ALLOW_EXPERIMENTAL_API */
>>     } else if (flow_patterns->tnl_pmd_items_cnt &&
>>                pattern_index < flow_patterns->tnl_pmd_items_cnt) {
>> +#endif /* ALLOW_EXPERIMENTAL_API */
>>         return;
>>     } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
>>         const struct rte_flow_item_eth *eth_spec = item->spec; @@ -927,7
>> +942,8 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>             extra_str = ds_cstr(&s_extra);
>>             VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d
>> %s",
>>                         netdev_get_name(netdev), (intptr_t) flow, extra_str,
>> -                        netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>> +                        netdev_dpdk_get_port_id(netdev, attr->transfer),
>> +                        ds_cstr(&s));
>>         }
>>     } else {
>>         enum vlog_level level = VLL_WARN; @@ -942,7 +958,8 @@
>> netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>             extra_str = ds_cstr(&s_extra);
>>             VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
>>                     netdev_get_name(netdev), extra_str,
>> -                    netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>> +                    netdev_dpdk_get_port_id(netdev, attr->transfer),
>> +                    ds_cstr(&s));
>>         }
>>     }
>>     ds_destroy(&s);
>> @@ -1035,6 +1052,12 @@ free_flow_patterns(struct flow_patterns
>> *patterns)
>>     struct rte_flow_error error;
>>     int i;
>>
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    /* REPRESENTED_PORT */
>> +    free(CONST_CAST(void *, patterns->items[0].spec));
>> +    free(CONST_CAST(void *, patterns->items[0].mask)); #endif /*
> This hard coding of [0] for this match is problematic. You could either malloc a regular item for it (and free it here), or ask about the type in the 0..n-1 loop.

I don't believe that using [0] is too problematic, especially
taking into account all the appropriate commentary. Also,
wouldn't the 0..n-1 loop checks affect insertion rate?

Perhaps it pays to keep things simple in this particular case.

>> +ALLOW_EXPERIMENTAL_API */
>> +
>>     if (patterns->tnl_pmd_items) {
>>         struct rte_flow_item *tnl_pmd_items = patterns->tnl_pmd_items;
>>         uint32_t tnl_pmd_items_cnt = patterns->tnl_pmd_items_cnt; @@ -
>> 1049,7 +1072,12 @@ free_flow_patterns(struct flow_patterns *patterns)
>>         }
>>     }
>>
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    for (i = 1 /* REPRESENTED_PORT */ + patterns->tnl_pmd_items_cnt;
>> +         i < patterns->cnt; i++) {
>> +#else /* ! ALLOW_EXPERIMENTAL_API */
>>     for (i = patterns->tnl_pmd_items_cnt; i < patterns->cnt; i++) {
>> +#endif /* ALLOW_EXPERIMENTAL_API */
>>         if (patterns->items[i].spec) {
>>             free(CONST_CAST(void *, patterns->items[i].spec));
>>         }
>> @@ -1115,13 +1143,13 @@ vport_to_rte_tunnel(struct netdev *vport,
>>         tunnel->tp_dst = tnl_cfg->dst_port;
>>         if (!VLOG_DROP_DBG(&rl)) {
>>             ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
>> -                          netdev_dpdk_get_port_id(netdev));
>> +                          netdev_dpdk_get_port_id(netdev, true));
>>         }
>>     } else if (!strcmp(netdev_get_type(vport), "gre")) {
>>         tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
>>         if (!VLOG_DROP_DBG(&rl)) {
>>             ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
>> -                          netdev_dpdk_get_port_id(netdev));
>> +                          netdev_dpdk_get_port_id(netdev, true));
>>         }
>>     } else {
>>         VLOG_DBG_RL(&rl, "vport type '%s' is not supported", @@ -1383,10
>> +1411,23 @@ parse_flow_match(struct netdev *netdev,
>>                  struct flow_patterns *patterns,
>>                  struct match *match)
>> {
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    struct netdev *physdev = netdev_ports_get(orig_in_port, netdev-
>>> dpif_type);
>> +    struct rte_flow_item_ethdev *ethdev_spec = xzalloc(sizeof *ethdev_spec);
>> +    struct rte_flow_item_ethdev *ethdev_mask = xzalloc(sizeof
>> +*ethdev_mask); #endif /* ALLOW_EXPERIMENTAL_API */
>>     struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
>>     struct flow *consumed_masks;
>>     uint8_t proto = 0;
>>
>> +#ifdef ALLOW_EXPERIMENTAL_API
>> +    /* Add an explicit traffic source item to the beginning of the pattern. */
>> +    ethdev_spec->port_id = netdev_dpdk_get_port_id(physdev, false);
>> +    *ethdev_mask = rte_flow_item_ethdev_mask;
>> +    add_flow_pattern(patterns,
>> RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
>> +                     ethdev_spec, ethdev_mask, NULL); #endif /*
>> +ALLOW_EXPERIMENTAL_API */
>> +
>>     consumed_masks = &match->wc.masks;
>>
>>     if (!flow_tnl_dst_is_set(&match->flow.tunnel)) { @@ -1784,7 +1825,7 @@
>> add_represented_port_action(struct flow_actions *actions,
>>     struct rte_flow_action_ethdev *ethdev;
>>     int outdev_id;
>>
>> -    outdev_id = netdev_dpdk_get_port_id(outdev);
>> +    outdev_id = netdev_dpdk_get_port_id(outdev, false);
>>     if (outdev_id < 0) {
>>         return -1;
>>     }
>> @@ -1801,7 +1842,7 @@ add_port_id_action(struct flow_actions *actions,
>>     struct rte_flow_action_port_id *port_id;
>>     int outdev_id;
>>
>> -    outdev_id = netdev_dpdk_get_port_id(outdev);
>> +    outdev_id = netdev_dpdk_get_port_id(outdev, false);
>>     if (outdev_id < 0) {
>>         return -1;
>>     }
>> @@ -2333,6 +2374,7 @@ netdev_offload_dpdk_flow_destroy(struct
>> ufid_to_rte_flow_data *rte_flow_data)
>>     struct netdev *physdev;
>>     struct netdev *netdev;
>>     ovs_u128 *ufid;
>> +    bool transfer;
>>     int ret;
>>
>>     ovs_mutex_lock(&rte_flow_data->lock);
>> @@ -2344,12 +2386,13 @@ netdev_offload_dpdk_flow_destroy(struct
>> ufid_to_rte_flow_data *rte_flow_data)
>>
>>     rte_flow_data->dead = true;
>>
>> +    transfer = rte_flow_data->actions_offloaded;
>>     rte_flow = rte_flow_data->rte_flow;
>>     physdev = rte_flow_data->physdev;
>>     netdev = rte_flow_data->netdev;
>>     ufid = &rte_flow_data->ufid;
>>
>> -    ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
>> +    ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow,
>> + &error);
>>
>>     if (ret == 0) {
>>         struct netdev_offload_dpdk_data *data; @@ -2364,12 +2407,12 @@
>> netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data
>> *rte_flow_data)
>>                     " flow destroy %d ufid " UUID_FMT,
>>                     netdev_get_name(netdev), netdev_get_name(physdev),
>>                     (intptr_t) rte_flow,
>> -                    netdev_dpdk_get_port_id(physdev),
>> +                    netdev_dpdk_get_port_id(physdev, transfer),
>>                     UUID_ARGS((struct uuid *) ufid));
>>     } else {
>>         VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
>>                  netdev_get_name(netdev), netdev_get_name(physdev),
>> -                 netdev_dpdk_get_port_id(physdev),
>> +                 netdev_dpdk_get_port_id(physdev, transfer),
>>                  UUID_ARGS((struct uuid *) ufid));
>>     }
>>
>> --
>> 2.30.2
>
>

--
Best regards,
Ivan M
0-day Robot July 20, 2022, 6:32 p.m. UTC | #3
Bleep bloop.  Greetings Ivan Malov, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 80 characters long (recommended limit is 79)
#55 FILE: lib/netdev-dpdk.c:1207:
                                       &dev->flow_transfer_proxy_port_id, NULL);

WARNING: Line is 80 characters long (recommended limit is 79)
#105 FILE: lib/netdev-dpdk.c:3891:
            response = xasprintf("Device '%s' can not be detached (flow proxy)",

ERROR: Inappropriate bracing around statement
#112 FILE: lib/netdev-dpdk.c:3898:
    if (dev_self != NULL)

WARNING: Line is 80 characters long (recommended limit is 79)
#134 FILE: lib/netdev-dpdk.c:5282:
    ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id : dev->port_id;

WARNING: Line is 80 characters long (recommended limit is 79)
#225 FILE: lib/netdev-dpdk.c:5469:
    ret = rte_flow_tunnel_action_decap_release(dev->flow_transfer_proxy_port_id,

Lines checked: 436, Warnings: 4, Errors: 1


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 45e5d26d2..01fb40255 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -420,6 +420,7 @@  enum dpdk_hw_ol_features {
 
 struct netdev_dpdk {
     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+        dpdk_port_t flow_transfer_proxy_port_id;
         dpdk_port_t port_id;
 
         /* If true, device was attached by rte_eth_dev_attach(). */
@@ -1130,8 +1131,9 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
-
 #ifdef ALLOW_EXPERIMENTAL_API
+    int ret;
+
     /*
      * Full tunnel offload requires that tunnel ID metadata be
      * delivered with "miss" packets from the hardware to the
@@ -1141,6 +1143,27 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
      * Request delivery of such metadata.
      */
     dpdk_eth_dev_init_rx_metadata(dev);
+
+    /*
+     * Managing "transfer" flows requires that the user communicate them
+     * via a port which has the privilege to control the embedded switch.
+     * For some vendors, all ports in a given switching domain have
+     * this privilege. For other vendors, it's only one port.
+     *
+     * Get the proxy port ID and remember it for later use.
+     */
+    ret = rte_flow_pick_transfer_proxy(dev->port_id,
+                                       &dev->flow_transfer_proxy_port_id, NULL);
+    if (ret != 0) {
+        /*
+         * The PMD does not indicate the proxy port.
+         * Assume the proxy is unneeded.
+         */
+        dev->flow_transfer_proxy_port_id = dev->port_id;
+    }
+#else /* ! ALLOW_EXPERIMENTAL_API */
+    /* No API to get transfer proxy; assume the proxy is unneeded. */
+    dev->flow_transfer_proxy_port_id = dev->port_id;
 #endif /* ALLOW_EXPERIMENTAL_API */
 
     rte_eth_dev_info_get(dev->port_id, &info);
@@ -3762,8 +3785,12 @@  netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
                    const char *argv[], void *aux OVS_UNUSED)
 {
     struct ds used_interfaces = DS_EMPTY_INITIALIZER;
+#ifdef ALLOW_EXPERIMENTAL_API
+    struct netdev_dpdk *dev_self = NULL;
+#endif /* ALLOW_EXPERIMENTAL_API */
     struct rte_eth_dev_info dev_info;
     dpdk_port_t sibling_port_id;
+    struct netdev_dpdk *dev;
     dpdk_port_t port_id;
     bool used = false;
     char *response;
@@ -3781,8 +3808,6 @@  netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
                   argv[1]);
 
     RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
-        struct netdev_dpdk *dev;
-
         LIST_FOR_EACH (dev, list_node, &dpdk_list) {
             if (dev->port_id != sibling_port_id) {
                 continue;
@@ -3802,6 +3827,27 @@  netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
     }
     ds_destroy(&used_interfaces);
 
+#ifdef ALLOW_EXPERIMENTAL_API
+    /*
+     * The device being detached may happen to be a flow proxy port
+     * for another device (still attached). If so, do not allow to
+     * detach. Devices dependent on this one must be detached first.
+     */
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (dev->port_id == port_id) {
+            dev_self = dev;
+        } else if (dev->flow_transfer_proxy_port_id == port_id) {
+            response = xasprintf("Device '%s' can not be detached (flow proxy)",
+                                 argv[1]);
+            goto error;
+        }
+    }
+
+    /* Indicate that the device being detached no longer needs a flow proxy. */
+    if (dev_self != NULL)
+        dev_self->flow_transfer_proxy_port_id = port_id;
+#endif /* ALLOW_EXPERIMENTAL_API */
+
     rte_eth_dev_info_get(port_id, &dev_info);
     rte_eth_dev_close(port_id);
     if (rte_dev_remove(dev_info.device) < 0) {
@@ -5167,7 +5213,8 @@  unlock:
 }
 
 int
-netdev_dpdk_get_port_id(struct netdev *netdev)
+netdev_dpdk_get_port_id(struct netdev *netdev,
+                        bool flow_transfer_proxy)
 {
     struct netdev_dpdk *dev;
     int ret = -1;
@@ -5178,7 +5225,7 @@  netdev_dpdk_get_port_id(struct netdev *netdev)
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = dev->port_id;
+    ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id : dev->port_id;
     ovs_mutex_unlock(&dev->mutex);
 out:
     return ret;
@@ -5214,13 +5261,15 @@  out:
 
 int
 netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
-                             struct rte_flow *rte_flow,
+                             bool transfer, struct rte_flow *rte_flow,
                              struct rte_flow_error *error)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     int ret;
 
-    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
+    ret = rte_flow_destroy(transfer ?
+                           dev->flow_transfer_proxy_port_id : dev->port_id,
+                           rte_flow, error);
     return ret;
 }
 
@@ -5234,7 +5283,19 @@  netdev_dpdk_rte_flow_create(struct netdev *netdev,
     struct rte_flow *flow;
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
+#ifdef ALLOW_EXPERIMENTAL_API
+    if (!attr->transfer) {
+        /*
+         * The 1st item in any pattern is a traffic source one.
+         * It is unnecessary in the case of non-transfer rules.
+         */
+        ++(items);
+    }
+#endif /* ALLOW_EXPERIMENTAL_API */
+
+    flow = rte_flow_create(attr->transfer ?
+                           dev->flow_transfer_proxy_port_id : dev->port_id,
+                           attr, items, actions, error);
     return flow;
 }
 
@@ -5262,7 +5323,8 @@  netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
     }
 
     dev = netdev_dpdk_cast(netdev);
-    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
+    ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
+                         actions, query, error);
     return ret;
 }
 
@@ -5284,8 +5346,8 @@  netdev_dpdk_rte_flow_tunnel_decap_set(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
-                                    num_of_actions, error);
+    ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id, tunnel,
+                                    actions, num_of_actions, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5306,8 +5368,8 @@  netdev_dpdk_rte_flow_tunnel_match(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
-                                error);
+    ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
+                                items, num_of_items, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5328,7 +5390,8 @@  netdev_dpdk_rte_flow_get_restore_info(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
+    ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id,
+                                    m, info, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5349,8 +5412,8 @@  netdev_dpdk_rte_flow_tunnel_action_decap_release(
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
-                                               num_of_actions, error);
+    ret = rte_flow_tunnel_action_decap_release(dev->flow_transfer_proxy_port_id,
+                                               actions, num_of_actions, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5370,8 +5433,8 @@  netdev_dpdk_rte_flow_tunnel_item_release(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
-                                       error);
+    ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id,
+                                       items, num_of_items, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 699be3fb4..6f18a73b2 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -35,7 +35,7 @@  bool netdev_dpdk_flow_api_supported(struct netdev *);
 
 int
 netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
-                             struct rte_flow *rte_flow,
+                             bool transfer, struct rte_flow *rte_flow,
                              struct rte_flow_error *error);
 struct rte_flow *
 netdev_dpdk_rte_flow_create(struct netdev *netdev,
@@ -49,7 +49,7 @@  netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
                                  struct rte_flow_query_count *query,
                                  struct rte_flow_error *error);
 int
-netdev_dpdk_get_port_id(struct netdev *netdev);
+netdev_dpdk_get_port_id(struct netdev *netdev, bool flow_transfer_proxy);
 
 #ifdef ALLOW_EXPERIMENTAL_API
 
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 9cd5a0159..9320a1d0b 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -353,8 +353,23 @@  dump_flow_pattern(struct ds *s,
 
     if (item->type == RTE_FLOW_ITEM_TYPE_END) {
         ds_put_cstr(s, "end ");
+#ifdef ALLOW_EXPERIMENTAL_API
+    } else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
+        const struct rte_flow_item_ethdev *ethdev_spec = item->spec;
+        const struct rte_flow_item_ethdev *ethdev_mask = item->mask;
+
+        ds_put_cstr(s, "represented_port ");
+
+        DUMP_PATTERN_ITEM(ethdev_mask->port_id, false, "ethdev_port_id",
+                          "%"PRIu16, ethdev_spec->port_id,
+                          ethdev_mask->port_id, 0);
+    } else if (flow_patterns->tnl_pmd_items_cnt &&
+               pattern_index < 1 /* REPRESENTED_PORT */ +
+                               flow_patterns->tnl_pmd_items_cnt) {
+#else /* ! ALLOW_EXPERIMENTAL_API */
     } else if (flow_patterns->tnl_pmd_items_cnt &&
                pattern_index < flow_patterns->tnl_pmd_items_cnt) {
+#endif /* ALLOW_EXPERIMENTAL_API */
         return;
     } else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
         const struct rte_flow_item_eth *eth_spec = item->spec;
@@ -927,7 +942,8 @@  netdev_offload_dpdk_flow_create(struct netdev *netdev,
             extra_str = ds_cstr(&s_extra);
             VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
                         netdev_get_name(netdev), (intptr_t) flow, extra_str,
-                        netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
+                        netdev_dpdk_get_port_id(netdev, attr->transfer),
+                        ds_cstr(&s));
         }
     } else {
         enum vlog_level level = VLL_WARN;
@@ -942,7 +958,8 @@  netdev_offload_dpdk_flow_create(struct netdev *netdev,
             extra_str = ds_cstr(&s_extra);
             VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
                     netdev_get_name(netdev), extra_str,
-                    netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
+                    netdev_dpdk_get_port_id(netdev, attr->transfer),
+                    ds_cstr(&s));
         }
     }
     ds_destroy(&s);
@@ -1035,6 +1052,12 @@  free_flow_patterns(struct flow_patterns *patterns)
     struct rte_flow_error error;
     int i;
 
+#ifdef ALLOW_EXPERIMENTAL_API
+    /* REPRESENTED_PORT */
+    free(CONST_CAST(void *, patterns->items[0].spec));
+    free(CONST_CAST(void *, patterns->items[0].mask));
+#endif /* ALLOW_EXPERIMENTAL_API */
+
     if (patterns->tnl_pmd_items) {
         struct rte_flow_item *tnl_pmd_items = patterns->tnl_pmd_items;
         uint32_t tnl_pmd_items_cnt = patterns->tnl_pmd_items_cnt;
@@ -1049,7 +1072,12 @@  free_flow_patterns(struct flow_patterns *patterns)
         }
     }
 
+#ifdef ALLOW_EXPERIMENTAL_API
+    for (i = 1 /* REPRESENTED_PORT */ + patterns->tnl_pmd_items_cnt;
+         i < patterns->cnt; i++) {
+#else /* ! ALLOW_EXPERIMENTAL_API */
     for (i = patterns->tnl_pmd_items_cnt; i < patterns->cnt; i++) {
+#endif /* ALLOW_EXPERIMENTAL_API */
         if (patterns->items[i].spec) {
             free(CONST_CAST(void *, patterns->items[i].spec));
         }
@@ -1115,13 +1143,13 @@  vport_to_rte_tunnel(struct netdev *vport,
         tunnel->tp_dst = tnl_cfg->dst_port;
         if (!VLOG_DROP_DBG(&rl)) {
             ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
-                          netdev_dpdk_get_port_id(netdev));
+                          netdev_dpdk_get_port_id(netdev, true));
         }
     } else if (!strcmp(netdev_get_type(vport), "gre")) {
         tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
         if (!VLOG_DROP_DBG(&rl)) {
             ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
-                          netdev_dpdk_get_port_id(netdev));
+                          netdev_dpdk_get_port_id(netdev, true));
         }
     } else {
         VLOG_DBG_RL(&rl, "vport type '%s' is not supported",
@@ -1383,10 +1411,23 @@  parse_flow_match(struct netdev *netdev,
                  struct flow_patterns *patterns,
                  struct match *match)
 {
+#ifdef ALLOW_EXPERIMENTAL_API
+    struct netdev *physdev = netdev_ports_get(orig_in_port, netdev->dpif_type);
+    struct rte_flow_item_ethdev *ethdev_spec = xzalloc(sizeof *ethdev_spec);
+    struct rte_flow_item_ethdev *ethdev_mask = xzalloc(sizeof *ethdev_mask);
+#endif /* ALLOW_EXPERIMENTAL_API */
     struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
     struct flow *consumed_masks;
     uint8_t proto = 0;
 
+#ifdef ALLOW_EXPERIMENTAL_API
+    /* Add an explicit traffic source item to the beginning of the pattern. */
+    ethdev_spec->port_id = netdev_dpdk_get_port_id(physdev, false);
+    *ethdev_mask = rte_flow_item_ethdev_mask;
+    add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
+                     ethdev_spec, ethdev_mask, NULL);
+#endif /* ALLOW_EXPERIMENTAL_API */
+
     consumed_masks = &match->wc.masks;
 
     if (!flow_tnl_dst_is_set(&match->flow.tunnel)) {
@@ -1784,7 +1825,7 @@  add_represented_port_action(struct flow_actions *actions,
     struct rte_flow_action_ethdev *ethdev;
     int outdev_id;
 
-    outdev_id = netdev_dpdk_get_port_id(outdev);
+    outdev_id = netdev_dpdk_get_port_id(outdev, false);
     if (outdev_id < 0) {
         return -1;
     }
@@ -1801,7 +1842,7 @@  add_port_id_action(struct flow_actions *actions,
     struct rte_flow_action_port_id *port_id;
     int outdev_id;
 
-    outdev_id = netdev_dpdk_get_port_id(outdev);
+    outdev_id = netdev_dpdk_get_port_id(outdev, false);
     if (outdev_id < 0) {
         return -1;
     }
@@ -2333,6 +2374,7 @@  netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
     struct netdev *physdev;
     struct netdev *netdev;
     ovs_u128 *ufid;
+    bool transfer;
     int ret;
 
     ovs_mutex_lock(&rte_flow_data->lock);
@@ -2344,12 +2386,13 @@  netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
 
     rte_flow_data->dead = true;
 
+    transfer = rte_flow_data->actions_offloaded;
     rte_flow = rte_flow_data->rte_flow;
     physdev = rte_flow_data->physdev;
     netdev = rte_flow_data->netdev;
     ufid = &rte_flow_data->ufid;
 
-    ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
+    ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow, &error);
 
     if (ret == 0) {
         struct netdev_offload_dpdk_data *data;
@@ -2364,12 +2407,12 @@  netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
                     " flow destroy %d ufid " UUID_FMT,
                     netdev_get_name(netdev), netdev_get_name(physdev),
                     (intptr_t) rte_flow,
-                    netdev_dpdk_get_port_id(physdev),
+                    netdev_dpdk_get_port_id(physdev, transfer),
                     UUID_ARGS((struct uuid *) ufid));
     } else {
         VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
                  netdev_get_name(netdev), netdev_get_name(physdev),
-                 netdev_dpdk_get_port_id(physdev),
+                 netdev_dpdk_get_port_id(physdev, transfer),
                  UUID_ARGS((struct uuid *) ufid));
     }