diff mbox series

[ovs-dev,dpdk-latest,v4,1/5] netdev-dpdk: use flow transfer proxy

Message ID 20230426140510.949421-2-simon.horman@corigine.com
State Changes Requested
Headers show
Series Add support for DPDK meter HW offload | expand

Checks

Context Check Description
ovsrobot/apply-robot fail apply and check: fail
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Simon Horman April 26, 2023, 2:05 p.m. UTC
From: Peng Zhang <peng.zhang@corigine.com>

Manage "transfer" flows via the corresponding mechanism.
Doing so requires that the traffic source be specified
explicitly, via the corresponding pattern item.

Signed-off-by: Peng Zhang <peng.zhang@corigine.com>
Signed-off-by: Jin Liu <jin.liu@corigine.com>
Co-authored-by: Jin Liu <jin.liu@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
---
 lib/netdev-dpdk.c         | 97 +++++++++++++++++++++++++++++++++------
 lib/netdev-dpdk.h         |  3 ++
 lib/netdev-offload-dpdk.c | 16 ++++---
 3 files changed, 96 insertions(+), 20 deletions(-)

Comments

0-day Robot April 26, 2023, 2:16 p.m. UTC | #1
References:  <20230426140510.949421-2-simon.horman@corigine.com>
 

Bleep bloop.  Greetings Simon Horman, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 netdev-dpdk: use flow transfer proxy
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Patch skipped due to previous failure.

Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Ivan Malov May 3, 2023, 2 p.m. UTC | #2
Hello Simon,

This patch has me intrigued. By the looks of it, it bears uncanny
resemblance to patch [1] by another author. Is your patch based
on patch [1]? If yes, could you please comment on the following:

1) Your patch does not seem to reference the original author.
    Why is it so? Is there a problem, colleagues?

2) Your patch does not seem to address review feedback [2].
    There's a problem that has been indicated by Eli,
    regarding flow flush. Doesn't it still stand?

Interested to hear your input on this. Thank you.

[1] 
https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402152.html

[2] 
https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402172.html

On Wed, 26 Apr 2023, Simon Horman wrote:

> From: Peng Zhang <peng.zhang at corigine.com>
>
> Manage "transfer" flows via the corresponding mechanism.
> Doing so requires that the traffic source be specified
> explicitly, via the corresponding pattern item.
>
> Signed-off-by: Peng Zhang <peng.zhang at corigine.com>
> Signed-off-by: Jin Liu <jin.liu at corigine.com>
> Co-authored-by: Jin Liu <jin.liu at corigine.com>
> Signed-off-by: Simon Horman <simon.horman at corigine.com>
> ---
> lib/netdev-dpdk.c         | 97 +++++++++++++++++++++++++++++++++------
> lib/netdev-dpdk.h         |  3 ++
> lib/netdev-offload-dpdk.c | 16 ++++---
> 3 files changed, 96 insertions(+), 20 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index fff57f78279a..278d6afc08af 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -437,6 +437,7 @@ enum dpdk_hw_ol_features {
> struct netdev_dpdk {
>     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>         dpdk_port_t port_id;
> +        dpdk_port_t flow_transfer_proxy_port_id;
>
>         /* If true, device was attached by rte_eth_dev_attach(). */
>         bool attached;
> @@ -1155,6 +1156,24 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
> +    int ret;
> +
> +    /* Managing "transfer" flows requires that the user communicate them
> +     * via a port which has the privilege to control the embedded switch.
> +     * For some vendors, all ports in a given switching domain have
> +     * this privilege. For other vendors, it's only one port.
> +     *
> +     * Get the proxy port ID and remember it for later use.
> +     */
> +    ret = rte_flow_pick_transfer_proxy(dev->port_id,
> +                                       &dev->flow_transfer_proxy_port_id,
> +                                       NULL);
> +    if (ret != 0) {
> +        /* The PMD does not indicate the proxy port.
> +         * Assume the proxy is unneeded.
> +         */
> +        dev->flow_transfer_proxy_port_id = dev->port_id;
> +    }
>
>     rte_eth_dev_info_get(dev->port_id, &info);
>
> @@ -3767,8 +3786,10 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>                    const char *argv[], void *aux OVS_UNUSED)
> {
>     struct ds used_interfaces = DS_EMPTY_INITIALIZER;
> +    struct rte_flow_error ferror;
>     struct rte_eth_dev_info dev_info;
>     dpdk_port_t sibling_port_id;
> +    struct netdev_dpdk *dev;
>     dpdk_port_t port_id;
>     bool used = false;
>     char *response;
> @@ -3786,8 +3807,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>                   argv[1]);
>
>     RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
> -        struct netdev_dpdk *dev;
> -
>         LIST_FOR_EACH (dev, list_node, &dpdk_list) {
>             if (dev->port_id != sibling_port_id) {
>                 continue;
> @@ -3807,6 +3826,22 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>     }
>     ds_destroy(&used_interfaces);
>
> +    /* The device being detached may happen to be a flow proxy port
> +     * for another device (still attached). Update the flow proxy port id.
> +     */
> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> +        if (dev->port_id != port_id &&
> +            dev->flow_transfer_proxy_port_id == port_id) {
> +            if (rte_flow_flush(dev->flow_transfer_proxy_port_id, &ferror)) {
> +                response = xasprintf("Flush port failed, Device '%s' can not"
> +                                     "be detached (flow proxy)", argv[1]);
> +                goto error;
> +            }
> +            break;
> +        }
> +    }
> +
> +
>     rte_eth_dev_info_get(port_id, &dev_info);
>     rte_eth_dev_close(port_id);
>     if (rte_dev_remove(dev_info.device) < 0) {
> @@ -3817,6 +3852,16 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
>     response = xasprintf("All devices shared with device '%s' "
>                          "have been detached", argv[1]);
>
> +    /* The device being detached may happen to be a flow proxy port.
> +     * After the flow proxy port was detached, the related ports
> +     * will reconfigure the device and update the proxy_port_id.
> +     */
> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> +         if (dev->flow_transfer_proxy_port_id == port_id) {
> +                netdev_request_reconfigure(&dev->up);
> +        }
> +    }
> +
>     ovs_mutex_unlock(&dpdk_mutex);
>     unixctl_command_reply(conn, response);
>     free(response);
> @@ -5192,6 +5237,23 @@ out:
>     return ret;
> }
>
> +int
> +netdev_dpdk_get_prox_port_id(struct netdev *netdev)
> +{
> +    struct netdev_dpdk *dev;
> +    int ret;
> +
> +    if (!is_dpdk_class(netdev->netdev_class)) {
> +        return -1;
> +    }
> +
> +    dev = netdev_dpdk_cast(netdev);
> +    ovs_mutex_lock(&dev->mutex);
> +    ret = dev->flow_transfer_proxy_port_id;
> +    ovs_mutex_unlock(&dev->mutex);
> +    return ret;
> +}
> +
> bool
> netdev_dpdk_flow_api_supported(struct netdev *netdev)
> {
> @@ -5222,13 +5284,15 @@ out:
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> +                             bool transfer,
>                              struct rte_flow *rte_flow,
>                              struct rte_flow_error *error)
> {
>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>     int ret;
>
> -    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
> +    ret = rte_flow_destroy(transfer ? dev->flow_transfer_proxy_port_id :
> +                           dev->port_id, rte_flow, error);
>     return ret;
> }
>
> @@ -5242,7 +5306,9 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
>     struct rte_flow *flow;
>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
> -    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
> +    flow = rte_flow_create(attr->transfer ?
> +                           dev->flow_transfer_proxy_port_id : dev->port_id,
> +                           attr, items, actions, error);
>     return flow;
> }
>
> @@ -5270,7 +5336,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
>     }
>
>     dev = netdev_dpdk_cast(netdev);
> -    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
> +    ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
> +                         actions, query, error);
>     return ret;
> }
>
> @@ -5292,8 +5359,8 @@ netdev_dpdk_rte_flow_tunnel_decap_set(struct netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
> -    ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
> -                                    num_of_actions, error);
> +    ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id, tunnel,
> +                                    actions, num_of_actions, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
> @@ -5314,8 +5381,8 @@ netdev_dpdk_rte_flow_tunnel_match(struct netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
> -    ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
> -                                error);
> +    ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
> +                                items, num_of_items, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
> @@ -5336,7 +5403,8 @@ netdev_dpdk_rte_flow_get_restore_info(struct netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
> -    ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
> +    ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id, m, info,
> +                                    error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
> @@ -5357,8 +5425,9 @@ netdev_dpdk_rte_flow_tunnel_action_decap_release(
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
> -    ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
> -                                               num_of_actions, error);
> +    ret = rte_flow_tunnel_action_decap_release(
> +                                            dev->flow_transfer_proxy_port_id,
> +                                            actions, num_of_actions, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
> @@ -5378,8 +5447,8 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct netdev *netdev,
>
>     dev = netdev_dpdk_cast(netdev);
>     ovs_mutex_lock(&dev->mutex);
> -    ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
> -                                       error);
> +    ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id, items,
> +                                       num_of_items, error);
>     ovs_mutex_unlock(&dev->mutex);
>     return ret;
> }
> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> index 5cd95d00f5a5..5ba59a82e8a7 100644
> --- a/lib/netdev-dpdk.h
> +++ b/lib/netdev-dpdk.h
> @@ -36,6 +36,7 @@ bool netdev_dpdk_flow_api_supported(struct netdev *);
>
> int
> netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> +                             bool transfer,
>                              struct rte_flow *rte_flow,
>                              struct rte_flow_error *error);
> struct rte_flow *
> @@ -51,6 +52,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
>                                  struct rte_flow_error *error);
> int
> netdev_dpdk_get_port_id(struct netdev *netdev);
> +int
> +netdev_dpdk_get_prox_port_id(struct netdev *netdev);
>
> #ifdef ALLOW_EXPERIMENTAL_API
>
> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> index 38f00fd309e6..9b594ac1ef90 100644
> --- a/lib/netdev-offload-dpdk.c
> +++ b/lib/netdev-offload-dpdk.c
> @@ -920,6 +920,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>             extra_str = ds_cstr(&s_extra);
>             VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
>                         netdev_get_name(netdev), (intptr_t) flow, extra_str,
> +                        attr->transfer ? netdev_dpdk_get_prox_port_id(netdev) :
>                         netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>         }
>     } else {
> @@ -935,6 +936,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>             extra_str = ds_cstr(&s_extra);
>             VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
>                     netdev_get_name(netdev), extra_str,
> +                    attr->transfer ? netdev_dpdk_get_prox_port_id(netdev) :
>                     netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
>         }
>     }
> @@ -1114,13 +1116,13 @@ vport_to_rte_tunnel(struct netdev *vport,
>         tunnel->tp_dst = tnl_cfg->dst_port;
>         if (!VLOG_DROP_DBG(&rl)) {
>             ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
> -                          netdev_dpdk_get_port_id(netdev));
> +                          netdev_dpdk_get_prox_port_id(netdev));
>         }
>     } else if (!strcmp(netdev_get_type(vport), "gre")) {
>         tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
>         if (!VLOG_DROP_DBG(&rl)) {
>             ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
> -                          netdev_dpdk_get_port_id(netdev));
> +                          netdev_dpdk_get_prox_port_id(netdev));
>         }
>     } else {
>         VLOG_DBG_RL(&rl, "vport type '%s' is not supported",
> @@ -2242,7 +2244,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
>                             struct nlattr *nl_actions,
>                             size_t actions_len)
> {
> -    const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
> +    const struct rte_flow_attr flow_attr = { .transfer = 1 };
>     struct flow_actions actions = {
>         .actions = NULL,
>         .cnt = 0,
> @@ -2319,6 +2321,7 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
>     struct netdev *physdev;
>     struct netdev *netdev;
>     ovs_u128 *ufid;
> +    bool transfer;
>     int ret;
>
>     ovs_mutex_lock(&rte_flow_data->lock);
> @@ -2330,12 +2333,13 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
>
>     rte_flow_data->dead = true;
>
> +    transfer = rte_flow_data->actions_offloaded;
>     rte_flow = rte_flow_data->rte_flow;
>     physdev = rte_flow_data->physdev;
>     netdev = rte_flow_data->netdev;
>     ufid = &rte_flow_data->ufid;
>
> -    ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
> +    ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow, &error);
>
>     if (ret == 0) {
>         struct netdev_offload_dpdk_data *data;
> @@ -2350,12 +2354,12 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
>                     " flow destroy %d ufid " UUID_FMT,
>                     netdev_get_name(netdev), netdev_get_name(physdev),
>                     (intptr_t) rte_flow,
> -                    netdev_dpdk_get_port_id(physdev),
> +                    netdev_dpdk_get_prox_port_id(physdev),
>                     UUID_ARGS((struct uuid *) ufid));
>     } else {
>         VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
>                  netdev_get_name(netdev), netdev_get_name(physdev),
> -                 netdev_dpdk_get_port_id(physdev),
> +                 netdev_dpdk_get_prox_port_id(physdev),
>                  UUID_ARGS((struct uuid *) ufid));
>     }
>
> -- 
> 2.30.2
>
Ilya Maximets May 5, 2023, 7:06 p.m. UTC | #3
On 5/3/23 16:00, Ivan Malov wrote:
> Hello Simon,
> 
> This patch has me intrigued. By the looks of it, it bears uncanny
> resemblance to patch [1] by another author. Is your patch based
> on patch [1]? If yes, could you please comment on the following:
> 
> 1) Your patch does not seem to reference the original author.
>    Why is it so? Is there a problem, colleagues?

When re-using someone else's work, please, retain the original
authorship.  I see there are changes made to the patch, but it's
the same as the original in many parts.  Since you made changes,
you should add yourself as co-authors.  If you feel that changes
made are more significant than the original ptch, then you may
swap the authorship, but you should add the original author to
the list of co-authors anyway.

> 
> 2) Your patch does not seem to address review feedback [2].
>    There's a problem that has been indicated by Eli,
>    regarding flow flush. Doesn't it still stand?

In this version the rte_flow_flush() call is added instead of
failing the detach.  However,

a. the flush operation should have already been executed from
   the higher layer from do_del_port() in dpif-netdev.  So,
   it should not be needed.

b. The problem doesn't apper to be addressed, because related
   ports will not get their flows flushed.

Best regards, Ilya Maximets.

> 
> Interested to hear your input on this. Thank you.
> 
> [1] https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402152.html
> 
> [2] https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402172.html
Nole Zhang May 8, 2023, 3:29 a.m. UTC | #4
> -----Original Message-----
> From: Ilya Maximets <i.maximets@ovn.org>
> Sent: Saturday, May 6, 2023 3:07 AM
> To: Ivan Malov <ivan.malov@arknetworks.am>; Simon Horman
> <simon.horman@corigine.com>
> Cc: i.maximets@ovn.org; dev@openvswitch.org; Eli Britstein
> <elibr@nvidia.com>; Kevin Liu <jin.liu@corigine.com>; Chaoyong He
> <chaoyong.he@corigine.com>; oss-drivers <oss-drivers@corigine.com>; Nole
> Zhang <peng.zhang@nephogine.com>
> Subject: Re: [ovs-dev] [PATCH dpdk-latest v4 1/5] netdev-dpdk: use flow
> transfer proxy
> 
> On 5/3/23 16:00, Ivan Malov wrote:
> > Hello Simon,
> >
> > This patch has me intrigued. By the looks of it, it bears uncanny
> > resemblance to patch [1] by another author. Is your patch based on
> > patch [1]? If yes, could you please comment on the following:
> >
> > 1) Your patch does not seem to reference the original author.
> >    Why is it so? Is there a problem, colleagues?
> 
> When re-using someone else's work, please, retain the original authorship.  I
> see there are changes made to the patch, but it's the same as the original in
> many parts.  Since you made changes, you should add yourself as co-authors.
> If you feel that changes made are more significant than the original ptch,
> then you may swap the authorship, but you should add the original author to
> the list of co-authors anyway.

@Ivan Malov sorry again.
It is my fault, I'm very sorry for these.
Next version, I will add signed-by.
> 
> >
> > 2) Your patch does not seem to address review feedback [2].
> >    There's a problem that has been indicated by Eli,
> >    regarding flow flush. Doesn't it still stand?
> 
> In this version the rte_flow_flush() call is added instead of failing the detach.
> However,
> 
> a. the flush operation should have already been executed from
>    the higher layer from do_del_port() in dpif-netdev.  So,
>    it should not be needed.
> 
> b. The problem doesn't apper to be addressed, because related
>    ports will not get their flows flushed.

Ok, I think you are right, do you think if I use the  rte_flow_flush() for related ports is OK?

> 
> Best regards, Ilya Maximets.
> 
> >
> > Interested to hear your input on this. Thank you.
> >
> > [1]
> > https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402152.ht
> > ml
> >
> > [2]
> > https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402172.ht
> > ml
Simon Horman May 8, 2023, 11:40 a.m. UTC | #5
On Sat, May 06, 2023 at 05:26:17AM +0200, Nole Zhang wrote:
> 
> 
> > -----Original Message-----
> > From: Ilya Maximets <i.maximets@ovn.org>
> > Sent: Saturday, May 6, 2023 3:07 AM
> > To: Ivan Malov <ivan.malov@arknetworks.am>; Simon Horman
> > <simon.horman@corigine.com>
> > Cc: i.maximets@ovn.org; dev@openvswitch.org; Eli Britstein
> > <elibr@nvidia.com>; Kevin Liu <jin.liu@corigine.com>; Chaoyong He
> > <chaoyong.he@corigine.com>; oss-drivers <oss-drivers@corigine.com>; Nole
> > Zhang <peng.zhang@nephogine.com>
> > Subject: Re: [ovs-dev] [PATCH dpdk-latest v4 1/5] netdev-dpdk: use flow
> > transfer proxy
> > 
> > On 5/3/23 16:00, Ivan Malov wrote:
> > > Hello Simon,
> > >
> > > This patch has me intrigued. By the looks of it, it bears uncanny
> > > resemblance to patch [1] by another author. Is your patch based on
> > > patch [1]? If yes, could you please comment on the following:
> > >
> > > 1) Your patch does not seem to reference the original author.
> > >    Why is it so? Is there a problem, colleagues?
> > 
> > When re-using someone else's work, please, retain the original authorship.  I
> > see there are changes made to the patch, but it's the same as the original in
> > many parts.  Since you made changes, you should add yourself as co-authors.
> > If you feel that changes made are more significant than the original ptch,
> > then you may swap the authorship, but you should add the original author to
> > the list of co-authors anyway.
> 
> @Ivan Malov
> It is my fault, I'm very sorry for these.
>  Next version, I will add signed-by.

Yes, likewise, very sorry about this.
Ivan Malov May 8, 2023, 4:09 p.m. UTC | #6
Hi Nole, Simon, Ilya,

On Mon, 8 May 2023, Nole Zhang wrote:

>
>
>> -----Original Message-----
>> From: Ilya Maximets <i.maximets@ovn.org>
>> Sent: Saturday, May 6, 2023 3:07 AM
>> To: Ivan Malov <ivan.malov@arknetworks.am>; Simon Horman
>> <simon.horman@corigine.com>
>> Cc: i.maximets@ovn.org; dev@openvswitch.org; Eli Britstein
>> <elibr@nvidia.com>; Kevin Liu <jin.liu@corigine.com>; Chaoyong He
>> <chaoyong.he@corigine.com>; oss-drivers <oss-drivers@corigine.com>; Nole
>> Zhang <peng.zhang@nephogine.com>
>> Subject: Re: [ovs-dev] [PATCH dpdk-latest v4 1/5] netdev-dpdk: use flow
>> transfer proxy
>>
>> On 5/3/23 16:00, Ivan Malov wrote:
>>> Hello Simon,
>>>
>>> This patch has me intrigued. By the looks of it, it bears uncanny
>>> resemblance to patch [1] by another author. Is your patch based on
>>> patch [1]? If yes, could you please comment on the following:
>>>
>>> 1) Your patch does not seem to reference the original author.
>>>    Why is it so? Is there a problem, colleagues?
>>
>> When re-using someone else's work, please, retain the original authorship.  I
>> see there are changes made to the patch, but it's the same as the original in
>> many parts.  Since you made changes, you should add yourself as co-authors.
>> If you feel that changes made are more significant than the original ptch,
>> then you may swap the authorship, but you should add the original author to
>> the list of co-authors anyway.

Thanks for the clarification Ilya.

>
> @Ivan Malov sorry again.
> It is my fault, I'm very sorry for these.
> Next version, I will add signed-by.

Glad we see eye to eye about this and can understand
each other after all.

>>
>>>
>>> 2) Your patch does not seem to address review feedback [2].
>>>    There's a problem that has been indicated by Eli,
>>>    regarding flow flush. Doesn't it still stand?
>>
>> In this version the rte_flow_flush() call is added instead of failing the detach.
>> However,
>>
>> a. the flush operation should have already been executed from
>>    the higher layer from do_del_port() in dpif-netdev.  So,
>>    it should not be needed.
>>
>> b. The problem doesn't apper to be addressed, because related
>>    ports will not get their flows flushed.
>
> Ok, I think you are right, do you think if I use the  rte_flow_flush() for related ports is OK?

Please find Eli's review of the original patch where he explains
how it should be done:

https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402172.html

Thank you.

>
>>
>> Best regards, Ilya Maximets.
>>
>>>
>>> Interested to hear your input on this. Thank you.
>>>
>>> [1]
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402152.ht
>>> ml
>>>
>>> [2]
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402172.ht
>>> ml
>
Ilya Maximets May 9, 2023, 11:54 a.m. UTC | #7
On 5/8/23 18:09, Ivan Malov wrote:
> Hi Nole, Simon, Ilya,
> 
> On Mon, 8 May 2023, Nole Zhang wrote:
> 
>>
>>
>>> -----Original Message-----
>>> From: Ilya Maximets <i.maximets@ovn.org>
>>> Sent: Saturday, May 6, 2023 3:07 AM
>>> To: Ivan Malov <ivan.malov@arknetworks.am>; Simon Horman
>>> <simon.horman@corigine.com>
>>> Cc: i.maximets@ovn.org; dev@openvswitch.org; Eli Britstein
>>> <elibr@nvidia.com>; Kevin Liu <jin.liu@corigine.com>; Chaoyong He
>>> <chaoyong.he@corigine.com>; oss-drivers <oss-drivers@corigine.com>; Nole
>>> Zhang <peng.zhang@nephogine.com>
>>> Subject: Re: [ovs-dev] [PATCH dpdk-latest v4 1/5] netdev-dpdk: use flow
>>> transfer proxy
>>>
>>> On 5/3/23 16:00, Ivan Malov wrote:
>>>> Hello Simon,
>>>>
>>>> This patch has me intrigued. By the looks of it, it bears uncanny
>>>> resemblance to patch [1] by another author. Is your patch based on
>>>> patch [1]? If yes, could you please comment on the following:
>>>>
>>>> 1) Your patch does not seem to reference the original author.
>>>>    Why is it so? Is there a problem, colleagues?
>>>
>>> When re-using someone else's work, please, retain the original authorship.  I
>>> see there are changes made to the patch, but it's the same as the original in
>>> many parts.  Since you made changes, you should add yourself as co-authors.
>>> If you feel that changes made are more significant than the original ptch,
>>> then you may swap the authorship, but you should add the original author to
>>> the list of co-authors anyway.
> 
> Thanks for the clarification Ilya.
> 
>>
>> @Ivan Malov sorry again.
>> It is my fault, I'm very sorry for these.
>> Next version, I will add signed-by.
> 
> Glad we see eye to eye about this and can understand
> each other after all.
> 
>>>
>>>>
>>>> 2) Your patch does not seem to address review feedback [2].
>>>>    There's a problem that has been indicated by Eli,
>>>>    regarding flow flush. Doesn't it still stand?
>>>
>>> In this version the rte_flow_flush() call is added instead of failing the detach.
>>> However,
>>>
>>> a. the flush operation should have already been executed from
>>>    the higher layer from do_del_port() in dpif-netdev.  So,
>>>    it should not be needed.
>>>
>>> b. The problem doesn't apper to be addressed, because related
>>>    ports will not get their flows flushed.
>>
>> Ok, I think you are right, do you think if I use the  rte_flow_flush() for related ports is OK?
> 
> Please find Eli's review of the original patch where he explains
> how it should be done:
> 
> https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402172.html

Yep.  The flushing should come from the netdev-offload layer or higher.
We shouldn't blindly call rte_flow_flush() as that may leave resources
in the netdev-offload layer.

OTOH, I'm not really sure if it's safe to flush even from the netdev-offload
layer, because we can have a concurrent flow_put operation executed from a
different offload thread.  And last I checked (a year ago), rte_flow API is
not thread safe, even if documentation says so.  We might need to pause all
offload threads while the flush is in progress.

And we're potentially leaving flow marks associated in dpif-netdev that
might prevent flows from from being re-offloaded.

Best regards, Ilya Maximets.

> 
> Thank you.
> 
>>
>>>
>>> Best regards, Ilya Maximets.
>>>
>>>>
>>>> Interested to hear your input on this. Thank you.
>>>>
>>>> [1]
>>>> https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402152.ht
>>>> ml
>>>>
>>>> [2]
>>>> https://mail.openvswitch.org/pipermail/ovs-dev/2023-February/402172.ht
>>>> ml
>>
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index fff57f78279a..278d6afc08af 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -437,6 +437,7 @@  enum dpdk_hw_ol_features {
 struct netdev_dpdk {
     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
         dpdk_port_t port_id;
+        dpdk_port_t flow_transfer_proxy_port_id;
 
         /* If true, device was attached by rte_eth_dev_attach(). */
         bool attached;
@@ -1155,6 +1156,24 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+    int ret;
+
+    /* Managing "transfer" flows requires that the user communicate them
+     * via a port which has the privilege to control the embedded switch.
+     * For some vendors, all ports in a given switching domain have
+     * this privilege. For other vendors, it's only one port.
+     *
+     * Get the proxy port ID and remember it for later use.
+     */
+    ret = rte_flow_pick_transfer_proxy(dev->port_id,
+                                       &dev->flow_transfer_proxy_port_id,
+                                       NULL);
+    if (ret != 0) {
+        /* The PMD does not indicate the proxy port.
+         * Assume the proxy is unneeded.
+         */
+        dev->flow_transfer_proxy_port_id = dev->port_id;
+    }
 
     rte_eth_dev_info_get(dev->port_id, &info);
 
@@ -3767,8 +3786,10 @@  netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
                    const char *argv[], void *aux OVS_UNUSED)
 {
     struct ds used_interfaces = DS_EMPTY_INITIALIZER;
+    struct rte_flow_error ferror;
     struct rte_eth_dev_info dev_info;
     dpdk_port_t sibling_port_id;
+    struct netdev_dpdk *dev;
     dpdk_port_t port_id;
     bool used = false;
     char *response;
@@ -3786,8 +3807,6 @@  netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
                   argv[1]);
 
     RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
-        struct netdev_dpdk *dev;
-
         LIST_FOR_EACH (dev, list_node, &dpdk_list) {
             if (dev->port_id != sibling_port_id) {
                 continue;
@@ -3807,6 +3826,22 @@  netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
     }
     ds_destroy(&used_interfaces);
 
+    /* The device being detached may happen to be a flow proxy port
+     * for another device (still attached). Update the flow proxy port id.
+     */
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (dev->port_id != port_id &&
+            dev->flow_transfer_proxy_port_id == port_id) {
+            if (rte_flow_flush(dev->flow_transfer_proxy_port_id, &ferror)) {
+                response = xasprintf("Flush port failed, Device '%s' can not"
+                                     "be detached (flow proxy)", argv[1]);
+                goto error;
+            }
+            break;
+        }
+    }
+
+
     rte_eth_dev_info_get(port_id, &dev_info);
     rte_eth_dev_close(port_id);
     if (rte_dev_remove(dev_info.device) < 0) {
@@ -3817,6 +3852,16 @@  netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
     response = xasprintf("All devices shared with device '%s' "
                          "have been detached", argv[1]);
 
+    /* The device being detached may happen to be a flow proxy port.
+     * After the flow proxy port was detached, the related ports
+     * will reconfigure the device and update the proxy_port_id.
+     */
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+         if (dev->flow_transfer_proxy_port_id == port_id) {
+                netdev_request_reconfigure(&dev->up);
+        }
+    }
+
     ovs_mutex_unlock(&dpdk_mutex);
     unixctl_command_reply(conn, response);
     free(response);
@@ -5192,6 +5237,23 @@  out:
     return ret;
 }
 
+int
+netdev_dpdk_get_prox_port_id(struct netdev *netdev)
+{
+    struct netdev_dpdk *dev;
+    int ret;
+
+    if (!is_dpdk_class(netdev->netdev_class)) {
+        return -1;
+    }
+
+    dev = netdev_dpdk_cast(netdev);
+    ovs_mutex_lock(&dev->mutex);
+    ret = dev->flow_transfer_proxy_port_id;
+    ovs_mutex_unlock(&dev->mutex);
+    return ret;
+}
+
 bool
 netdev_dpdk_flow_api_supported(struct netdev *netdev)
 {
@@ -5222,13 +5284,15 @@  out:
 
 int
 netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
+                             bool transfer,
                              struct rte_flow *rte_flow,
                              struct rte_flow_error *error)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     int ret;
 
-    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
+    ret = rte_flow_destroy(transfer ? dev->flow_transfer_proxy_port_id :
+                           dev->port_id, rte_flow, error);
     return ret;
 }
 
@@ -5242,7 +5306,9 @@  netdev_dpdk_rte_flow_create(struct netdev *netdev,
     struct rte_flow *flow;
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
+    flow = rte_flow_create(attr->transfer ?
+                           dev->flow_transfer_proxy_port_id : dev->port_id,
+                           attr, items, actions, error);
     return flow;
 }
 
@@ -5270,7 +5336,8 @@  netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
     }
 
     dev = netdev_dpdk_cast(netdev);
-    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
+    ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
+                         actions, query, error);
     return ret;
 }
 
@@ -5292,8 +5359,8 @@  netdev_dpdk_rte_flow_tunnel_decap_set(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
-                                    num_of_actions, error);
+    ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id, tunnel,
+                                    actions, num_of_actions, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5314,8 +5381,8 @@  netdev_dpdk_rte_flow_tunnel_match(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
-                                error);
+    ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
+                                items, num_of_items, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5336,7 +5403,8 @@  netdev_dpdk_rte_flow_get_restore_info(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
+    ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id, m, info,
+                                    error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5357,8 +5425,9 @@  netdev_dpdk_rte_flow_tunnel_action_decap_release(
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
-                                               num_of_actions, error);
+    ret = rte_flow_tunnel_action_decap_release(
+                                            dev->flow_transfer_proxy_port_id,
+                                            actions, num_of_actions, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
@@ -5378,8 +5447,8 @@  netdev_dpdk_rte_flow_tunnel_item_release(struct netdev *netdev,
 
     dev = netdev_dpdk_cast(netdev);
     ovs_mutex_lock(&dev->mutex);
-    ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
-                                       error);
+    ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id, items,
+                                       num_of_items, error);
     ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 5cd95d00f5a5..5ba59a82e8a7 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -36,6 +36,7 @@  bool netdev_dpdk_flow_api_supported(struct netdev *);
 
 int
 netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
+                             bool transfer,
                              struct rte_flow *rte_flow,
                              struct rte_flow_error *error);
 struct rte_flow *
@@ -51,6 +52,8 @@  netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
                                  struct rte_flow_error *error);
 int
 netdev_dpdk_get_port_id(struct netdev *netdev);
+int
+netdev_dpdk_get_prox_port_id(struct netdev *netdev);
 
 #ifdef ALLOW_EXPERIMENTAL_API
 
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 38f00fd309e6..9b594ac1ef90 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -920,6 +920,7 @@  netdev_offload_dpdk_flow_create(struct netdev *netdev,
             extra_str = ds_cstr(&s_extra);
             VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
                         netdev_get_name(netdev), (intptr_t) flow, extra_str,
+                        attr->transfer ? netdev_dpdk_get_prox_port_id(netdev) :
                         netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
         }
     } else {
@@ -935,6 +936,7 @@  netdev_offload_dpdk_flow_create(struct netdev *netdev,
             extra_str = ds_cstr(&s_extra);
             VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
                     netdev_get_name(netdev), extra_str,
+                    attr->transfer ? netdev_dpdk_get_prox_port_id(netdev) :
                     netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
         }
     }
@@ -1114,13 +1116,13 @@  vport_to_rte_tunnel(struct netdev *vport,
         tunnel->tp_dst = tnl_cfg->dst_port;
         if (!VLOG_DROP_DBG(&rl)) {
             ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
-                          netdev_dpdk_get_port_id(netdev));
+                          netdev_dpdk_get_prox_port_id(netdev));
         }
     } else if (!strcmp(netdev_get_type(vport), "gre")) {
         tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
         if (!VLOG_DROP_DBG(&rl)) {
             ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
-                          netdev_dpdk_get_port_id(netdev));
+                          netdev_dpdk_get_prox_port_id(netdev));
         }
     } else {
         VLOG_DBG_RL(&rl, "vport type '%s' is not supported",
@@ -2242,7 +2244,7 @@  netdev_offload_dpdk_actions(struct netdev *netdev,
                             struct nlattr *nl_actions,
                             size_t actions_len)
 {
-    const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
+    const struct rte_flow_attr flow_attr = { .transfer = 1 };
     struct flow_actions actions = {
         .actions = NULL,
         .cnt = 0,
@@ -2319,6 +2321,7 @@  netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
     struct netdev *physdev;
     struct netdev *netdev;
     ovs_u128 *ufid;
+    bool transfer;
     int ret;
 
     ovs_mutex_lock(&rte_flow_data->lock);
@@ -2330,12 +2333,13 @@  netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
 
     rte_flow_data->dead = true;
 
+    transfer = rte_flow_data->actions_offloaded;
     rte_flow = rte_flow_data->rte_flow;
     physdev = rte_flow_data->physdev;
     netdev = rte_flow_data->netdev;
     ufid = &rte_flow_data->ufid;
 
-    ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
+    ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow, &error);
 
     if (ret == 0) {
         struct netdev_offload_dpdk_data *data;
@@ -2350,12 +2354,12 @@  netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
                     " flow destroy %d ufid " UUID_FMT,
                     netdev_get_name(netdev), netdev_get_name(physdev),
                     (intptr_t) rte_flow,
-                    netdev_dpdk_get_port_id(physdev),
+                    netdev_dpdk_get_prox_port_id(physdev),
                     UUID_ARGS((struct uuid *) ufid));
     } else {
         VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
                  netdev_get_name(netdev), netdev_get_name(physdev),
-                 netdev_dpdk_get_port_id(physdev),
+                 netdev_dpdk_get_prox_port_id(physdev),
                  UUID_ARGS((struct uuid *) ufid));
     }