diff mbox series

[ovs-dev,15/20] netdev-offload-dpdk-flow: Support offload of output action

Message ID 20191120152826.25074-16-elibr@mellanox.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series netdev datapath actions offload | expand

Commit Message

Eli Britstein Nov. 20, 2019, 3:28 p.m. UTC
Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
---
 NEWS                           |  2 +
 lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 85 insertions(+), 4 deletions(-)

Comments

0-day Robot Nov. 20, 2019, 4:34 p.m. UTC | #1
Bleep bloop.  Greetings Eli Britstein, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
Failed to merge in the changes.
Patch failed at 0001 netdev-offload-dpdk-flow: Support offload of output action
The copy of the patch that failed is found in:
   /var/lib/jenkins/jobs/upstream_build_from_pw/workspace/.git/rebase-apply/patch
When you have resolved this problem, run "git am --resolved".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Ilya Maximets Nov. 22, 2019, 4:19 p.m. UTC | #2
On 20.11.2019 16:28, Eli Britstein wrote:
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
> ---
>  NEWS                           |  2 +
>  lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 85 insertions(+), 4 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 88b818948..ca9c2b230 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -10,6 +10,8 @@ Post-v2.12.0
>         if supported by libbpf.
>       * Add option to enable, disable and query TCP sequence checking in
>         conntrack.
> +   - DPDK:
> +     * Add hardware offload support for output actions.
>  
>  v2.12.0 - 03 Sep 2019
>  ---------------------
> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
> index dbbc45e99..6e7efb315 100644
> --- a/lib/netdev-offload-dpdk-flow.c
> +++ b/lib/netdev-offload-dpdk-flow.c
> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>          } else {
>              ds_put_cstr(s, "  RSS = null\n");
>          }
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
> +        const struct rte_flow_action_count *count = actions->conf;
> +
> +        ds_put_cstr(s, "rte flow count action:\n");
> +        if (count) {
> +            ds_put_format(s,
> +                          "  Count: shared=%d, id=%d\n",
> +                          count->shared, count->id);
> +        } else {
> +            ds_put_cstr(s, "  Count = null\n");
> +        }
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
> +        const struct rte_flow_action_port_id *port_id = actions->conf;
> +
> +        ds_put_cstr(s, "rte flow port-id action:\n");
> +        if (port_id) {
> +            ds_put_format(s,
> +                          "  Port-id: original=%d, id=%d\n",
> +                          port_id->original, port_id->id);
> +        } else {
> +            ds_put_cstr(s, "  Port-id = null\n");
> +        }
>      } else {
>          ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>      }
> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>      return 0;
>  }
>  
> +static void
> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
> +{
> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
> +
> +    count->shared = 0;
> +    /* Each flow has a single count action, so no need of id */
> +    count->id = 0;
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
> +}
> +
> +static void
> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
> +                                    struct netdev *outdev)
> +{
> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
> +
> +    port_id->id = netdev_dpdk_get_port_id(outdev);
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
> +}
> +
> +static int
> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
> +                                   const struct nlattr *nla,
> +                                   struct offload_info *info)
> +{
> +    struct netdev *outdev;
> +    odp_port_t port;
> +
> +    port = nl_attr_get_odp_port(nla);
> +    outdev = netdev_ports_get(port, info->dpif_class);
> +    if (outdev == NULL) {
> +        VLOG_DBG_RL(&error_rl,
> +                    "Cannot find netdev for odp port %d", port);
> +        return -1;
> +    }
> +    if (!netdev_dpdk_flow_api_supported(outdev)) {

This doesn't look correct.  I mean, maybe we need to check if port is
really the port on a same piece of hardware.  Will the flow setup fail
in this case?  Will code fallback to the MARK+RSS?

> +        VLOG_DBG_RL(&error_rl,
> +                    "Output to %s cannot be offloaded",
> +                    netdev_get_name(outdev));
> +        return -1;
> +    }
> +
> +    netdev_dpdk_flow_add_count_action(actions);
> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
> +    netdev_close(outdev);
> +
> +    return 0;
> +}
> +
>  int
>  netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>                               struct nlattr *nl_actions,
>                               size_t nl_actions_len,
> -                             struct offload_info *info OVS_UNUSED)
> +                             struct offload_info *info)
>  {
>      struct nlattr *nla;
>      size_t left;
>  
>      NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> -        VLOG_DBG_RL(&error_rl,
> -                    "Unsupported action type %d", nl_attr_type(nla));
> -        return -1;
> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
> +            left <= NLA_ALIGN(nla->nla_len)) {
> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
> +                return -1;
> +            }
> +        } else {
> +            VLOG_DBG_RL(&error_rl,
> +                        "Unsupported action type %d", nl_attr_type(nla));
> +            return -1;
> +        }
>      }
>  
>      add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>
Eli Britstein Nov. 24, 2019, 1:22 p.m. UTC | #3
On 11/22/2019 6:19 PM, Ilya Maximets wrote:
> On 20.11.2019 16:28, Eli Britstein wrote:
>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>> ---
>>   NEWS                           |  2 +
>>   lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>   2 files changed, 85 insertions(+), 4 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 88b818948..ca9c2b230 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>          if supported by libbpf.
>>        * Add option to enable, disable and query TCP sequence checking in
>>          conntrack.
>> +   - DPDK:
>> +     * Add hardware offload support for output actions.
>>   
>>   v2.12.0 - 03 Sep 2019
>>   ---------------------
>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>> index dbbc45e99..6e7efb315 100644
>> --- a/lib/netdev-offload-dpdk-flow.c
>> +++ b/lib/netdev-offload-dpdk-flow.c
>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>           } else {
>>               ds_put_cstr(s, "  RSS = null\n");
>>           }
>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>> +        const struct rte_flow_action_count *count = actions->conf;
>> +
>> +        ds_put_cstr(s, "rte flow count action:\n");
>> +        if (count) {
>> +            ds_put_format(s,
>> +                          "  Count: shared=%d, id=%d\n",
>> +                          count->shared, count->id);
>> +        } else {
>> +            ds_put_cstr(s, "  Count = null\n");
>> +        }
>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>> +
>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>> +        if (port_id) {
>> +            ds_put_format(s,
>> +                          "  Port-id: original=%d, id=%d\n",
>> +                          port_id->original, port_id->id);
>> +        } else {
>> +            ds_put_cstr(s, "  Port-id = null\n");
>> +        }
>>       } else {
>>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>       }
>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>       return 0;
>>   }
>>   
>> +static void
>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>> +{
>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>> +
>> +    count->shared = 0;
>> +    /* Each flow has a single count action, so no need of id */
>> +    count->id = 0;
>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>> +}
>> +
>> +static void
>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>> +                                    struct netdev *outdev)
>> +{
>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>> +
>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>> +}
>> +
>> +static int
>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>> +                                   const struct nlattr *nla,
>> +                                   struct offload_info *info)
>> +{
>> +    struct netdev *outdev;
>> +    odp_port_t port;
>> +
>> +    port = nl_attr_get_odp_port(nla);
>> +    outdev = netdev_ports_get(port, info->dpif_class);
>> +    if (outdev == NULL) {
>> +        VLOG_DBG_RL(&error_rl,
>> +                    "Cannot find netdev for odp port %d", port);
>> +        return -1;
>> +    }
>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
> This doesn't look correct.  I mean, maybe we need to check if port is
> really the port on a same piece of hardware.  Will the flow setup fail
> in this case?  Will code fallback to the MARK+RSS?

We cannot check if the port is on the same HW, and it is also wrong from 
the application point of view. If the operation cannot be supported, it 
is in the driver (DPDK) scope to fail it.

You are right about the fallback. I'll fix it in v2.

>
>> +        VLOG_DBG_RL(&error_rl,
>> +                    "Output to %s cannot be offloaded",
>> +                    netdev_get_name(outdev));
>> +        return -1;
>> +    }
>> +
>> +    netdev_dpdk_flow_add_count_action(actions);
>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>> +    netdev_close(outdev);
>> +
>> +    return 0;
>> +}
>> +
>>   int
>>   netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>                                struct nlattr *nl_actions,
>>                                size_t nl_actions_len,
>> -                             struct offload_info *info OVS_UNUSED)
>> +                             struct offload_info *info)
>>   {
>>       struct nlattr *nla;
>>       size_t left;
>>   
>>       NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>> -        VLOG_DBG_RL(&error_rl,
>> -                    "Unsupported action type %d", nl_attr_type(nla));
>> -        return -1;
>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>> +            left <= NLA_ALIGN(nla->nla_len)) {
>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
>> +                return -1;
>> +            }
>> +        } else {
>> +            VLOG_DBG_RL(&error_rl,
>> +                        "Unsupported action type %d", nl_attr_type(nla));
>> +            return -1;
>> +        }
>>       }
>>   
>>       add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>
Ilya Maximets Nov. 30, 2019, 11:59 a.m. UTC | #4
On 24.11.2019 14:22, Eli Britstein wrote:
> 
> On 11/22/2019 6:19 PM, Ilya Maximets wrote:
>> On 20.11.2019 16:28, Eli Britstein wrote:
>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>> ---
>>>   NEWS                           |  2 +
>>>   lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>   2 files changed, 85 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/NEWS b/NEWS
>>> index 88b818948..ca9c2b230 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>          if supported by libbpf.
>>>        * Add option to enable, disable and query TCP sequence checking in
>>>          conntrack.
>>> +   - DPDK:
>>> +     * Add hardware offload support for output actions.
>>>   
>>>   v2.12.0 - 03 Sep 2019
>>>   ---------------------
>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>> index dbbc45e99..6e7efb315 100644
>>> --- a/lib/netdev-offload-dpdk-flow.c
>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>           } else {
>>>               ds_put_cstr(s, "  RSS = null\n");
>>>           }
>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>> +        const struct rte_flow_action_count *count = actions->conf;
>>> +
>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>> +        if (count) {
>>> +            ds_put_format(s,
>>> +                          "  Count: shared=%d, id=%d\n",
>>> +                          count->shared, count->id);
>>> +        } else {
>>> +            ds_put_cstr(s, "  Count = null\n");
>>> +        }
>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>> +
>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>> +        if (port_id) {
>>> +            ds_put_format(s,
>>> +                          "  Port-id: original=%d, id=%d\n",
>>> +                          port_id->original, port_id->id);
>>> +        } else {
>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>> +        }
>>>       } else {
>>>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>       }
>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>       return 0;
>>>   }
>>>   
>>> +static void
>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>> +{
>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>> +
>>> +    count->shared = 0;
>>> +    /* Each flow has a single count action, so no need of id */
>>> +    count->id = 0;
>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>> +}
>>> +
>>> +static void
>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>> +                                    struct netdev *outdev)
>>> +{
>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>> +
>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>> +}
>>> +
>>> +static int
>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>> +                                   const struct nlattr *nla,
>>> +                                   struct offload_info *info)
>>> +{
>>> +    struct netdev *outdev;
>>> +    odp_port_t port;
>>> +
>>> +    port = nl_attr_get_odp_port(nla);
>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>> +    if (outdev == NULL) {
>>> +        VLOG_DBG_RL(&error_rl,
>>> +                    "Cannot find netdev for odp port %d", port);
>>> +        return -1;
>>> +    }
>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>> This doesn't look correct.  I mean, maybe we need to check if port is
>> really the port on a same piece of hardware.  Will the flow setup fail
>> in this case?  Will code fallback to the MARK+RSS?
> 
> We cannot check if the port is on the same HW, and it is also wrong from 
> the application point of view. If the operation cannot be supported, it 
> is in the driver (DPDK) scope to fail it.

BTW, function netdev_dpdk_flow_api_supported() is not intended to be called
in the offloading process.  It's only for initialization phase.  You can see
the "/* TODO: Check if we able to offload some minimal flow. */" in the code
and that might be destructive and unwanted for offloading process.

You, probably, wanted something like this instead (not tested):

diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
index 4e1c4251d..92876f3a3 100644
--- a/lib/netdev-offload-provider.h
+++ b/lib/netdev-offload-provider.h
@@ -90,6 +90,9 @@ struct netdev_flow_api {
 int netdev_register_flow_api_provider(const struct netdev_flow_api *);
 int netdev_unregister_flow_api_provider(const char *type);
 
+bool netdev_flow_api_equals(const struct netdev *,
+                            const struct netdev_flow_api *);
+
 #ifdef __linux__
 extern const struct netdev_flow_api netdev_offload_tc;
 #endif
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index ae01acda6..1970b1bae 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type)
     return error;
 }
 
+bool
+netdev_flow_api_equals(const struct netdev *netdev,
+                       const struct netdev_flow_api *flow_api)
+{
+    const struct netdev_flow_api *netdev_flow_api =
+        ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api);
+
+    return netdev_flow_api == flow_api;
+}
+
 static int
 netdev_assign_flow_api(struct netdev *netdev)
 {
---

BTW2, We, probably, need to call rte_flow_validate() somewhere.

Best regards, Ilya Maximets.
Eli Britstein Dec. 1, 2019, 8:29 a.m. UTC | #5
On 11/30/2019 1:59 PM, Ilya Maximets wrote:
> On 24.11.2019 14:22, Eli Britstein wrote:
>> On 11/22/2019 6:19 PM, Ilya Maximets wrote:
>>> On 20.11.2019 16:28, Eli Britstein wrote:
>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>>> ---
>>>>    NEWS                           |  2 +
>>>>    lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>>    2 files changed, 85 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/NEWS b/NEWS
>>>> index 88b818948..ca9c2b230 100644
>>>> --- a/NEWS
>>>> +++ b/NEWS
>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>           if supported by libbpf.
>>>>         * Add option to enable, disable and query TCP sequence checking in
>>>>           conntrack.
>>>> +   - DPDK:
>>>> +     * Add hardware offload support for output actions.
>>>>    
>>>>    v2.12.0 - 03 Sep 2019
>>>>    ---------------------
>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>>> index dbbc45e99..6e7efb315 100644
>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>>            } else {
>>>>                ds_put_cstr(s, "  RSS = null\n");
>>>>            }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>> +        if (count) {
>>>> +            ds_put_format(s,
>>>> +                          "  Count: shared=%d, id=%d\n",
>>>> +                          count->shared, count->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>> +        }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>> +        if (port_id) {
>>>> +            ds_put_format(s,
>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>> +                          port_id->original, port_id->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>> +        }
>>>>        } else {
>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>>        }
>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>>        return 0;
>>>>    }
>>>>    
>>>> +static void
>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>> +{
>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>> +
>>>> +    count->shared = 0;
>>>> +    /* Each flow has a single count action, so no need of id */
>>>> +    count->id = 0;
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>> +}
>>>> +
>>>> +static void
>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>> +                                    struct netdev *outdev)
>>>> +{
>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>> +
>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>> +}
>>>> +
>>>> +static int
>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>> +                                   const struct nlattr *nla,
>>>> +                                   struct offload_info *info)
>>>> +{
>>>> +    struct netdev *outdev;
>>>> +    odp_port_t port;
>>>> +
>>>> +    port = nl_attr_get_odp_port(nla);
>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>> +    if (outdev == NULL) {
>>>> +        VLOG_DBG_RL(&error_rl,
>>>> +                    "Cannot find netdev for odp port %d", port);
>>>> +        return -1;
>>>> +    }
>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>> This doesn't look correct.  I mean, maybe we need to check if port is
>>> really the port on a same piece of hardware.  Will the flow setup fail
>>> in this case?  Will code fallback to the MARK+RSS?
>> We cannot check if the port is on the same HW, and it is also wrong from
>> the application point of view. If the operation cannot be supported, it
>> is in the driver (DPDK) scope to fail it.
> BTW, function netdev_dpdk_flow_api_supported() is not intended to be called
> in the offloading process.  It's only for initialization phase.  You can see
> the "/* TODO: Check if we able to offload some minimal flow. */" in the code
> and that might be destructive and unwanted for offloading process.

Actually, there is no need to call it at all, as we get here only if we 
found the netdev by netdev_ports_get, which means we found a device that 
can have offloads.

I enhanced for error handling of netdev_dpdk_get_port_id instead of 
introducing such new code, that I think not required.

>
> You, probably, wanted something like this instead (not tested):
>
> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
> index 4e1c4251d..92876f3a3 100644
> --- a/lib/netdev-offload-provider.h
> +++ b/lib/netdev-offload-provider.h
> @@ -90,6 +90,9 @@ struct netdev_flow_api {
>   int netdev_register_flow_api_provider(const struct netdev_flow_api *);
>   int netdev_unregister_flow_api_provider(const char *type);
>   
> +bool netdev_flow_api_equals(const struct netdev *,
> +                            const struct netdev_flow_api *);
> +
>   #ifdef __linux__
>   extern const struct netdev_flow_api netdev_offload_tc;
>   #endif
> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
> index ae01acda6..1970b1bae 100644
> --- a/lib/netdev-offload.c
> +++ b/lib/netdev-offload.c
> @@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type)
>       return error;
>   }
>   
> +bool
> +netdev_flow_api_equals(const struct netdev *netdev,
> +                       const struct netdev_flow_api *flow_api)
> +{
> +    const struct netdev_flow_api *netdev_flow_api =
> +        ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api);
> +
> +    return netdev_flow_api == flow_api;
> +}
> +
>   static int
>   netdev_assign_flow_api(struct netdev *netdev)
>   {
> ---
>
> BTW2, We, probably, need to call rte_flow_validate() somewhere.
what for? if we fail by rte_flow_create, we will handle the error. the 
validate may be used in the future to query driver capabilities, but 
this is something much more complex and not related to this patch-set.
>
> Best regards, Ilya Maximets.
Ilya Maximets Dec. 2, 2019, 2:25 p.m. UTC | #6
On 01.12.2019 9:29, Eli Britstein wrote:
> 
> On 11/30/2019 1:59 PM, Ilya Maximets wrote:
>> On 24.11.2019 14:22, Eli Britstein wrote:
>>> On 11/22/2019 6:19 PM, Ilya Maximets wrote:
>>>> On 20.11.2019 16:28, Eli Britstein wrote:
>>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>>>> ---
>>>>>    NEWS                           |  2 +
>>>>>    lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>>>    2 files changed, 85 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/NEWS b/NEWS
>>>>> index 88b818948..ca9c2b230 100644
>>>>> --- a/NEWS
>>>>> +++ b/NEWS
>>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>>           if supported by libbpf.
>>>>>         * Add option to enable, disable and query TCP sequence checking in
>>>>>           conntrack.
>>>>> +   - DPDK:
>>>>> +     * Add hardware offload support for output actions.
>>>>>    
>>>>>    v2.12.0 - 03 Sep 2019
>>>>>    ---------------------
>>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>>>> index dbbc45e99..6e7efb315 100644
>>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>>>            } else {
>>>>>                ds_put_cstr(s, "  RSS = null\n");
>>>>>            }
>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>>> +
>>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>>> +        if (count) {
>>>>> +            ds_put_format(s,
>>>>> +                          "  Count: shared=%d, id=%d\n",
>>>>> +                          count->shared, count->id);
>>>>> +        } else {
>>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>>> +        }
>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>>> +
>>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>>> +        if (port_id) {
>>>>> +            ds_put_format(s,
>>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>>> +                          port_id->original, port_id->id);
>>>>> +        } else {
>>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>>> +        }
>>>>>        } else {
>>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>>>        }
>>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>>>        return 0;
>>>>>    }
>>>>>    
>>>>> +static void
>>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>>> +{
>>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>>> +
>>>>> +    count->shared = 0;
>>>>> +    /* Each flow has a single count action, so no need of id */
>>>>> +    count->id = 0;
>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>>> +                                    struct netdev *outdev)
>>>>> +{
>>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>>> +
>>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>>> +}
>>>>> +
>>>>> +static int
>>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>>> +                                   const struct nlattr *nla,
>>>>> +                                   struct offload_info *info)
>>>>> +{
>>>>> +    struct netdev *outdev;
>>>>> +    odp_port_t port;
>>>>> +
>>>>> +    port = nl_attr_get_odp_port(nla);
>>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>>> +    if (outdev == NULL) {
>>>>> +        VLOG_DBG_RL(&error_rl,
>>>>> +                    "Cannot find netdev for odp port %d", port);
>>>>> +        return -1;
>>>>> +    }
>>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>>> This doesn't look correct.  I mean, maybe we need to check if port is
>>>> really the port on a same piece of hardware.  Will the flow setup fail
>>>> in this case?  Will code fallback to the MARK+RSS?
>>> We cannot check if the port is on the same HW, and it is also wrong from
>>> the application point of view. If the operation cannot be supported, it
>>> is in the driver (DPDK) scope to fail it.
>> BTW, function netdev_dpdk_flow_api_supported() is not intended to be called
>> in the offloading process.  It's only for initialization phase.  You can see
>> the "/* TODO: Check if we able to offload some minimal flow. */" in the code
>> and that might be destructive and unwanted for offloading process.
> 
> Actually, there is no need to call it at all, as we get here only if we 
> found the netdev by netdev_ports_get, which means we found a device that 
> can have offloads.

'port_to_netdev' hash map contains all the netdevs created by the ofproto
regardless of the flow API initialization.  This is done to allow delayed
initialization if hw offloading was enabled in runtime.  'netdev_ports_get'
just traverses above hash map.  So, the flow API could be not initialized.

> 
> I enhanced for error handling of netdev_dpdk_get_port_id instead of 
> introducing such new code, that I think not required.
> 
>>
>> You, probably, wanted something like this instead (not tested):
>>
>> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
>> index 4e1c4251d..92876f3a3 100644
>> --- a/lib/netdev-offload-provider.h
>> +++ b/lib/netdev-offload-provider.h
>> @@ -90,6 +90,9 @@ struct netdev_flow_api {
>>   int netdev_register_flow_api_provider(const struct netdev_flow_api *);
>>   int netdev_unregister_flow_api_provider(const char *type);
>>   
>> +bool netdev_flow_api_equals(const struct netdev *,
>> +                            const struct netdev_flow_api *);
>> +
>>   #ifdef __linux__
>>   extern const struct netdev_flow_api netdev_offload_tc;
>>   #endif
>> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
>> index ae01acda6..1970b1bae 100644
>> --- a/lib/netdev-offload.c
>> +++ b/lib/netdev-offload.c
>> @@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type)
>>       return error;
>>   }
>>   
>> +bool
>> +netdev_flow_api_equals(const struct netdev *netdev,
>> +                       const struct netdev_flow_api *flow_api)
>> +{
>> +    const struct netdev_flow_api *netdev_flow_api =
>> +        ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api);
>> +
>> +    return netdev_flow_api == flow_api;
>> +}
>> +
>>   static int
>>   netdev_assign_flow_api(struct netdev *netdev)
>>   {
>> ---
>>
>> BTW2, We, probably, need to call rte_flow_validate() somewhere.
> what for? if we fail by rte_flow_create, we will handle the error. the 
> validate may be used in the future to query driver capabilities, but 
> this is something much more complex and not related to this patch-set.

OK.
Eli Britstein Dec. 3, 2019, 1:03 p.m. UTC | #7
On 12/2/2019 4:25 PM, Ilya Maximets wrote:
> On 01.12.2019 9:29, Eli Britstein wrote:
>> On 11/30/2019 1:59 PM, Ilya Maximets wrote:
>>> On 24.11.2019 14:22, Eli Britstein wrote:
>>>> On 11/22/2019 6:19 PM, Ilya Maximets wrote:
>>>>> On 20.11.2019 16:28, Eli Britstein wrote:
>>>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>>>>> ---
>>>>>>     NEWS                           |  2 +
>>>>>>     lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>>>>     2 files changed, 85 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/NEWS b/NEWS
>>>>>> index 88b818948..ca9c2b230 100644
>>>>>> --- a/NEWS
>>>>>> +++ b/NEWS
>>>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>>>            if supported by libbpf.
>>>>>>          * Add option to enable, disable and query TCP sequence checking in
>>>>>>            conntrack.
>>>>>> +   - DPDK:
>>>>>> +     * Add hardware offload support for output actions.
>>>>>>     
>>>>>>     v2.12.0 - 03 Sep 2019
>>>>>>     ---------------------
>>>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>>>>> index dbbc45e99..6e7efb315 100644
>>>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>>>>             } else {
>>>>>>                 ds_put_cstr(s, "  RSS = null\n");
>>>>>>             }
>>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>>>> +
>>>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>>>> +        if (count) {
>>>>>> +            ds_put_format(s,
>>>>>> +                          "  Count: shared=%d, id=%d\n",
>>>>>> +                          count->shared, count->id);
>>>>>> +        } else {
>>>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>>>> +        }
>>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>>>> +
>>>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>>>> +        if (port_id) {
>>>>>> +            ds_put_format(s,
>>>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>>>> +                          port_id->original, port_id->id);
>>>>>> +        } else {
>>>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>>>> +        }
>>>>>>         } else {
>>>>>>             ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>>>>         }
>>>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>>>>         return 0;
>>>>>>     }
>>>>>>     
>>>>>> +static void
>>>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>>>> +{
>>>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>>>> +
>>>>>> +    count->shared = 0;
>>>>>> +    /* Each flow has a single count action, so no need of id */
>>>>>> +    count->id = 0;
>>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>>>> +}
>>>>>> +
>>>>>> +static void
>>>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>>>> +                                    struct netdev *outdev)
>>>>>> +{
>>>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>>>> +
>>>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>>>> +}
>>>>>> +
>>>>>> +static int
>>>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>>>> +                                   const struct nlattr *nla,
>>>>>> +                                   struct offload_info *info)
>>>>>> +{
>>>>>> +    struct netdev *outdev;
>>>>>> +    odp_port_t port;
>>>>>> +
>>>>>> +    port = nl_attr_get_odp_port(nla);
>>>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>>>> +    if (outdev == NULL) {
>>>>>> +        VLOG_DBG_RL(&error_rl,
>>>>>> +                    "Cannot find netdev for odp port %d", port);
>>>>>> +        return -1;
>>>>>> +    }
>>>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>>>> This doesn't look correct.  I mean, maybe we need to check if port is
>>>>> really the port on a same piece of hardware.  Will the flow setup fail
>>>>> in this case?  Will code fallback to the MARK+RSS?
>>>> We cannot check if the port is on the same HW, and it is also wrong from
>>>> the application point of view. If the operation cannot be supported, it
>>>> is in the driver (DPDK) scope to fail it.
>>> BTW, function netdev_dpdk_flow_api_supported() is not intended to be called
>>> in the offloading process.  It's only for initialization phase.  You can see
>>> the "/* TODO: Check if we able to offload some minimal flow. */" in the code
>>> and that might be destructive and unwanted for offloading process.
>> Actually, there is no need to call it at all, as we get here only if we
>> found the netdev by netdev_ports_get, which means we found a device that
>> can have offloads.
> 'port_to_netdev' hash map contains all the netdevs created by the ofproto
> regardless of the flow API initialization.  This is done to allow delayed
> initialization if hw offloading was enabled in runtime.  'netdev_ports_get'
> just traverses above hash map.  So, the flow API could be not initialized.

I don't care if flow API is not initialized on the output netdev. I 
check validity of its DPDK port and that should be enough. Please see in V2.

BTW, AFAIK, changing hw-offload is not supported on the fly and requires 
OVS to be restarted.

>
>> I enhanced for error handling of netdev_dpdk_get_port_id instead of
>> introducing such new code, that I think not required.
>>
>>> You, probably, wanted something like this instead (not tested):
>>>
>>> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
>>> index 4e1c4251d..92876f3a3 100644
>>> --- a/lib/netdev-offload-provider.h
>>> +++ b/lib/netdev-offload-provider.h
>>> @@ -90,6 +90,9 @@ struct netdev_flow_api {
>>>    int netdev_register_flow_api_provider(const struct netdev_flow_api *);
>>>    int netdev_unregister_flow_api_provider(const char *type);
>>>    
>>> +bool netdev_flow_api_equals(const struct netdev *,
>>> +                            const struct netdev_flow_api *);
>>> +
>>>    #ifdef __linux__
>>>    extern const struct netdev_flow_api netdev_offload_tc;
>>>    #endif
>>> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
>>> index ae01acda6..1970b1bae 100644
>>> --- a/lib/netdev-offload.c
>>> +++ b/lib/netdev-offload.c
>>> @@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type)
>>>        return error;
>>>    }
>>>    
>>> +bool
>>> +netdev_flow_api_equals(const struct netdev *netdev,
>>> +                       const struct netdev_flow_api *flow_api)
>>> +{
>>> +    const struct netdev_flow_api *netdev_flow_api =
>>> +        ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api);
>>> +
>>> +    return netdev_flow_api == flow_api;
>>> +}
>>> +
>>>    static int
>>>    netdev_assign_flow_api(struct netdev *netdev)
>>>    {
>>> ---
>>>
>>> BTW2, We, probably, need to call rte_flow_validate() somewhere.
>> what for? if we fail by rte_flow_create, we will handle the error. the
>> validate may be used in the future to query driver capabilities, but
>> this is something much more complex and not related to this patch-set.
> OK.
Li,Rongqing via dev Dec. 3, 2019, 3:19 p.m. UTC | #8
On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
>
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
> ---
>  NEWS                           |  2 +
>  lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 85 insertions(+), 4 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 88b818948..ca9c2b230 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -10,6 +10,8 @@ Post-v2.12.0
>         if supported by libbpf.
>       * Add option to enable, disable and query TCP sequence checking in
>         conntrack.
> +   - DPDK:
> +     * Add hardware offload support for output actions.
>
>  v2.12.0 - 03 Sep 2019
>  ---------------------
> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
> index dbbc45e99..6e7efb315 100644
> --- a/lib/netdev-offload-dpdk-flow.c
> +++ b/lib/netdev-offload-dpdk-flow.c
> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>          } else {
>              ds_put_cstr(s, "  RSS = null\n");
>          }
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
> +        const struct rte_flow_action_count *count = actions->conf;
> +
> +        ds_put_cstr(s, "rte flow count action:\n");
> +        if (count) {
> +            ds_put_format(s,
> +                          "  Count: shared=%d, id=%d\n",
> +                          count->shared, count->id);
> +        } else {
> +            ds_put_cstr(s, "  Count = null\n");
> +        }
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
> +        const struct rte_flow_action_port_id *port_id = actions->conf;
> +
> +        ds_put_cstr(s, "rte flow port-id action:\n");
> +        if (port_id) {
> +            ds_put_format(s,
> +                          "  Port-id: original=%d, id=%d\n",
> +                          port_id->original, port_id->id);
> +        } else {
> +            ds_put_cstr(s, "  Port-id = null\n");
> +        }
>      } else {
>          ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>      }
> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>      return 0;
>  }
>
> +static void
> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
> +{
> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
> +
> +    count->shared = 0;
> +    /* Each flow has a single count action, so no need of id */
> +    count->id = 0;
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
> +}
> +
> +static void
> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
> +                                    struct netdev *outdev)
> +{
> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
> +
> +    port_id->id = netdev_dpdk_get_port_id(outdev);
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
> +}
> +
> +static int
> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
> +                                   const struct nlattr *nla,
> +                                   struct offload_info *info)
> +{
> +    struct netdev *outdev;
> +    odp_port_t port;
> +
> +    port = nl_attr_get_odp_port(nla);
> +    outdev = netdev_ports_get(port, info->dpif_class);
> +    if (outdev == NULL) {
> +        VLOG_DBG_RL(&error_rl,
> +                    "Cannot find netdev for odp port %d", port);
> +        return -1;
> +    }
> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
> +        VLOG_DBG_RL(&error_rl,
> +                    "Output to %s cannot be offloaded",
> +                    netdev_get_name(outdev));
> +        return -1;
> +    }
> +
> +    netdev_dpdk_flow_add_count_action(actions);
> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
> +    netdev_close(outdev);
> +
> +    return 0;
> +}
> +
>  int
>  netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>                               struct nlattr *nl_actions,
>                               size_t nl_actions_len,
> -                             struct offload_info *info OVS_UNUSED)
> +                             struct offload_info *info)
>  {
>      struct nlattr *nla;
>      size_t left;
>
>      NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> -        VLOG_DBG_RL(&error_rl,
> -                    "Unsupported action type %d", nl_attr_type(nla));
> -        return -1;
> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
> +            left <= NLA_ALIGN(nla->nla_len)) {
> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
> +                return -1;
> +            }

The check in netdev_dpdk_flow_add_output_action() is insufficient to
decide whether the device is capable of full-offload, since it assumes
that the output action is supported if netdev_dpdk type is
DPDK_DEV_ETH.

There are a couple of issues with this approach:

1. For a device that is not capable of full-offload, it results in 2
calls to the PMD for every flow-offload request. The first attempt for
full-offload fails, followed by another call for partial-offload.

2. Even for devices that support full-offload, the offload might fail
if the in-port and out-port are not in the same switch-domain. For
example, if only partial-offload is configured (vhost-user ports) in
the vSwitch, then the PMD should check if the in-port and out-port are
on the same switch (domain-id must match) and fail the request
otherwise. This check is important to alert the user of
misconfigurations. If OVS doesn't do this check, all pmd drivers will
have to do this check and that duplication is not desirable.

Here's how this could be addressed, by identifying whether the device
is capable of full-offload or partial-offload.

1. Check if the target device (in-port) is full-offload capable. This
could be done by checking if rte_flow api is supported, along with a
valid switch-domain-id. We could even improve this further, similar to
the kernel switchdev framework in which the device mode is
configurable (legacy/eswitch modes) and available to consumers, but
this might require some more DPDK framework changes.

2. If the device is full-offload capable, and if output action is
specified,  check if in-port and out-port are on the same switch
(switch-domain-id match mentioned above).

If both 1 and 2 succeed, then the device is capable of full-offload
and we can proceed with full-offload item/action setup, followed by
rte_flow_create().

Otherwise, we fallback to partial-offload code path. Here, we can
handle the case where a vhost-user interface is either the in-port or
out-port of the flow being offloaded. Depending on whether the
vhost-user interface is the in-port or out-port, if the other port
associated with the flow is offload capable (DPDK_DEV_ETH), then we
offload the flow to that device. This would help us build further
extensions to the existing partial-offload framework by adding more
actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
the output action that still needs to be processed in software.

Thanks,
-Harsha

> +        } else {
> +            VLOG_DBG_RL(&error_rl,
> +                        "Unsupported action type %d", nl_attr_type(nla));
> +            return -1;
> +        }
>      }
>
>      add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> --
> 2.14.5
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eli Britstein Dec. 4, 2019, 3:25 p.m. UTC | #9
On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>> ---
>>   NEWS                           |  2 +
>>   lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>   2 files changed, 85 insertions(+), 4 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 88b818948..ca9c2b230 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>          if supported by libbpf.
>>        * Add option to enable, disable and query TCP sequence checking in
>>          conntrack.
>> +   - DPDK:
>> +     * Add hardware offload support for output actions.
>>
>>   v2.12.0 - 03 Sep 2019
>>   ---------------------
>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>> index dbbc45e99..6e7efb315 100644
>> --- a/lib/netdev-offload-dpdk-flow.c
>> +++ b/lib/netdev-offload-dpdk-flow.c
>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>           } else {
>>               ds_put_cstr(s, "  RSS = null\n");
>>           }
>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>> +        const struct rte_flow_action_count *count = actions->conf;
>> +
>> +        ds_put_cstr(s, "rte flow count action:\n");
>> +        if (count) {
>> +            ds_put_format(s,
>> +                          "  Count: shared=%d, id=%d\n",
>> +                          count->shared, count->id);
>> +        } else {
>> +            ds_put_cstr(s, "  Count = null\n");
>> +        }
>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>> +
>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>> +        if (port_id) {
>> +            ds_put_format(s,
>> +                          "  Port-id: original=%d, id=%d\n",
>> +                          port_id->original, port_id->id);
>> +        } else {
>> +            ds_put_cstr(s, "  Port-id = null\n");
>> +        }
>>       } else {
>>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>       }
>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>       return 0;
>>   }
>>
>> +static void
>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>> +{
>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>> +
>> +    count->shared = 0;
>> +    /* Each flow has a single count action, so no need of id */
>> +    count->id = 0;
>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>> +}
>> +
>> +static void
>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>> +                                    struct netdev *outdev)
>> +{
>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>> +
>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>> +}
>> +
>> +static int
>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>> +                                   const struct nlattr *nla,
>> +                                   struct offload_info *info)
>> +{
>> +    struct netdev *outdev;
>> +    odp_port_t port;
>> +
>> +    port = nl_attr_get_odp_port(nla);
>> +    outdev = netdev_ports_get(port, info->dpif_class);
>> +    if (outdev == NULL) {
>> +        VLOG_DBG_RL(&error_rl,
>> +                    "Cannot find netdev for odp port %d", port);
>> +        return -1;
>> +    }
>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>> +        VLOG_DBG_RL(&error_rl,
>> +                    "Output to %s cannot be offloaded",
>> +                    netdev_get_name(outdev));
>> +        return -1;
>> +    }
>> +
>> +    netdev_dpdk_flow_add_count_action(actions);
>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>> +    netdev_close(outdev);
>> +
>> +    return 0;
>> +}
>> +
>>   int
>>   netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>                                struct nlattr *nl_actions,
>>                                size_t nl_actions_len,
>> -                             struct offload_info *info OVS_UNUSED)
>> +                             struct offload_info *info)
>>   {
>>       struct nlattr *nla;
>>       size_t left;
>>
>>       NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>> -        VLOG_DBG_RL(&error_rl,
>> -                    "Unsupported action type %d", nl_attr_type(nla));
>> -        return -1;
>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>> +            left <= NLA_ALIGN(nla->nla_len)) {
>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
>> +                return -1;
>> +            }
> The check in netdev_dpdk_flow_add_output_action() is insufficient to
> decide whether the device is capable of full-offload, since it assumes
> that the output action is supported if netdev_dpdk type is
> DPDK_DEV_ETH.
>
> There are a couple of issues with this approach:
>
> 1. For a device that is not capable of full-offload, it results in 2
> calls to the PMD for every flow-offload request. The first attempt for
> full-offload fails, followed by another call for partial-offload.
We cannot avoid this, as we want to do full offload, and only fallback 
to partial if it doesn't work.
>
> 2. Even for devices that support full-offload, the offload might fail
> if the in-port and out-port are not in the same switch-domain. For
> example, if only partial-offload is configured (vhost-user ports) in
> the vSwitch, then the PMD should check if the in-port and out-port are
> on the same switch (domain-id must match) and fail the request
> otherwise. This check is important to alert the user of
> misconfigurations. If OVS doesn't do this check, all pmd drivers will
> have to do this check and that duplication is not desirable.

I don't think it's in the OVS scope to do such checks, but it should be 
in the driver's level (PMD), the same way it is in the kernel offloading 
using TC.

I agree that the argument of logging is important, but this is something 
that I think out of scope of this series. All prints in DPDK go to 
/dev/null, and not logged into OVS' log.

BTW, in kernel, there is a similar issue with drivers that logs using 
extack. Those messages are not dumped to dmesg, and OVS also doesn't 
read them to its log. Also out of scope of this series...

>
> Here's how this could be addressed, by identifying whether the device
> is capable of full-offload or partial-offload.
>
> 1. Check if the target device (in-port) is full-offload capable. This
> could be done by checking if rte_flow api is supported, along with a
> valid switch-domain-id. We could even improve this further, similar to
> the kernel switchdev framework in which the device mode is
> configurable (legacy/eswitch modes) and available to consumers, but
> this might require some more DPDK framework changes.
I think we can enhance to check if both are ETH. However, As I wrote 
above, I think checking other HW properties should not be in OVS scope, 
but in driver's domain.
>
> 2. If the device is full-offload capable, and if output action is
> specified,  check if in-port and out-port are on the same switch
> (switch-domain-id match mentioned above).
>
> If both 1 and 2 succeed, then the device is capable of full-offload
> and we can proceed with full-offload item/action setup, followed by
> rte_flow_create().
>
> Otherwise, we fallback to partial-offload code path. Here, we can
> handle the case where a vhost-user interface is either the in-port or
> out-port of the flow being offloaded. Depending on whether the
> vhost-user interface is the in-port or out-port, if the other port
> associated with the flow is offload capable (DPDK_DEV_ETH), then we
> offload the flow to that device. This would help us build further
> extensions to the existing partial-offload framework by adding more
> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
> the output action that still needs to be processed in software.
>
> Thanks,
> -Harsha
>
>> +        } else {
>> +            VLOG_DBG_RL(&error_rl,
>> +                        "Unsupported action type %d", nl_attr_type(nla));
>> +            return -1;
>> +        }
>>       }
>>
>>       add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>> --
>> 2.14.5
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7C3397e2af98f74b1db43808d778043f5d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637109831952593096&amp;sdata=NvEUyLaCDekjbTOFO1vRRteSUBkSkFrruVLMJas28OY%3D&amp;reserved=0
Ilya Maximets Dec. 4, 2019, 3:42 p.m. UTC | #10
On 04.12.2019 16:25, Eli Britstein wrote:
> 
> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>> ---
>>>   NEWS                           |  2 +
>>>   lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>   2 files changed, 85 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/NEWS b/NEWS
>>> index 88b818948..ca9c2b230 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>          if supported by libbpf.
>>>        * Add option to enable, disable and query TCP sequence checking in
>>>          conntrack.
>>> +   - DPDK:
>>> +     * Add hardware offload support for output actions.
>>>
>>>   v2.12.0 - 03 Sep 2019
>>>   ---------------------
>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>> index dbbc45e99..6e7efb315 100644
>>> --- a/lib/netdev-offload-dpdk-flow.c
>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>           } else {
>>>               ds_put_cstr(s, "  RSS = null\n");
>>>           }
>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>> +        const struct rte_flow_action_count *count = actions->conf;
>>> +
>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>> +        if (count) {
>>> +            ds_put_format(s,
>>> +                          "  Count: shared=%d, id=%d\n",
>>> +                          count->shared, count->id);
>>> +        } else {
>>> +            ds_put_cstr(s, "  Count = null\n");
>>> +        }
>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>> +
>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>> +        if (port_id) {
>>> +            ds_put_format(s,
>>> +                          "  Port-id: original=%d, id=%d\n",
>>> +                          port_id->original, port_id->id);
>>> +        } else {
>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>> +        }
>>>       } else {
>>>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>       }
>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>       return 0;
>>>   }
>>>
>>> +static void
>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>> +{
>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>> +
>>> +    count->shared = 0;
>>> +    /* Each flow has a single count action, so no need of id */
>>> +    count->id = 0;
>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>> +}
>>> +
>>> +static void
>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>> +                                    struct netdev *outdev)
>>> +{
>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>> +
>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>> +}
>>> +
>>> +static int
>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>> +                                   const struct nlattr *nla,
>>> +                                   struct offload_info *info)
>>> +{
>>> +    struct netdev *outdev;
>>> +    odp_port_t port;
>>> +
>>> +    port = nl_attr_get_odp_port(nla);
>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>> +    if (outdev == NULL) {
>>> +        VLOG_DBG_RL(&error_rl,
>>> +                    "Cannot find netdev for odp port %d", port);
>>> +        return -1;
>>> +    }
>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>> +        VLOG_DBG_RL(&error_rl,
>>> +                    "Output to %s cannot be offloaded",
>>> +                    netdev_get_name(outdev));
>>> +        return -1;
>>> +    }
>>> +
>>> +    netdev_dpdk_flow_add_count_action(actions);
>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>>> +    netdev_close(outdev);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   int
>>>   netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>>                                struct nlattr *nl_actions,
>>>                                size_t nl_actions_len,
>>> -                             struct offload_info *info OVS_UNUSED)
>>> +                             struct offload_info *info)
>>>   {
>>>       struct nlattr *nla;
>>>       size_t left;
>>>
>>>       NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>>> -        VLOG_DBG_RL(&error_rl,
>>> -                    "Unsupported action type %d", nl_attr_type(nla));
>>> -        return -1;
>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>>> +            left <= NLA_ALIGN(nla->nla_len)) {
>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
>>> +                return -1;
>>> +            }
>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
>> decide whether the device is capable of full-offload, since it assumes
>> that the output action is supported if netdev_dpdk type is
>> DPDK_DEV_ETH.
>>
>> There are a couple of issues with this approach:
>>
>> 1. For a device that is not capable of full-offload, it results in 2
>> calls to the PMD for every flow-offload request. The first attempt for
>> full-offload fails, followed by another call for partial-offload.
> We cannot avoid this, as we want to do full offload, and only fallback 
> to partial if it doesn't work.
>>
>> 2. Even for devices that support full-offload, the offload might fail
>> if the in-port and out-port are not in the same switch-domain. For
>> example, if only partial-offload is configured (vhost-user ports) in
>> the vSwitch, then the PMD should check if the in-port and out-port are
>> on the same switch (domain-id must match) and fail the request
>> otherwise. This check is important to alert the user of
>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
>> have to do this check and that duplication is not desirable.
> 
> I don't think it's in the OVS scope to do such checks, but it should be 
> in the driver's level (PMD), the same way it is in the kernel offloading 
> using TC.
> 
> I agree that the argument of logging is important, but this is something 
> that I think out of scope of this series. All prints in DPDK go to 
> /dev/null, and not logged into OVS' log.

This is not correct.  DPDK logs are redirected to OVS log.  So, if DPDK driver
uses DPDK logging properly, all the messages should go to ovs-vswitchd.log
depending on the configured log levels.

> 
> BTW, in kernel, there is a similar issue with drivers that logs using 
> extack. Those messages are not dumped to dmesg, and OVS also doesn't 
> read them to its log. Also out of scope of this series...
> 
>>
>> Here's how this could be addressed, by identifying whether the device
>> is capable of full-offload or partial-offload.
>>
>> 1. Check if the target device (in-port) is full-offload capable. This
>> could be done by checking if rte_flow api is supported, along with a
>> valid switch-domain-id. We could even improve this further, similar to
>> the kernel switchdev framework in which the device mode is
>> configurable (legacy/eswitch modes) and available to consumers, but
>> this might require some more DPDK framework changes.
> I think we can enhance to check if both are ETH. However, As I wrote 
> above, I think checking other HW properties should not be in OVS scope, 
> but in driver's domain.
>>
>> 2. If the device is full-offload capable, and if output action is
>> specified,  check if in-port and out-port are on the same switch
>> (switch-domain-id match mentioned above).
>>
>> If both 1 and 2 succeed, then the device is capable of full-offload
>> and we can proceed with full-offload item/action setup, followed by
>> rte_flow_create().
>>
>> Otherwise, we fallback to partial-offload code path. Here, we can
>> handle the case where a vhost-user interface is either the in-port or
>> out-port of the flow being offloaded. Depending on whether the
>> vhost-user interface is the in-port or out-port, if the other port
>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
>> offload the flow to that device. This would help us build further
>> extensions to the existing partial-offload framework by adding more
>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
>> the output action that still needs to be processed in software.
>>
>> Thanks,
>> -Harsha
>>
>>> +        } else {
>>> +            VLOG_DBG_RL(&error_rl,
>>> +                        "Unsupported action type %d", nl_attr_type(nla));
>>> +            return -1;
>>> +        }
>>>       }
>>>
>>>       add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>> --
>>> 2.14.5
>>>
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org
>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7C3397e2af98f74b1db43808d778043f5d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637109831952593096&amp;sdata=NvEUyLaCDekjbTOFO1vRRteSUBkSkFrruVLMJas28OY%3D&amp;reserved=0
Eli Britstein Dec. 4, 2019, 4:16 p.m. UTC | #11
On 12/4/2019 5:42 PM, Ilya Maximets wrote:
> On 04.12.2019 16:25, Eli Britstein wrote:
>> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
>>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>>> ---
>>>>    NEWS                           |  2 +
>>>>    lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>>    2 files changed, 85 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/NEWS b/NEWS
>>>> index 88b818948..ca9c2b230 100644
>>>> --- a/NEWS
>>>> +++ b/NEWS
>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>           if supported by libbpf.
>>>>         * Add option to enable, disable and query TCP sequence checking in
>>>>           conntrack.
>>>> +   - DPDK:
>>>> +     * Add hardware offload support for output actions.
>>>>
>>>>    v2.12.0 - 03 Sep 2019
>>>>    ---------------------
>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>>> index dbbc45e99..6e7efb315 100644
>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>>            } else {
>>>>                ds_put_cstr(s, "  RSS = null\n");
>>>>            }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>> +        if (count) {
>>>> +            ds_put_format(s,
>>>> +                          "  Count: shared=%d, id=%d\n",
>>>> +                          count->shared, count->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>> +        }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>> +        if (port_id) {
>>>> +            ds_put_format(s,
>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>> +                          port_id->original, port_id->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>> +        }
>>>>        } else {
>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>>        }
>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>>        return 0;
>>>>    }
>>>>
>>>> +static void
>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>> +{
>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>> +
>>>> +    count->shared = 0;
>>>> +    /* Each flow has a single count action, so no need of id */
>>>> +    count->id = 0;
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>> +}
>>>> +
>>>> +static void
>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>> +                                    struct netdev *outdev)
>>>> +{
>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>> +
>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>> +}
>>>> +
>>>> +static int
>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>> +                                   const struct nlattr *nla,
>>>> +                                   struct offload_info *info)
>>>> +{
>>>> +    struct netdev *outdev;
>>>> +    odp_port_t port;
>>>> +
>>>> +    port = nl_attr_get_odp_port(nla);
>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>> +    if (outdev == NULL) {
>>>> +        VLOG_DBG_RL(&error_rl,
>>>> +                    "Cannot find netdev for odp port %d", port);
>>>> +        return -1;
>>>> +    }
>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>>> +        VLOG_DBG_RL(&error_rl,
>>>> +                    "Output to %s cannot be offloaded",
>>>> +                    netdev_get_name(outdev));
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    netdev_dpdk_flow_add_count_action(actions);
>>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>>>> +    netdev_close(outdev);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>    int
>>>>    netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>>>                                 struct nlattr *nl_actions,
>>>>                                 size_t nl_actions_len,
>>>> -                             struct offload_info *info OVS_UNUSED)
>>>> +                             struct offload_info *info)
>>>>    {
>>>>        struct nlattr *nla;
>>>>        size_t left;
>>>>
>>>>        NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>>>> -        VLOG_DBG_RL(&error_rl,
>>>> -                    "Unsupported action type %d", nl_attr_type(nla));
>>>> -        return -1;
>>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>>>> +            left <= NLA_ALIGN(nla->nla_len)) {
>>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
>>>> +                return -1;
>>>> +            }
>>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
>>> decide whether the device is capable of full-offload, since it assumes
>>> that the output action is supported if netdev_dpdk type is
>>> DPDK_DEV_ETH.
>>>
>>> There are a couple of issues with this approach:
>>>
>>> 1. For a device that is not capable of full-offload, it results in 2
>>> calls to the PMD for every flow-offload request. The first attempt for
>>> full-offload fails, followed by another call for partial-offload.
>> We cannot avoid this, as we want to do full offload, and only fallback
>> to partial if it doesn't work.
>>> 2. Even for devices that support full-offload, the offload might fail
>>> if the in-port and out-port are not in the same switch-domain. For
>>> example, if only partial-offload is configured (vhost-user ports) in
>>> the vSwitch, then the PMD should check if the in-port and out-port are
>>> on the same switch (domain-id must match) and fail the request
>>> otherwise. This check is important to alert the user of
>>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
>>> have to do this check and that duplication is not desirable.
>> I don't think it's in the OVS scope to do such checks, but it should be
>> in the driver's level (PMD), the same way it is in the kernel offloading
>> using TC.
>>
>> I agree that the argument of logging is important, but this is something
>> that I think out of scope of this series. All prints in DPDK go to
>> /dev/null, and not logged into OVS' log.
> This is not correct.  DPDK logs are redirected to OVS log.  So, if DPDK driver
> uses DPDK logging properly, all the messages should go to ovs-vswitchd.log
> depending on the configured log levels.
OK, so in relevance to the previous comment, if the DPDK PMD logs 
(properly) a message that it is not the same HW, the user will be 
alerted of possible invalid configuration.
>
>> BTW, in kernel, there is a similar issue with drivers that logs using
>> extack. Those messages are not dumped to dmesg, and OVS also doesn't
>> read them to its log. Also out of scope of this series...
>>
>>> Here's how this could be addressed, by identifying whether the device
>>> is capable of full-offload or partial-offload.
>>>
>>> 1. Check if the target device (in-port) is full-offload capable. This
>>> could be done by checking if rte_flow api is supported, along with a
>>> valid switch-domain-id. We could even improve this further, similar to
>>> the kernel switchdev framework in which the device mode is
>>> configurable (legacy/eswitch modes) and available to consumers, but
>>> this might require some more DPDK framework changes.
>> I think we can enhance to check if both are ETH. However, As I wrote
>> above, I think checking other HW properties should not be in OVS scope,
>> but in driver's domain.
When I try to implement it, I see the netdev_dpdk_flow_api_supported API 
checks exactly this. Could you please explain again why its usage is 
restricted to init phase only?
>>> 2. If the device is full-offload capable, and if output action is
>>> specified,  check if in-port and out-port are on the same switch
>>> (switch-domain-id match mentioned above).
>>>
>>> If both 1 and 2 succeed, then the device is capable of full-offload
>>> and we can proceed with full-offload item/action setup, followed by
>>> rte_flow_create().
>>>
>>> Otherwise, we fallback to partial-offload code path. Here, we can
>>> handle the case where a vhost-user interface is either the in-port or
>>> out-port of the flow being offloaded. Depending on whether the
>>> vhost-user interface is the in-port or out-port, if the other port
>>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
>>> offload the flow to that device. This would help us build further
>>> extensions to the existing partial-offload framework by adding more
>>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
>>> the output action that still needs to be processed in software.
>>>
>>> Thanks,
>>> -Harsha
>>>
>>>> +        } else {
>>>> +            VLOG_DBG_RL(&error_rl,
>>>> +                        "Unsupported action type %d", nl_attr_type(nla));
>>>> +            return -1;
>>>> +        }
>>>>        }
>>>>
>>>>        add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>> --
>>>> 2.14.5
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org
>>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7Ccddf11ececd44fe5046c08d778d08a98%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637110709393981707&amp;sdata=rTGgfKo%2F6%2Faf7BIwBZi962tbyBBR97WGWgQunYhyGek%3D&amp;reserved=0
Eli Britstein Dec. 5, 2019, 6:48 a.m. UTC | #12
On 12/2/2019 4:25 PM, Ilya Maximets wrote:
> BTW, function netdev_dpdk_flow_api_supported() is not intended to be called
> in the offloading process.  It's only for initialization phase.  You can see
> the "/* TODO: Check if we able to offload some minimal flow. */" in the code
> and that might be destructive and unwanted for offloading process.

I guess the thought you had in mind when writing it was to call 
rte_flow_validate once or more in order to find out offload capabilities 
of that device, but for now it is only a comment.

In the future if/when we do call rte_flow_validate, I think it should 
not be in this function, but in a function maybe more specific for that, 
like

netdev_offload_dpdk_cap(), or something like that.

In current code for sure, I don't see any issue to call this function in 
the offloading process, and not only during init.
Li,Rongqing via dev Dec. 5, 2019, 4:46 p.m. UTC | #13
On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote:
>
>
> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
> > On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
> >> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> >> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
> >> ---
> >>   NEWS                           |  2 +
> >>   lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
> >>   2 files changed, 85 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/NEWS b/NEWS
> >> index 88b818948..ca9c2b230 100644
> >> --- a/NEWS
> >> +++ b/NEWS
> >> @@ -10,6 +10,8 @@ Post-v2.12.0
> >>          if supported by libbpf.
> >>        * Add option to enable, disable and query TCP sequence checking in
> >>          conntrack.
> >> +   - DPDK:
> >> +     * Add hardware offload support for output actions.
> >>
> >>   v2.12.0 - 03 Sep 2019
> >>   ---------------------
> >> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
> >> index dbbc45e99..6e7efb315 100644
> >> --- a/lib/netdev-offload-dpdk-flow.c
> >> +++ b/lib/netdev-offload-dpdk-flow.c
> >> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
> >>           } else {
> >>               ds_put_cstr(s, "  RSS = null\n");
> >>           }
> >> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
> >> +        const struct rte_flow_action_count *count = actions->conf;
> >> +
> >> +        ds_put_cstr(s, "rte flow count action:\n");
> >> +        if (count) {
> >> +            ds_put_format(s,
> >> +                          "  Count: shared=%d, id=%d\n",
> >> +                          count->shared, count->id);
> >> +        } else {
> >> +            ds_put_cstr(s, "  Count = null\n");
> >> +        }
> >> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
> >> +        const struct rte_flow_action_port_id *port_id = actions->conf;
> >> +
> >> +        ds_put_cstr(s, "rte flow port-id action:\n");
> >> +        if (port_id) {
> >> +            ds_put_format(s,
> >> +                          "  Port-id: original=%d, id=%d\n",
> >> +                          port_id->original, port_id->id);
> >> +        } else {
> >> +            ds_put_cstr(s, "  Port-id = null\n");
> >> +        }
> >>       } else {
> >>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
> >>       }
> >> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
> >>       return 0;
> >>   }
> >>
> >> +static void
> >> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
> >> +{
> >> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
> >> +
> >> +    count->shared = 0;
> >> +    /* Each flow has a single count action, so no need of id */
> >> +    count->id = 0;
> >> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
> >> +}
> >> +
> >> +static void
> >> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
> >> +                                    struct netdev *outdev)
> >> +{
> >> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
> >> +
> >> +    port_id->id = netdev_dpdk_get_port_id(outdev);
> >> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
> >> +}
> >> +
> >> +static int
> >> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
> >> +                                   const struct nlattr *nla,
> >> +                                   struct offload_info *info)
> >> +{
> >> +    struct netdev *outdev;
> >> +    odp_port_t port;
> >> +
> >> +    port = nl_attr_get_odp_port(nla);
> >> +    outdev = netdev_ports_get(port, info->dpif_class);
> >> +    if (outdev == NULL) {
> >> +        VLOG_DBG_RL(&error_rl,
> >> +                    "Cannot find netdev for odp port %d", port);
> >> +        return -1;
> >> +    }
> >> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
> >> +        VLOG_DBG_RL(&error_rl,
> >> +                    "Output to %s cannot be offloaded",
> >> +                    netdev_get_name(outdev));
> >> +        return -1;
> >> +    }
> >> +
> >> +    netdev_dpdk_flow_add_count_action(actions);
> >> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
> >> +    netdev_close(outdev);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>   int
> >>   netdev_dpdk_flow_actions_add(struct flow_actions *actions,
> >>                                struct nlattr *nl_actions,
> >>                                size_t nl_actions_len,
> >> -                             struct offload_info *info OVS_UNUSED)
> >> +                             struct offload_info *info)
> >>   {
> >>       struct nlattr *nla;
> >>       size_t left;
> >>
> >>       NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> >> -        VLOG_DBG_RL(&error_rl,
> >> -                    "Unsupported action type %d", nl_attr_type(nla));
> >> -        return -1;
> >> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
> >> +            left <= NLA_ALIGN(nla->nla_len)) {
> >> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
> >> +                return -1;
> >> +            }
> > The check in netdev_dpdk_flow_add_output_action() is insufficient to
> > decide whether the device is capable of full-offload, since it assumes
> > that the output action is supported if netdev_dpdk type is
> > DPDK_DEV_ETH.
> >
> > There are a couple of issues with this approach:
> >
> > 1. For a device that is not capable of full-offload, it results in 2
> > calls to the PMD for every flow-offload request. The first attempt for
> > full-offload fails, followed by another call for partial-offload.
> We cannot avoid this, as we want to do full offload, and only fallback
> to partial if it doesn't work.
> >
> > 2. Even for devices that support full-offload, the offload might fail
> > if the in-port and out-port are not in the same switch-domain. For
> > example, if only partial-offload is configured (vhost-user ports) in
> > the vSwitch, then the PMD should check if the in-port and out-port are
> > on the same switch (domain-id must match) and fail the request
> > otherwise. This check is important to alert the user of
> > misconfigurations. If OVS doesn't do this check, all pmd drivers will
> > have to do this check and that duplication is not desirable.
>
> I don't think it's in the OVS scope to do such checks, but it should be
> in the driver's level (PMD), the same way it is in the kernel offloading
> using TC.
>

IMO, it is not out-of-scope for OVS to add these checks to make better
offload decisions:
- This layer of OVS (netdev-offload-dpdk) is already offload-aware and
it is using a set of offload APIs (rte_flow)
- Full offload may not be the preferred configuration for a customer
(since it requires additional config like SRIOV). That is, the code
should not make the distinction that full-offload is always preferred
and partial-offload is only a fallback option.
- Without such checks, offload rate of partial-offload configuration
could be potentially impacted with a large number of flows.
- Similar improvement (checking/logging in OVS as opposed to
individual drivers) could be made in TC kernel offloads too

> I agree that the argument of logging is important, but this is something
> that I think out of scope of this series. All prints in DPDK go to
> /dev/null, and not logged into OVS' log.
>
> BTW, in kernel, there is a similar issue with drivers that logs using
> extack. Those messages are not dumped to dmesg, and OVS also doesn't
> read them to its log. Also out of scope of this series...
>
> >
> > Here's how this could be addressed, by identifying whether the device
> > is capable of full-offload or partial-offload.
> >
> > 1. Check if the target device (in-port) is full-offload capable. This
> > could be done by checking if rte_flow api is supported, along with a
> > valid switch-domain-id. We could even improve this further, similar to
> > the kernel switchdev framework in which the device mode is
> > configurable (legacy/eswitch modes) and available to consumers, but
> > this might require some more DPDK framework changes.
> I think we can enhance to check if both are ETH. However, As I wrote
> above, I think checking other HW properties should not be in OVS scope,
> but in driver's domain.
> >
> > 2. If the device is full-offload capable, and if output action is
> > specified,  check if in-port and out-port are on the same switch
> > (switch-domain-id match mentioned above).
> >
> > If both 1 and 2 succeed, then the device is capable of full-offload
> > and we can proceed with full-offload item/action setup, followed by
> > rte_flow_create().
> >
> > Otherwise, we fallback to partial-offload code path. Here, we can
> > handle the case where a vhost-user interface is either the in-port or
> > out-port of the flow being offloaded. Depending on whether the
> > vhost-user interface is the in-port or out-port, if the other port
> > associated with the flow is offload capable (DPDK_DEV_ETH), then we
> > offload the flow to that device. This would help us build further
> > extensions to the existing partial-offload framework by adding more
> > actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
> > the output action that still needs to be processed in software.
> >
> > Thanks,
> > -Harsha
> >
> >> +        } else {
> >> +            VLOG_DBG_RL(&error_rl,
> >> +                        "Unsupported action type %d", nl_attr_type(nla));
> >> +            return -1;
> >> +        }
> >>       }
> >>
> >>       add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >> --
> >> 2.14.5
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7C3397e2af98f74b1db43808d778043f5d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637109831952593096&amp;sdata=NvEUyLaCDekjbTOFO1vRRteSUBkSkFrruVLMJas28OY%3D&amp;reserved=0
Eli Britstein Dec. 8, 2019, 9:38 a.m. UTC | #14
On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote:
> On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote:
>>
>> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
>>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>>> ---
>>>>    NEWS                           |  2 +
>>>>    lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>>    2 files changed, 85 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/NEWS b/NEWS
>>>> index 88b818948..ca9c2b230 100644
>>>> --- a/NEWS
>>>> +++ b/NEWS
>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>           if supported by libbpf.
>>>>         * Add option to enable, disable and query TCP sequence checking in
>>>>           conntrack.
>>>> +   - DPDK:
>>>> +     * Add hardware offload support for output actions.
>>>>
>>>>    v2.12.0 - 03 Sep 2019
>>>>    ---------------------
>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>>> index dbbc45e99..6e7efb315 100644
>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>>            } else {
>>>>                ds_put_cstr(s, "  RSS = null\n");
>>>>            }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>> +        if (count) {
>>>> +            ds_put_format(s,
>>>> +                          "  Count: shared=%d, id=%d\n",
>>>> +                          count->shared, count->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>> +        }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>> +        if (port_id) {
>>>> +            ds_put_format(s,
>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>> +                          port_id->original, port_id->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>> +        }
>>>>        } else {
>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>>        }
>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>>        return 0;
>>>>    }
>>>>
>>>> +static void
>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>> +{
>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>> +
>>>> +    count->shared = 0;
>>>> +    /* Each flow has a single count action, so no need of id */
>>>> +    count->id = 0;
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>> +}
>>>> +
>>>> +static void
>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>> +                                    struct netdev *outdev)
>>>> +{
>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>> +
>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>> +}
>>>> +
>>>> +static int
>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>> +                                   const struct nlattr *nla,
>>>> +                                   struct offload_info *info)
>>>> +{
>>>> +    struct netdev *outdev;
>>>> +    odp_port_t port;
>>>> +
>>>> +    port = nl_attr_get_odp_port(nla);
>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>> +    if (outdev == NULL) {
>>>> +        VLOG_DBG_RL(&error_rl,
>>>> +                    "Cannot find netdev for odp port %d", port);
>>>> +        return -1;
>>>> +    }
>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>>> +        VLOG_DBG_RL(&error_rl,
>>>> +                    "Output to %s cannot be offloaded",
>>>> +                    netdev_get_name(outdev));
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    netdev_dpdk_flow_add_count_action(actions);
>>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>>>> +    netdev_close(outdev);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>    int
>>>>    netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>>>                                 struct nlattr *nl_actions,
>>>>                                 size_t nl_actions_len,
>>>> -                             struct offload_info *info OVS_UNUSED)
>>>> +                             struct offload_info *info)
>>>>    {
>>>>        struct nlattr *nla;
>>>>        size_t left;
>>>>
>>>>        NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>>>> -        VLOG_DBG_RL(&error_rl,
>>>> -                    "Unsupported action type %d", nl_attr_type(nla));
>>>> -        return -1;
>>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>>>> +            left <= NLA_ALIGN(nla->nla_len)) {
>>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
>>>> +                return -1;
>>>> +            }
>>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
>>> decide whether the device is capable of full-offload, since it assumes
>>> that the output action is supported if netdev_dpdk type is
>>> DPDK_DEV_ETH.
>>>
>>> There are a couple of issues with this approach:
>>>
>>> 1. For a device that is not capable of full-offload, it results in 2
>>> calls to the PMD for every flow-offload request. The first attempt for
>>> full-offload fails, followed by another call for partial-offload.
>> We cannot avoid this, as we want to do full offload, and only fallback
>> to partial if it doesn't work.
>>> 2. Even for devices that support full-offload, the offload might fail
>>> if the in-port and out-port are not in the same switch-domain. For
>>> example, if only partial-offload is configured (vhost-user ports) in
>>> the vSwitch, then the PMD should check if the in-port and out-port are
>>> on the same switch (domain-id must match) and fail the request
>>> otherwise. This check is important to alert the user of
>>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
>>> have to do this check and that duplication is not desirable.
>> I don't think it's in the OVS scope to do such checks, but it should be
>> in the driver's level (PMD), the same way it is in the kernel offloading
>> using TC.
>>
> IMO, it is not out-of-scope for OVS to add these checks to make better
> offload decisions:
> - This layer of OVS (netdev-offload-dpdk) is already offload-aware and
> it is using a set of offload APIs (rte_flow)
It is aware of the offload APIs, but not on specific HW capabilities, so 
in some cases

trying to make such restrictions won't cover all use cases in best case and result in
redundant code.

For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with
embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but
we can in ConnectX-6.

It is not something exposed at all to the user, specifically to OVS.

So, I think those kind of decisions that involve HW specifics should be done in the
layer that has the HW knowledge, e.g. - the driver.

> - Full offload may not be the preferred configuration for a customer
> (since it requires additional config like SRIOV). That is, the code
> should not make the distinction that full-offload is always preferred
> and partial-offload is only a fallback option.
At the moment, any kind of offload requires SRIOV. BTW, to resolve this, 
after full

offload is accepted, we plan to resubmit vDPA (see
https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads for
virtio as well.

> - Without such checks, offload rate of partial-offload configuration
> could be potentially impacted with a large number of flows.
I don't think failing in the PMD level will be much slower than failing 
in OVS level.
> - Similar improvement (checking/logging in OVS as opposed to
> individual drivers) could be made in TC kernel offloads too
In order to add such checks, we can maybe think in the future of a 
capability

querying mechanism, to query each device (at init time, maybe using
rte_flow_validate) and make decisions based on that.

However, I think this mechanism is neither a simple one, nor should be a
prerequisite for this patch-set.

>
>> I agree that the argument of logging is important, but this is something
>> that I think out of scope of this series. All prints in DPDK go to
>> /dev/null, and not logged into OVS' log.
>>
>> BTW, in kernel, there is a similar issue with drivers that logs using
>> extack. Those messages are not dumped to dmesg, and OVS also doesn't
>> read them to its log. Also out of scope of this series...
>>
>>> Here's how this could be addressed, by identifying whether the device
>>> is capable of full-offload or partial-offload.
>>>
>>> 1. Check if the target device (in-port) is full-offload capable. This
>>> could be done by checking if rte_flow api is supported, along with a
>>> valid switch-domain-id. We could even improve this further, similar to
>>> the kernel switchdev framework in which the device mode is
>>> configurable (legacy/eswitch modes) and available to consumers, but
>>> this might require some more DPDK framework changes.
>> I think we can enhance to check if both are ETH. However, As I wrote
>> above, I think checking other HW properties should not be in OVS scope,
>> but in driver's domain.
>>> 2. If the device is full-offload capable, and if output action is
>>> specified,  check if in-port and out-port are on the same switch
>>> (switch-domain-id match mentioned above).
>>>
>>> If both 1 and 2 succeed, then the device is capable of full-offload
>>> and we can proceed with full-offload item/action setup, followed by
>>> rte_flow_create().
>>>
>>> Otherwise, we fallback to partial-offload code path. Here, we can
>>> handle the case where a vhost-user interface is either the in-port or
>>> out-port of the flow being offloaded. Depending on whether the
>>> vhost-user interface is the in-port or out-port, if the other port
>>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
>>> offload the flow to that device. This would help us build further
>>> extensions to the existing partial-offload framework by adding more
>>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
>>> the output action that still needs to be processed in software.
>>>
>>> Thanks,
>>> -Harsha
>>>
>>>> +        } else {
>>>> +            VLOG_DBG_RL(&error_rl,
>>>> +                        "Unsupported action type %d", nl_attr_type(nla));
>>>> +            return -1;
>>>> +        }
>>>>        }
>>>>
>>>>        add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>> --
>>>> 2.14.5
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org
>>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&amp;sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&amp;reserved=0
Ilya Maximets Dec. 10, 2019, 6:15 p.m. UTC | #15
On 08.12.2019 10:38, Eli Britstein wrote:
> 
> On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote:
>> On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote:
>>>
>>> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
>>>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
>>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>>>>> ---
>>>>>    NEWS                           |  2 +
>>>>>    lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
>>>>>    2 files changed, 85 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/NEWS b/NEWS
>>>>> index 88b818948..ca9c2b230 100644
>>>>> --- a/NEWS
>>>>> +++ b/NEWS
>>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>>           if supported by libbpf.
>>>>>         * Add option to enable, disable and query TCP sequence checking in
>>>>>           conntrack.
>>>>> +   - DPDK:
>>>>> +     * Add hardware offload support for output actions.
>>>>>
>>>>>    v2.12.0 - 03 Sep 2019
>>>>>    ---------------------
>>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
>>>>> index dbbc45e99..6e7efb315 100644
>>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>>>>            } else {
>>>>>                ds_put_cstr(s, "  RSS = null\n");
>>>>>            }
>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>>> +
>>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>>> +        if (count) {
>>>>> +            ds_put_format(s,
>>>>> +                          "  Count: shared=%d, id=%d\n",
>>>>> +                          count->shared, count->id);
>>>>> +        } else {
>>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>>> +        }
>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>>> +
>>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>>> +        if (port_id) {
>>>>> +            ds_put_format(s,
>>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>>> +                          port_id->original, port_id->id);
>>>>> +        } else {
>>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>>> +        }
>>>>>        } else {
>>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>>>        }
>>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
>>>>>        return 0;
>>>>>    }
>>>>>
>>>>> +static void
>>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>>> +{
>>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>>> +
>>>>> +    count->shared = 0;
>>>>> +    /* Each flow has a single count action, so no need of id */
>>>>> +    count->id = 0;
>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>>> +                                    struct netdev *outdev)
>>>>> +{
>>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>>> +
>>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>>> +}
>>>>> +
>>>>> +static int
>>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>>> +                                   const struct nlattr *nla,
>>>>> +                                   struct offload_info *info)
>>>>> +{
>>>>> +    struct netdev *outdev;
>>>>> +    odp_port_t port;
>>>>> +
>>>>> +    port = nl_attr_get_odp_port(nla);
>>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>>> +    if (outdev == NULL) {
>>>>> +        VLOG_DBG_RL(&error_rl,
>>>>> +                    "Cannot find netdev for odp port %d", port);
>>>>> +        return -1;
>>>>> +    }
>>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>>>> +        VLOG_DBG_RL(&error_rl,
>>>>> +                    "Output to %s cannot be offloaded",
>>>>> +                    netdev_get_name(outdev));
>>>>> +        return -1;
>>>>> +    }
>>>>> +
>>>>> +    netdev_dpdk_flow_add_count_action(actions);
>>>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>>>>> +    netdev_close(outdev);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>>    int
>>>>>    netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>>>>                                 struct nlattr *nl_actions,
>>>>>                                 size_t nl_actions_len,
>>>>> -                             struct offload_info *info OVS_UNUSED)
>>>>> +                             struct offload_info *info)
>>>>>    {
>>>>>        struct nlattr *nla;
>>>>>        size_t left;
>>>>>
>>>>>        NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>>>>> -        VLOG_DBG_RL(&error_rl,
>>>>> -                    "Unsupported action type %d", nl_attr_type(nla));
>>>>> -        return -1;
>>>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>>>>> +            left <= NLA_ALIGN(nla->nla_len)) {
>>>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
>>>>> +                return -1;
>>>>> +            }
>>>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
>>>> decide whether the device is capable of full-offload, since it assumes
>>>> that the output action is supported if netdev_dpdk type is
>>>> DPDK_DEV_ETH.
>>>>
>>>> There are a couple of issues with this approach:
>>>>
>>>> 1. For a device that is not capable of full-offload, it results in 2
>>>> calls to the PMD for every flow-offload request. The first attempt for
>>>> full-offload fails, followed by another call for partial-offload.
>>> We cannot avoid this, as we want to do full offload, and only fallback
>>> to partial if it doesn't work.
>>>> 2. Even for devices that support full-offload, the offload might fail
>>>> if the in-port and out-port are not in the same switch-domain. For
>>>> example, if only partial-offload is configured (vhost-user ports) in
>>>> the vSwitch, then the PMD should check if the in-port and out-port are
>>>> on the same switch (domain-id must match) and fail the request
>>>> otherwise. This check is important to alert the user of
>>>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
>>>> have to do this check and that duplication is not desirable.
>>> I don't think it's in the OVS scope to do such checks, but it should be
>>> in the driver's level (PMD), the same way it is in the kernel offloading
>>> using TC.
>>>
>> IMO, it is not out-of-scope for OVS to add these checks to make better
>> offload decisions:
>> - This layer of OVS (netdev-offload-dpdk) is already offload-aware and
>> it is using a set of offload APIs (rte_flow)
> It is aware of the offload APIs, but not on specific HW capabilities, so 
> in some cases
> 
> trying to make such restrictions won't cover all use cases in best case and result in
> redundant code.
> 
> For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with
> embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but
> we can in ConnectX-6.
> 
> It is not something exposed at all to the user, specifically to OVS.
> 
> So, I think those kind of decisions that involve HW specifics should be done in the
> layer that has the HW knowledge, e.g. - the driver.
> 
>> - Full offload may not be the preferred configuration for a customer
>> (since it requires additional config like SRIOV). That is, the code
>> should not make the distinction that full-offload is always preferred
>> and partial-offload is only a fallback option.
> At the moment, any kind of offload requires SRIOV.


I don't think that SRIOV required to use Flow Director on Intel NICs.
So, partial offloading should work on Intel NICs wihtout enabling SRIOV.


> BTW, to resolve this, 
> after full
> 
> offload is accepted, we plan to resubmit vDPA (see
> https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads for
> virtio as well.
> 
>> - Without such checks, offload rate of partial-offload configuration
>> could be potentially impacted with a large number of flows.
> I don't think failing in the PMD level will be much slower than failing 
> in OVS level.
>> - Similar improvement (checking/logging in OVS as opposed to
>> individual drivers) could be made in TC kernel offloads too
> In order to add such checks, we can maybe think in the future of a 
> capability
> 
> querying mechanism, to query each device (at init time, maybe using
> rte_flow_validate) and make decisions based on that.
> 
> However, I think this mechanism is neither a simple one, nor should be a
> prerequisite for this patch-set.
> 
>>
>>> I agree that the argument of logging is important, but this is something
>>> that I think out of scope of this series. All prints in DPDK go to
>>> /dev/null, and not logged into OVS' log.
>>>
>>> BTW, in kernel, there is a similar issue with drivers that logs using
>>> extack. Those messages are not dumped to dmesg, and OVS also doesn't
>>> read them to its log. Also out of scope of this series...
>>>
>>>> Here's how this could be addressed, by identifying whether the device
>>>> is capable of full-offload or partial-offload.
>>>>
>>>> 1. Check if the target device (in-port) is full-offload capable. This
>>>> could be done by checking if rte_flow api is supported, along with a
>>>> valid switch-domain-id. We could even improve this further, similar to
>>>> the kernel switchdev framework in which the device mode is
>>>> configurable (legacy/eswitch modes) and available to consumers, but
>>>> this might require some more DPDK framework changes.
>>> I think we can enhance to check if both are ETH. However, As I wrote
>>> above, I think checking other HW properties should not be in OVS scope,
>>> but in driver's domain.
>>>> 2. If the device is full-offload capable, and if output action is
>>>> specified,  check if in-port and out-port are on the same switch
>>>> (switch-domain-id match mentioned above).
>>>>
>>>> If both 1 and 2 succeed, then the device is capable of full-offload
>>>> and we can proceed with full-offload item/action setup, followed by
>>>> rte_flow_create().
>>>>
>>>> Otherwise, we fallback to partial-offload code path. Here, we can
>>>> handle the case where a vhost-user interface is either the in-port or
>>>> out-port of the flow being offloaded. Depending on whether the
>>>> vhost-user interface is the in-port or out-port, if the other port
>>>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
>>>> offload the flow to that device. This would help us build further
>>>> extensions to the existing partial-offload framework by adding more
>>>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
>>>> the output action that still needs to be processed in software.
>>>>
>>>> Thanks,
>>>> -Harsha
>>>>
>>>>> +        } else {
>>>>> +            VLOG_DBG_RL(&error_rl,
>>>>> +                        "Unsupported action type %d", nl_attr_type(nla));
>>>>> +            return -1;
>>>>> +        }
>>>>>        }
>>>>>
>>>>>        add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>>> --
>>>>> 2.14.5
>>>>>
>>>>> _______________________________________________
>>>>> dev mailing list
>>>>> dev@openvswitch.org
>>>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&amp;sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&amp;reserved=0
Li,Rongqing via dev Dec. 16, 2019, 8:47 a.m. UTC | #16
On Sun, Dec 8, 2019 at 3:08 PM Eli Britstein <elibr@mellanox.com> wrote:
>
>
> On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote:
> > On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote:
> >>
> >> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
> >>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote:
> >>>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> >>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
> >>>> ---
> >>>>    NEWS                           |  2 +
> >>>>    lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++--
> >>>>    2 files changed, 85 insertions(+), 4 deletions(-)
> >>>>
> >>>> diff --git a/NEWS b/NEWS
> >>>> index 88b818948..ca9c2b230 100644
> >>>> --- a/NEWS
> >>>> +++ b/NEWS
> >>>> @@ -10,6 +10,8 @@ Post-v2.12.0
> >>>>           if supported by libbpf.
> >>>>         * Add option to enable, disable and query TCP sequence checking in
> >>>>           conntrack.
> >>>> +   - DPDK:
> >>>> +     * Add hardware offload support for output actions.
> >>>>
> >>>>    v2.12.0 - 03 Sep 2019
> >>>>    ---------------------
> >>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
> >>>> index dbbc45e99..6e7efb315 100644
> >>>> --- a/lib/netdev-offload-dpdk-flow.c
> >>>> +++ b/lib/netdev-offload-dpdk-flow.c
> >>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
> >>>>            } else {
> >>>>                ds_put_cstr(s, "  RSS = null\n");
> >>>>            }
> >>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
> >>>> +        const struct rte_flow_action_count *count = actions->conf;
> >>>> +
> >>>> +        ds_put_cstr(s, "rte flow count action:\n");
> >>>> +        if (count) {
> >>>> +            ds_put_format(s,
> >>>> +                          "  Count: shared=%d, id=%d\n",
> >>>> +                          count->shared, count->id);
> >>>> +        } else {
> >>>> +            ds_put_cstr(s, "  Count = null\n");
> >>>> +        }
> >>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
> >>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
> >>>> +
> >>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
> >>>> +        if (port_id) {
> >>>> +            ds_put_format(s,
> >>>> +                          "  Port-id: original=%d, id=%d\n",
> >>>> +                          port_id->original, port_id->id);
> >>>> +        } else {
> >>>> +            ds_put_cstr(s, "  Port-id = null\n");
> >>>> +        }
> >>>>        } else {
> >>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
> >>>>        }
> >>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
> >>>>        return 0;
> >>>>    }
> >>>>
> >>>> +static void
> >>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
> >>>> +{
> >>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
> >>>> +
> >>>> +    count->shared = 0;
> >>>> +    /* Each flow has a single count action, so no need of id */
> >>>> +    count->id = 0;
> >>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
> >>>> +}
> >>>> +
> >>>> +static void
> >>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
> >>>> +                                    struct netdev *outdev)
> >>>> +{
> >>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
> >>>> +
> >>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
> >>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
> >>>> +}
> >>>> +
> >>>> +static int
> >>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
> >>>> +                                   const struct nlattr *nla,
> >>>> +                                   struct offload_info *info)
> >>>> +{
> >>>> +    struct netdev *outdev;
> >>>> +    odp_port_t port;
> >>>> +
> >>>> +    port = nl_attr_get_odp_port(nla);
> >>>> +    outdev = netdev_ports_get(port, info->dpif_class);
> >>>> +    if (outdev == NULL) {
> >>>> +        VLOG_DBG_RL(&error_rl,
> >>>> +                    "Cannot find netdev for odp port %d", port);
> >>>> +        return -1;
> >>>> +    }
> >>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
> >>>> +        VLOG_DBG_RL(&error_rl,
> >>>> +                    "Output to %s cannot be offloaded",
> >>>> +                    netdev_get_name(outdev));
> >>>> +        return -1;
> >>>> +    }
> >>>> +
> >>>> +    netdev_dpdk_flow_add_count_action(actions);
> >>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
> >>>> +    netdev_close(outdev);
> >>>> +
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>>    int
> >>>>    netdev_dpdk_flow_actions_add(struct flow_actions *actions,
> >>>>                                 struct nlattr *nl_actions,
> >>>>                                 size_t nl_actions_len,
> >>>> -                             struct offload_info *info OVS_UNUSED)
> >>>> +                             struct offload_info *info)
> >>>>    {
> >>>>        struct nlattr *nla;
> >>>>        size_t left;
> >>>>
> >>>>        NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> >>>> -        VLOG_DBG_RL(&error_rl,
> >>>> -                    "Unsupported action type %d", nl_attr_type(nla));
> >>>> -        return -1;
> >>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
> >>>> +            left <= NLA_ALIGN(nla->nla_len)) {
> >>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
> >>>> +                return -1;
> >>>> +            }
> >>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
> >>> decide whether the device is capable of full-offload, since it assumes
> >>> that the output action is supported if netdev_dpdk type is
> >>> DPDK_DEV_ETH.
> >>>
> >>> There are a couple of issues with this approach:
> >>>
> >>> 1. For a device that is not capable of full-offload, it results in 2
> >>> calls to the PMD for every flow-offload request. The first attempt for
> >>> full-offload fails, followed by another call for partial-offload.
> >> We cannot avoid this, as we want to do full offload, and only fallback
> >> to partial if it doesn't work.
> >>> 2. Even for devices that support full-offload, the offload might fail
> >>> if the in-port and out-port are not in the same switch-domain. For
> >>> example, if only partial-offload is configured (vhost-user ports) in
> >>> the vSwitch, then the PMD should check if the in-port and out-port are
> >>> on the same switch (domain-id must match) and fail the request
> >>> otherwise. This check is important to alert the user of
> >>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
> >>> have to do this check and that duplication is not desirable.
> >> I don't think it's in the OVS scope to do such checks, but it should be
> >> in the driver's level (PMD), the same way it is in the kernel offloading
> >> using TC.
> >>
> > IMO, it is not out-of-scope for OVS to add these checks to make better
> > offload decisions:
> > - This layer of OVS (netdev-offload-dpdk) is already offload-aware and
> > it is using a set of offload APIs (rte_flow)
> It is aware of the offload APIs, but not on specific HW capabilities, so
> in some cases
>
> trying to make such restrictions won't cover all use cases in best case and result in
> redundant code.
>
> For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with
> embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but
> we can in ConnectX-6.

OVS will only do the first level validation that the 2 ports belong to
the same eSwitch, as per the switchdev architecture. If there are
additional chip specific restrictions (i.e, something not compliant
with switchdev spec), those of course could be validated and rejected
by the driver.  I'm not suggesting that the driver shouldn't have such
chip-specific additional checks, but the first level check should be
done by OVS.

>
> It is not something exposed at all to the user, specifically to OVS.
>
> So, I think those kind of decisions that involve HW specifics should be done in the
> layer that has the HW knowledge, e.g. - the driver.
>
> > - Full offload may not be the preferred configuration for a customer
> > (since it requires additional config like SRIOV). That is, the code
> > should not make the distinction that full-offload is always preferred
> > and partial-offload is only a fallback option.
> At the moment, any kind of offload requires SRIOV. BTW, to resolve this,
> after full
>
> offload is accepted, we plan to resubmit vDPA (see
> https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads for
> virtio as well.

But vDPA requires more infrastructure changes that may not be needed
for a simple vhost-user configuration and it shouldn't stop us from
supporting additional actions for vhost-user.

Thanks,
-Harsha

>
> > - Without such checks, offload rate of partial-offload configuration
> > could be potentially impacted with a large number of flows.
> I don't think failing in the PMD level will be much slower than failing
> in OVS level.
> > - Similar improvement (checking/logging in OVS as opposed to
> > individual drivers) could be made in TC kernel offloads too
> In order to add such checks, we can maybe think in the future of a
> capability
>
> querying mechanism, to query each device (at init time, maybe using
> rte_flow_validate) and make decisions based on that.
>
> However, I think this mechanism is neither a simple one, nor should be a
> prerequisite for this patch-set.
>
> >
> >> I agree that the argument of logging is important, but this is something
> >> that I think out of scope of this series. All prints in DPDK go to
> >> /dev/null, and not logged into OVS' log.
> >>
> >> BTW, in kernel, there is a similar issue with drivers that logs using
> >> extack. Those messages are not dumped to dmesg, and OVS also doesn't
> >> read them to its log. Also out of scope of this series...
> >>
> >>> Here's how this could be addressed, by identifying whether the device
> >>> is capable of full-offload or partial-offload.
> >>>
> >>> 1. Check if the target device (in-port) is full-offload capable. This
> >>> could be done by checking if rte_flow api is supported, along with a
> >>> valid switch-domain-id. We could even improve this further, similar to
> >>> the kernel switchdev framework in which the device mode is
> >>> configurable (legacy/eswitch modes) and available to consumers, but
> >>> this might require some more DPDK framework changes.
> >> I think we can enhance to check if both are ETH. However, As I wrote
> >> above, I think checking other HW properties should not be in OVS scope,
> >> but in driver's domain.
> >>> 2. If the device is full-offload capable, and if output action is
> >>> specified,  check if in-port and out-port are on the same switch
> >>> (switch-domain-id match mentioned above).
> >>>
> >>> If both 1 and 2 succeed, then the device is capable of full-offload
> >>> and we can proceed with full-offload item/action setup, followed by
> >>> rte_flow_create().
> >>>
> >>> Otherwise, we fallback to partial-offload code path. Here, we can
> >>> handle the case where a vhost-user interface is either the in-port or
> >>> out-port of the flow being offloaded. Depending on whether the
> >>> vhost-user interface is the in-port or out-port, if the other port
> >>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
> >>> offload the flow to that device. This would help us build further
> >>> extensions to the existing partial-offload framework by adding more
> >>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
> >>> the output action that still needs to be processed in software.
> >>>
> >>> Thanks,
> >>> -Harsha
> >>>
> >>>> +        } else {
> >>>> +            VLOG_DBG_RL(&error_rl,
> >>>> +                        "Unsupported action type %d", nl_attr_type(nla));
> >>>> +            return -1;
> >>>> +        }
> >>>>        }
> >>>>
> >>>>        add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >>>> --
> >>>> 2.14.5
> >>>>
> >>>> _______________________________________________
> >>>> dev mailing list
> >>>> dev@openvswitch.org
> >>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&amp;sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&amp;reserved=0
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 88b818948..ca9c2b230 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@  Post-v2.12.0
        if supported by libbpf.
      * Add option to enable, disable and query TCP sequence checking in
        conntrack.
+   - DPDK:
+     * Add hardware offload support for output actions.
 
 v2.12.0 - 03 Sep 2019
 ---------------------
diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c
index dbbc45e99..6e7efb315 100644
--- a/lib/netdev-offload-dpdk-flow.c
+++ b/lib/netdev-offload-dpdk-flow.c
@@ -274,6 +274,28 @@  ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
         } else {
             ds_put_cstr(s, "  RSS = null\n");
         }
+    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
+        const struct rte_flow_action_count *count = actions->conf;
+
+        ds_put_cstr(s, "rte flow count action:\n");
+        if (count) {
+            ds_put_format(s,
+                          "  Count: shared=%d, id=%d\n",
+                          count->shared, count->id);
+        } else {
+            ds_put_cstr(s, "  Count = null\n");
+        }
+    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
+        const struct rte_flow_action_port_id *port_id = actions->conf;
+
+        ds_put_cstr(s, "rte flow port-id action:\n");
+        if (port_id) {
+            ds_put_format(s,
+                          "  Port-id: original=%d, id=%d\n",
+                          port_id->original, port_id->id);
+        } else {
+            ds_put_cstr(s, "  Port-id = null\n");
+        }
     } else {
         ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
     }
@@ -531,19 +553,76 @@  netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns,
     return 0;
 }
 
+static void
+netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
+{
+    struct rte_flow_action_count *count = xzalloc(sizeof *count);
+
+    count->shared = 0;
+    /* Each flow has a single count action, so no need of id */
+    count->id = 0;
+    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
+}
+
+static void
+netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
+                                    struct netdev *outdev)
+{
+    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
+
+    port_id->id = netdev_dpdk_get_port_id(outdev);
+    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
+}
+
+static int
+netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
+                                   const struct nlattr *nla,
+                                   struct offload_info *info)
+{
+    struct netdev *outdev;
+    odp_port_t port;
+
+    port = nl_attr_get_odp_port(nla);
+    outdev = netdev_ports_get(port, info->dpif_class);
+    if (outdev == NULL) {
+        VLOG_DBG_RL(&error_rl,
+                    "Cannot find netdev for odp port %d", port);
+        return -1;
+    }
+    if (!netdev_dpdk_flow_api_supported(outdev)) {
+        VLOG_DBG_RL(&error_rl,
+                    "Output to %s cannot be offloaded",
+                    netdev_get_name(outdev));
+        return -1;
+    }
+
+    netdev_dpdk_flow_add_count_action(actions);
+    netdev_dpdk_flow_add_port_id_action(actions, outdev);
+    netdev_close(outdev);
+
+    return 0;
+}
+
 int
 netdev_dpdk_flow_actions_add(struct flow_actions *actions,
                              struct nlattr *nl_actions,
                              size_t nl_actions_len,
-                             struct offload_info *info OVS_UNUSED)
+                             struct offload_info *info)
 {
     struct nlattr *nla;
     size_t left;
 
     NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
-        VLOG_DBG_RL(&error_rl,
-                    "Unsupported action type %d", nl_attr_type(nla));
-        return -1;
+        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
+            left <= NLA_ALIGN(nla->nla_len)) {
+            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
+                return -1;
+            }
+        } else {
+            VLOG_DBG_RL(&error_rl,
+                        "Unsupported action type %d", nl_attr_type(nla));
+            return -1;
+        }
     }
 
     add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);