Message ID | 20191120152826.25074-16-elibr@mellanox.com |
---|---|
State | Changes Requested |
Delegated to: | Ilya Maximets |
Headers | show |
Series | netdev datapath actions offload | expand |
Bleep bloop. Greetings Eli Britstein, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. git-am: Failed to merge in the changes. Patch failed at 0001 netdev-offload-dpdk-flow: Support offload of output action The copy of the patch that failed is found in: /var/lib/jenkins/jobs/upstream_build_from_pw/workspace/.git/rebase-apply/patch When you have resolved this problem, run "git am --resolved". If you prefer to skip this patch, run "git am --skip" instead. To restore the original branch and stop patching, run "git am --abort". Please check this out. If you feel there has been an error, please email aconole@redhat.com Thanks, 0-day Robot
On 20.11.2019 16:28, Eli Britstein wrote: > Signed-off-by: Eli Britstein <elibr@mellanox.com> > Reviewed-by: Oz Shlomo <ozsh@mellanox.com> > --- > NEWS | 2 + > lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 85 insertions(+), 4 deletions(-) > > diff --git a/NEWS b/NEWS > index 88b818948..ca9c2b230 100644 > --- a/NEWS > +++ b/NEWS > @@ -10,6 +10,8 @@ Post-v2.12.0 > if supported by libbpf. > * Add option to enable, disable and query TCP sequence checking in > conntrack. > + - DPDK: > + * Add hardware offload support for output actions. > > v2.12.0 - 03 Sep 2019 > --------------------- > diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c > index dbbc45e99..6e7efb315 100644 > --- a/lib/netdev-offload-dpdk-flow.c > +++ b/lib/netdev-offload-dpdk-flow.c > @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) > } else { > ds_put_cstr(s, " RSS = null\n"); > } > + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { > + const struct rte_flow_action_count *count = actions->conf; > + > + ds_put_cstr(s, "rte flow count action:\n"); > + if (count) { > + ds_put_format(s, > + " Count: shared=%d, id=%d\n", > + count->shared, count->id); > + } else { > + ds_put_cstr(s, " Count = null\n"); > + } > + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { > + const struct rte_flow_action_port_id *port_id = actions->conf; > + > + ds_put_cstr(s, "rte flow port-id action:\n"); > + if (port_id) { > + ds_put_format(s, > + " Port-id: original=%d, id=%d\n", > + port_id->original, port_id->id); > + } else { > + ds_put_cstr(s, " Port-id = null\n"); > + } > } else { > ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); > } > @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, > return 0; > } > > +static void > +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) > +{ > + struct rte_flow_action_count *count = xzalloc(sizeof *count); > + > + count->shared = 0; > + /* Each flow has a single count action, so no need of id */ > + count->id = 0; > + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); > +} > + > +static void > +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, > + struct netdev *outdev) > +{ > + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); > + > + port_id->id = netdev_dpdk_get_port_id(outdev); > + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); > +} > + > +static int > +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, > + const struct nlattr *nla, > + struct offload_info *info) > +{ > + struct netdev *outdev; > + odp_port_t port; > + > + port = nl_attr_get_odp_port(nla); > + outdev = netdev_ports_get(port, info->dpif_class); > + if (outdev == NULL) { > + VLOG_DBG_RL(&error_rl, > + "Cannot find netdev for odp port %d", port); > + return -1; > + } > + if (!netdev_dpdk_flow_api_supported(outdev)) { This doesn't look correct. I mean, maybe we need to check if port is really the port on a same piece of hardware. Will the flow setup fail in this case? Will code fallback to the MARK+RSS? > + VLOG_DBG_RL(&error_rl, > + "Output to %s cannot be offloaded", > + netdev_get_name(outdev)); > + return -1; > + } > + > + netdev_dpdk_flow_add_count_action(actions); > + netdev_dpdk_flow_add_port_id_action(actions, outdev); > + netdev_close(outdev); > + > + return 0; > +} > + > int > netdev_dpdk_flow_actions_add(struct flow_actions *actions, > struct nlattr *nl_actions, > size_t nl_actions_len, > - struct offload_info *info OVS_UNUSED) > + struct offload_info *info) > { > struct nlattr *nla; > size_t left; > > NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { > - VLOG_DBG_RL(&error_rl, > - "Unsupported action type %d", nl_attr_type(nla)); > - return -1; > + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && > + left <= NLA_ALIGN(nla->nla_len)) { > + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { > + return -1; > + } > + } else { > + VLOG_DBG_RL(&error_rl, > + "Unsupported action type %d", nl_attr_type(nla)); > + return -1; > + } > } > > add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); >
On 11/22/2019 6:19 PM, Ilya Maximets wrote: > On 20.11.2019 16:28, Eli Britstein wrote: >> Signed-off-by: Eli Britstein <elibr@mellanox.com> >> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >> --- >> NEWS | 2 + >> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >> 2 files changed, 85 insertions(+), 4 deletions(-) >> >> diff --git a/NEWS b/NEWS >> index 88b818948..ca9c2b230 100644 >> --- a/NEWS >> +++ b/NEWS >> @@ -10,6 +10,8 @@ Post-v2.12.0 >> if supported by libbpf. >> * Add option to enable, disable and query TCP sequence checking in >> conntrack. >> + - DPDK: >> + * Add hardware offload support for output actions. >> >> v2.12.0 - 03 Sep 2019 >> --------------------- >> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >> index dbbc45e99..6e7efb315 100644 >> --- a/lib/netdev-offload-dpdk-flow.c >> +++ b/lib/netdev-offload-dpdk-flow.c >> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >> } else { >> ds_put_cstr(s, " RSS = null\n"); >> } >> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >> + const struct rte_flow_action_count *count = actions->conf; >> + >> + ds_put_cstr(s, "rte flow count action:\n"); >> + if (count) { >> + ds_put_format(s, >> + " Count: shared=%d, id=%d\n", >> + count->shared, count->id); >> + } else { >> + ds_put_cstr(s, " Count = null\n"); >> + } >> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >> + const struct rte_flow_action_port_id *port_id = actions->conf; >> + >> + ds_put_cstr(s, "rte flow port-id action:\n"); >> + if (port_id) { >> + ds_put_format(s, >> + " Port-id: original=%d, id=%d\n", >> + port_id->original, port_id->id); >> + } else { >> + ds_put_cstr(s, " Port-id = null\n"); >> + } >> } else { >> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >> } >> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >> return 0; >> } >> >> +static void >> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >> +{ >> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >> + >> + count->shared = 0; >> + /* Each flow has a single count action, so no need of id */ >> + count->id = 0; >> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >> +} >> + >> +static void >> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >> + struct netdev *outdev) >> +{ >> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >> + >> + port_id->id = netdev_dpdk_get_port_id(outdev); >> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >> +} >> + >> +static int >> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >> + const struct nlattr *nla, >> + struct offload_info *info) >> +{ >> + struct netdev *outdev; >> + odp_port_t port; >> + >> + port = nl_attr_get_odp_port(nla); >> + outdev = netdev_ports_get(port, info->dpif_class); >> + if (outdev == NULL) { >> + VLOG_DBG_RL(&error_rl, >> + "Cannot find netdev for odp port %d", port); >> + return -1; >> + } >> + if (!netdev_dpdk_flow_api_supported(outdev)) { > This doesn't look correct. I mean, maybe we need to check if port is > really the port on a same piece of hardware. Will the flow setup fail > in this case? Will code fallback to the MARK+RSS? We cannot check if the port is on the same HW, and it is also wrong from the application point of view. If the operation cannot be supported, it is in the driver (DPDK) scope to fail it. You are right about the fallback. I'll fix it in v2. > >> + VLOG_DBG_RL(&error_rl, >> + "Output to %s cannot be offloaded", >> + netdev_get_name(outdev)); >> + return -1; >> + } >> + >> + netdev_dpdk_flow_add_count_action(actions); >> + netdev_dpdk_flow_add_port_id_action(actions, outdev); >> + netdev_close(outdev); >> + >> + return 0; >> +} >> + >> int >> netdev_dpdk_flow_actions_add(struct flow_actions *actions, >> struct nlattr *nl_actions, >> size_t nl_actions_len, >> - struct offload_info *info OVS_UNUSED) >> + struct offload_info *info) >> { >> struct nlattr *nla; >> size_t left; >> >> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { >> - VLOG_DBG_RL(&error_rl, >> - "Unsupported action type %d", nl_attr_type(nla)); >> - return -1; >> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && >> + left <= NLA_ALIGN(nla->nla_len)) { >> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { >> + return -1; >> + } >> + } else { >> + VLOG_DBG_RL(&error_rl, >> + "Unsupported action type %d", nl_attr_type(nla)); >> + return -1; >> + } >> } >> >> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); >>
On 24.11.2019 14:22, Eli Britstein wrote: > > On 11/22/2019 6:19 PM, Ilya Maximets wrote: >> On 20.11.2019 16:28, Eli Britstein wrote: >>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>> --- >>> NEWS | 2 + >>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>> 2 files changed, 85 insertions(+), 4 deletions(-) >>> >>> diff --git a/NEWS b/NEWS >>> index 88b818948..ca9c2b230 100644 >>> --- a/NEWS >>> +++ b/NEWS >>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>> if supported by libbpf. >>> * Add option to enable, disable and query TCP sequence checking in >>> conntrack. >>> + - DPDK: >>> + * Add hardware offload support for output actions. >>> >>> v2.12.0 - 03 Sep 2019 >>> --------------------- >>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>> index dbbc45e99..6e7efb315 100644 >>> --- a/lib/netdev-offload-dpdk-flow.c >>> +++ b/lib/netdev-offload-dpdk-flow.c >>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>> } else { >>> ds_put_cstr(s, " RSS = null\n"); >>> } >>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>> + const struct rte_flow_action_count *count = actions->conf; >>> + >>> + ds_put_cstr(s, "rte flow count action:\n"); >>> + if (count) { >>> + ds_put_format(s, >>> + " Count: shared=%d, id=%d\n", >>> + count->shared, count->id); >>> + } else { >>> + ds_put_cstr(s, " Count = null\n"); >>> + } >>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>> + >>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>> + if (port_id) { >>> + ds_put_format(s, >>> + " Port-id: original=%d, id=%d\n", >>> + port_id->original, port_id->id); >>> + } else { >>> + ds_put_cstr(s, " Port-id = null\n"); >>> + } >>> } else { >>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>> } >>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>> return 0; >>> } >>> >>> +static void >>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>> +{ >>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>> + >>> + count->shared = 0; >>> + /* Each flow has a single count action, so no need of id */ >>> + count->id = 0; >>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>> +} >>> + >>> +static void >>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>> + struct netdev *outdev) >>> +{ >>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>> + >>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>> +} >>> + >>> +static int >>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>> + const struct nlattr *nla, >>> + struct offload_info *info) >>> +{ >>> + struct netdev *outdev; >>> + odp_port_t port; >>> + >>> + port = nl_attr_get_odp_port(nla); >>> + outdev = netdev_ports_get(port, info->dpif_class); >>> + if (outdev == NULL) { >>> + VLOG_DBG_RL(&error_rl, >>> + "Cannot find netdev for odp port %d", port); >>> + return -1; >>> + } >>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >> This doesn't look correct. I mean, maybe we need to check if port is >> really the port on a same piece of hardware. Will the flow setup fail >> in this case? Will code fallback to the MARK+RSS? > > We cannot check if the port is on the same HW, and it is also wrong from > the application point of view. If the operation cannot be supported, it > is in the driver (DPDK) scope to fail it. BTW, function netdev_dpdk_flow_api_supported() is not intended to be called in the offloading process. It's only for initialization phase. You can see the "/* TODO: Check if we able to offload some minimal flow. */" in the code and that might be destructive and unwanted for offloading process. You, probably, wanted something like this instead (not tested): diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h index 4e1c4251d..92876f3a3 100644 --- a/lib/netdev-offload-provider.h +++ b/lib/netdev-offload-provider.h @@ -90,6 +90,9 @@ struct netdev_flow_api { int netdev_register_flow_api_provider(const struct netdev_flow_api *); int netdev_unregister_flow_api_provider(const char *type); +bool netdev_flow_api_equals(const struct netdev *, + const struct netdev_flow_api *); + #ifdef __linux__ extern const struct netdev_flow_api netdev_offload_tc; #endif diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c index ae01acda6..1970b1bae 100644 --- a/lib/netdev-offload.c +++ b/lib/netdev-offload.c @@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type) return error; } +bool +netdev_flow_api_equals(const struct netdev *netdev, + const struct netdev_flow_api *flow_api) +{ + const struct netdev_flow_api *netdev_flow_api = + ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api); + + return netdev_flow_api == flow_api; +} + static int netdev_assign_flow_api(struct netdev *netdev) { --- BTW2, We, probably, need to call rte_flow_validate() somewhere. Best regards, Ilya Maximets.
On 11/30/2019 1:59 PM, Ilya Maximets wrote: > On 24.11.2019 14:22, Eli Britstein wrote: >> On 11/22/2019 6:19 PM, Ilya Maximets wrote: >>> On 20.11.2019 16:28, Eli Britstein wrote: >>>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>>> --- >>>> NEWS | 2 + >>>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>>> 2 files changed, 85 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/NEWS b/NEWS >>>> index 88b818948..ca9c2b230 100644 >>>> --- a/NEWS >>>> +++ b/NEWS >>>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>>> if supported by libbpf. >>>> * Add option to enable, disable and query TCP sequence checking in >>>> conntrack. >>>> + - DPDK: >>>> + * Add hardware offload support for output actions. >>>> >>>> v2.12.0 - 03 Sep 2019 >>>> --------------------- >>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>>> index dbbc45e99..6e7efb315 100644 >>>> --- a/lib/netdev-offload-dpdk-flow.c >>>> +++ b/lib/netdev-offload-dpdk-flow.c >>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>>> } else { >>>> ds_put_cstr(s, " RSS = null\n"); >>>> } >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>>> + const struct rte_flow_action_count *count = actions->conf; >>>> + >>>> + ds_put_cstr(s, "rte flow count action:\n"); >>>> + if (count) { >>>> + ds_put_format(s, >>>> + " Count: shared=%d, id=%d\n", >>>> + count->shared, count->id); >>>> + } else { >>>> + ds_put_cstr(s, " Count = null\n"); >>>> + } >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>>> + >>>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>>> + if (port_id) { >>>> + ds_put_format(s, >>>> + " Port-id: original=%d, id=%d\n", >>>> + port_id->original, port_id->id); >>>> + } else { >>>> + ds_put_cstr(s, " Port-id = null\n"); >>>> + } >>>> } else { >>>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>>> } >>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>>> return 0; >>>> } >>>> >>>> +static void >>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>>> +{ >>>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>>> + >>>> + count->shared = 0; >>>> + /* Each flow has a single count action, so no need of id */ >>>> + count->id = 0; >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>>> +} >>>> + >>>> +static void >>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>>> + struct netdev *outdev) >>>> +{ >>>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>>> + >>>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>>> +} >>>> + >>>> +static int >>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>>> + const struct nlattr *nla, >>>> + struct offload_info *info) >>>> +{ >>>> + struct netdev *outdev; >>>> + odp_port_t port; >>>> + >>>> + port = nl_attr_get_odp_port(nla); >>>> + outdev = netdev_ports_get(port, info->dpif_class); >>>> + if (outdev == NULL) { >>>> + VLOG_DBG_RL(&error_rl, >>>> + "Cannot find netdev for odp port %d", port); >>>> + return -1; >>>> + } >>>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >>> This doesn't look correct. I mean, maybe we need to check if port is >>> really the port on a same piece of hardware. Will the flow setup fail >>> in this case? Will code fallback to the MARK+RSS? >> We cannot check if the port is on the same HW, and it is also wrong from >> the application point of view. If the operation cannot be supported, it >> is in the driver (DPDK) scope to fail it. > BTW, function netdev_dpdk_flow_api_supported() is not intended to be called > in the offloading process. It's only for initialization phase. You can see > the "/* TODO: Check if we able to offload some minimal flow. */" in the code > and that might be destructive and unwanted for offloading process. Actually, there is no need to call it at all, as we get here only if we found the netdev by netdev_ports_get, which means we found a device that can have offloads. I enhanced for error handling of netdev_dpdk_get_port_id instead of introducing such new code, that I think not required. > > You, probably, wanted something like this instead (not tested): > > diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h > index 4e1c4251d..92876f3a3 100644 > --- a/lib/netdev-offload-provider.h > +++ b/lib/netdev-offload-provider.h > @@ -90,6 +90,9 @@ struct netdev_flow_api { > int netdev_register_flow_api_provider(const struct netdev_flow_api *); > int netdev_unregister_flow_api_provider(const char *type); > > +bool netdev_flow_api_equals(const struct netdev *, > + const struct netdev_flow_api *); > + > #ifdef __linux__ > extern const struct netdev_flow_api netdev_offload_tc; > #endif > diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c > index ae01acda6..1970b1bae 100644 > --- a/lib/netdev-offload.c > +++ b/lib/netdev-offload.c > @@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type) > return error; > } > > +bool > +netdev_flow_api_equals(const struct netdev *netdev, > + const struct netdev_flow_api *flow_api) > +{ > + const struct netdev_flow_api *netdev_flow_api = > + ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api); > + > + return netdev_flow_api == flow_api; > +} > + > static int > netdev_assign_flow_api(struct netdev *netdev) > { > --- > > BTW2, We, probably, need to call rte_flow_validate() somewhere. what for? if we fail by rte_flow_create, we will handle the error. the validate may be used in the future to query driver capabilities, but this is something much more complex and not related to this patch-set. > > Best regards, Ilya Maximets.
On 01.12.2019 9:29, Eli Britstein wrote: > > On 11/30/2019 1:59 PM, Ilya Maximets wrote: >> On 24.11.2019 14:22, Eli Britstein wrote: >>> On 11/22/2019 6:19 PM, Ilya Maximets wrote: >>>> On 20.11.2019 16:28, Eli Britstein wrote: >>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>>>> --- >>>>> NEWS | 2 + >>>>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>>>> 2 files changed, 85 insertions(+), 4 deletions(-) >>>>> >>>>> diff --git a/NEWS b/NEWS >>>>> index 88b818948..ca9c2b230 100644 >>>>> --- a/NEWS >>>>> +++ b/NEWS >>>>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>>>> if supported by libbpf. >>>>> * Add option to enable, disable and query TCP sequence checking in >>>>> conntrack. >>>>> + - DPDK: >>>>> + * Add hardware offload support for output actions. >>>>> >>>>> v2.12.0 - 03 Sep 2019 >>>>> --------------------- >>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>>>> index dbbc45e99..6e7efb315 100644 >>>>> --- a/lib/netdev-offload-dpdk-flow.c >>>>> +++ b/lib/netdev-offload-dpdk-flow.c >>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>>>> } else { >>>>> ds_put_cstr(s, " RSS = null\n"); >>>>> } >>>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>>>> + const struct rte_flow_action_count *count = actions->conf; >>>>> + >>>>> + ds_put_cstr(s, "rte flow count action:\n"); >>>>> + if (count) { >>>>> + ds_put_format(s, >>>>> + " Count: shared=%d, id=%d\n", >>>>> + count->shared, count->id); >>>>> + } else { >>>>> + ds_put_cstr(s, " Count = null\n"); >>>>> + } >>>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>>>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>>>> + >>>>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>>>> + if (port_id) { >>>>> + ds_put_format(s, >>>>> + " Port-id: original=%d, id=%d\n", >>>>> + port_id->original, port_id->id); >>>>> + } else { >>>>> + ds_put_cstr(s, " Port-id = null\n"); >>>>> + } >>>>> } else { >>>>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>>>> } >>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>>>> return 0; >>>>> } >>>>> >>>>> +static void >>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>>>> +{ >>>>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>>>> + >>>>> + count->shared = 0; >>>>> + /* Each flow has a single count action, so no need of id */ >>>>> + count->id = 0; >>>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>>>> +} >>>>> + >>>>> +static void >>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>>>> + struct netdev *outdev) >>>>> +{ >>>>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>>>> + >>>>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>>>> +} >>>>> + >>>>> +static int >>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>>>> + const struct nlattr *nla, >>>>> + struct offload_info *info) >>>>> +{ >>>>> + struct netdev *outdev; >>>>> + odp_port_t port; >>>>> + >>>>> + port = nl_attr_get_odp_port(nla); >>>>> + outdev = netdev_ports_get(port, info->dpif_class); >>>>> + if (outdev == NULL) { >>>>> + VLOG_DBG_RL(&error_rl, >>>>> + "Cannot find netdev for odp port %d", port); >>>>> + return -1; >>>>> + } >>>>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >>>> This doesn't look correct. I mean, maybe we need to check if port is >>>> really the port on a same piece of hardware. Will the flow setup fail >>>> in this case? Will code fallback to the MARK+RSS? >>> We cannot check if the port is on the same HW, and it is also wrong from >>> the application point of view. If the operation cannot be supported, it >>> is in the driver (DPDK) scope to fail it. >> BTW, function netdev_dpdk_flow_api_supported() is not intended to be called >> in the offloading process. It's only for initialization phase. You can see >> the "/* TODO: Check if we able to offload some minimal flow. */" in the code >> and that might be destructive and unwanted for offloading process. > > Actually, there is no need to call it at all, as we get here only if we > found the netdev by netdev_ports_get, which means we found a device that > can have offloads. 'port_to_netdev' hash map contains all the netdevs created by the ofproto regardless of the flow API initialization. This is done to allow delayed initialization if hw offloading was enabled in runtime. 'netdev_ports_get' just traverses above hash map. So, the flow API could be not initialized. > > I enhanced for error handling of netdev_dpdk_get_port_id instead of > introducing such new code, that I think not required. > >> >> You, probably, wanted something like this instead (not tested): >> >> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h >> index 4e1c4251d..92876f3a3 100644 >> --- a/lib/netdev-offload-provider.h >> +++ b/lib/netdev-offload-provider.h >> @@ -90,6 +90,9 @@ struct netdev_flow_api { >> int netdev_register_flow_api_provider(const struct netdev_flow_api *); >> int netdev_unregister_flow_api_provider(const char *type); >> >> +bool netdev_flow_api_equals(const struct netdev *, >> + const struct netdev_flow_api *); >> + >> #ifdef __linux__ >> extern const struct netdev_flow_api netdev_offload_tc; >> #endif >> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c >> index ae01acda6..1970b1bae 100644 >> --- a/lib/netdev-offload.c >> +++ b/lib/netdev-offload.c >> @@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type) >> return error; >> } >> >> +bool >> +netdev_flow_api_equals(const struct netdev *netdev, >> + const struct netdev_flow_api *flow_api) >> +{ >> + const struct netdev_flow_api *netdev_flow_api = >> + ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api); >> + >> + return netdev_flow_api == flow_api; >> +} >> + >> static int >> netdev_assign_flow_api(struct netdev *netdev) >> { >> --- >> >> BTW2, We, probably, need to call rte_flow_validate() somewhere. > what for? if we fail by rte_flow_create, we will handle the error. the > validate may be used in the future to query driver capabilities, but > this is something much more complex and not related to this patch-set. OK.
On 12/2/2019 4:25 PM, Ilya Maximets wrote: > On 01.12.2019 9:29, Eli Britstein wrote: >> On 11/30/2019 1:59 PM, Ilya Maximets wrote: >>> On 24.11.2019 14:22, Eli Britstein wrote: >>>> On 11/22/2019 6:19 PM, Ilya Maximets wrote: >>>>> On 20.11.2019 16:28, Eli Britstein wrote: >>>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>>>>> --- >>>>>> NEWS | 2 + >>>>>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>>>>> 2 files changed, 85 insertions(+), 4 deletions(-) >>>>>> >>>>>> diff --git a/NEWS b/NEWS >>>>>> index 88b818948..ca9c2b230 100644 >>>>>> --- a/NEWS >>>>>> +++ b/NEWS >>>>>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>>>>> if supported by libbpf. >>>>>> * Add option to enable, disable and query TCP sequence checking in >>>>>> conntrack. >>>>>> + - DPDK: >>>>>> + * Add hardware offload support for output actions. >>>>>> >>>>>> v2.12.0 - 03 Sep 2019 >>>>>> --------------------- >>>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>>>>> index dbbc45e99..6e7efb315 100644 >>>>>> --- a/lib/netdev-offload-dpdk-flow.c >>>>>> +++ b/lib/netdev-offload-dpdk-flow.c >>>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>>>>> } else { >>>>>> ds_put_cstr(s, " RSS = null\n"); >>>>>> } >>>>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>>>>> + const struct rte_flow_action_count *count = actions->conf; >>>>>> + >>>>>> + ds_put_cstr(s, "rte flow count action:\n"); >>>>>> + if (count) { >>>>>> + ds_put_format(s, >>>>>> + " Count: shared=%d, id=%d\n", >>>>>> + count->shared, count->id); >>>>>> + } else { >>>>>> + ds_put_cstr(s, " Count = null\n"); >>>>>> + } >>>>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>>>>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>>>>> + >>>>>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>>>>> + if (port_id) { >>>>>> + ds_put_format(s, >>>>>> + " Port-id: original=%d, id=%d\n", >>>>>> + port_id->original, port_id->id); >>>>>> + } else { >>>>>> + ds_put_cstr(s, " Port-id = null\n"); >>>>>> + } >>>>>> } else { >>>>>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>>>>> } >>>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>>>>> return 0; >>>>>> } >>>>>> >>>>>> +static void >>>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>>>>> +{ >>>>>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>>>>> + >>>>>> + count->shared = 0; >>>>>> + /* Each flow has a single count action, so no need of id */ >>>>>> + count->id = 0; >>>>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>>>>> +} >>>>>> + >>>>>> +static void >>>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>>>>> + struct netdev *outdev) >>>>>> +{ >>>>>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>>>>> + >>>>>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>>>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>>>>> +} >>>>>> + >>>>>> +static int >>>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>>>>> + const struct nlattr *nla, >>>>>> + struct offload_info *info) >>>>>> +{ >>>>>> + struct netdev *outdev; >>>>>> + odp_port_t port; >>>>>> + >>>>>> + port = nl_attr_get_odp_port(nla); >>>>>> + outdev = netdev_ports_get(port, info->dpif_class); >>>>>> + if (outdev == NULL) { >>>>>> + VLOG_DBG_RL(&error_rl, >>>>>> + "Cannot find netdev for odp port %d", port); >>>>>> + return -1; >>>>>> + } >>>>>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >>>>> This doesn't look correct. I mean, maybe we need to check if port is >>>>> really the port on a same piece of hardware. Will the flow setup fail >>>>> in this case? Will code fallback to the MARK+RSS? >>>> We cannot check if the port is on the same HW, and it is also wrong from >>>> the application point of view. If the operation cannot be supported, it >>>> is in the driver (DPDK) scope to fail it. >>> BTW, function netdev_dpdk_flow_api_supported() is not intended to be called >>> in the offloading process. It's only for initialization phase. You can see >>> the "/* TODO: Check if we able to offload some minimal flow. */" in the code >>> and that might be destructive and unwanted for offloading process. >> Actually, there is no need to call it at all, as we get here only if we >> found the netdev by netdev_ports_get, which means we found a device that >> can have offloads. > 'port_to_netdev' hash map contains all the netdevs created by the ofproto > regardless of the flow API initialization. This is done to allow delayed > initialization if hw offloading was enabled in runtime. 'netdev_ports_get' > just traverses above hash map. So, the flow API could be not initialized. I don't care if flow API is not initialized on the output netdev. I check validity of its DPDK port and that should be enough. Please see in V2. BTW, AFAIK, changing hw-offload is not supported on the fly and requires OVS to be restarted. > >> I enhanced for error handling of netdev_dpdk_get_port_id instead of >> introducing such new code, that I think not required. >> >>> You, probably, wanted something like this instead (not tested): >>> >>> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h >>> index 4e1c4251d..92876f3a3 100644 >>> --- a/lib/netdev-offload-provider.h >>> +++ b/lib/netdev-offload-provider.h >>> @@ -90,6 +90,9 @@ struct netdev_flow_api { >>> int netdev_register_flow_api_provider(const struct netdev_flow_api *); >>> int netdev_unregister_flow_api_provider(const char *type); >>> >>> +bool netdev_flow_api_equals(const struct netdev *, >>> + const struct netdev_flow_api *); >>> + >>> #ifdef __linux__ >>> extern const struct netdev_flow_api netdev_offload_tc; >>> #endif >>> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c >>> index ae01acda6..1970b1bae 100644 >>> --- a/lib/netdev-offload.c >>> +++ b/lib/netdev-offload.c >>> @@ -156,6 +156,16 @@ netdev_unregister_flow_api_provider(const char *type) >>> return error; >>> } >>> >>> +bool >>> +netdev_flow_api_equals(const struct netdev *netdev, >>> + const struct netdev_flow_api *flow_api) >>> +{ >>> + const struct netdev_flow_api *netdev_flow_api = >>> + ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api); >>> + >>> + return netdev_flow_api == flow_api; >>> +} >>> + >>> static int >>> netdev_assign_flow_api(struct netdev *netdev) >>> { >>> --- >>> >>> BTW2, We, probably, need to call rte_flow_validate() somewhere. >> what for? if we fail by rte_flow_create, we will handle the error. the >> validate may be used in the future to query driver capabilities, but >> this is something much more complex and not related to this patch-set. > OK.
On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: > > Signed-off-by: Eli Britstein <elibr@mellanox.com> > Reviewed-by: Oz Shlomo <ozsh@mellanox.com> > --- > NEWS | 2 + > lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 85 insertions(+), 4 deletions(-) > > diff --git a/NEWS b/NEWS > index 88b818948..ca9c2b230 100644 > --- a/NEWS > +++ b/NEWS > @@ -10,6 +10,8 @@ Post-v2.12.0 > if supported by libbpf. > * Add option to enable, disable and query TCP sequence checking in > conntrack. > + - DPDK: > + * Add hardware offload support for output actions. > > v2.12.0 - 03 Sep 2019 > --------------------- > diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c > index dbbc45e99..6e7efb315 100644 > --- a/lib/netdev-offload-dpdk-flow.c > +++ b/lib/netdev-offload-dpdk-flow.c > @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) > } else { > ds_put_cstr(s, " RSS = null\n"); > } > + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { > + const struct rte_flow_action_count *count = actions->conf; > + > + ds_put_cstr(s, "rte flow count action:\n"); > + if (count) { > + ds_put_format(s, > + " Count: shared=%d, id=%d\n", > + count->shared, count->id); > + } else { > + ds_put_cstr(s, " Count = null\n"); > + } > + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { > + const struct rte_flow_action_port_id *port_id = actions->conf; > + > + ds_put_cstr(s, "rte flow port-id action:\n"); > + if (port_id) { > + ds_put_format(s, > + " Port-id: original=%d, id=%d\n", > + port_id->original, port_id->id); > + } else { > + ds_put_cstr(s, " Port-id = null\n"); > + } > } else { > ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); > } > @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, > return 0; > } > > +static void > +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) > +{ > + struct rte_flow_action_count *count = xzalloc(sizeof *count); > + > + count->shared = 0; > + /* Each flow has a single count action, so no need of id */ > + count->id = 0; > + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); > +} > + > +static void > +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, > + struct netdev *outdev) > +{ > + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); > + > + port_id->id = netdev_dpdk_get_port_id(outdev); > + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); > +} > + > +static int > +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, > + const struct nlattr *nla, > + struct offload_info *info) > +{ > + struct netdev *outdev; > + odp_port_t port; > + > + port = nl_attr_get_odp_port(nla); > + outdev = netdev_ports_get(port, info->dpif_class); > + if (outdev == NULL) { > + VLOG_DBG_RL(&error_rl, > + "Cannot find netdev for odp port %d", port); > + return -1; > + } > + if (!netdev_dpdk_flow_api_supported(outdev)) { > + VLOG_DBG_RL(&error_rl, > + "Output to %s cannot be offloaded", > + netdev_get_name(outdev)); > + return -1; > + } > + > + netdev_dpdk_flow_add_count_action(actions); > + netdev_dpdk_flow_add_port_id_action(actions, outdev); > + netdev_close(outdev); > + > + return 0; > +} > + > int > netdev_dpdk_flow_actions_add(struct flow_actions *actions, > struct nlattr *nl_actions, > size_t nl_actions_len, > - struct offload_info *info OVS_UNUSED) > + struct offload_info *info) > { > struct nlattr *nla; > size_t left; > > NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { > - VLOG_DBG_RL(&error_rl, > - "Unsupported action type %d", nl_attr_type(nla)); > - return -1; > + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && > + left <= NLA_ALIGN(nla->nla_len)) { > + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { > + return -1; > + } The check in netdev_dpdk_flow_add_output_action() is insufficient to decide whether the device is capable of full-offload, since it assumes that the output action is supported if netdev_dpdk type is DPDK_DEV_ETH. There are a couple of issues with this approach: 1. For a device that is not capable of full-offload, it results in 2 calls to the PMD for every flow-offload request. The first attempt for full-offload fails, followed by another call for partial-offload. 2. Even for devices that support full-offload, the offload might fail if the in-port and out-port are not in the same switch-domain. For example, if only partial-offload is configured (vhost-user ports) in the vSwitch, then the PMD should check if the in-port and out-port are on the same switch (domain-id must match) and fail the request otherwise. This check is important to alert the user of misconfigurations. If OVS doesn't do this check, all pmd drivers will have to do this check and that duplication is not desirable. Here's how this could be addressed, by identifying whether the device is capable of full-offload or partial-offload. 1. Check if the target device (in-port) is full-offload capable. This could be done by checking if rte_flow api is supported, along with a valid switch-domain-id. We could even improve this further, similar to the kernel switchdev framework in which the device mode is configurable (legacy/eswitch modes) and available to consumers, but this might require some more DPDK framework changes. 2. If the device is full-offload capable, and if output action is specified, check if in-port and out-port are on the same switch (switch-domain-id match mentioned above). If both 1 and 2 succeed, then the device is capable of full-offload and we can proceed with full-offload item/action setup, followed by rte_flow_create(). Otherwise, we fallback to partial-offload code path. Here, we can handle the case where a vhost-user interface is either the in-port or out-port of the flow being offloaded. Depending on whether the vhost-user interface is the in-port or out-port, if the other port associated with the flow is offload capable (DPDK_DEV_ETH), then we offload the flow to that device. This would help us build further extensions to the existing partial-offload framework by adding more actions such as VLAN and Tunnel actions (apart from MARK/RSS), except the output action that still needs to be processed in software. Thanks, -Harsha > + } else { > + VLOG_DBG_RL(&error_rl, > + "Unsupported action type %d", nl_attr_type(nla)); > + return -1; > + } > } > > add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); > -- > 2.14.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote: > On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: >> Signed-off-by: Eli Britstein <elibr@mellanox.com> >> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >> --- >> NEWS | 2 + >> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >> 2 files changed, 85 insertions(+), 4 deletions(-) >> >> diff --git a/NEWS b/NEWS >> index 88b818948..ca9c2b230 100644 >> --- a/NEWS >> +++ b/NEWS >> @@ -10,6 +10,8 @@ Post-v2.12.0 >> if supported by libbpf. >> * Add option to enable, disable and query TCP sequence checking in >> conntrack. >> + - DPDK: >> + * Add hardware offload support for output actions. >> >> v2.12.0 - 03 Sep 2019 >> --------------------- >> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >> index dbbc45e99..6e7efb315 100644 >> --- a/lib/netdev-offload-dpdk-flow.c >> +++ b/lib/netdev-offload-dpdk-flow.c >> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >> } else { >> ds_put_cstr(s, " RSS = null\n"); >> } >> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >> + const struct rte_flow_action_count *count = actions->conf; >> + >> + ds_put_cstr(s, "rte flow count action:\n"); >> + if (count) { >> + ds_put_format(s, >> + " Count: shared=%d, id=%d\n", >> + count->shared, count->id); >> + } else { >> + ds_put_cstr(s, " Count = null\n"); >> + } >> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >> + const struct rte_flow_action_port_id *port_id = actions->conf; >> + >> + ds_put_cstr(s, "rte flow port-id action:\n"); >> + if (port_id) { >> + ds_put_format(s, >> + " Port-id: original=%d, id=%d\n", >> + port_id->original, port_id->id); >> + } else { >> + ds_put_cstr(s, " Port-id = null\n"); >> + } >> } else { >> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >> } >> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >> return 0; >> } >> >> +static void >> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >> +{ >> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >> + >> + count->shared = 0; >> + /* Each flow has a single count action, so no need of id */ >> + count->id = 0; >> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >> +} >> + >> +static void >> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >> + struct netdev *outdev) >> +{ >> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >> + >> + port_id->id = netdev_dpdk_get_port_id(outdev); >> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >> +} >> + >> +static int >> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >> + const struct nlattr *nla, >> + struct offload_info *info) >> +{ >> + struct netdev *outdev; >> + odp_port_t port; >> + >> + port = nl_attr_get_odp_port(nla); >> + outdev = netdev_ports_get(port, info->dpif_class); >> + if (outdev == NULL) { >> + VLOG_DBG_RL(&error_rl, >> + "Cannot find netdev for odp port %d", port); >> + return -1; >> + } >> + if (!netdev_dpdk_flow_api_supported(outdev)) { >> + VLOG_DBG_RL(&error_rl, >> + "Output to %s cannot be offloaded", >> + netdev_get_name(outdev)); >> + return -1; >> + } >> + >> + netdev_dpdk_flow_add_count_action(actions); >> + netdev_dpdk_flow_add_port_id_action(actions, outdev); >> + netdev_close(outdev); >> + >> + return 0; >> +} >> + >> int >> netdev_dpdk_flow_actions_add(struct flow_actions *actions, >> struct nlattr *nl_actions, >> size_t nl_actions_len, >> - struct offload_info *info OVS_UNUSED) >> + struct offload_info *info) >> { >> struct nlattr *nla; >> size_t left; >> >> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { >> - VLOG_DBG_RL(&error_rl, >> - "Unsupported action type %d", nl_attr_type(nla)); >> - return -1; >> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && >> + left <= NLA_ALIGN(nla->nla_len)) { >> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { >> + return -1; >> + } > The check in netdev_dpdk_flow_add_output_action() is insufficient to > decide whether the device is capable of full-offload, since it assumes > that the output action is supported if netdev_dpdk type is > DPDK_DEV_ETH. > > There are a couple of issues with this approach: > > 1. For a device that is not capable of full-offload, it results in 2 > calls to the PMD for every flow-offload request. The first attempt for > full-offload fails, followed by another call for partial-offload. We cannot avoid this, as we want to do full offload, and only fallback to partial if it doesn't work. > > 2. Even for devices that support full-offload, the offload might fail > if the in-port and out-port are not in the same switch-domain. For > example, if only partial-offload is configured (vhost-user ports) in > the vSwitch, then the PMD should check if the in-port and out-port are > on the same switch (domain-id must match) and fail the request > otherwise. This check is important to alert the user of > misconfigurations. If OVS doesn't do this check, all pmd drivers will > have to do this check and that duplication is not desirable. I don't think it's in the OVS scope to do such checks, but it should be in the driver's level (PMD), the same way it is in the kernel offloading using TC. I agree that the argument of logging is important, but this is something that I think out of scope of this series. All prints in DPDK go to /dev/null, and not logged into OVS' log. BTW, in kernel, there is a similar issue with drivers that logs using extack. Those messages are not dumped to dmesg, and OVS also doesn't read them to its log. Also out of scope of this series... > > Here's how this could be addressed, by identifying whether the device > is capable of full-offload or partial-offload. > > 1. Check if the target device (in-port) is full-offload capable. This > could be done by checking if rte_flow api is supported, along with a > valid switch-domain-id. We could even improve this further, similar to > the kernel switchdev framework in which the device mode is > configurable (legacy/eswitch modes) and available to consumers, but > this might require some more DPDK framework changes. I think we can enhance to check if both are ETH. However, As I wrote above, I think checking other HW properties should not be in OVS scope, but in driver's domain. > > 2. If the device is full-offload capable, and if output action is > specified, check if in-port and out-port are on the same switch > (switch-domain-id match mentioned above). > > If both 1 and 2 succeed, then the device is capable of full-offload > and we can proceed with full-offload item/action setup, followed by > rte_flow_create(). > > Otherwise, we fallback to partial-offload code path. Here, we can > handle the case where a vhost-user interface is either the in-port or > out-port of the flow being offloaded. Depending on whether the > vhost-user interface is the in-port or out-port, if the other port > associated with the flow is offload capable (DPDK_DEV_ETH), then we > offload the flow to that device. This would help us build further > extensions to the existing partial-offload framework by adding more > actions such as VLAN and Tunnel actions (apart from MARK/RSS), except > the output action that still needs to be processed in software. > > Thanks, > -Harsha > >> + } else { >> + VLOG_DBG_RL(&error_rl, >> + "Unsupported action type %d", nl_attr_type(nla)); >> + return -1; >> + } >> } >> >> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); >> -- >> 2.14.5 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7C3397e2af98f74b1db43808d778043f5d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637109831952593096&sdata=NvEUyLaCDekjbTOFO1vRRteSUBkSkFrruVLMJas28OY%3D&reserved=0
On 04.12.2019 16:25, Eli Britstein wrote: > > On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote: >> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: >>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>> --- >>> NEWS | 2 + >>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>> 2 files changed, 85 insertions(+), 4 deletions(-) >>> >>> diff --git a/NEWS b/NEWS >>> index 88b818948..ca9c2b230 100644 >>> --- a/NEWS >>> +++ b/NEWS >>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>> if supported by libbpf. >>> * Add option to enable, disable and query TCP sequence checking in >>> conntrack. >>> + - DPDK: >>> + * Add hardware offload support for output actions. >>> >>> v2.12.0 - 03 Sep 2019 >>> --------------------- >>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>> index dbbc45e99..6e7efb315 100644 >>> --- a/lib/netdev-offload-dpdk-flow.c >>> +++ b/lib/netdev-offload-dpdk-flow.c >>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>> } else { >>> ds_put_cstr(s, " RSS = null\n"); >>> } >>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>> + const struct rte_flow_action_count *count = actions->conf; >>> + >>> + ds_put_cstr(s, "rte flow count action:\n"); >>> + if (count) { >>> + ds_put_format(s, >>> + " Count: shared=%d, id=%d\n", >>> + count->shared, count->id); >>> + } else { >>> + ds_put_cstr(s, " Count = null\n"); >>> + } >>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>> + >>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>> + if (port_id) { >>> + ds_put_format(s, >>> + " Port-id: original=%d, id=%d\n", >>> + port_id->original, port_id->id); >>> + } else { >>> + ds_put_cstr(s, " Port-id = null\n"); >>> + } >>> } else { >>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>> } >>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>> return 0; >>> } >>> >>> +static void >>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>> +{ >>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>> + >>> + count->shared = 0; >>> + /* Each flow has a single count action, so no need of id */ >>> + count->id = 0; >>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>> +} >>> + >>> +static void >>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>> + struct netdev *outdev) >>> +{ >>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>> + >>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>> +} >>> + >>> +static int >>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>> + const struct nlattr *nla, >>> + struct offload_info *info) >>> +{ >>> + struct netdev *outdev; >>> + odp_port_t port; >>> + >>> + port = nl_attr_get_odp_port(nla); >>> + outdev = netdev_ports_get(port, info->dpif_class); >>> + if (outdev == NULL) { >>> + VLOG_DBG_RL(&error_rl, >>> + "Cannot find netdev for odp port %d", port); >>> + return -1; >>> + } >>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >>> + VLOG_DBG_RL(&error_rl, >>> + "Output to %s cannot be offloaded", >>> + netdev_get_name(outdev)); >>> + return -1; >>> + } >>> + >>> + netdev_dpdk_flow_add_count_action(actions); >>> + netdev_dpdk_flow_add_port_id_action(actions, outdev); >>> + netdev_close(outdev); >>> + >>> + return 0; >>> +} >>> + >>> int >>> netdev_dpdk_flow_actions_add(struct flow_actions *actions, >>> struct nlattr *nl_actions, >>> size_t nl_actions_len, >>> - struct offload_info *info OVS_UNUSED) >>> + struct offload_info *info) >>> { >>> struct nlattr *nla; >>> size_t left; >>> >>> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { >>> - VLOG_DBG_RL(&error_rl, >>> - "Unsupported action type %d", nl_attr_type(nla)); >>> - return -1; >>> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && >>> + left <= NLA_ALIGN(nla->nla_len)) { >>> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { >>> + return -1; >>> + } >> The check in netdev_dpdk_flow_add_output_action() is insufficient to >> decide whether the device is capable of full-offload, since it assumes >> that the output action is supported if netdev_dpdk type is >> DPDK_DEV_ETH. >> >> There are a couple of issues with this approach: >> >> 1. For a device that is not capable of full-offload, it results in 2 >> calls to the PMD for every flow-offload request. The first attempt for >> full-offload fails, followed by another call for partial-offload. > We cannot avoid this, as we want to do full offload, and only fallback > to partial if it doesn't work. >> >> 2. Even for devices that support full-offload, the offload might fail >> if the in-port and out-port are not in the same switch-domain. For >> example, if only partial-offload is configured (vhost-user ports) in >> the vSwitch, then the PMD should check if the in-port and out-port are >> on the same switch (domain-id must match) and fail the request >> otherwise. This check is important to alert the user of >> misconfigurations. If OVS doesn't do this check, all pmd drivers will >> have to do this check and that duplication is not desirable. > > I don't think it's in the OVS scope to do such checks, but it should be > in the driver's level (PMD), the same way it is in the kernel offloading > using TC. > > I agree that the argument of logging is important, but this is something > that I think out of scope of this series. All prints in DPDK go to > /dev/null, and not logged into OVS' log. This is not correct. DPDK logs are redirected to OVS log. So, if DPDK driver uses DPDK logging properly, all the messages should go to ovs-vswitchd.log depending on the configured log levels. > > BTW, in kernel, there is a similar issue with drivers that logs using > extack. Those messages are not dumped to dmesg, and OVS also doesn't > read them to its log. Also out of scope of this series... > >> >> Here's how this could be addressed, by identifying whether the device >> is capable of full-offload or partial-offload. >> >> 1. Check if the target device (in-port) is full-offload capable. This >> could be done by checking if rte_flow api is supported, along with a >> valid switch-domain-id. We could even improve this further, similar to >> the kernel switchdev framework in which the device mode is >> configurable (legacy/eswitch modes) and available to consumers, but >> this might require some more DPDK framework changes. > I think we can enhance to check if both are ETH. However, As I wrote > above, I think checking other HW properties should not be in OVS scope, > but in driver's domain. >> >> 2. If the device is full-offload capable, and if output action is >> specified, check if in-port and out-port are on the same switch >> (switch-domain-id match mentioned above). >> >> If both 1 and 2 succeed, then the device is capable of full-offload >> and we can proceed with full-offload item/action setup, followed by >> rte_flow_create(). >> >> Otherwise, we fallback to partial-offload code path. Here, we can >> handle the case where a vhost-user interface is either the in-port or >> out-port of the flow being offloaded. Depending on whether the >> vhost-user interface is the in-port or out-port, if the other port >> associated with the flow is offload capable (DPDK_DEV_ETH), then we >> offload the flow to that device. This would help us build further >> extensions to the existing partial-offload framework by adding more >> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except >> the output action that still needs to be processed in software. >> >> Thanks, >> -Harsha >> >>> + } else { >>> + VLOG_DBG_RL(&error_rl, >>> + "Unsupported action type %d", nl_attr_type(nla)); >>> + return -1; >>> + } >>> } >>> >>> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); >>> -- >>> 2.14.5 >>> >>> _______________________________________________ >>> dev mailing list >>> dev@openvswitch.org >>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7C3397e2af98f74b1db43808d778043f5d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637109831952593096&sdata=NvEUyLaCDekjbTOFO1vRRteSUBkSkFrruVLMJas28OY%3D&reserved=0
On 12/4/2019 5:42 PM, Ilya Maximets wrote: > On 04.12.2019 16:25, Eli Britstein wrote: >> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote: >>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: >>>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>>> --- >>>> NEWS | 2 + >>>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>>> 2 files changed, 85 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/NEWS b/NEWS >>>> index 88b818948..ca9c2b230 100644 >>>> --- a/NEWS >>>> +++ b/NEWS >>>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>>> if supported by libbpf. >>>> * Add option to enable, disable and query TCP sequence checking in >>>> conntrack. >>>> + - DPDK: >>>> + * Add hardware offload support for output actions. >>>> >>>> v2.12.0 - 03 Sep 2019 >>>> --------------------- >>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>>> index dbbc45e99..6e7efb315 100644 >>>> --- a/lib/netdev-offload-dpdk-flow.c >>>> +++ b/lib/netdev-offload-dpdk-flow.c >>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>>> } else { >>>> ds_put_cstr(s, " RSS = null\n"); >>>> } >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>>> + const struct rte_flow_action_count *count = actions->conf; >>>> + >>>> + ds_put_cstr(s, "rte flow count action:\n"); >>>> + if (count) { >>>> + ds_put_format(s, >>>> + " Count: shared=%d, id=%d\n", >>>> + count->shared, count->id); >>>> + } else { >>>> + ds_put_cstr(s, " Count = null\n"); >>>> + } >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>>> + >>>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>>> + if (port_id) { >>>> + ds_put_format(s, >>>> + " Port-id: original=%d, id=%d\n", >>>> + port_id->original, port_id->id); >>>> + } else { >>>> + ds_put_cstr(s, " Port-id = null\n"); >>>> + } >>>> } else { >>>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>>> } >>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>>> return 0; >>>> } >>>> >>>> +static void >>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>>> +{ >>>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>>> + >>>> + count->shared = 0; >>>> + /* Each flow has a single count action, so no need of id */ >>>> + count->id = 0; >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>>> +} >>>> + >>>> +static void >>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>>> + struct netdev *outdev) >>>> +{ >>>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>>> + >>>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>>> +} >>>> + >>>> +static int >>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>>> + const struct nlattr *nla, >>>> + struct offload_info *info) >>>> +{ >>>> + struct netdev *outdev; >>>> + odp_port_t port; >>>> + >>>> + port = nl_attr_get_odp_port(nla); >>>> + outdev = netdev_ports_get(port, info->dpif_class); >>>> + if (outdev == NULL) { >>>> + VLOG_DBG_RL(&error_rl, >>>> + "Cannot find netdev for odp port %d", port); >>>> + return -1; >>>> + } >>>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >>>> + VLOG_DBG_RL(&error_rl, >>>> + "Output to %s cannot be offloaded", >>>> + netdev_get_name(outdev)); >>>> + return -1; >>>> + } >>>> + >>>> + netdev_dpdk_flow_add_count_action(actions); >>>> + netdev_dpdk_flow_add_port_id_action(actions, outdev); >>>> + netdev_close(outdev); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> int >>>> netdev_dpdk_flow_actions_add(struct flow_actions *actions, >>>> struct nlattr *nl_actions, >>>> size_t nl_actions_len, >>>> - struct offload_info *info OVS_UNUSED) >>>> + struct offload_info *info) >>>> { >>>> struct nlattr *nla; >>>> size_t left; >>>> >>>> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { >>>> - VLOG_DBG_RL(&error_rl, >>>> - "Unsupported action type %d", nl_attr_type(nla)); >>>> - return -1; >>>> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && >>>> + left <= NLA_ALIGN(nla->nla_len)) { >>>> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { >>>> + return -1; >>>> + } >>> The check in netdev_dpdk_flow_add_output_action() is insufficient to >>> decide whether the device is capable of full-offload, since it assumes >>> that the output action is supported if netdev_dpdk type is >>> DPDK_DEV_ETH. >>> >>> There are a couple of issues with this approach: >>> >>> 1. For a device that is not capable of full-offload, it results in 2 >>> calls to the PMD for every flow-offload request. The first attempt for >>> full-offload fails, followed by another call for partial-offload. >> We cannot avoid this, as we want to do full offload, and only fallback >> to partial if it doesn't work. >>> 2. Even for devices that support full-offload, the offload might fail >>> if the in-port and out-port are not in the same switch-domain. For >>> example, if only partial-offload is configured (vhost-user ports) in >>> the vSwitch, then the PMD should check if the in-port and out-port are >>> on the same switch (domain-id must match) and fail the request >>> otherwise. This check is important to alert the user of >>> misconfigurations. If OVS doesn't do this check, all pmd drivers will >>> have to do this check and that duplication is not desirable. >> I don't think it's in the OVS scope to do such checks, but it should be >> in the driver's level (PMD), the same way it is in the kernel offloading >> using TC. >> >> I agree that the argument of logging is important, but this is something >> that I think out of scope of this series. All prints in DPDK go to >> /dev/null, and not logged into OVS' log. > This is not correct. DPDK logs are redirected to OVS log. So, if DPDK driver > uses DPDK logging properly, all the messages should go to ovs-vswitchd.log > depending on the configured log levels. OK, so in relevance to the previous comment, if the DPDK PMD logs (properly) a message that it is not the same HW, the user will be alerted of possible invalid configuration. > >> BTW, in kernel, there is a similar issue with drivers that logs using >> extack. Those messages are not dumped to dmesg, and OVS also doesn't >> read them to its log. Also out of scope of this series... >> >>> Here's how this could be addressed, by identifying whether the device >>> is capable of full-offload or partial-offload. >>> >>> 1. Check if the target device (in-port) is full-offload capable. This >>> could be done by checking if rte_flow api is supported, along with a >>> valid switch-domain-id. We could even improve this further, similar to >>> the kernel switchdev framework in which the device mode is >>> configurable (legacy/eswitch modes) and available to consumers, but >>> this might require some more DPDK framework changes. >> I think we can enhance to check if both are ETH. However, As I wrote >> above, I think checking other HW properties should not be in OVS scope, >> but in driver's domain. When I try to implement it, I see the netdev_dpdk_flow_api_supported API checks exactly this. Could you please explain again why its usage is restricted to init phase only? >>> 2. If the device is full-offload capable, and if output action is >>> specified, check if in-port and out-port are on the same switch >>> (switch-domain-id match mentioned above). >>> >>> If both 1 and 2 succeed, then the device is capable of full-offload >>> and we can proceed with full-offload item/action setup, followed by >>> rte_flow_create(). >>> >>> Otherwise, we fallback to partial-offload code path. Here, we can >>> handle the case where a vhost-user interface is either the in-port or >>> out-port of the flow being offloaded. Depending on whether the >>> vhost-user interface is the in-port or out-port, if the other port >>> associated with the flow is offload capable (DPDK_DEV_ETH), then we >>> offload the flow to that device. This would help us build further >>> extensions to the existing partial-offload framework by adding more >>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except >>> the output action that still needs to be processed in software. >>> >>> Thanks, >>> -Harsha >>> >>>> + } else { >>>> + VLOG_DBG_RL(&error_rl, >>>> + "Unsupported action type %d", nl_attr_type(nla)); >>>> + return -1; >>>> + } >>>> } >>>> >>>> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); >>>> -- >>>> 2.14.5 >>>> >>>> _______________________________________________ >>>> dev mailing list >>>> dev@openvswitch.org >>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7Ccddf11ececd44fe5046c08d778d08a98%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637110709393981707&sdata=rTGgfKo%2F6%2Faf7BIwBZi962tbyBBR97WGWgQunYhyGek%3D&reserved=0
On 12/2/2019 4:25 PM, Ilya Maximets wrote: > BTW, function netdev_dpdk_flow_api_supported() is not intended to be called > in the offloading process. It's only for initialization phase. You can see > the "/* TODO: Check if we able to offload some minimal flow. */" in the code > and that might be destructive and unwanted for offloading process. I guess the thought you had in mind when writing it was to call rte_flow_validate once or more in order to find out offload capabilities of that device, but for now it is only a comment. In the future if/when we do call rte_flow_validate, I think it should not be in this function, but in a function maybe more specific for that, like netdev_offload_dpdk_cap(), or something like that. In current code for sure, I don't see any issue to call this function in the offloading process, and not only during init.
On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote: > > > On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote: > > On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: > >> Signed-off-by: Eli Britstein <elibr@mellanox.com> > >> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> > >> --- > >> NEWS | 2 + > >> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- > >> 2 files changed, 85 insertions(+), 4 deletions(-) > >> > >> diff --git a/NEWS b/NEWS > >> index 88b818948..ca9c2b230 100644 > >> --- a/NEWS > >> +++ b/NEWS > >> @@ -10,6 +10,8 @@ Post-v2.12.0 > >> if supported by libbpf. > >> * Add option to enable, disable and query TCP sequence checking in > >> conntrack. > >> + - DPDK: > >> + * Add hardware offload support for output actions. > >> > >> v2.12.0 - 03 Sep 2019 > >> --------------------- > >> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c > >> index dbbc45e99..6e7efb315 100644 > >> --- a/lib/netdev-offload-dpdk-flow.c > >> +++ b/lib/netdev-offload-dpdk-flow.c > >> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) > >> } else { > >> ds_put_cstr(s, " RSS = null\n"); > >> } > >> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { > >> + const struct rte_flow_action_count *count = actions->conf; > >> + > >> + ds_put_cstr(s, "rte flow count action:\n"); > >> + if (count) { > >> + ds_put_format(s, > >> + " Count: shared=%d, id=%d\n", > >> + count->shared, count->id); > >> + } else { > >> + ds_put_cstr(s, " Count = null\n"); > >> + } > >> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { > >> + const struct rte_flow_action_port_id *port_id = actions->conf; > >> + > >> + ds_put_cstr(s, "rte flow port-id action:\n"); > >> + if (port_id) { > >> + ds_put_format(s, > >> + " Port-id: original=%d, id=%d\n", > >> + port_id->original, port_id->id); > >> + } else { > >> + ds_put_cstr(s, " Port-id = null\n"); > >> + } > >> } else { > >> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); > >> } > >> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, > >> return 0; > >> } > >> > >> +static void > >> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) > >> +{ > >> + struct rte_flow_action_count *count = xzalloc(sizeof *count); > >> + > >> + count->shared = 0; > >> + /* Each flow has a single count action, so no need of id */ > >> + count->id = 0; > >> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); > >> +} > >> + > >> +static void > >> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, > >> + struct netdev *outdev) > >> +{ > >> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); > >> + > >> + port_id->id = netdev_dpdk_get_port_id(outdev); > >> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); > >> +} > >> + > >> +static int > >> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, > >> + const struct nlattr *nla, > >> + struct offload_info *info) > >> +{ > >> + struct netdev *outdev; > >> + odp_port_t port; > >> + > >> + port = nl_attr_get_odp_port(nla); > >> + outdev = netdev_ports_get(port, info->dpif_class); > >> + if (outdev == NULL) { > >> + VLOG_DBG_RL(&error_rl, > >> + "Cannot find netdev for odp port %d", port); > >> + return -1; > >> + } > >> + if (!netdev_dpdk_flow_api_supported(outdev)) { > >> + VLOG_DBG_RL(&error_rl, > >> + "Output to %s cannot be offloaded", > >> + netdev_get_name(outdev)); > >> + return -1; > >> + } > >> + > >> + netdev_dpdk_flow_add_count_action(actions); > >> + netdev_dpdk_flow_add_port_id_action(actions, outdev); > >> + netdev_close(outdev); > >> + > >> + return 0; > >> +} > >> + > >> int > >> netdev_dpdk_flow_actions_add(struct flow_actions *actions, > >> struct nlattr *nl_actions, > >> size_t nl_actions_len, > >> - struct offload_info *info OVS_UNUSED) > >> + struct offload_info *info) > >> { > >> struct nlattr *nla; > >> size_t left; > >> > >> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { > >> - VLOG_DBG_RL(&error_rl, > >> - "Unsupported action type %d", nl_attr_type(nla)); > >> - return -1; > >> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && > >> + left <= NLA_ALIGN(nla->nla_len)) { > >> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { > >> + return -1; > >> + } > > The check in netdev_dpdk_flow_add_output_action() is insufficient to > > decide whether the device is capable of full-offload, since it assumes > > that the output action is supported if netdev_dpdk type is > > DPDK_DEV_ETH. > > > > There are a couple of issues with this approach: > > > > 1. For a device that is not capable of full-offload, it results in 2 > > calls to the PMD for every flow-offload request. The first attempt for > > full-offload fails, followed by another call for partial-offload. > We cannot avoid this, as we want to do full offload, and only fallback > to partial if it doesn't work. > > > > 2. Even for devices that support full-offload, the offload might fail > > if the in-port and out-port are not in the same switch-domain. For > > example, if only partial-offload is configured (vhost-user ports) in > > the vSwitch, then the PMD should check if the in-port and out-port are > > on the same switch (domain-id must match) and fail the request > > otherwise. This check is important to alert the user of > > misconfigurations. If OVS doesn't do this check, all pmd drivers will > > have to do this check and that duplication is not desirable. > > I don't think it's in the OVS scope to do such checks, but it should be > in the driver's level (PMD), the same way it is in the kernel offloading > using TC. > IMO, it is not out-of-scope for OVS to add these checks to make better offload decisions: - This layer of OVS (netdev-offload-dpdk) is already offload-aware and it is using a set of offload APIs (rte_flow) - Full offload may not be the preferred configuration for a customer (since it requires additional config like SRIOV). That is, the code should not make the distinction that full-offload is always preferred and partial-offload is only a fallback option. - Without such checks, offload rate of partial-offload configuration could be potentially impacted with a large number of flows. - Similar improvement (checking/logging in OVS as opposed to individual drivers) could be made in TC kernel offloads too > I agree that the argument of logging is important, but this is something > that I think out of scope of this series. All prints in DPDK go to > /dev/null, and not logged into OVS' log. > > BTW, in kernel, there is a similar issue with drivers that logs using > extack. Those messages are not dumped to dmesg, and OVS also doesn't > read them to its log. Also out of scope of this series... > > > > > Here's how this could be addressed, by identifying whether the device > > is capable of full-offload or partial-offload. > > > > 1. Check if the target device (in-port) is full-offload capable. This > > could be done by checking if rte_flow api is supported, along with a > > valid switch-domain-id. We could even improve this further, similar to > > the kernel switchdev framework in which the device mode is > > configurable (legacy/eswitch modes) and available to consumers, but > > this might require some more DPDK framework changes. > I think we can enhance to check if both are ETH. However, As I wrote > above, I think checking other HW properties should not be in OVS scope, > but in driver's domain. > > > > 2. If the device is full-offload capable, and if output action is > > specified, check if in-port and out-port are on the same switch > > (switch-domain-id match mentioned above). > > > > If both 1 and 2 succeed, then the device is capable of full-offload > > and we can proceed with full-offload item/action setup, followed by > > rte_flow_create(). > > > > Otherwise, we fallback to partial-offload code path. Here, we can > > handle the case where a vhost-user interface is either the in-port or > > out-port of the flow being offloaded. Depending on whether the > > vhost-user interface is the in-port or out-port, if the other port > > associated with the flow is offload capable (DPDK_DEV_ETH), then we > > offload the flow to that device. This would help us build further > > extensions to the existing partial-offload framework by adding more > > actions such as VLAN and Tunnel actions (apart from MARK/RSS), except > > the output action that still needs to be processed in software. > > > > Thanks, > > -Harsha > > > >> + } else { > >> + VLOG_DBG_RL(&error_rl, > >> + "Unsupported action type %d", nl_attr_type(nla)); > >> + return -1; > >> + } > >> } > >> > >> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); > >> -- > >> 2.14.5 > >> > >> _______________________________________________ > >> dev mailing list > >> dev@openvswitch.org > >> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7C3397e2af98f74b1db43808d778043f5d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637109831952593096&sdata=NvEUyLaCDekjbTOFO1vRRteSUBkSkFrruVLMJas28OY%3D&reserved=0
On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote: > On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote: >> >> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote: >>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: >>>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>>> --- >>>> NEWS | 2 + >>>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>>> 2 files changed, 85 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/NEWS b/NEWS >>>> index 88b818948..ca9c2b230 100644 >>>> --- a/NEWS >>>> +++ b/NEWS >>>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>>> if supported by libbpf. >>>> * Add option to enable, disable and query TCP sequence checking in >>>> conntrack. >>>> + - DPDK: >>>> + * Add hardware offload support for output actions. >>>> >>>> v2.12.0 - 03 Sep 2019 >>>> --------------------- >>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>>> index dbbc45e99..6e7efb315 100644 >>>> --- a/lib/netdev-offload-dpdk-flow.c >>>> +++ b/lib/netdev-offload-dpdk-flow.c >>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>>> } else { >>>> ds_put_cstr(s, " RSS = null\n"); >>>> } >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>>> + const struct rte_flow_action_count *count = actions->conf; >>>> + >>>> + ds_put_cstr(s, "rte flow count action:\n"); >>>> + if (count) { >>>> + ds_put_format(s, >>>> + " Count: shared=%d, id=%d\n", >>>> + count->shared, count->id); >>>> + } else { >>>> + ds_put_cstr(s, " Count = null\n"); >>>> + } >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>>> + >>>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>>> + if (port_id) { >>>> + ds_put_format(s, >>>> + " Port-id: original=%d, id=%d\n", >>>> + port_id->original, port_id->id); >>>> + } else { >>>> + ds_put_cstr(s, " Port-id = null\n"); >>>> + } >>>> } else { >>>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>>> } >>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>>> return 0; >>>> } >>>> >>>> +static void >>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>>> +{ >>>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>>> + >>>> + count->shared = 0; >>>> + /* Each flow has a single count action, so no need of id */ >>>> + count->id = 0; >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>>> +} >>>> + >>>> +static void >>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>>> + struct netdev *outdev) >>>> +{ >>>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>>> + >>>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>>> +} >>>> + >>>> +static int >>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>>> + const struct nlattr *nla, >>>> + struct offload_info *info) >>>> +{ >>>> + struct netdev *outdev; >>>> + odp_port_t port; >>>> + >>>> + port = nl_attr_get_odp_port(nla); >>>> + outdev = netdev_ports_get(port, info->dpif_class); >>>> + if (outdev == NULL) { >>>> + VLOG_DBG_RL(&error_rl, >>>> + "Cannot find netdev for odp port %d", port); >>>> + return -1; >>>> + } >>>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >>>> + VLOG_DBG_RL(&error_rl, >>>> + "Output to %s cannot be offloaded", >>>> + netdev_get_name(outdev)); >>>> + return -1; >>>> + } >>>> + >>>> + netdev_dpdk_flow_add_count_action(actions); >>>> + netdev_dpdk_flow_add_port_id_action(actions, outdev); >>>> + netdev_close(outdev); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> int >>>> netdev_dpdk_flow_actions_add(struct flow_actions *actions, >>>> struct nlattr *nl_actions, >>>> size_t nl_actions_len, >>>> - struct offload_info *info OVS_UNUSED) >>>> + struct offload_info *info) >>>> { >>>> struct nlattr *nla; >>>> size_t left; >>>> >>>> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { >>>> - VLOG_DBG_RL(&error_rl, >>>> - "Unsupported action type %d", nl_attr_type(nla)); >>>> - return -1; >>>> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && >>>> + left <= NLA_ALIGN(nla->nla_len)) { >>>> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { >>>> + return -1; >>>> + } >>> The check in netdev_dpdk_flow_add_output_action() is insufficient to >>> decide whether the device is capable of full-offload, since it assumes >>> that the output action is supported if netdev_dpdk type is >>> DPDK_DEV_ETH. >>> >>> There are a couple of issues with this approach: >>> >>> 1. For a device that is not capable of full-offload, it results in 2 >>> calls to the PMD for every flow-offload request. The first attempt for >>> full-offload fails, followed by another call for partial-offload. >> We cannot avoid this, as we want to do full offload, and only fallback >> to partial if it doesn't work. >>> 2. Even for devices that support full-offload, the offload might fail >>> if the in-port and out-port are not in the same switch-domain. For >>> example, if only partial-offload is configured (vhost-user ports) in >>> the vSwitch, then the PMD should check if the in-port and out-port are >>> on the same switch (domain-id must match) and fail the request >>> otherwise. This check is important to alert the user of >>> misconfigurations. If OVS doesn't do this check, all pmd drivers will >>> have to do this check and that duplication is not desirable. >> I don't think it's in the OVS scope to do such checks, but it should be >> in the driver's level (PMD), the same way it is in the kernel offloading >> using TC. >> > IMO, it is not out-of-scope for OVS to add these checks to make better > offload decisions: > - This layer of OVS (netdev-offload-dpdk) is already offload-aware and > it is using a set of offload APIs (rte_flow) It is aware of the offload APIs, but not on specific HW capabilities, so in some cases trying to make such restrictions won't cover all use cases in best case and result in redundant code. For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but we can in ConnectX-6. It is not something exposed at all to the user, specifically to OVS. So, I think those kind of decisions that involve HW specifics should be done in the layer that has the HW knowledge, e.g. - the driver. > - Full offload may not be the preferred configuration for a customer > (since it requires additional config like SRIOV). That is, the code > should not make the distinction that full-offload is always preferred > and partial-offload is only a fallback option. At the moment, any kind of offload requires SRIOV. BTW, to resolve this, after full offload is accepted, we plan to resubmit vDPA (see https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads for virtio as well. > - Without such checks, offload rate of partial-offload configuration > could be potentially impacted with a large number of flows. I don't think failing in the PMD level will be much slower than failing in OVS level. > - Similar improvement (checking/logging in OVS as opposed to > individual drivers) could be made in TC kernel offloads too In order to add such checks, we can maybe think in the future of a capability querying mechanism, to query each device (at init time, maybe using rte_flow_validate) and make decisions based on that. However, I think this mechanism is neither a simple one, nor should be a prerequisite for this patch-set. > >> I agree that the argument of logging is important, but this is something >> that I think out of scope of this series. All prints in DPDK go to >> /dev/null, and not logged into OVS' log. >> >> BTW, in kernel, there is a similar issue with drivers that logs using >> extack. Those messages are not dumped to dmesg, and OVS also doesn't >> read them to its log. Also out of scope of this series... >> >>> Here's how this could be addressed, by identifying whether the device >>> is capable of full-offload or partial-offload. >>> >>> 1. Check if the target device (in-port) is full-offload capable. This >>> could be done by checking if rte_flow api is supported, along with a >>> valid switch-domain-id. We could even improve this further, similar to >>> the kernel switchdev framework in which the device mode is >>> configurable (legacy/eswitch modes) and available to consumers, but >>> this might require some more DPDK framework changes. >> I think we can enhance to check if both are ETH. However, As I wrote >> above, I think checking other HW properties should not be in OVS scope, >> but in driver's domain. >>> 2. If the device is full-offload capable, and if output action is >>> specified, check if in-port and out-port are on the same switch >>> (switch-domain-id match mentioned above). >>> >>> If both 1 and 2 succeed, then the device is capable of full-offload >>> and we can proceed with full-offload item/action setup, followed by >>> rte_flow_create(). >>> >>> Otherwise, we fallback to partial-offload code path. Here, we can >>> handle the case where a vhost-user interface is either the in-port or >>> out-port of the flow being offloaded. Depending on whether the >>> vhost-user interface is the in-port or out-port, if the other port >>> associated with the flow is offload capable (DPDK_DEV_ETH), then we >>> offload the flow to that device. This would help us build further >>> extensions to the existing partial-offload framework by adding more >>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except >>> the output action that still needs to be processed in software. >>> >>> Thanks, >>> -Harsha >>> >>>> + } else { >>>> + VLOG_DBG_RL(&error_rl, >>>> + "Unsupported action type %d", nl_attr_type(nla)); >>>> + return -1; >>>> + } >>>> } >>>> >>>> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); >>>> -- >>>> 2.14.5 >>>> >>>> _______________________________________________ >>>> dev mailing list >>>> dev@openvswitch.org >>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&reserved=0
On 08.12.2019 10:38, Eli Britstein wrote: > > On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote: >> On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote: >>> >>> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote: >>>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: >>>>> Signed-off-by: Eli Britstein <elibr@mellanox.com> >>>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> >>>>> --- >>>>> NEWS | 2 + >>>>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- >>>>> 2 files changed, 85 insertions(+), 4 deletions(-) >>>>> >>>>> diff --git a/NEWS b/NEWS >>>>> index 88b818948..ca9c2b230 100644 >>>>> --- a/NEWS >>>>> +++ b/NEWS >>>>> @@ -10,6 +10,8 @@ Post-v2.12.0 >>>>> if supported by libbpf. >>>>> * Add option to enable, disable and query TCP sequence checking in >>>>> conntrack. >>>>> + - DPDK: >>>>> + * Add hardware offload support for output actions. >>>>> >>>>> v2.12.0 - 03 Sep 2019 >>>>> --------------------- >>>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c >>>>> index dbbc45e99..6e7efb315 100644 >>>>> --- a/lib/netdev-offload-dpdk-flow.c >>>>> +++ b/lib/netdev-offload-dpdk-flow.c >>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) >>>>> } else { >>>>> ds_put_cstr(s, " RSS = null\n"); >>>>> } >>>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { >>>>> + const struct rte_flow_action_count *count = actions->conf; >>>>> + >>>>> + ds_put_cstr(s, "rte flow count action:\n"); >>>>> + if (count) { >>>>> + ds_put_format(s, >>>>> + " Count: shared=%d, id=%d\n", >>>>> + count->shared, count->id); >>>>> + } else { >>>>> + ds_put_cstr(s, " Count = null\n"); >>>>> + } >>>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { >>>>> + const struct rte_flow_action_port_id *port_id = actions->conf; >>>>> + >>>>> + ds_put_cstr(s, "rte flow port-id action:\n"); >>>>> + if (port_id) { >>>>> + ds_put_format(s, >>>>> + " Port-id: original=%d, id=%d\n", >>>>> + port_id->original, port_id->id); >>>>> + } else { >>>>> + ds_put_cstr(s, " Port-id = null\n"); >>>>> + } >>>>> } else { >>>>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); >>>>> } >>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, >>>>> return 0; >>>>> } >>>>> >>>>> +static void >>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) >>>>> +{ >>>>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); >>>>> + >>>>> + count->shared = 0; >>>>> + /* Each flow has a single count action, so no need of id */ >>>>> + count->id = 0; >>>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); >>>>> +} >>>>> + >>>>> +static void >>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, >>>>> + struct netdev *outdev) >>>>> +{ >>>>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); >>>>> + >>>>> + port_id->id = netdev_dpdk_get_port_id(outdev); >>>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); >>>>> +} >>>>> + >>>>> +static int >>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, >>>>> + const struct nlattr *nla, >>>>> + struct offload_info *info) >>>>> +{ >>>>> + struct netdev *outdev; >>>>> + odp_port_t port; >>>>> + >>>>> + port = nl_attr_get_odp_port(nla); >>>>> + outdev = netdev_ports_get(port, info->dpif_class); >>>>> + if (outdev == NULL) { >>>>> + VLOG_DBG_RL(&error_rl, >>>>> + "Cannot find netdev for odp port %d", port); >>>>> + return -1; >>>>> + } >>>>> + if (!netdev_dpdk_flow_api_supported(outdev)) { >>>>> + VLOG_DBG_RL(&error_rl, >>>>> + "Output to %s cannot be offloaded", >>>>> + netdev_get_name(outdev)); >>>>> + return -1; >>>>> + } >>>>> + >>>>> + netdev_dpdk_flow_add_count_action(actions); >>>>> + netdev_dpdk_flow_add_port_id_action(actions, outdev); >>>>> + netdev_close(outdev); >>>>> + >>>>> + return 0; >>>>> +} >>>>> + >>>>> int >>>>> netdev_dpdk_flow_actions_add(struct flow_actions *actions, >>>>> struct nlattr *nl_actions, >>>>> size_t nl_actions_len, >>>>> - struct offload_info *info OVS_UNUSED) >>>>> + struct offload_info *info) >>>>> { >>>>> struct nlattr *nla; >>>>> size_t left; >>>>> >>>>> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { >>>>> - VLOG_DBG_RL(&error_rl, >>>>> - "Unsupported action type %d", nl_attr_type(nla)); >>>>> - return -1; >>>>> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && >>>>> + left <= NLA_ALIGN(nla->nla_len)) { >>>>> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { >>>>> + return -1; >>>>> + } >>>> The check in netdev_dpdk_flow_add_output_action() is insufficient to >>>> decide whether the device is capable of full-offload, since it assumes >>>> that the output action is supported if netdev_dpdk type is >>>> DPDK_DEV_ETH. >>>> >>>> There are a couple of issues with this approach: >>>> >>>> 1. For a device that is not capable of full-offload, it results in 2 >>>> calls to the PMD for every flow-offload request. The first attempt for >>>> full-offload fails, followed by another call for partial-offload. >>> We cannot avoid this, as we want to do full offload, and only fallback >>> to partial if it doesn't work. >>>> 2. Even for devices that support full-offload, the offload might fail >>>> if the in-port and out-port are not in the same switch-domain. For >>>> example, if only partial-offload is configured (vhost-user ports) in >>>> the vSwitch, then the PMD should check if the in-port and out-port are >>>> on the same switch (domain-id must match) and fail the request >>>> otherwise. This check is important to alert the user of >>>> misconfigurations. If OVS doesn't do this check, all pmd drivers will >>>> have to do this check and that duplication is not desirable. >>> I don't think it's in the OVS scope to do such checks, but it should be >>> in the driver's level (PMD), the same way it is in the kernel offloading >>> using TC. >>> >> IMO, it is not out-of-scope for OVS to add these checks to make better >> offload decisions: >> - This layer of OVS (netdev-offload-dpdk) is already offload-aware and >> it is using a set of offload APIs (rte_flow) > It is aware of the offload APIs, but not on specific HW capabilities, so > in some cases > > trying to make such restrictions won't cover all use cases in best case and result in > redundant code. > > For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with > embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but > we can in ConnectX-6. > > It is not something exposed at all to the user, specifically to OVS. > > So, I think those kind of decisions that involve HW specifics should be done in the > layer that has the HW knowledge, e.g. - the driver. > >> - Full offload may not be the preferred configuration for a customer >> (since it requires additional config like SRIOV). That is, the code >> should not make the distinction that full-offload is always preferred >> and partial-offload is only a fallback option. > At the moment, any kind of offload requires SRIOV. I don't think that SRIOV required to use Flow Director on Intel NICs. So, partial offloading should work on Intel NICs wihtout enabling SRIOV. > BTW, to resolve this, > after full > > offload is accepted, we plan to resubmit vDPA (see > https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads for > virtio as well. > >> - Without such checks, offload rate of partial-offload configuration >> could be potentially impacted with a large number of flows. > I don't think failing in the PMD level will be much slower than failing > in OVS level. >> - Similar improvement (checking/logging in OVS as opposed to >> individual drivers) could be made in TC kernel offloads too > In order to add such checks, we can maybe think in the future of a > capability > > querying mechanism, to query each device (at init time, maybe using > rte_flow_validate) and make decisions based on that. > > However, I think this mechanism is neither a simple one, nor should be a > prerequisite for this patch-set. > >> >>> I agree that the argument of logging is important, but this is something >>> that I think out of scope of this series. All prints in DPDK go to >>> /dev/null, and not logged into OVS' log. >>> >>> BTW, in kernel, there is a similar issue with drivers that logs using >>> extack. Those messages are not dumped to dmesg, and OVS also doesn't >>> read them to its log. Also out of scope of this series... >>> >>>> Here's how this could be addressed, by identifying whether the device >>>> is capable of full-offload or partial-offload. >>>> >>>> 1. Check if the target device (in-port) is full-offload capable. This >>>> could be done by checking if rte_flow api is supported, along with a >>>> valid switch-domain-id. We could even improve this further, similar to >>>> the kernel switchdev framework in which the device mode is >>>> configurable (legacy/eswitch modes) and available to consumers, but >>>> this might require some more DPDK framework changes. >>> I think we can enhance to check if both are ETH. However, As I wrote >>> above, I think checking other HW properties should not be in OVS scope, >>> but in driver's domain. >>>> 2. If the device is full-offload capable, and if output action is >>>> specified, check if in-port and out-port are on the same switch >>>> (switch-domain-id match mentioned above). >>>> >>>> If both 1 and 2 succeed, then the device is capable of full-offload >>>> and we can proceed with full-offload item/action setup, followed by >>>> rte_flow_create(). >>>> >>>> Otherwise, we fallback to partial-offload code path. Here, we can >>>> handle the case where a vhost-user interface is either the in-port or >>>> out-port of the flow being offloaded. Depending on whether the >>>> vhost-user interface is the in-port or out-port, if the other port >>>> associated with the flow is offload capable (DPDK_DEV_ETH), then we >>>> offload the flow to that device. This would help us build further >>>> extensions to the existing partial-offload framework by adding more >>>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except >>>> the output action that still needs to be processed in software. >>>> >>>> Thanks, >>>> -Harsha >>>> >>>>> + } else { >>>>> + VLOG_DBG_RL(&error_rl, >>>>> + "Unsupported action type %d", nl_attr_type(nla)); >>>>> + return -1; >>>>> + } >>>>> } >>>>> >>>>> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); >>>>> -- >>>>> 2.14.5 >>>>> >>>>> _______________________________________________ >>>>> dev mailing list >>>>> dev@openvswitch.org >>>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&reserved=0
On Sun, Dec 8, 2019 at 3:08 PM Eli Britstein <elibr@mellanox.com> wrote: > > > On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote: > > On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <elibr@mellanox.com> wrote: > >> > >> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote: > >>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <elibr@mellanox.com> wrote: > >>>> Signed-off-by: Eli Britstein <elibr@mellanox.com> > >>>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com> > >>>> --- > >>>> NEWS | 2 + > >>>> lib/netdev-offload-dpdk-flow.c | 87 ++++++++++++++++++++++++++++++++++++++++-- > >>>> 2 files changed, 85 insertions(+), 4 deletions(-) > >>>> > >>>> diff --git a/NEWS b/NEWS > >>>> index 88b818948..ca9c2b230 100644 > >>>> --- a/NEWS > >>>> +++ b/NEWS > >>>> @@ -10,6 +10,8 @@ Post-v2.12.0 > >>>> if supported by libbpf. > >>>> * Add option to enable, disable and query TCP sequence checking in > >>>> conntrack. > >>>> + - DPDK: > >>>> + * Add hardware offload support for output actions. > >>>> > >>>> v2.12.0 - 03 Sep 2019 > >>>> --------------------- > >>>> diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c > >>>> index dbbc45e99..6e7efb315 100644 > >>>> --- a/lib/netdev-offload-dpdk-flow.c > >>>> +++ b/lib/netdev-offload-dpdk-flow.c > >>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) > >>>> } else { > >>>> ds_put_cstr(s, " RSS = null\n"); > >>>> } > >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { > >>>> + const struct rte_flow_action_count *count = actions->conf; > >>>> + > >>>> + ds_put_cstr(s, "rte flow count action:\n"); > >>>> + if (count) { > >>>> + ds_put_format(s, > >>>> + " Count: shared=%d, id=%d\n", > >>>> + count->shared, count->id); > >>>> + } else { > >>>> + ds_put_cstr(s, " Count = null\n"); > >>>> + } > >>>> + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { > >>>> + const struct rte_flow_action_port_id *port_id = actions->conf; > >>>> + > >>>> + ds_put_cstr(s, "rte flow port-id action:\n"); > >>>> + if (port_id) { > >>>> + ds_put_format(s, > >>>> + " Port-id: original=%d, id=%d\n", > >>>> + port_id->original, port_id->id); > >>>> + } else { > >>>> + ds_put_cstr(s, " Port-id = null\n"); > >>>> + } > >>>> } else { > >>>> ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); > >>>> } > >>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, > >>>> return 0; > >>>> } > >>>> > >>>> +static void > >>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) > >>>> +{ > >>>> + struct rte_flow_action_count *count = xzalloc(sizeof *count); > >>>> + > >>>> + count->shared = 0; > >>>> + /* Each flow has a single count action, so no need of id */ > >>>> + count->id = 0; > >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); > >>>> +} > >>>> + > >>>> +static void > >>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, > >>>> + struct netdev *outdev) > >>>> +{ > >>>> + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); > >>>> + > >>>> + port_id->id = netdev_dpdk_get_port_id(outdev); > >>>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); > >>>> +} > >>>> + > >>>> +static int > >>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, > >>>> + const struct nlattr *nla, > >>>> + struct offload_info *info) > >>>> +{ > >>>> + struct netdev *outdev; > >>>> + odp_port_t port; > >>>> + > >>>> + port = nl_attr_get_odp_port(nla); > >>>> + outdev = netdev_ports_get(port, info->dpif_class); > >>>> + if (outdev == NULL) { > >>>> + VLOG_DBG_RL(&error_rl, > >>>> + "Cannot find netdev for odp port %d", port); > >>>> + return -1; > >>>> + } > >>>> + if (!netdev_dpdk_flow_api_supported(outdev)) { > >>>> + VLOG_DBG_RL(&error_rl, > >>>> + "Output to %s cannot be offloaded", > >>>> + netdev_get_name(outdev)); > >>>> + return -1; > >>>> + } > >>>> + > >>>> + netdev_dpdk_flow_add_count_action(actions); > >>>> + netdev_dpdk_flow_add_port_id_action(actions, outdev); > >>>> + netdev_close(outdev); > >>>> + > >>>> + return 0; > >>>> +} > >>>> + > >>>> int > >>>> netdev_dpdk_flow_actions_add(struct flow_actions *actions, > >>>> struct nlattr *nl_actions, > >>>> size_t nl_actions_len, > >>>> - struct offload_info *info OVS_UNUSED) > >>>> + struct offload_info *info) > >>>> { > >>>> struct nlattr *nla; > >>>> size_t left; > >>>> > >>>> NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { > >>>> - VLOG_DBG_RL(&error_rl, > >>>> - "Unsupported action type %d", nl_attr_type(nla)); > >>>> - return -1; > >>>> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && > >>>> + left <= NLA_ALIGN(nla->nla_len)) { > >>>> + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { > >>>> + return -1; > >>>> + } > >>> The check in netdev_dpdk_flow_add_output_action() is insufficient to > >>> decide whether the device is capable of full-offload, since it assumes > >>> that the output action is supported if netdev_dpdk type is > >>> DPDK_DEV_ETH. > >>> > >>> There are a couple of issues with this approach: > >>> > >>> 1. For a device that is not capable of full-offload, it results in 2 > >>> calls to the PMD for every flow-offload request. The first attempt for > >>> full-offload fails, followed by another call for partial-offload. > >> We cannot avoid this, as we want to do full offload, and only fallback > >> to partial if it doesn't work. > >>> 2. Even for devices that support full-offload, the offload might fail > >>> if the in-port and out-port are not in the same switch-domain. For > >>> example, if only partial-offload is configured (vhost-user ports) in > >>> the vSwitch, then the PMD should check if the in-port and out-port are > >>> on the same switch (domain-id must match) and fail the request > >>> otherwise. This check is important to alert the user of > >>> misconfigurations. If OVS doesn't do this check, all pmd drivers will > >>> have to do this check and that duplication is not desirable. > >> I don't think it's in the OVS scope to do such checks, but it should be > >> in the driver's level (PMD), the same way it is in the kernel offloading > >> using TC. > >> > > IMO, it is not out-of-scope for OVS to add these checks to make better > > offload decisions: > > - This layer of OVS (netdev-offload-dpdk) is already offload-aware and > > it is using a set of offload APIs (rte_flow) > It is aware of the offload APIs, but not on specific HW capabilities, so > in some cases > > trying to make such restrictions won't cover all use cases in best case and result in > redundant code. > > For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with > embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but > we can in ConnectX-6. OVS will only do the first level validation that the 2 ports belong to the same eSwitch, as per the switchdev architecture. If there are additional chip specific restrictions (i.e, something not compliant with switchdev spec), those of course could be validated and rejected by the driver. I'm not suggesting that the driver shouldn't have such chip-specific additional checks, but the first level check should be done by OVS. > > It is not something exposed at all to the user, specifically to OVS. > > So, I think those kind of decisions that involve HW specifics should be done in the > layer that has the HW knowledge, e.g. - the driver. > > > - Full offload may not be the preferred configuration for a customer > > (since it requires additional config like SRIOV). That is, the code > > should not make the distinction that full-offload is always preferred > > and partial-offload is only a fallback option. > At the moment, any kind of offload requires SRIOV. BTW, to resolve this, > after full > > offload is accepted, we plan to resubmit vDPA (see > https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads for > virtio as well. But vDPA requires more infrastructure changes that may not be needed for a simple vhost-user configuration and it shouldn't stop us from supporting additional actions for vhost-user. Thanks, -Harsha > > > - Without such checks, offload rate of partial-offload configuration > > could be potentially impacted with a large number of flows. > I don't think failing in the PMD level will be much slower than failing > in OVS level. > > - Similar improvement (checking/logging in OVS as opposed to > > individual drivers) could be made in TC kernel offloads too > In order to add such checks, we can maybe think in the future of a > capability > > querying mechanism, to query each device (at init time, maybe using > rte_flow_validate) and make decisions based on that. > > However, I think this mechanism is neither a simple one, nor should be a > prerequisite for this patch-set. > > > > >> I agree that the argument of logging is important, but this is something > >> that I think out of scope of this series. All prints in DPDK go to > >> /dev/null, and not logged into OVS' log. > >> > >> BTW, in kernel, there is a similar issue with drivers that logs using > >> extack. Those messages are not dumped to dmesg, and OVS also doesn't > >> read them to its log. Also out of scope of this series... > >> > >>> Here's how this could be addressed, by identifying whether the device > >>> is capable of full-offload or partial-offload. > >>> > >>> 1. Check if the target device (in-port) is full-offload capable. This > >>> could be done by checking if rte_flow api is supported, along with a > >>> valid switch-domain-id. We could even improve this further, similar to > >>> the kernel switchdev framework in which the device mode is > >>> configurable (legacy/eswitch modes) and available to consumers, but > >>> this might require some more DPDK framework changes. > >> I think we can enhance to check if both are ETH. However, As I wrote > >> above, I think checking other HW properties should not be in OVS scope, > >> but in driver's domain. > >>> 2. If the device is full-offload capable, and if output action is > >>> specified, check if in-port and out-port are on the same switch > >>> (switch-domain-id match mentioned above). > >>> > >>> If both 1 and 2 succeed, then the device is capable of full-offload > >>> and we can proceed with full-offload item/action setup, followed by > >>> rte_flow_create(). > >>> > >>> Otherwise, we fallback to partial-offload code path. Here, we can > >>> handle the case where a vhost-user interface is either the in-port or > >>> out-port of the flow being offloaded. Depending on whether the > >>> vhost-user interface is the in-port or out-port, if the other port > >>> associated with the flow is offload capable (DPDK_DEV_ETH), then we > >>> offload the flow to that device. This would help us build further > >>> extensions to the existing partial-offload framework by adding more > >>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except > >>> the output action that still needs to be processed in software. > >>> > >>> Thanks, > >>> -Harsha > >>> > >>>> + } else { > >>>> + VLOG_DBG_RL(&error_rl, > >>>> + "Unsupported action type %d", nl_attr_type(nla)); > >>>> + return -1; > >>>> + } > >>>> } > >>>> > >>>> add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); > >>>> -- > >>>> 2.14.5 > >>>> > >>>> _______________________________________________ > >>>> dev mailing list > >>>> dev@openvswitch.org > >>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&reserved=0
diff --git a/NEWS b/NEWS index 88b818948..ca9c2b230 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ Post-v2.12.0 if supported by libbpf. * Add option to enable, disable and query TCP sequence checking in conntrack. + - DPDK: + * Add hardware offload support for output actions. v2.12.0 - 03 Sep 2019 --------------------- diff --git a/lib/netdev-offload-dpdk-flow.c b/lib/netdev-offload-dpdk-flow.c index dbbc45e99..6e7efb315 100644 --- a/lib/netdev-offload-dpdk-flow.c +++ b/lib/netdev-offload-dpdk-flow.c @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions) } else { ds_put_cstr(s, " RSS = null\n"); } + } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { + const struct rte_flow_action_count *count = actions->conf; + + ds_put_cstr(s, "rte flow count action:\n"); + if (count) { + ds_put_format(s, + " Count: shared=%d, id=%d\n", + count->shared, count->id); + } else { + ds_put_cstr(s, " Count = null\n"); + } + } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { + const struct rte_flow_action_port_id *port_id = actions->conf; + + ds_put_cstr(s, "rte flow port-id action:\n"); + if (port_id) { + ds_put_format(s, + " Port-id: original=%d, id=%d\n", + port_id->original, port_id->id); + } else { + ds_put_cstr(s, " Port-id = null\n"); + } } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns *patterns, return 0; } +static void +netdev_dpdk_flow_add_count_action(struct flow_actions *actions) +{ + struct rte_flow_action_count *count = xzalloc(sizeof *count); + + count->shared = 0; + /* Each flow has a single count action, so no need of id */ + count->id = 0; + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); +} + +static void +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions, + struct netdev *outdev) +{ + struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id); + + port_id->id = netdev_dpdk_get_port_id(outdev); + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); +} + +static int +netdev_dpdk_flow_add_output_action(struct flow_actions *actions, + const struct nlattr *nla, + struct offload_info *info) +{ + struct netdev *outdev; + odp_port_t port; + + port = nl_attr_get_odp_port(nla); + outdev = netdev_ports_get(port, info->dpif_class); + if (outdev == NULL) { + VLOG_DBG_RL(&error_rl, + "Cannot find netdev for odp port %d", port); + return -1; + } + if (!netdev_dpdk_flow_api_supported(outdev)) { + VLOG_DBG_RL(&error_rl, + "Output to %s cannot be offloaded", + netdev_get_name(outdev)); + return -1; + } + + netdev_dpdk_flow_add_count_action(actions); + netdev_dpdk_flow_add_port_id_action(actions, outdev); + netdev_close(outdev); + + return 0; +} + int netdev_dpdk_flow_actions_add(struct flow_actions *actions, struct nlattr *nl_actions, size_t nl_actions_len, - struct offload_info *info OVS_UNUSED) + struct offload_info *info) { struct nlattr *nla; size_t left; NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { - VLOG_DBG_RL(&error_rl, - "Unsupported action type %d", nl_attr_type(nla)); - return -1; + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT && + left <= NLA_ALIGN(nla->nla_len)) { + if (netdev_dpdk_flow_add_output_action(actions, nla, info )) { + return -1; + } + } else { + VLOG_DBG_RL(&error_rl, + "Unsupported action type %d", nl_attr_type(nla)); + return -1; + } } add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);