diff mbox series

[ovs-dev,V3,18/19] netdev-offload-dpdk: Support offload of clone tnl_push/output actions

Message ID 20191208132304.15521-19-elibr@mellanox.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series netdev datapath actions offload | expand

Commit Message

Eli Britstein Dec. 8, 2019, 1:23 p.m. UTC
Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
---
 Documentation/howto/dpdk.rst |  2 ++
 NEWS                         |  4 ++--
 lib/netdev-dpdk.c            | 14 +++++++++++++
 lib/netdev-offload-dpdk.c    | 48 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 2 deletions(-)

Comments

Li,Rongqing via dev Dec. 10, 2019, 6:52 a.m. UTC | #1
On Sun, Dec 8, 2019 at 6:53 PM Eli Britstein <elibr@mellanox.com> wrote:
>
...
> +static int
> +parse_clone_actions(struct netdev *netdev,
> +                    struct flow_actions *actions,
> +                    const struct nlattr *clone_actions,
> +                    const size_t clone_actions_len,
> +                    struct offload_info *info)
> +{
> +    const struct nlattr *ca;
> +    unsigned int cleft;
> +
> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
> +        int clone_type = nl_attr_type(ca);
> +
> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
> +            struct rte_flow_action_raw_encap *raw_encap =
> +                xzalloc(sizeof *raw_encap);
> +
> +            raw_encap->data = (uint8_t *)tnl_push->header;
> +            raw_encap->preserve = NULL;
> +            raw_encap->size = tnl_push->header_len;
> +
> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
> +                            raw_encap);

Hi, converting OVS_TUNNLE_PUSH into RTE_RAW_ENCAP loses the 'tnl_type'
information provided by OVS. RAW_ENCAP provides the tunnel header just
as a blob of bits which may not be ideal for NIC HW to offload.

How about using tnl_push->tnl_type field to parse the header and
compose specific tunnel encap actions like RTE_VXLAN_ENCAP,
RTE_NVGRE_ENCAP etc.
This would be also be more inline with how it's done with TC-offload.

thanks!
Eli Britstein Dec. 10, 2019, 7:50 a.m. UTC | #2
On 12/10/2019 8:52 AM, Sathya Perla wrote:
> On Sun, Dec 8, 2019 at 6:53 PM Eli Britstein <elibr@mellanox.com> wrote:
> ...
>> +static int
>> +parse_clone_actions(struct netdev *netdev,
>> +                    struct flow_actions *actions,
>> +                    const struct nlattr *clone_actions,
>> +                    const size_t clone_actions_len,
>> +                    struct offload_info *info)
>> +{
>> +    const struct nlattr *ca;
>> +    unsigned int cleft;
>> +
>> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
>> +        int clone_type = nl_attr_type(ca);
>> +
>> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
>> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
>> +            struct rte_flow_action_raw_encap *raw_encap =
>> +                xzalloc(sizeof *raw_encap);
>> +
>> +            raw_encap->data = (uint8_t *)tnl_push->header;
>> +            raw_encap->preserve = NULL;
>> +            raw_encap->size = tnl_push->header_len;
>> +
>> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
>> +                            raw_encap);
> Hi, converting OVS_TUNNLE_PUSH into RTE_RAW_ENCAP loses the 'tnl_type'
> information provided by OVS. RAW_ENCAP provides the tunnel header just
> as a blob of bits which may not be ideal for NIC HW to offload.
>
> How about using tnl_push->tnl_type field to parse the header and
> compose specific tunnel encap actions like RTE_VXLAN_ENCAP,
> RTE_NVGRE_ENCAP etc.
> This would be also be more inline with how it's done with TC-offload.

I see your point. However, struct ovs_action_push_tnl has the "header" 
field just as a raw binary buffer, and not detailed like in TC. 
"tnl_type" has a comment /* For logging. */. It is indeed used for 
logging, as in lib/odp-util.c, in function format_odp_tnl_push_header.

using VXLAN_ENCAP instead of RAW_ENCAP will make the push less generic, 
as I will have to parse the header to build the vxlan_encap fields, just 
so they can re-build as a raw header in the PMD level, so I don't see 
the offload benefit of it.

Another point is that this way it will support any header, not just 
VXLAN (as an example).

>
> thanks!
Li,Rongqing via dev Dec. 10, 2019, 10:09 a.m. UTC | #3
On Tue, Dec 10, 2019 at 1:21 PM Eli Britstein <elibr@mellanox.com> wrote:
>
>
> On 12/10/2019 8:52 AM, Sathya Perla wrote:
> > On Sun, Dec 8, 2019 at 6:53 PM Eli Britstein <elibr@mellanox.com> wrote:
> > ...
> >> +static int
> >> +parse_clone_actions(struct netdev *netdev,
> >> +                    struct flow_actions *actions,
> >> +                    const struct nlattr *clone_actions,
> >> +                    const size_t clone_actions_len,
> >> +                    struct offload_info *info)
> >> +{
> >> +    const struct nlattr *ca;
> >> +    unsigned int cleft;
> >> +
> >> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
> >> +        int clone_type = nl_attr_type(ca);
> >> +
> >> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
> >> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
> >> +            struct rte_flow_action_raw_encap *raw_encap =
> >> +                xzalloc(sizeof *raw_encap);
> >> +
> >> +            raw_encap->data = (uint8_t *)tnl_push->header;
> >> +            raw_encap->preserve = NULL;
> >> +            raw_encap->size = tnl_push->header_len;
> >> +
> >> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
> >> +                            raw_encap);
> > Hi, converting OVS_TUNNLE_PUSH into RTE_RAW_ENCAP loses the 'tnl_type'
> > information provided by OVS. RAW_ENCAP provides the tunnel header just
> > as a blob of bits which may not be ideal for NIC HW to offload.
> >
> > How about using tnl_push->tnl_type field to parse the header and
> > compose specific tunnel encap actions like RTE_VXLAN_ENCAP,
> > RTE_NVGRE_ENCAP etc.
> > This would be also be more inline with how it's done with TC-offload.
>
> I see your point. However, struct ovs_action_push_tnl has the "header"
> field just as a raw binary buffer, and not detailed like in TC.
> "tnl_type" has a comment /* For logging. */. It is indeed used for
> logging, as in lib/odp-util.c, in function format_odp_tnl_push_header.

This is not entirely true. Checkout propagate_tunnel_data_to_flow()
where tnl_type is being used
to figure out nw_proto.

>
> using VXLAN_ENCAP instead of RAW_ENCAP will make the push less generic,
> as I will have to parse the header to build the vxlan_encap fields, just
> so they can re-build as a raw header in the PMD level, so I don't see
> the offload benefit of it.

Why are you assuming that all PMDs will rebuild the tunnel header
fields as a 'raw header'?
What if the NIC HW needs tunnel specific headers to be programmed
separately for each tunnel type?

>
> Another point is that this way it will support any header, not just
> VXLAN (as an example).

Given that VXLAN and NVGRE ENCAP and DECAP actions are already defined
in rte_flow, how about
we use them for vxlan and nvgre and use RAW for the rest of the tunnel types?
We can support all tunnel headers even this way.....

thanks!
Eli Britstein Dec. 10, 2019, 11:55 a.m. UTC | #4
On 12/10/2019 12:09 PM, Sathya Perla wrote:
> On Tue, Dec 10, 2019 at 1:21 PM Eli Britstein <elibr@mellanox.com> wrote:
>>
>> On 12/10/2019 8:52 AM, Sathya Perla wrote:
>>> On Sun, Dec 8, 2019 at 6:53 PM Eli Britstein <elibr@mellanox.com> wrote:
>>> ...
>>>> +static int
>>>> +parse_clone_actions(struct netdev *netdev,
>>>> +                    struct flow_actions *actions,
>>>> +                    const struct nlattr *clone_actions,
>>>> +                    const size_t clone_actions_len,
>>>> +                    struct offload_info *info)
>>>> +{
>>>> +    const struct nlattr *ca;
>>>> +    unsigned int cleft;
>>>> +
>>>> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
>>>> +        int clone_type = nl_attr_type(ca);
>>>> +
>>>> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
>>>> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
>>>> +            struct rte_flow_action_raw_encap *raw_encap =
>>>> +                xzalloc(sizeof *raw_encap);
>>>> +
>>>> +            raw_encap->data = (uint8_t *)tnl_push->header;
>>>> +            raw_encap->preserve = NULL;
>>>> +            raw_encap->size = tnl_push->header_len;
>>>> +
>>>> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
>>>> +                            raw_encap);
>>> Hi, converting OVS_TUNNLE_PUSH into RTE_RAW_ENCAP loses the 'tnl_type'
>>> information provided by OVS. RAW_ENCAP provides the tunnel header just
>>> as a blob of bits which may not be ideal for NIC HW to offload.
>>>
>>> How about using tnl_push->tnl_type field to parse the header and
>>> compose specific tunnel encap actions like RTE_VXLAN_ENCAP,
>>> RTE_NVGRE_ENCAP etc.
>>> This would be also be more inline with how it's done with TC-offload.
>> I see your point. However, struct ovs_action_push_tnl has the "header"
>> field just as a raw binary buffer, and not detailed like in TC.
>> "tnl_type" has a comment /* For logging. */. It is indeed used for
>> logging, as in lib/odp-util.c, in function format_odp_tnl_push_header.
> This is not entirely true. Checkout propagate_tunnel_data_to_flow()
> where tnl_type is being used
> to figure out nw_proto.
>
>> using VXLAN_ENCAP instead of RAW_ENCAP will make the push less generic,
>> as I will have to parse the header to build the vxlan_encap fields, just
>> so they can re-build as a raw header in the PMD level, so I don't see
>> the offload benefit of it.
> Why are you assuming that all PMDs will rebuild the tunnel header
> fields as a 'raw header'?
As far as I saw in DPDK code, if I'm not wrong, all PMDs that support it 
do it like this. Anyway, it is indeed not relevant.
> What if the NIC HW needs tunnel specific headers to be programmed
> separately for each tunnel type?
>
>> Another point is that this way it will support any header, not just
>> VXLAN (as an example).
> Given that VXLAN and NVGRE ENCAP and DECAP actions are already defined
> in rte_flow, how about
> we use them for vxlan and nvgre and use RAW for the rest of the tunnel types?
> We can support all tunnel headers even this way.....

Again, I see your point.

However, in OVS level, we have the encap as a raw buffer and DPDK 
supports it natively using RAW_ENCAP. For that reason I think we should 
use the straight forward method.

I think that if there is a PMD that requires the fields separately, it 
is under its responsibility to parse it, and not forcing the application 
to do it.

As it's a valid attribute in DPDK, and it's the natural way for OVS to 
use, I think we should use it. If it is from some reason not valid, in 
the future, DPDK will deprecate it.

>
> thanks!
Li,Rongqing via dev Dec. 10, 2019, 12:53 p.m. UTC | #5
On Tue, Dec 10, 2019 at 5:25 PM Eli Britstein <elibr@mellanox.com> wrote:
>
>
> On 12/10/2019 12:09 PM, Sathya Perla wrote:
> > On Tue, Dec 10, 2019 at 1:21 PM Eli Britstein <elibr@mellanox.com> wrote:
> >>
> >> On 12/10/2019 8:52 AM, Sathya Perla wrote:
> >>> On Sun, Dec 8, 2019 at 6:53 PM Eli Britstein <elibr@mellanox.com> wrote:
> >>> ...
> >>>> +static int
> >>>> +parse_clone_actions(struct netdev *netdev,
> >>>> +                    struct flow_actions *actions,
> >>>> +                    const struct nlattr *clone_actions,
> >>>> +                    const size_t clone_actions_len,
> >>>> +                    struct offload_info *info)
> >>>> +{
> >>>> +    const struct nlattr *ca;
> >>>> +    unsigned int cleft;
> >>>> +
> >>>> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
> >>>> +        int clone_type = nl_attr_type(ca);
> >>>> +
> >>>> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
> >>>> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
> >>>> +            struct rte_flow_action_raw_encap *raw_encap =
> >>>> +                xzalloc(sizeof *raw_encap);
> >>>> +
> >>>> +            raw_encap->data = (uint8_t *)tnl_push->header;
> >>>> +            raw_encap->preserve = NULL;
> >>>> +            raw_encap->size = tnl_push->header_len;
> >>>> +
> >>>> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
> >>>> +                            raw_encap);
> >>> Hi, converting OVS_TUNNLE_PUSH into RTE_RAW_ENCAP loses the 'tnl_type'
> >>> information provided by OVS. RAW_ENCAP provides the tunnel header just
> >>> as a blob of bits which may not be ideal for NIC HW to offload.
> >>>
> >>> How about using tnl_push->tnl_type field to parse the header and
> >>> compose specific tunnel encap actions like RTE_VXLAN_ENCAP,
> >>> RTE_NVGRE_ENCAP etc.
> >>> This would be also be more inline with how it's done with TC-offload.
> >> I see your point. However, struct ovs_action_push_tnl has the "header"
> >> field just as a raw binary buffer, and not detailed like in TC.
> >> "tnl_type" has a comment /* For logging. */. It is indeed used for
> >> logging, as in lib/odp-util.c, in function format_odp_tnl_push_header.
> > This is not entirely true. Checkout propagate_tunnel_data_to_flow()
> > where tnl_type is being used
> > to figure out nw_proto.
> >
> >> using VXLAN_ENCAP instead of RAW_ENCAP will make the push less generic,
> >> as I will have to parse the header to build the vxlan_encap fields, just
> >> so they can re-build as a raw header in the PMD level, so I don't see
> >> the offload benefit of it.
> > Why are you assuming that all PMDs will rebuild the tunnel header
> > fields as a 'raw header'?
> As far as I saw in DPDK code, if I'm not wrong, all PMDs that support it
> do it like this. Anyway, it is indeed not relevant.
> > What if the NIC HW needs tunnel specific headers to be programmed
> > separately for each tunnel type?
> >
> >> Another point is that this way it will support any header, not just
> >> VXLAN (as an example).
> > Given that VXLAN and NVGRE ENCAP and DECAP actions are already defined
> > in rte_flow, how about
> > we use them for vxlan and nvgre and use RAW for the rest of the tunnel types?
> > We can support all tunnel headers even this way.....
>
> Again, I see your point.
>
> However, in OVS level, we have the encap as a raw buffer and DPDK
> supports it natively using RAW_ENCAP. For that reason I think we should
> use the straight forward method.
>
> I think that if there is a PMD that requires the fields separately, it
> is under its responsibility to parse it, and not forcing the application
> to do it.

How should a PMD parse the header buffer? For e.g., for vxlan, should the PMD
look for dst UDP port 4789? What if a different port number is used
for vxlan as it's the case with some deployments?
I think it's important to deal with this in OVS because the 'tnl_type'
info is available in OVS and it should be relayed to the PMD
is some form.

>
> As it's a valid attribute in DPDK, and it's the natural way for OVS to
> use, I think we should use it. If it is from some reason not valid, in
> the future, DPDK will deprecate it.

I agree RAW_ENCAP is a valid RTE action. But I feel we must choose the
action that conveys/translates as much
data as possible from the OVS layer to the PMD. This will enable
offload support from more number of NIC HWs.

thanks!
Eli Britstein Dec. 10, 2019, 2:26 p.m. UTC | #6
On 12/10/2019 2:53 PM, Sathya Perla wrote:
> On Tue, Dec 10, 2019 at 5:25 PM Eli Britstein <elibr@mellanox.com> wrote:
>>
>> On 12/10/2019 12:09 PM, Sathya Perla wrote:
>>> On Tue, Dec 10, 2019 at 1:21 PM Eli Britstein <elibr@mellanox.com> wrote:
>>>> On 12/10/2019 8:52 AM, Sathya Perla wrote:
>>>>> On Sun, Dec 8, 2019 at 6:53 PM Eli Britstein <elibr@mellanox.com> wrote:
>>>>> ...
>>>>>> +static int
>>>>>> +parse_clone_actions(struct netdev *netdev,
>>>>>> +                    struct flow_actions *actions,
>>>>>> +                    const struct nlattr *clone_actions,
>>>>>> +                    const size_t clone_actions_len,
>>>>>> +                    struct offload_info *info)
>>>>>> +{
>>>>>> +    const struct nlattr *ca;
>>>>>> +    unsigned int cleft;
>>>>>> +
>>>>>> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
>>>>>> +        int clone_type = nl_attr_type(ca);
>>>>>> +
>>>>>> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
>>>>>> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
>>>>>> +            struct rte_flow_action_raw_encap *raw_encap =
>>>>>> +                xzalloc(sizeof *raw_encap);
>>>>>> +
>>>>>> +            raw_encap->data = (uint8_t *)tnl_push->header;
>>>>>> +            raw_encap->preserve = NULL;
>>>>>> +            raw_encap->size = tnl_push->header_len;
>>>>>> +
>>>>>> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
>>>>>> +                            raw_encap);
>>>>> Hi, converting OVS_TUNNLE_PUSH into RTE_RAW_ENCAP loses the 'tnl_type'
>>>>> information provided by OVS. RAW_ENCAP provides the tunnel header just
>>>>> as a blob of bits which may not be ideal for NIC HW to offload.
>>>>>
>>>>> How about using tnl_push->tnl_type field to parse the header and
>>>>> compose specific tunnel encap actions like RTE_VXLAN_ENCAP,
>>>>> RTE_NVGRE_ENCAP etc.
>>>>> This would be also be more inline with how it's done with TC-offload.
>>>> I see your point. However, struct ovs_action_push_tnl has the "header"
>>>> field just as a raw binary buffer, and not detailed like in TC.
>>>> "tnl_type" has a comment /* For logging. */. It is indeed used for
>>>> logging, as in lib/odp-util.c, in function format_odp_tnl_push_header.
>>> This is not entirely true. Checkout propagate_tunnel_data_to_flow()
>>> where tnl_type is being used
>>> to figure out nw_proto.
>>>
>>>> using VXLAN_ENCAP instead of RAW_ENCAP will make the push less generic,
>>>> as I will have to parse the header to build the vxlan_encap fields, just
>>>> so they can re-build as a raw header in the PMD level, so I don't see
>>>> the offload benefit of it.
>>> Why are you assuming that all PMDs will rebuild the tunnel header
>>> fields as a 'raw header'?
>> As far as I saw in DPDK code, if I'm not wrong, all PMDs that support it
>> do it like this. Anyway, it is indeed not relevant.
>>> What if the NIC HW needs tunnel specific headers to be programmed
>>> separately for each tunnel type?
>>>
>>>> Another point is that this way it will support any header, not just
>>>> VXLAN (as an example).
>>> Given that VXLAN and NVGRE ENCAP and DECAP actions are already defined
>>> in rte_flow, how about
>>> we use them for vxlan and nvgre and use RAW for the rest of the tunnel types?
>>> We can support all tunnel headers even this way.....
>> Again, I see your point.
>>
>> However, in OVS level, we have the encap as a raw buffer and DPDK
>> supports it natively using RAW_ENCAP. For that reason I think we should
>> use the straight forward method.
>>
>> I think that if there is a PMD that requires the fields separately, it
>> is under its responsibility to parse it, and not forcing the application
>> to do it.
> How should a PMD parse the header buffer? For e.g., for vxlan, should the PMD
> look for dst UDP port 4789? What if a different port number is used
> for vxlan as it's the case with some deployments?
> I think it's important to deal with this in OVS because the 'tnl_type'
> info is available in OVS and it should be relayed to the PMD
> is some form.
>
>> As it's a valid attribute in DPDK, and it's the natural way for OVS to
>> use, I think we should use it. If it is from some reason not valid, in
>> the future, DPDK will deprecate it.
> I agree RAW_ENCAP is a valid RTE action. But I feel we must choose the
> action that conveys/translates as much
> data as possible from the OVS layer to the PMD. This will enable
> offload support from more number of NIC HWs.

AFAIU, currently there is no such NIC HW that actually uses the 
additional info in VXLAN_ENCAP vs RAW_ENCAP, but they would just 
re-build the header as if it would been received with RAW, so for now I 
don't see any benefit of doing so. In the future, if there is such HW as 
you suggest, maybe it can be enhanced.

What do you think?

>
> thanks!
Li,Rongqing via dev Dec. 10, 2019, 4:29 p.m. UTC | #7
On Tue, Dec 10, 2019 at 7:56 PM Eli Britstein <elibr@mellanox.com> wrote:
>
>
> On 12/10/2019 2:53 PM, Sathya Perla wrote:
> > On Tue, Dec 10, 2019 at 5:25 PM Eli Britstein <elibr@mellanox.com> wrote:
> >>
> >> On 12/10/2019 12:09 PM, Sathya Perla wrote:
> >>> On Tue, Dec 10, 2019 at 1:21 PM Eli Britstein <elibr@mellanox.com> wrote:
> >>>> On 12/10/2019 8:52 AM, Sathya Perla wrote:
> >>>>> On Sun, Dec 8, 2019 at 6:53 PM Eli Britstein <elibr@mellanox.com> wrote:
> >>>>> ...
> >>>>>> +static int
> >>>>>> +parse_clone_actions(struct netdev *netdev,
> >>>>>> +                    struct flow_actions *actions,
> >>>>>> +                    const struct nlattr *clone_actions,
> >>>>>> +                    const size_t clone_actions_len,
> >>>>>> +                    struct offload_info *info)
> >>>>>> +{
> >>>>>> +    const struct nlattr *ca;
> >>>>>> +    unsigned int cleft;
> >>>>>> +
> >>>>>> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
> >>>>>> +        int clone_type = nl_attr_type(ca);
> >>>>>> +
> >>>>>> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
> >>>>>> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
> >>>>>> +            struct rte_flow_action_raw_encap *raw_encap =
> >>>>>> +                xzalloc(sizeof *raw_encap);
> >>>>>> +
> >>>>>> +            raw_encap->data = (uint8_t *)tnl_push->header;
> >>>>>> +            raw_encap->preserve = NULL;
> >>>>>> +            raw_encap->size = tnl_push->header_len;
> >>>>>> +
> >>>>>> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
> >>>>>> +                            raw_encap);
> >>>>> Hi, converting OVS_TUNNLE_PUSH into RTE_RAW_ENCAP loses the 'tnl_type'
> >>>>> information provided by OVS. RAW_ENCAP provides the tunnel header just
> >>>>> as a blob of bits which may not be ideal for NIC HW to offload.
> >>>>>
> >>>>> How about using tnl_push->tnl_type field to parse the header and
> >>>>> compose specific tunnel encap actions like RTE_VXLAN_ENCAP,
> >>>>> RTE_NVGRE_ENCAP etc.
> >>>>> This would be also be more inline with how it's done with TC-offload.
> >>>> I see your point. However, struct ovs_action_push_tnl has the "header"
> >>>> field just as a raw binary buffer, and not detailed like in TC.
> >>>> "tnl_type" has a comment /* For logging. */. It is indeed used for
> >>>> logging, as in lib/odp-util.c, in function format_odp_tnl_push_header.
> >>> This is not entirely true. Checkout propagate_tunnel_data_to_flow()
> >>> where tnl_type is being used
> >>> to figure out nw_proto.
> >>>
> >>>> using VXLAN_ENCAP instead of RAW_ENCAP will make the push less generic,
> >>>> as I will have to parse the header to build the vxlan_encap fields, just
> >>>> so they can re-build as a raw header in the PMD level, so I don't see
> >>>> the offload benefit of it.
> >>> Why are you assuming that all PMDs will rebuild the tunnel header
> >>> fields as a 'raw header'?
> >> As far as I saw in DPDK code, if I'm not wrong, all PMDs that support it
> >> do it like this. Anyway, it is indeed not relevant.
> >>> What if the NIC HW needs tunnel specific headers to be programmed
> >>> separately for each tunnel type?
> >>>
> >>>> Another point is that this way it will support any header, not just
> >>>> VXLAN (as an example).
> >>> Given that VXLAN and NVGRE ENCAP and DECAP actions are already defined
> >>> in rte_flow, how about
> >>> we use them for vxlan and nvgre and use RAW for the rest of the tunnel types?
> >>> We can support all tunnel headers even this way.....
> >> Again, I see your point.
> >>
> >> However, in OVS level, we have the encap as a raw buffer and DPDK
> >> supports it natively using RAW_ENCAP. For that reason I think we should
> >> use the straight forward method.
> >>
> >> I think that if there is a PMD that requires the fields separately, it
> >> is under its responsibility to parse it, and not forcing the application
> >> to do it.
> > How should a PMD parse the header buffer? For e.g., for vxlan, should the PMD
> > look for dst UDP port 4789? What if a different port number is used
> > for vxlan as it's the case with some deployments?
> > I think it's important to deal with this in OVS because the 'tnl_type'
> > info is available in OVS and it should be relayed to the PMD
> > is some form.
> >
> >> As it's a valid attribute in DPDK, and it's the natural way for OVS to
> >> use, I think we should use it. If it is from some reason not valid, in
> >> the future, DPDK will deprecate it.
> > I agree RAW_ENCAP is a valid RTE action. But I feel we must choose the
> > action that conveys/translates as much
> > data as possible from the OVS layer to the PMD. This will enable
> > offload support from more number of NIC HWs.
>
> AFAIU, currently there is no such NIC HW that actually uses the
> additional info in VXLAN_ENCAP vs RAW_ENCAP, but they would just
> re-build the header as if it would been received with RAW, so for now I
> don't see any benefit of doing so. In the future, if there is such HW as
> you suggest, maybe it can be enhanced.
>
> What do you think?

If you see TC-offload for bnxt_en driver (bnxt_tc.c), it programs vxlan encap
by setting the L2, L3, L4 header fields specifically. The bnxt PMD
would have to do something very similar!
So, we do have NIC HW that suits/needs this.

I'm fine if this is a follow-on patch....but it best done now because
as soon as this patch-set is committed,
PMDs may start integration with the RTE attributes used here....

If you agree with this in principle, maybe we can help in enhancing this patch?
Pls let me know.
thanks!
Ilya Maximets Dec. 10, 2019, 8:43 p.m. UTC | #8
On 08.12.2019 14:23, Eli Britstein wrote:
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
> ---
>  Documentation/howto/dpdk.rst |  2 ++
>  NEWS                         |  4 ++--
>  lib/netdev-dpdk.c            | 14 +++++++++++++
>  lib/netdev-offload-dpdk.c    | 48 ++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 66 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
> index b9be83002..b9aaf26bf 100644
> --- a/Documentation/howto/dpdk.rst
> +++ b/Documentation/howto/dpdk.rst
> @@ -394,6 +394,8 @@ Supported actions for hardware offload are:
>  - Drop.
>  - Modification of Ethernet (mod_dl_src/mod_dl_dst).
>  - Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl).
> +- Clone with tnl_push and output (a single clone, as the last action,
> +  with a single output, as the last nested clone action).
>  
>  Further Reading
>  ---------------
> diff --git a/NEWS b/NEWS
> index 297ca6fff..25125801b 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -26,8 +26,8 @@ Post-v2.12.0
>       * DPDK ring ports (dpdkr) are deprecated and will be removed in next
>         releases.
>       * Add support for DPDK 19.11.
> -     * Add hardware offload support for output, drop and set actions of
> -       MAC and IPv4 (experimental).
> +     * Add hardware offload support for output, drop, set of MAC, IPv4
> +       and tunnel push-output actions (experimental).
>  
>  v2.12.0 - 03 Sep 2019
>  ---------------------
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index c4606363d..14f5d6f3b 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -4762,6 +4762,20 @@ ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
>          } else {
>              ds_put_cstr(s, "  Set-ttl = null\n");
>          }
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
> +        const struct rte_flow_action_raw_encap *raw_encap = actions->conf;
> +
> +        ds_put_cstr(s, "rte flow raw-encap action:\n");
> +        if (raw_encap) {
> +            ds_put_format(s,
> +                          "  Raw-encap: size=%ld\n",
> +                          raw_encap->size);
> +            ds_put_format(s,
> +                          "  Raw-encap: encap=\n");

Why so multilined?

> +            ds_put_hex_dump(s, raw_encap->data, raw_encap->size, 0, false);
> +        } else {
> +            ds_put_cstr(s, "  Raw-encap = null\n");
> +        }
>      } else {
>          ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>      }
> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> index 3fccac956..b0403a085 100644
> --- a/lib/netdev-offload-dpdk.c
> +++ b/lib/netdev-offload-dpdk.c
> @@ -634,6 +634,45 @@ parse_set_actions(struct flow_actions *actions,
>      return 0;
>  }
>  
> +static int
> +parse_clone_actions(struct netdev *netdev,
> +                    struct flow_actions *actions,
> +                    const struct nlattr *clone_actions,
> +                    const size_t clone_actions_len,
> +                    struct offload_info *info)
> +{
> +    const struct nlattr *ca;
> +    unsigned int cleft;
> +
> +    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
> +        int clone_type = nl_attr_type(ca);
> +
> +        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
> +            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
> +            struct rte_flow_action_raw_encap *raw_encap =
> +                xzalloc(sizeof *raw_encap);
> +
> +            raw_encap->data = (uint8_t *)tnl_push->header;
> +            raw_encap->preserve = NULL;
> +            raw_encap->size = tnl_push->header_len;
> +
> +            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
> +                            raw_encap);
> +        } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
> +            if (!(cleft <= NLA_ALIGN(ca->nla_len)) ||

The same question as for the simple output action:
Why it should be the last here?  I guess, any combination of
encaps and outputs should be valid here.  Isn't it?

> +                add_output_action(netdev, actions, ca, info)) {
> +                return -1;
> +            }
> +        } else {
> +            VLOG_DBG_RL(&error_rl,
> +                        "Unsupported clone action. clone_type=%d", clone_type);

Maybe, "Unsupported nested action inside clone(), action type: %d" ?

> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int
>  parse_flow_actions(struct netdev *netdev,
>                     struct flow_actions *actions,
> @@ -661,6 +700,15 @@ parse_flow_actions(struct netdev *netdev,
>                                    masked)) {
>                  return -1;
>              }
> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE) {
> +            const struct nlattr *clone_actions = nl_attr_get(nla);
> +            size_t clone_actions_len = nl_attr_get_size(nla);
> +
> +            if (!(left <= NLA_ALIGN(nla->nla_len)) ||
> +                parse_clone_actions(netdev, actions, clone_actions,
> +                                    clone_actions_len, info)) {
> +                return -1;
> +            }
>          } else {
>              VLOG_DBG_RL(&error_rl,
>                          "Unsupported action type %d", nl_attr_type(nla));
>
diff mbox series

Patch

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index b9be83002..b9aaf26bf 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -394,6 +394,8 @@  Supported actions for hardware offload are:
 - Drop.
 - Modification of Ethernet (mod_dl_src/mod_dl_dst).
 - Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl).
+- Clone with tnl_push and output (a single clone, as the last action,
+  with a single output, as the last nested clone action).
 
 Further Reading
 ---------------
diff --git a/NEWS b/NEWS
index 297ca6fff..25125801b 100644
--- a/NEWS
+++ b/NEWS
@@ -26,8 +26,8 @@  Post-v2.12.0
      * DPDK ring ports (dpdkr) are deprecated and will be removed in next
        releases.
      * Add support for DPDK 19.11.
-     * Add hardware offload support for output, drop and set actions of
-       MAC and IPv4 (experimental).
+     * Add hardware offload support for output, drop, set of MAC, IPv4
+       and tunnel push-output actions (experimental).
 
 v2.12.0 - 03 Sep 2019
 ---------------------
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index c4606363d..14f5d6f3b 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -4762,6 +4762,20 @@  ds_put_flow_action(struct ds *s, const struct rte_flow_action *actions)
         } else {
             ds_put_cstr(s, "  Set-ttl = null\n");
         }
+    } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
+        const struct rte_flow_action_raw_encap *raw_encap = actions->conf;
+
+        ds_put_cstr(s, "rte flow raw-encap action:\n");
+        if (raw_encap) {
+            ds_put_format(s,
+                          "  Raw-encap: size=%ld\n",
+                          raw_encap->size);
+            ds_put_format(s,
+                          "  Raw-encap: encap=\n");
+            ds_put_hex_dump(s, raw_encap->data, raw_encap->size, 0, false);
+        } else {
+            ds_put_cstr(s, "  Raw-encap = null\n");
+        }
     } else {
         ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
     }
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 3fccac956..b0403a085 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -634,6 +634,45 @@  parse_set_actions(struct flow_actions *actions,
     return 0;
 }
 
+static int
+parse_clone_actions(struct netdev *netdev,
+                    struct flow_actions *actions,
+                    const struct nlattr *clone_actions,
+                    const size_t clone_actions_len,
+                    struct offload_info *info)
+{
+    const struct nlattr *ca;
+    unsigned int cleft;
+
+    NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
+        int clone_type = nl_attr_type(ca);
+
+        if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
+            const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
+            struct rte_flow_action_raw_encap *raw_encap =
+                xzalloc(sizeof *raw_encap);
+
+            raw_encap->data = (uint8_t *)tnl_push->header;
+            raw_encap->preserve = NULL;
+            raw_encap->size = tnl_push->header_len;
+
+            add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
+                            raw_encap);
+        } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
+            if (!(cleft <= NLA_ALIGN(ca->nla_len)) ||
+                add_output_action(netdev, actions, ca, info)) {
+                return -1;
+            }
+        } else {
+            VLOG_DBG_RL(&error_rl,
+                        "Unsupported clone action. clone_type=%d", clone_type);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 static int
 parse_flow_actions(struct netdev *netdev,
                    struct flow_actions *actions,
@@ -661,6 +700,15 @@  parse_flow_actions(struct netdev *netdev,
                                   masked)) {
                 return -1;
             }
+        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE) {
+            const struct nlattr *clone_actions = nl_attr_get(nla);
+            size_t clone_actions_len = nl_attr_get_size(nla);
+
+            if (!(left <= NLA_ALIGN(nla->nla_len)) ||
+                parse_clone_actions(netdev, actions, clone_actions,
+                                    clone_actions_len, info)) {
+                return -1;
+            }
         } else {
             VLOG_DBG_RL(&error_rl,
                         "Unsupported action type %d", nl_attr_type(nla));