diff mbox series

[ovs-dev,v1] netdev-rte-offloads: Reassign vport netdev functions.

Message ID 1555490103-10275-1-git-send-email-ophirmu@mellanox.com
State Superseded
Headers show
Series [ovs-dev,v1] netdev-rte-offloads: Reassign vport netdev functions. | expand

Commit Message

Ophir Munk April 17, 2019, 8:35 a.m. UTC
From: Roni Bar Yanai <roniba@mellanox.com>

vport offloaded functions should have a different implementation for
kernel based OVS versus dpdk based OVS. Currently there is an unconditional
execution of a kernel based calls even if the vport was added by dpif-netdev
rather than by dpif-netlink. Before this commit and in case hw-offload=true
adding a netdev datapath vport resulted in an error since the vport kernel
based APIs were called. In practice the API returned immediately on a
get_ifindex() failure (see [1]), which caused no harm, but it is required
to avoid such scenarios in advance. In case of a netdev datapath vport flow
functions must be updated at runtime. This commit reassigns flow functions
to NULL when such a vport is added. It uses a duplicated vport class to
keep the original default kernel vport class intact. This enables using
vports in a mixed environment of kernel and netdev (userspace) bridges at
the same time.

[1]
ovs|00002|netdev_tc_offloads(dp_netdev_flow_5)|ERR|flow_put: failed to
get ifindex for vxlan_sys_4789: No such device

Reviewed-by: Asaf Penso <asafp@mellanox.com>
Co-authored-by: Ophir Munk <ophirmu@mellanox.com>
Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Signed-off-by: Roni Bar Yanai <roniba@mellanox.com>
---
 lib/dpif-netdev.c         |   2 +
 lib/netdev-rte-offloads.c |  47 +++++++++++
 lib/netdev-rte-offloads.h |   5 ++
 lib/netdev-vport.c        | 211 ++++++++++++++++++++++++++++------------------
 lib/netdev-vport.h        |   4 +
 5 files changed, 188 insertions(+), 81 deletions(-)

Comments

0-day Robot April 17, 2019, 9:49 a.m. UTC | #1
Bleep bloop.  Greetings Ophir Munk, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


build:
/bin/sh ./libtool  --tag=CXX   --mode=compile g++ -std=gnu++11 -DHAVE_CONFIG_H -I.    -I ./include -I ./include -I ./lib -I ./lib     -g -O2 -MT include/openvswitch/cxxtest.lo -MD -MP -MF $depbase.Tpo -c -o include/openvswitch/cxxtest.lo include/openvswitch/cxxtest.cc &&\
mv -f $depbase.Tpo $depbase.Plo
libtool: compile:  g++ -std=gnu++11 -DHAVE_CONFIG_H -I. -I ./include -I ./include -I ./lib -I ./lib -g -O2 -MT include/openvswitch/cxxtest.lo -MD -MP -MF include/openvswitch/.deps/cxxtest.Tpo -c include/openvswitch/cxxtest.cc -o include/openvswitch/cxxtest.o
/bin/sh ./libtool  --tag=CXX   --mode=link g++ -std=gnu++11  -g -O2     -o include/openvswitch/libcxxtest.la  include/openvswitch/cxxtest.lo  -lpthread -lrt -lm  -lunbound
libtool: link: ar cru include/openvswitch/.libs/libcxxtest.a  include/openvswitch/cxxtest.o
libtool: link: ranlib include/openvswitch/.libs/libcxxtest.a
libtool: link: ( cd "include/openvswitch/.libs" && rm -f "libcxxtest.la" && ln -s "../libcxxtest.la" "libcxxtest.la" )
depbase=`echo utilities/ovs-appctl.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\
gcc -std=gnu99 -DHAVE_CONFIG_H -I.    -I ./include -I ./include -I ./lib -I ./lib    -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g -O2 -MT utilities/ovs-appctl.o -MD -MP -MF $depbase.Tpo -c -o utilities/ovs-appctl.o utilities/ovs-appctl.c &&\
mv -f $depbase.Tpo $depbase.Po
/bin/sh ./libtool  --tag=CC   --mode=link gcc -std=gnu99 -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g -O2     -o utilities/ovs-appctl utilities/ovs-appctl.o lib/libopenvswitch.la -lpthread -lrt -lm  -lunbound
libtool: link: gcc -std=gnu99 -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror -g -O2 -o utilities/ovs-appctl utilities/ovs-appctl.o  lib/.libs/libopenvswitch.a -lssl -lcrypto -lcap-ng -lpthread -lrt -lm -lunbound
depbase=`echo utilities/ovs-testcontroller.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\
gcc -std=gnu99 -DHAVE_CONFIG_H -I.    -I ./include -I ./include -I ./lib -I ./lib    -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g -O2 -MT utilities/ovs-testcontroller.o -MD -MP -MF $depbase.Tpo -c -o utilities/ovs-testcontroller.o utilities/ovs-testcontroller.c &&\
mv -f $depbase.Tpo $depbase.Po
/bin/sh ./libtool  --tag=CC   --mode=link gcc -std=gnu99 -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g -O2     -o utilities/ovs-testcontroller utilities/ovs-testcontroller.o lib/libopenvswitch.la -lssl -lcrypto   -lpthread -lrt -lm  -lunbound
libtool: link: gcc -std=gnu99 -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror -g -O2 -o utilities/ovs-testcontroller utilities/ovs-testcontroller.o  lib/.libs/libopenvswitch.a -lcap-ng -lssl -lcrypto -lpthread -lrt -lm -lunbound
lib/.libs/libopenvswitch.a(dpif-netdev.o): In function `dpif_netdev_port_add':
/var/lib/jenkins/jobs/upstream_build_from_pw/workspace/lib/dpif-netdev.c:1869: undefined reference to `netdev_rte_offloads_port_add'
collect2: error: ld returned 1 exit status
make[2]: *** [utilities/ovs-testcontroller] Error 1
make[2]: Leaving directory `/var/lib/jenkins/jobs/upstream_build_from_pw/workspace'
make[1]: *** [all-recursive] Error 1
make[1]: Leaving directory `/var/lib/jenkins/jobs/upstream_build_from_pw/workspace'
make: *** [all] Error 2


Please check this out.  If you feel there has been an error, please email aconole@bytheb.org

Thanks,
0-day Robot
Aaron Conole April 17, 2019, 3:45 p.m. UTC | #2
0-day Robot <robot@bytheb.org> writes:

> Bleep bloop.  Greetings Ophir Munk, I am a robot and I have tried out your patch.
> Thanks for your contribution.
>
> I encountered some error that I wasn't expecting.  See the details below.
>
>
> build:
...
> libtool: link: gcc -std=gnu99 -Wstrict-prototypes -Wall -Wextra
> -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security
> -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align
> -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes
> -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror
> -Werror -g -O2 -o utilities/ovs-testcontroller
> utilities/ovs-testcontroller.o lib/.libs/libopenvswitch.a -lcap-ng
> -lssl -lcrypto -lpthread -lrt -lm -lunbound
> lib/.libs/libopenvswitch.a(dpif-netdev.o): In function `dpif_netdev_port_add':
> /var/lib/jenkins/jobs/upstream_build_from_pw/workspace/lib/dpif-netdev.c:1869:
> undefined reference to `netdev_rte_offloads_port_add'
> collect2: error: ld returned 1 exit status
> make[2]: *** [utilities/ovs-testcontroller] Error 1
> make[2]: Leaving directory `/var/lib/jenkins/jobs/upstream_build_from_pw/workspace'
> make[1]: *** [all-recursive] Error 1
> make[1]: Leaving directory `/var/lib/jenkins/jobs/upstream_build_from_pw/workspace'
> make: *** [all] Error 2

I guess you'll need to provide an implementation for the non-dpdk
builds.

I also suggest a different name than netdev_rte_offloads_port_add

rte comes from dpdk as an acronym for Run Time Environment.  Maybe even
just dropping the 'rte_' portion?

>
> Thanks,
> 0-day Robot
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ben Pfaff April 17, 2019, 4:34 p.m. UTC | #3
On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
> rte comes from dpdk as an acronym for Run Time Environment.  Maybe even
> just dropping the 'rte_' portion?

*That* is what rte stands for?  What a ridiculously generic name.  It's
like naming a library Operating System.
Gregory Rose April 17, 2019, 4:39 p.m. UTC | #4
On 4/17/2019 9:34 AM, Ben Pfaff wrote:
> On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
>> rte comes from dpdk as an acronym for Run Time Environment.  Maybe even
>> just dropping the 'rte_' portion?
> *That* is what rte stands for?  What a ridiculously generic name.  It's
> like naming a library Operating System.

Like libOS?

https://lwn.net/Articles/637658/

;^)

> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ben Pfaff April 17, 2019, 4:54 p.m. UTC | #5
On Wed, Apr 17, 2019 at 09:39:45AM -0700, Gregory Rose wrote:
> 
> 
> On 4/17/2019 9:34 AM, Ben Pfaff wrote:
> > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
> > > rte comes from dpdk as an acronym for Run Time Environment.  Maybe even
> > > just dropping the 'rte_' portion?
> > *That* is what rte stands for?  What a ridiculously generic name.  It's
> > like naming a library Operating System.
> 
> Like libOS?
> 
> https://lwn.net/Articles/637658/

Yeah... bad name.
Stokes, Ian April 18, 2019, 3:58 p.m. UTC | #6
On 4/17/2019 5:34 PM, Ben Pfaff wrote:
> On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
>> rte comes from dpdk as an acronym for Run Time Environment.  Maybe even
>> just dropping the 'rte_' portion?
> 
> *That* is what rte stands for?  What a ridiculously generic name.  It's
> like naming a library Operating System.

This piqued my interest also, with DPDK in the early days it was 
targeting bare metal comms systems, so the original API was LWRTE 
(LiteWeight Run Time Environment) which became RTE as it moved on from 
bare metal, so it seems more of a legacy convention.

Ian
Ben Pfaff April 18, 2019, 7:52 p.m. UTC | #7
On Thu, Apr 18, 2019 at 04:58:58PM +0100, Ian Stokes wrote:
> On 4/17/2019 5:34 PM, Ben Pfaff wrote:
> > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
> > > rte comes from dpdk as an acronym for Run Time Environment.  Maybe even
> > > just dropping the 'rte_' portion?
> > 
> > *That* is what rte stands for?  What a ridiculously generic name.  It's
> > like naming a library Operating System.
> 
> This piqued my interest also, with DPDK in the early days it was targeting
> bare metal comms systems, so the original API was LWRTE (LiteWeight Run Time
> Environment) which became RTE as it moved on from bare metal, so it seems
> more of a legacy convention.

Oh, so originally it *was* the basis of an operating system.  That makes
a little bit of sense now.
Ophir Munk April 21, 2019, 9:11 a.m. UTC | #8
Thomas - would you like to explain more on the origins of "rte"?

Please note that dpdk API and its public data structures use an rte_ prefix, for example:
rte_create_flow(). This is a convenient way to know when we are directly calling/accessing the DPDK library.
In parallel we should avoid prefixing rte_ to any OVS owned APIs or structures.
In any case "rte" is widely used inside OVS with things related to dpdk.

I have sent v2 which fixes the compilation error (when compiled for kernel only without dpdk)
which was caused by an unreferenced call inside dpif-netdev. 
This call is only relevant to dpdk and is only relevant when configuring
"--with-dpdk=".

Regards,
Ophir

> -----Original Message-----
> From: ovs-dev-bounces@openvswitch.org <ovs-dev-
> bounces@openvswitch.org> On Behalf Of Ian Stokes
> Sent: Thursday, April 18, 2019 6:59 PM
> To: Ben Pfaff <blp@ovn.org>; Aaron Conole <aconole@redhat.com>
> Cc: dev@openvswitch.org
> Subject: Re: [ovs-dev] [ovs-dev, v1] netdev-rte-offloads: Reassign vport
> netdev functions.
> 
> On 4/17/2019 5:34 PM, Ben Pfaff wrote:
> > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
> >> rte comes from dpdk as an acronym for Run Time Environment.  Maybe
> >> even just dropping the 'rte_' portion?
> >
> > *That* is what rte stands for?  What a ridiculously generic name.
> > It's like naming a library Operating System.
> 
> This piqued my interest also, with DPDK in the early days it was targeting bare
> metal comms systems, so the original API was LWRTE (LiteWeight Run Time
> Environment) which became RTE as it moved on from bare metal, so it seems
> more of a legacy convention.
> 
> Ian
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.
> openvswitch.org%2Fmailman%2Flistinfo%2Fovs-
> dev&amp;data=02%7C01%7Cophirmu%40mellanox.com%7C7961fde350f149
> b17ede08d6c416d1f3%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C1%7
> C636911999634142236&amp;sdata=m5LKiyrnAtWO3vpVTyWiQXsnLlhq5ma14
> MydfFiXmfI%3D&amp;reserved=0
Thomas Monjalon April 21, 2019, 4:50 p.m. UTC | #9
21/04/2019 11:11, Ophir Munk:
> Thomas - would you like to explain more on the origins of "rte"?

Ian explained (below) the origin quite clearly.
It has been decided in the early days by Intel.

> From: Ian Stokes
> > On 4/17/2019 5:34 PM, Ben Pfaff wrote:
> > > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
> > >> rte comes from dpdk as an acronym for Run Time Environment.  Maybe
> > >> even just dropping the 'rte_' portion?
> > >
> > > *That* is what rte stands for?  What a ridiculously generic name.
> > > It's like naming a library Operating System.

Yes I agree that it's ridiculous :)

I already proposed to replace rte_ with dpdk_ prefix
but the vast majority was against a big replacement.
Would you support such a change?

> > This piqued my interest also, with DPDK in the early days it was targeting bare
> > metal comms systems, so the original API was LWRTE (LiteWeight Run Time
> > Environment) which became RTE as it moved on from bare metal, so it seems
> > more of a legacy convention.
Ben Pfaff April 22, 2019, 4:13 p.m. UTC | #10
On Sun, Apr 21, 2019 at 06:50:48PM +0200, Thomas Monjalon wrote:
> 21/04/2019 11:11, Ophir Munk:
> > Thomas - would you like to explain more on the origins of "rte"?
> 
> Ian explained (below) the origin quite clearly.
> It has been decided in the early days by Intel.
> 
> > From: Ian Stokes
> > > On 4/17/2019 5:34 PM, Ben Pfaff wrote:
> > > > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
> > > >> rte comes from dpdk as an acronym for Run Time Environment.  Maybe
> > > >> even just dropping the 'rte_' portion?
> > > >
> > > > *That* is what rte stands for?  What a ridiculously generic name.
> > > > It's like naming a library Operating System.
> 
> Yes I agree that it's ridiculous :)
> 
> I already proposed to replace rte_ with dpdk_ prefix
> but the vast majority was against a big replacement.
> Would you support such a change?

Yes, though I don't think my opinion should matter much here.
Ilya Maximets April 23, 2019, 7:46 a.m. UTC | #11
> 21/04/2019 11:11, Ophir Munk:
>> Thomas - would you like to explain more on the origins of "rte"?
> 
> Ian explained (below) the origin quite clearly.
> It has been decided in the early days by Intel.
> 
>> From: Ian Stokes
>> > On 4/17/2019 5:34 PM, Ben Pfaff wrote:
>> > > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
>> > >> rte comes from dpdk as an acronym for Run Time Environment.  Maybe
>> > >> even just dropping the 'rte_' portion?
>> > >
>> > > *That* is what rte stands for?  What a ridiculously generic name.
>> > > It's like naming a library Operating System.
> 
> Yes I agree that it's ridiculous :)

The best header is "rte_eal.h". It looks like DPDK tries to abstract from itself.

> 
> I already proposed to replace rte_ with dpdk_ prefix
> but the vast majority was against a big replacement.
> Would you support such a change?

I'm not contributing much to dpdk these days, but I'd support such a change.
This would be a big step toward apps that tries to work with DPDK as a library
and not as a run-time environment.
And, probably, right now is the last chance for DPDK do make such a huge API break.
There was way too much discussions about API stability and I'm afraid that DPDK
will petrify soon without ability to change anything.

> 
>> > This piqued my interest also, with DPDK in the early days it was targeting bare
>> > metal comms systems, so the original API was LWRTE (LiteWeight Run Time
>> > Environment) which became RTE as it moved on from bare metal, so it seems
>> > more of a legacy convention.
>
Aaron Conole April 29, 2019, 1:44 p.m. UTC | #12
Ilya Maximets <i.maximets@samsung.com> writes:

>> 21/04/2019 11:11, Ophir Munk:
>>> Thomas - would you like to explain more on the origins of "rte"?
>> 
>> Ian explained (below) the origin quite clearly.
>> It has been decided in the early days by Intel.
>> 
>>> From: Ian Stokes
>>> > On 4/17/2019 5:34 PM, Ben Pfaff wrote:
>>> > > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
>>> > >> rte comes from dpdk as an acronym for Run Time Environment.  Maybe
>>> > >> even just dropping the 'rte_' portion?
>>> > >
>>> > > *That* is what rte stands for?  What a ridiculously generic name.
>>> > > It's like naming a library Operating System.
>> 
>> Yes I agree that it's ridiculous :)
>
> The best header is "rte_eal.h". It looks like DPDK tries to abstract from itself.
>
>> 
>> I already proposed to replace rte_ with dpdk_ prefix
>> but the vast majority was against a big replacement.
>> Would you support such a change?
>
> I'm not contributing much to dpdk these days, but I'd support such a
> change.

I have a longer (and probably useless, but w/e) opinion below.

> This would be a big step toward apps that tries to work with DPDK as a library
> and not as a run-time environment.
> And, probably, right now is the last chance for DPDK do make such a huge API break.
> There was way too much discussions about API stability and I'm afraid that DPDK
> will petrify soon without ability to change anything.

+ .5

I think it makes sense to do this change at the time DPDK project
declares a stronger ABI&API guarantee.  "This is the new ossified
ABI/API, and we don't break it."  It's a strong declaration to users.
And API / ABI are really just user interfaces.  Compilers, computers,
etc. don't "care".

Then again, history is littered with crazier ossification in software.
After all, we still use CAR,CDR in lisp and I don't think most people
know what that's all about either.  Heck, supposedly indices start at 0
rather than 1 to correct for yacht handicapping interrupting the
compiler[1][2] and it turns out that isn't a terrible thing.  "rte_"
*is* one character less than "dpdk_" after all.

All this to say, I don't think it really truly matters.  As seen in the
original patch, people now just think "rte == dpdk" and so it's probably
a lot of hassle for not as much gain.  Anyone who googles "rte_*" upon
encountering it in code will find DPDK.  And whatever the reason for
using "rte_" originally won't matter.

So I guess... do whatever you'd like in your existing code.  :)

OTOH, for code in OvS, I'll continue to advocate against using *rte*
because it's new code and *rte* doesn't really mean anything to OvS
anyway.  At least in the DPDK project, it meant something.

1: https://en.wikipedia.org/wiki/Zero-based_numbering#Origin
2: http://exple.tive.org/blarg/2013/10/22/citation-needed/

>> 
>>> > This piqued my interest also, with DPDK in the early days it was targeting bare
>>> > metal comms systems, so the original API was LWRTE (LiteWeight Run Time
>>> > Environment) which became RTE as it moved on from bare metal, so it seems
>>> > more of a legacy convention.
>>
Roni Bar Yanai April 30, 2019, 11:29 a.m. UTC | #13
Aaron, please see inline.

>-----Original Message-----
>From: ovs-dev-bounces@openvswitch.org <ovs-dev-bounces@openvswitch.org>
>On Behalf Of Aaron Conole
>Sent: Monday, April 29, 2019 4:44 PM
>To: Ilya Maximets <i.maximets@samsung.com>
>Cc: ovs-dev@openvswitch.org; Thomas Monjalon <thomas@monjalon.net>
>Subject: Re: [ovs-dev] [ovs-dev, v1] netdev-rte-offloads: Reassign vport netdev
>functions.
>
>Ilya Maximets <i.maximets@samsung.com> writes:
>
>>> 21/04/2019 11:11, Ophir Munk:
>>>> Thomas - would you like to explain more on the origins of "rte"?
>>>
>>> Ian explained (below) the origin quite clearly.
>>> It has been decided in the early days by Intel.
>>>
>>>> From: Ian Stokes
>>>> > On 4/17/2019 5:34 PM, Ben Pfaff wrote:
>>>> > > On Wed, Apr 17, 2019 at 11:45:33AM -0400, Aaron Conole wrote:
>>>> > >> rte comes from dpdk as an acronym for Run Time Environment.  Maybe
>>>> > >> even just dropping the 'rte_' portion?
>>>> > >
>>>> > > *That* is what rte stands for?  What a ridiculously generic name.
>>>> > > It's like naming a library Operating System.
>>>
>>> Yes I agree that it's ridiculous :)
>>
>> The best header is "rte_eal.h". It looks like DPDK tries to abstract from itself.
>>
>>>
>>> I already proposed to replace rte_ with dpdk_ prefix
>>> but the vast majority was against a big replacement.
>>> Would you support such a change?
>>
>> I'm not contributing much to dpdk these days, but I'd support such a
>> change.
>
>I have a longer (and probably useless, but w/e) opinion below.
>
>> This would be a big step toward apps that tries to work with DPDK as a library
>> and not as a run-time environment.
>> And, probably, right now is the last chance for DPDK do make such a huge API
>break.
>> There was way too much discussions about API stability and I'm afraid that DPDK
>> will petrify soon without ability to change anything.
>
>+ .5
>
>I think it makes sense to do this change at the time DPDK project
>declares a stronger ABI&API guarantee.  "This is the new ossified
>ABI/API, and we don't break it."  It's a strong declaration to users.
>And API / ABI are really just user interfaces.  Compilers, computers,
>etc. don't "care".
>
>Then again, history is littered with crazier ossification in software.
>After all, we still use CAR,CDR in lisp and I don't think most people
>know what that's all about either.  Heck, supposedly indices start at 0
>rather than 1 to correct for yacht handicapping interrupting the
>compiler[1][2] and it turns out that isn't a terrible thing.  "rte_"
>*is* one character less than "dpdk_" after all.
>
>All this to say, I don't think it really truly matters.  As seen in the
>original patch, people now just think "rte == dpdk" and so it's probably
>a lot of hassle for not as much gain.  Anyone who googles "rte_*" upon
>encountering it in code will find DPDK.  And whatever the reason for
>using "rte_" originally won't matter.
>
>So I guess... do whatever you'd like in your existing code.  :)
>
>OTOH, for code in OvS, I'll continue to advocate against using *rte*
>because it's new code and *rte* doesn't really mean anything to OvS
>anyway.  At least in the DPDK project, it meant something.
>
>1:
>https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fen.wikipe
>dia.org%2Fwiki%2FZero-
>based_numbering%23Origin&amp;data=02%7C01%7Croniba%40mellanox.com%7
>C6de746f1250948803ec608d6cca94ccd%7Ca652971c7d2e4d9ba6a4d149256f461b%7
>C0%7C0%7C636921424851725523&amp;sdata=IvceIDWnPxohuOl2smsbmf2Pz43tk
>8KgnUm5AVXDWfU%3D&amp;reserved=0
>2:
>https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fexple.tive.o
>rg%2Fblarg%2F2013%2F10%2F22%2Fcitation-
>needed%2F&amp;data=02%7C01%7Croniba%40mellanox.com%7C6de746f125094
>8803ec608d6cca94ccd%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636
>921424851725523&amp;sdata=6ZlgjwSktRL8ltjAOUHcDJHksoWH8rKqo%2BenInMJ
>6Y8%3D&amp;reserved=0
>
Agree that for most developers dpdk == rte, and when looking at the code rte* will not
be confusing. Having said that, totally agree that it makes more sense for OVS code to 
use dpdk and not rte in the API. We have tc-offload and dpdk-offload and we have 
netdev-dpdk...etc. 
This patch is not relevant anymore as the required functionality was implemented by Ilya
as infrastructure in a different patch, but will follow the convention on future patches.
Thanks,
Roni
>>>
>>>> > This piqued my interest also, with DPDK in the early days it was targeting
>bare
>>>> > metal comms systems, so the original API was LWRTE (LiteWeight Run Time
>>>> > Environment) which became RTE as it moved on from bare metal, so it
>seems
>>>> > more of a legacy convention.
>>>
>_______________________________________________
>dev mailing list
>dev@openvswitch.org
>https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.open
>vswitch.org%2Fmailman%2Flistinfo%2Fovs-
>dev&amp;data=02%7C01%7Croniba%40mellanox.com%7C6de746f1250948803ec60
>8d6cca94ccd%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C63692142485
>1735523&amp;sdata=ITEQBetXzMxsyR7wU02rwrpx4epHc4LARdEhEDTXq3M%3D&
>amp;reserved=0
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4d6d0c3..9a48038 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -51,6 +51,7 @@ 
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-provider.h"
+#include "netdev-rte-offloads.h"
 #include "netdev-vport.h"
 #include "netlink.h"
 #include "odp-execute.h"
@@ -1865,6 +1866,7 @@  dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
     if (!error) {
         *port_nop = port_no;
         error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
+        netdev_rte_offloads_port_add(netdev);
     }
     ovs_mutex_unlock(&dp->port_mutex);
 
diff --git a/lib/netdev-rte-offloads.c b/lib/netdev-rte-offloads.c
index e9ab086..85f01e5 100644
--- a/lib/netdev-rte-offloads.c
+++ b/lib/netdev-rte-offloads.c
@@ -22,6 +22,7 @@ 
 #include "cmap.h"
 #include "dpif-netdev.h"
 #include "netdev-provider.h"
+#include "netdev-vport.h"
 #include "openvswitch/match.h"
 #include "openvswitch/vlog.h"
 #include "packets.h"
@@ -731,3 +732,49 @@  netdev_rte_offloads_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
 
     return netdev_rte_offloads_destroy_flow(netdev, ufid, rte_flow);
 }
+
+/*
+ * Vport netdev flow pointers are initialized by default to kernel calls.
+ * They should be nullified or be set to a valid netdev (userspace) calls.
+ */
+#define NULLIFY(f) (ndc->f = NULL)
+static void
+netdev_rte_offloads_vxlan_init(struct netdev *netdev)
+{
+    /*
+     * Clone default function pointers some
+     * of which may be kernel flow pointers.
+     */
+    struct netdev_class *ndc = netdev_vport_dup_class_once(netdev);
+    if (!ndc) {
+        VLOG_DBG("Vport netdev_class offload api cannot be updated.");
+        return;
+    }
+
+    /* Override kernel flow pointers. */
+    NULLIFY(flow_put);
+    NULLIFY(flow_flush);
+    NULLIFY(flow_dump_create);
+    NULLIFY(flow_dump_destroy);
+    NULLIFY(flow_dump_next);
+    NULLIFY(flow_put);
+    NULLIFY(flow_get);
+    NULLIFY(flow_del);
+    NULLIFY(init_flow_api);
+
+    netdev_vport_update_class(netdev, ndc);
+}
+
+/*
+ * This function is called as part of adding a new dpif netdev port.
+ * In case of vport class of "vxlan" type we update it to match netdev
+ * datapath apis.
+ */
+void
+netdev_rte_offloads_port_add(struct netdev *netdev)
+{
+    const char *type = netdev_get_type(netdev);
+    if (!strcmp("vxlan", type)) {
+        netdev_rte_offloads_vxlan_init(netdev);
+    }
+}
diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h
index 18c8a75..7fbe414 100644
--- a/lib/netdev-rte-offloads.h
+++ b/lib/netdev-rte-offloads.h
@@ -50,6 +50,11 @@  int netdev_rte_offloads_flow_put(struct netdev *netdev, struct match *match,
 int netdev_rte_offloads_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
                                  struct dpif_flow_stats *stats);
 
+/*
+ * Called by dpif netdev when a port is added
+ */
+void netdev_rte_offloads_port_add(struct netdev *netdev);
+
 #define DPDK_FLOW_OFFLOAD_API                   \
     .flow_put = netdev_rte_offloads_flow_put,   \
     .flow_del = netdev_rte_offloads_flow_del
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 808a43f..e27bc5e 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -1131,90 +1131,95 @@  netdev_vport_get_ifindex(const struct netdev *netdev_)
     .get_tunnel_config = get_netdev_tunnel_config,  \
     .get_status = tunnel_get_status
 
+/* The name of the dpif_port should be short enough to accomodate adding
+ * a port number to the end if one is necessary. */
+static struct vport_class vport_classes[] = {
+    { "genev_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "geneve",
+          .build_header = netdev_geneve_build_header,
+          .push_header = netdev_tnl_push_udp_header,
+          .pop_header = netdev_geneve_pop_header,
+          .get_ifindex = NETDEV_VPORT_GET_IFINDEX,
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+    { "gre_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "gre",
+          .build_header = netdev_gre_build_header,
+          .push_header = netdev_gre_push_header,
+          .pop_header = netdev_gre_pop_header,
+          .get_ifindex = NETDEV_VPORT_GET_IFINDEX,
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+    { "vxlan_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "vxlan",
+          .build_header = netdev_vxlan_build_header,
+          .push_header = netdev_tnl_push_udp_header,
+          .pop_header = netdev_vxlan_pop_header,
+          .get_ifindex = NETDEV_VPORT_GET_IFINDEX
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+    { "lisp_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "lisp"
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+    { "stt_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "stt"
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+    { "erspan_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "erspan",
+          .build_header = netdev_erspan_build_header,
+          .push_header = netdev_erspan_push_header,
+          .pop_header = netdev_erspan_pop_header
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+    { "ip6erspan_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "ip6erspan",
+          .build_header = netdev_erspan_build_header,
+          .push_header = netdev_erspan_push_header,
+          .pop_header = netdev_erspan_pop_header
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+    { "ip6gre_sys",
+      {
+          TUNNEL_FUNCTIONS_COMMON,
+          .type = "ip6gre",
+          .build_header = netdev_gre_build_header,
+          .push_header = netdev_gre_push_header,
+          .pop_header = netdev_gre_pop_header
+      },
+      {{NULL, NULL, 0, 0}}
+    },
+};
+
+#define NUM_VPORT_CLASSES (sizeof vport_classes / sizeof vport_classes[0])
+
+static struct vport_class dup_vport_classes[NUM_VPORT_CLASSES];
+
 void
 netdev_vport_tunnel_register(void)
 {
-    /* The name of the dpif_port should be short enough to accomodate adding
-     * a port number to the end if one is necessary. */
-    static struct vport_class vport_classes[] = {
-        { "genev_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "geneve",
-              .build_header = netdev_geneve_build_header,
-              .push_header = netdev_tnl_push_udp_header,
-              .pop_header = netdev_geneve_pop_header,
-              .get_ifindex = NETDEV_VPORT_GET_IFINDEX,
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-        { "gre_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "gre",
-              .build_header = netdev_gre_build_header,
-              .push_header = netdev_gre_push_header,
-              .pop_header = netdev_gre_pop_header,
-              .get_ifindex = NETDEV_VPORT_GET_IFINDEX,
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-        { "vxlan_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "vxlan",
-              .build_header = netdev_vxlan_build_header,
-              .push_header = netdev_tnl_push_udp_header,
-              .pop_header = netdev_vxlan_pop_header,
-              .get_ifindex = NETDEV_VPORT_GET_IFINDEX
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-        { "lisp_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "lisp"
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-        { "stt_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "stt"
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-        { "erspan_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "erspan",
-              .build_header = netdev_erspan_build_header,
-              .push_header = netdev_erspan_push_header,
-              .pop_header = netdev_erspan_pop_header
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-        { "ip6erspan_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "ip6erspan",
-              .build_header = netdev_erspan_build_header,
-              .push_header = netdev_erspan_push_header,
-              .pop_header = netdev_erspan_pop_header
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-        { "ip6gre_sys",
-          {
-              TUNNEL_FUNCTIONS_COMMON,
-              .type = "ip6gre",
-              .build_header = netdev_gre_build_header,
-              .push_header = netdev_gre_push_header,
-              .pop_header = netdev_gre_pop_header
-          },
-          {{NULL, NULL, 0, 0}}
-        },
-    };
     static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
 
     if (ovsthread_once_start(&once)) {
@@ -1225,6 +1230,8 @@  netdev_vport_tunnel_register(void)
             netdev_register_provider(&vport_classes[i].netdev_class);
         }
 
+        memset(dup_vport_classes, 0, sizeof dup_vport_classes);
+
         unixctl_command_register("tnl/egress_port_range", "min max", 0, 2,
                                  netdev_tnl_egress_port_range, NULL);
 
@@ -1247,3 +1254,45 @@  netdev_vport_patch_register(void)
     simap_init(&patch_class.global_cfg_tracker);
     netdev_register_provider(&patch_class.netdev_class);
 }
+
+/*
+ * This functions receives a netdev pointer which contains default
+ * netdev_class fields (such as kernel flow functions pointers).
+ * It returns a duplicated netdev_class that can be overriden by the caller
+ */
+struct netdev_class *
+netdev_vport_dup_class_once(struct netdev *netdev)
+{
+    unsigned int i;
+    struct vport_class *vpclass;
+    struct netdev_class *ndclass = NULL;
+
+    if (!is_vport_class(netdev->netdev_class)) {
+        goto out;
+    }
+
+    /* Get the vport_class which contains the netdev_class. */
+    vpclass = vport_class_cast(netdev_get_class(netdev));
+    /* Calculate vpclass entry index in the array 'vport_classes' */
+    i = vpclass - &vport_classes[0];
+    if (i >= NUM_VPORT_CLASSES) {
+        goto out;
+    }
+    /* Entry duplication should be done only once */
+    if (!dup_vport_classes[i].dpif_port) {
+        memcpy(&dup_vport_classes[i], vpclass, sizeof *vpclass);
+    }
+    /* Get the duplicated netdev_class pointer */
+    ndclass = &dup_vport_classes[i].netdev_class;
+out:
+    return ndclass;
+}
+
+void
+netdev_vport_update_class(struct netdev *netdev, struct netdev_class *ndclass)
+{
+    if (is_vport_class(netdev->netdev_class)) {
+        netdev->netdev_class = ndclass;
+    }
+}
+
diff --git a/lib/netdev-vport.h b/lib/netdev-vport.h
index 9d756a2..d100f76 100644
--- a/lib/netdev-vport.h
+++ b/lib/netdev-vport.h
@@ -42,6 +42,10 @@  void netdev_vport_inc_tx(const struct netdev *,
 bool netdev_vport_is_vport_class(const struct netdev_class *);
 const char *netdev_vport_class_get_dpif_port(const struct netdev_class *);
 
+struct netdev_class *netdev_vport_dup_class_once(struct netdev *netdev);
+void netdev_vport_update_class(struct netdev *netdev,
+                               struct netdev_class *ndclass);
+
 #ifndef _WIN32
 enum { NETDEV_VPORT_NAME_BUFSIZE = 16 };
 #else