diff mbox series

[ovs-dev,v3] ovs-tcpdump: Improve performance with dummy interface

Message ID 0f8c9cf19942e0e28682f729676544b222f6317c.camel@redhat.com
State Superseded
Headers show
Series [ovs-dev,v3] ovs-tcpdump: Improve performance with dummy interface | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Mike Pattrick Oct. 27, 2021, 3:48 p.m. UTC
Currently the ovs-tcpdump utility creates a virtual tunnel to send
packets to. This method functions perfectly fine, however, it can
greatly impact performance of the monitored port.

It has been reported to reduce packet throughput significantly. I was
able to reproduce a reduction in throughput of up 70 percent in some
tests with a simple setup of two hosts communicating through a single
bridge on Linux with the kernel module datapath. Another more complex
test was configured for DPDK and the usermode datapath. This test 
involved a data path going from a VM, through a port into one OVS
bridge, out through a network card which could be DPDK enabled for the
relevant tests, in to a different network interface, then into a 
different OVS bridge, through another port, and then into a virtual
machine.

Using the dummy driver resulted in the following impact to performance
compared to no ovs-tcpdump. Due to intra-test variance and fluctuations
during the first few seconds after installing a tap; multiple samples
were taken over multiple test runs. The first few seconds worth of 
results were discarded and then results were averaged out.

Original Script
===============
Category             Impact on Throughput
Kernel datapath    - 65%
Usermode datapath  - 26%
DPDK datapath      - 37%

New Script
==========
Category             Impact on Throughput
Kernel datapath    - 5%
Usermode datapath  - 16%
DPDK datapath      - 29%

Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 utilities/ovs-tcpdump.in | 42 +++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

Comments

Aaron Conole Oct. 27, 2021, 5:48 p.m. UTC | #1
Mike Pattrick <mkp@redhat.com> writes:

> Currently the ovs-tcpdump utility creates a virtual tunnel to send
> packets to. This method functions perfectly fine, however, it can
> greatly impact performance of the monitored port.
>
> It has been reported to reduce packet throughput significantly. I was
> able to reproduce a reduction in throughput of up 70 percent in some
> tests with a simple setup of two hosts communicating through a single
> bridge on Linux with the kernel module datapath. Another more complex
> test was configured for DPDK and the usermode datapath. This test 
> involved a data path going from a VM, through a port into one OVS
> bridge, out through a network card which could be DPDK enabled for the
> relevant tests, in to a different network interface, then into a 
> different OVS bridge, through another port, and then into a virtual
> machine.
>
> Using the dummy driver resulted in the following impact to performance
> compared to no ovs-tcpdump. Due to intra-test variance and fluctuations
> during the first few seconds after installing a tap; multiple samples
> were taken over multiple test runs. The first few seconds worth of 
> results were discarded and then results were averaged out.
>
> Original Script
> ===============
> Category             Impact on Throughput
> Kernel datapath    - 65%
> Usermode datapath  - 26%
> DPDK datapath      - 37%
>
> New Script
> ==========
> Category             Impact on Throughput
> Kernel datapath    - 5%
> Usermode datapath  - 16%
> DPDK datapath      - 29%
>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
> ---

Looks good to me.  Thanks for preserving the port teardown as well.

Acked-by: Aaron Conole <aconole@redhat.com>

>  utilities/ovs-tcpdump.in | 42 +++++++++++++++++++++-------------------
>  1 file changed, 22 insertions(+), 20 deletions(-)
>
> diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in
> index 5ec02383c..10989dfc1 100755
> --- a/utilities/ovs-tcpdump.in
> +++ b/utilities/ovs-tcpdump.in
> @@ -14,12 +14,9 @@
>  # See the License for the specific language governing permissions and
>  # limitations under the License.
>  
> -import fcntl
> -
>  import os
>  import pwd
>  from random import randint
> -import struct
>  import subprocess
>  import sys
>  import time
> @@ -52,8 +49,8 @@ except Exception:
>      print("       the correct location.")
>      sys.exit(1)
>  
> -tapdev_fd = None
>  _make_taps = {}
> +_del_taps = {}
>  _make_mirror_name = {}
>  IFNAMSIZ_LINUX = 15      # this is the max name size, excluding the null byte.
>  
> @@ -67,21 +64,11 @@ def _doexec(*args, **kwargs):
>      return proc
>  
>  
> -def _install_tap_linux(tap_name, mtu_value=None):
> -    """Uses /dev/net/tun to create a tap device"""
> -    global tapdev_fd
> -
> -    IFF_TAP = 0x0002
> -    IFF_NO_PI = 0x1000
> -    TUNSETIFF = 0x400454CA  # This is derived by printf() of TUNSETIFF
> -    TUNSETOWNER = TUNSETIFF + 2
> -
> -    tapdev_fd = os.open('/dev/net/tun', os.O_RDWR)
> -    ifr = struct.pack('16sH', tap_name.encode('utf8'), IFF_TAP | IFF_NO_PI)
> -    fcntl.ioctl(tapdev_fd, TUNSETIFF, ifr)
> -    fcntl.ioctl(tapdev_fd, TUNSETOWNER, os.getegid())
> +def _install_dst_if_linux(tap_name, mtu_value=None):
> +    _doexec(
> +        *['ip', 'link', 'add', str(tap_name), 'type', 'dummy']
> +        ).wait()
>  
> -    time.sleep(1)  # required to give the new device settling time
>      if mtu_value is not None:
>          pipe = _doexec(
>              *(['ip', 'link', 'set', 'dev', str(tap_name), 'mtu',
> @@ -93,14 +80,22 @@ def _install_tap_linux(tap_name, mtu_value=None):
>      pipe.wait()
>  
>  
> +def _remove_dst_if_linux(tap_name):
> +    _doexec(
> +        *['ip', 'link', 'del', str(tap_name)]
> +        ).wait()
> +
> +
>  def _make_linux_mirror_name(interface_name):
>      if len(interface_name) > IFNAMSIZ_LINUX - 2:
>          return "ovsmi%06d" % randint(1, 999999)
>      return "mi%s" % interface_name
>  
>  
> -_make_taps['linux'] = _install_tap_linux
> -_make_taps['linux2'] = _install_tap_linux
> +_make_taps['linux'] = _install_dst_if_linux
> +_make_taps['linux2'] = _install_dst_if_linux
> +_del_taps['linux'] = _remove_dst_if_linux
> +_del_taps['linux2'] = _remove_dst_if_linux
>  _make_mirror_name['linux'] = _make_linux_mirror_name
>  _make_mirror_name['linux2'] = _make_linux_mirror_name
>  
> @@ -455,6 +450,9 @@ def main():
>         mirror_interface not in interfaces():
>          _make_taps[sys.platform](mirror_interface,
>                                   ovsdb.interface_mtu(interface))
> +        tap_created = True
> +    else:
> +        tap_created = False
>  
>      if mirror_interface not in interfaces():
>          print("ERROR: Please create an interface called `%s`" %
> @@ -480,6 +478,8 @@ def main():
>          print("ERROR: Unable to properly setup the mirror: %s." % str(oe))
>          try:
>              ovsdb.destroy_port(mirror_interface, ovsdb.port_bridge(interface))
> +            if tap_created is True:
> +                _del_taps[sys.platform](mirror_interface)
>          except Exception:
>              pass
>          sys.exit(1)
> @@ -498,6 +498,8 @@ def main():
>  
>          ovsdb.destroy_mirror(interface, ovsdb.port_bridge(interface))
>          ovsdb.destroy_port(mirror_interface, ovsdb.port_bridge(interface))
> +        if tap_created is True:
> +            _del_taps[sys.platform](mirror_interface)
>      except Exception:
>          print("Unable to tear down the ports and mirrors.")
>          print("Please use ovs-vsctl to remove the ports and mirrors created.")
Gaetan Rivet Nov. 1, 2021, 9:16 p.m. UTC | #2
On Wed, Oct 27, 2021, at 17:48, Mike Pattrick wrote:
> Currently the ovs-tcpdump utility creates a virtual tunnel to send
> packets to. This method functions perfectly fine, however, it can
> greatly impact performance of the monitored port.
>
> It has been reported to reduce packet throughput significantly. I was
> able to reproduce a reduction in throughput of up 70 percent in some
> tests with a simple setup of two hosts communicating through a single
> bridge on Linux with the kernel module datapath. Another more complex
> test was configured for DPDK and the usermode datapath. This test 
> involved a data path going from a VM, through a port into one OVS
> bridge, out through a network card which could be DPDK enabled for the
> relevant tests, in to a different network interface, then into a 
> different OVS bridge, through another port, and then into a virtual
> machine.
>
> Using the dummy driver resulted in the following impact to performance
> compared to no ovs-tcpdump. Due to intra-test variance and fluctuations
> during the first few seconds after installing a tap; multiple samples
> were taken over multiple test runs. The first few seconds worth of 
> results were discarded and then results were averaged out.
>
> Original Script
> ===============
> Category             Impact on Throughput
> Kernel datapath    - 65%
> Usermode datapath  - 26%
> DPDK datapath      - 37%
>
> New Script
> ==========
> Category             Impact on Throughput
> Kernel datapath    - 5%
> Usermode datapath  - 16%
> DPDK datapath      - 29%
>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>

The code looks good to me and the performance improvements are welcome, thanks.
Acked-by: Gaetan Rivet <grive@u256.net>
Ilya Maximets Nov. 3, 2021, 3:53 p.m. UTC | #3
On 10/27/21 17:48, Mike Pattrick wrote:
> Currently the ovs-tcpdump utility creates a virtual tunnel to send
> packets to. This method functions perfectly fine, however, it can
> greatly impact performance of the monitored port.
> 
> It has been reported to reduce packet throughput significantly. I was
> able to reproduce a reduction in throughput of up 70 percent in some
> tests with a simple setup of two hosts communicating through a single
> bridge on Linux with the kernel module datapath. Another more complex
> test was configured for DPDK and the usermode datapath. This test 
> involved a data path going from a VM, through a port into one OVS
> bridge, out through a network card which could be DPDK enabled for the
> relevant tests, in to a different network interface, then into a 
> different OVS bridge, through another port, and then into a virtual
> machine.
> 
> Using the dummy driver resulted in the following impact to performance
> compared to no ovs-tcpdump. Due to intra-test variance and fluctuations
> during the first few seconds after installing a tap; multiple samples
> were taken over multiple test runs. The first few seconds worth of 
> results were discarded and then results were averaged out.
> 
> Original Script
> ===============
> Category             Impact on Throughput
> Kernel datapath    - 65%
> Usermode datapath  - 26%
> DPDK datapath      - 37%
> 
> New Script
> ==========
> Category             Impact on Throughput
> Kernel datapath    - 5%
> Usermode datapath  - 16%
> DPDK datapath      - 29%
> 
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
> ---
>  utilities/ovs-tcpdump.in | 42 +++++++++++++++++++++-------------------
>  1 file changed, 22 insertions(+), 20 deletions(-)
> 

Hi, Mike.  Thanks for working on this!

I didn't test myself, but numbers in the commit message looks impressive.

The performance impact on kernel datapath surprised me though.  Do you
know why there is so big difference?  I'd expect this kind of performance
drop for userspace datapath, but not for a kernel.

One comment to the commit message is: Please don't use the term 'DPDK datapath'.
There is no such thing.  It's userspace datapath with dpdk ports.  Usage of
the 'DPDK datapth' term creates a lot of confusion for users.

And for the implementation:
'dummy' module is mostly used for kernel testing and it's not typically loaded
by default on various systems (I don't have it loaded on any of my systems).
So, IIUC, ovs-tcpdump will fail by default.  At least we should print some
meaningful error message in this case.  Another option is to emit a warning
and fall back to usual tap interface.

What do you think?  Aaron?  Gaetan?

It's also a bit weird that code uses 'tap' in the names of variables, but
not a big deal, I guess.

Best regards, Ilya Maximets.
Mike Pattrick Nov. 3, 2021, 4:48 p.m. UTC | #4
On Wed, 2021-11-03 at 16:53 +0100, Ilya Maximets wrote:
> On 10/27/21 17:48, Mike Pattrick wrote:
> > Currently the ovs-tcpdump utility creates a virtual tunnel to send
> > packets to. This method functions perfectly fine, however, it can
> > greatly impact performance of the monitored port.
> > 
> > It has been reported to reduce packet throughput significantly. I was
> > able to reproduce a reduction in throughput of up 70 percent in some
> > tests with a simple setup of two hosts communicating through a single
> > bridge on Linux with the kernel module datapath. Another more complex
> > test was configured for DPDK and the usermode datapath. This test 
> > involved a data path going from a VM, through a port into one OVS
> > bridge, out through a network card which could be DPDK enabled for the
> > relevant tests, in to a different network interface, then into a 
> > different OVS bridge, through another port, and then into a virtual
> > machine.
> > 
> > Using the dummy driver resulted in the following impact to performance
> > compared to no ovs-tcpdump. Due to intra-test variance and fluctuations
> > during the first few seconds after installing a tap; multiple samples
> > were taken over multiple test runs. The first few seconds worth of 
> > results were discarded and then results were averaged out.
> > 
> > Original Script
> > ===============
> > Category             Impact on Throughput
> > Kernel datapath    - 65%
> > Usermode datapath  - 26%
> > DPDK datapath      - 37%
> > 
> > New Script
> > ==========
> > Category             Impact on Throughput
> > Kernel datapath    - 5%
> > Usermode datapath  - 16%
> > DPDK datapath      - 29%
> > 
> > Signed-off-by: Mike Pattrick <mkp@redhat.com>
> > ---
> >  utilities/ovs-tcpdump.in | 42 +++++++++++++++++++++-------------------
> >  1 file changed, 22 insertions(+), 20 deletions(-)
> > 
> 
> Hi, Mike.  Thanks for working on this!
> 
> I didn't test myself, but numbers in the commit message looks impressive.
> 
> The performance impact on kernel datapath surprised me though.  Do you
> know why there is so big difference?  I'd expect this kind of performance
> drop for userspace datapath, but not for a kernel.

I didn't investigate this thoroughly, just reproduced it a few times on different
distributions. If there's some interest I could deep dive into this.

> One comment to the commit message is: Please don't use the term 'DPDK datapath'.
> There is no such thing.  It's userspace datapath with dpdk ports.  Usage of
> the 'DPDK datapth' term creates a lot of confusion for users.

Noted, I will be more careful about this nomenclature in future.

> And for the implementation:
> 'dummy' module is mostly used for kernel testing and it's not typically loaded
> by default on various systems (I don't have it loaded on any of my systems).
> So, IIUC, ovs-tcpdump will fail by default.  At least we should print some
> meaningful error message in this case.  Another option is to emit a warning
> and fall back to usual tap interface.

That's a good point. We could fall back onto a variety of devices if dummy is
missing. Out of curiosity, what distros don't include dummy?

> What do you think?  Aaron?  Gaetan?
> 
> It's also a bit weird that code uses 'tap' in the names of variables, but
> not a big deal, I guess.

Mixed feelings about the use of "tap". I changed a few locations, but I believe
tap is widely referenced in the networking world as "test access point". At least
Gigamon and Cisco use "tap" in reference to passive packet capture. This is a
stylistic thing, but using tap in this context really feels natural to me.

> 
> Best regards, Ilya Maximets.
>
diff mbox series

Patch

diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in
index 5ec02383c..10989dfc1 100755
--- a/utilities/ovs-tcpdump.in
+++ b/utilities/ovs-tcpdump.in
@@ -14,12 +14,9 @@ 
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import fcntl
-
 import os
 import pwd
 from random import randint
-import struct
 import subprocess
 import sys
 import time
@@ -52,8 +49,8 @@  except Exception:
     print("       the correct location.")
     sys.exit(1)
 
-tapdev_fd = None
 _make_taps = {}
+_del_taps = {}
 _make_mirror_name = {}
 IFNAMSIZ_LINUX = 15      # this is the max name size, excluding the null byte.
 
@@ -67,21 +64,11 @@  def _doexec(*args, **kwargs):
     return proc
 
 
-def _install_tap_linux(tap_name, mtu_value=None):
-    """Uses /dev/net/tun to create a tap device"""
-    global tapdev_fd
-
-    IFF_TAP = 0x0002
-    IFF_NO_PI = 0x1000
-    TUNSETIFF = 0x400454CA  # This is derived by printf() of TUNSETIFF
-    TUNSETOWNER = TUNSETIFF + 2
-
-    tapdev_fd = os.open('/dev/net/tun', os.O_RDWR)
-    ifr = struct.pack('16sH', tap_name.encode('utf8'), IFF_TAP | IFF_NO_PI)
-    fcntl.ioctl(tapdev_fd, TUNSETIFF, ifr)
-    fcntl.ioctl(tapdev_fd, TUNSETOWNER, os.getegid())
+def _install_dst_if_linux(tap_name, mtu_value=None):
+    _doexec(
+        *['ip', 'link', 'add', str(tap_name), 'type', 'dummy']
+        ).wait()
 
-    time.sleep(1)  # required to give the new device settling time
     if mtu_value is not None:
         pipe = _doexec(
             *(['ip', 'link', 'set', 'dev', str(tap_name), 'mtu',
@@ -93,14 +80,22 @@  def _install_tap_linux(tap_name, mtu_value=None):
     pipe.wait()
 
 
+def _remove_dst_if_linux(tap_name):
+    _doexec(
+        *['ip', 'link', 'del', str(tap_name)]
+        ).wait()
+
+
 def _make_linux_mirror_name(interface_name):
     if len(interface_name) > IFNAMSIZ_LINUX - 2:
         return "ovsmi%06d" % randint(1, 999999)
     return "mi%s" % interface_name
 
 
-_make_taps['linux'] = _install_tap_linux
-_make_taps['linux2'] = _install_tap_linux
+_make_taps['linux'] = _install_dst_if_linux
+_make_taps['linux2'] = _install_dst_if_linux
+_del_taps['linux'] = _remove_dst_if_linux
+_del_taps['linux2'] = _remove_dst_if_linux
 _make_mirror_name['linux'] = _make_linux_mirror_name
 _make_mirror_name['linux2'] = _make_linux_mirror_name
 
@@ -455,6 +450,9 @@  def main():
        mirror_interface not in interfaces():
         _make_taps[sys.platform](mirror_interface,
                                  ovsdb.interface_mtu(interface))
+        tap_created = True
+    else:
+        tap_created = False
 
     if mirror_interface not in interfaces():
         print("ERROR: Please create an interface called `%s`" %
@@ -480,6 +478,8 @@  def main():
         print("ERROR: Unable to properly setup the mirror: %s." % str(oe))
         try:
             ovsdb.destroy_port(mirror_interface, ovsdb.port_bridge(interface))
+            if tap_created is True:
+                _del_taps[sys.platform](mirror_interface)
         except Exception:
             pass
         sys.exit(1)
@@ -498,6 +498,8 @@  def main():
 
         ovsdb.destroy_mirror(interface, ovsdb.port_bridge(interface))
         ovsdb.destroy_port(mirror_interface, ovsdb.port_bridge(interface))
+        if tap_created is True:
+            _del_taps[sys.platform](mirror_interface)
     except Exception:
         print("Unable to tear down the ports and mirrors.")
         print("Please use ovs-vsctl to remove the ports and mirrors created.")