diff mbox series

[ovs-dev,v5,3/3] userspace: Add new option srv6_flowlabel in SRv6 tunnel.

Message ID 20230523035823.32640-4-nmiki@yahoo-corp.jp
State Accepted
Commit 701c2dbfb8970ec5d939e5f0fe24479716174762
Headers show
Series Support flowlabel calculation in SRv6 tunnels | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Nobuhiro MIKI May 23, 2023, 3:58 a.m. UTC
It supports flowlabel based load balancing by controlling the flowlabel
of outer IPv6 header, which is already implemented in Linux kernel as
seg6_flowlabel sysctl [1].

[1]: https://docs.kernel.org/networking/seg6-sysctl.html

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
---
 lib/netdev-native-tnl.c       | 35 ++++++++++++--
 lib/netdev-vport.c            |  8 ++++
 lib/netdev.h                  | 12 +++++
 tests/tunnel-push-pop-ipv6.at | 86 +++++++++++++++++++++++++++++++++++
 vswitchd/vswitch.xml          | 26 +++++++++++
 5 files changed, 164 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index b244e73eb9cb..0dcabac55eca 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -884,7 +884,12 @@  netdev_srv6_build_header(const struct netdev *netdev,
         goto out;
     }
 
-    srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING, 0);
+    /* Writes the netdev_srv6_flowlabel enum value to the ipv6
+     * flowlabel field. It must later be replaced by a valid value
+     * in the header push. */
+    srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING,
+                                     htonl(tnl_cfg->srv6_flowlabel));
+
     srh->rt_hdr.segments_left = nr_segs - 1;
     srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
     srh->rt_hdr.hdrlen = 2 * nr_segs;
@@ -923,10 +928,34 @@  netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
                         struct dp_packet *packet,
                         const struct ovs_action_push_tnl *data)
 {
+    struct ovs_16aligned_ip6_hdr *inner_ip6, *outer_ip6;
+    enum netdev_srv6_flowlabel srv6_flowlabel;
+    ovs_be32 ipv6_label = 0;
     int ip_tot_size;
+    uint32_t flow;
 
-    netdev_tnl_push_ip_header(packet, data->header, data->header_len,
-                              &ip_tot_size, 0);
+    inner_ip6 = dp_packet_l3(packet);
+    outer_ip6 = netdev_tnl_ipv6_hdr((void *) data->header);
+    srv6_flowlabel = ntohl(get_16aligned_be32(&outer_ip6->ip6_flow)) &
+                     IPV6_LABEL_MASK;
+
+    switch (srv6_flowlabel) {
+    case SRV6_FLOWLABEL_COPY:
+        flow = ntohl(get_16aligned_be32(&inner_ip6->ip6_flow));
+        ipv6_label = (flow >> 28) == 6 ? htonl(flow & IPV6_LABEL_MASK) : 0;
+        break;
+
+    case SRV6_FLOWLABEL_ZERO:
+        ipv6_label = 0;
+        break;
+
+    case SRV6_FLOWLABEL_COMPUTE:
+        ipv6_label = htonl(dp_packet_get_rss_hash(packet) & IPV6_LABEL_MASK);
+        break;
+    }
+
+    netdev_tnl_push_ip_header(packet, data->header,
+                              data->header_len, &ip_tot_size, ipv6_label);
 }
 
 struct dp_packet *
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 663ee8606c3b..2141621cf23e 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -792,6 +792,14 @@  set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp)
                               name, node->value);
                 break;
             }
+        } else if (!strcmp(node->key, "srv6_flowlabel")) {
+            if (!strcmp(node->value, "zero")) {
+                tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_ZERO;
+            } else if (!strcmp(node->value, "compute")) {
+                tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COMPUTE;
+            } else {
+                tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COPY;
+            }
         } else if (!strcmp(node->key, "payload_type")) {
             if (!strcmp(node->value, "mpls")) {
                  tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS);
diff --git a/lib/netdev.h b/lib/netdev.h
index ff207f56c28c..3d339fe12ae9 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -97,6 +97,17 @@  enum netdev_pt_mode {
     NETDEV_PT_LEGACY_L3,
 };
 
+enum netdev_srv6_flowlabel {
+    /* Copy the flowlabel of inner packet. */
+    SRV6_FLOWLABEL_COPY,
+
+    /* Simply set flowlabel to 0. */
+    SRV6_FLOWLABEL_ZERO,
+
+    /* Set flowlabel to a hash over L3/L4 fields of the inner packet. */
+    SRV6_FLOWLABEL_COMPUTE,
+};
+
 /* Configuration specific to tunnels. */
 struct netdev_tunnel_config {
     ovs_be64 in_key;
@@ -144,6 +155,7 @@  struct netdev_tunnel_config {
     uint8_t srv6_num_segs;
     #define SRV6_MAX_SEGS 6
     struct in6_addr srv6_segs[SRV6_MAX_SEGS];
+    enum netdev_srv6_flowlabel srv6_flowlabel;
 };
 
 void netdev_run(void);
diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
index e300fe3a0d26..eb55c88f0a9e 100644
--- a/tests/tunnel-push-pop-ipv6.at
+++ b/tests/tunnel-push-pop-ipv6.at
@@ -1,5 +1,91 @@ 
 AT_BANNER([tunnel_push_pop_ipv6])
 
+AT_SETUP([tunnel_push_pop_ipv6 - srv6])
+
+OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00 options:pcap=p0.pcap])
+AT_CHECK([ovs-vsctl add-br int-br1 -- set bridge int-br1 datapath_type=dummy], [0])
+AT_CHECK([ovs-vsctl add-br int-br2 -- set bridge int-br2 datapath_type=dummy], [0])
+AT_CHECK([ovs-vsctl add-br int-br3 -- set bridge int-br3 datapath_type=dummy], [0])
+AT_CHECK([ovs-vsctl add-port int-br1 t1 -- set Interface t1 type=srv6 \
+                       options:remote_ip=2001:cafe::91 ofport_request=2 \
+                       options:srv6_flowlabel=copy \
+                       ], [0])
+AT_CHECK([ovs-vsctl add-port int-br2 t2 -- set Interface t2 type=srv6 \
+                       options:remote_ip=2001:cafe::92 ofport_request=3 \
+                       options:srv6_flowlabel=zero \
+                       ], [0])
+AT_CHECK([ovs-vsctl add-port int-br3 t3 -- set Interface t3 type=srv6 \
+                       options:remote_ip=2001:cafe::93 ofport_request=4 \
+                       options:srv6_flowlabel=compute \
+                       ], [0])
+
+dnl First setup dummy interface IP address, then add the route
+dnl so that tnl-port table can get valid IP address for the device.
+AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 2001:cafe::88/24], [0], [OK
+])
+AT_CHECK([ovs-appctl ovs/route/add 2001:cafe::0/24 br0], [0], [OK
+])
+AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::91 aa:55:aa:55:00:01], [0], [OK
+])
+AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::92 aa:55:aa:55:00:02], [0], [OK
+])
+AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::93 aa:55:aa:55:00:03], [0], [OK
+])
+AT_CHECK([ovs-ofctl add-flow br0 action=1])
+AT_CHECK([ovs-ofctl add-flow int-br1 action=2])
+AT_CHECK([ovs-ofctl add-flow int-br2 action=3])
+AT_CHECK([ovs-ofctl add-flow int-br3 action=4])
+
+dnl Check "srv6_flowlabel=copy".
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o 'ipv6_label=0x[[0-9a-f]]*'], [0], [dnl
+ipv6_label=0x00000
+ipv6_label=0x00000
+ipv6_label=0x00002
+ipv6_label=0x00003
+])
+
+dnl Check "srv6_flowlabel=zero".
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o 'ipv6_label=0x[[0-9a-f]]*'], [0], [dnl
+ipv6_label=0x00000
+ipv6_label=0x00000
+ipv6_label=0x00000
+ipv6_label=0x00000
+])
+
+dnl dnl Check "srv6_flowlabel=compute" for different flows.
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o 'ipv6_label=0x[[0-9a-f]]*'| sort | uniq -c | wc -l], [0], [dnl
+4
+])
+
+dnl dnl Check "srv6_flowlabel=compute" for same IPv4/TCP flow.
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=100,dst=200),tcp_flags(0x002)'])
+AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 2 | grep -o 'ipv6_label=0x[[0-9a-f]]*' | sort | uniq -c | wc -l], [0], [dnl
+1
+])
+
+dnl dnl Check "srv6_flowlabel=compute" for same IPv6/TCP flow.
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x001)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=3,proto=6,tclass=0x0,hlimit=64),tcp(src=100,dst=200),tcp_flags(0x002)'])
+AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 2 | grep -o 'ipv6_label=0x[[0-9a-f]]*' | sort | uniq -c | wc -l], [0], [dnl
+1
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([tunnel_push_pop_ipv6 - ip6gre])
 
 OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00])
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index edb5eafa04c3..59c404bbbc7a 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3287,6 +3287,32 @@ 
            <ref column="options" key="remote_ip"/>.
         </p>
       </column>
+      <column name="options" key="srv6_flowlabel"
+              type='{"type": "string",
+                     "enum": ["set", ["zero", "copy", "compute"]]}'>
+        <p>
+          Optional.
+          This option controls how flowlabel in outer IPv6 header is
+          configured. It gives the benefit of IPv6 flow label based
+          load balancing, which is supported by some popular vendor
+          appliances. Like net.ipv6.seg6_flowlabel sysconfig, it is
+          one of the three values below:
+        </p>
+        <ul>
+          <li>
+            By default, or if this option is <code>copy</code>, copy the
+            flowlabel of inner IPv6 header to the flowlabel of outer IPv6
+            header. If inner header is not IPv6, it is set to 0.
+          </li>
+          <li>
+            If this option is <code>zero</code>, simply set flowlabel to 0.
+          </li>
+          <li>
+            If this option is <code>compute</code>, set flowlabel to a hash
+            over the L3/L4 fields of the inner packet.
+          </li>
+        </ul>
+      </column>
     </group>
 
     <group title="Patch Options">