diff mbox series

[ovs-dev,v2] netdev-tc-offloads: Don't offload header modification on ip fragments.

Message ID 0352f12cfc3aca0b830f282be0151b844269fd88.1715089950.git.echaudro@redhat.com
State New
Headers show
Series [ovs-dev,v2] netdev-tc-offloads: Don't offload header modification on ip fragments. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Eelco Chaudron May 7, 2024, 1:52 p.m. UTC
While offloading header modifications to TC, OVS is using {TCA_PEDIT} +
{TCA_CSUM} combination as that it the only way to represent header
rewrite.  However, {TCA_CSUM} is unable to calculate L4 checksums for
IP fragments.

Since TC already applies fragmentation bit masking, this patch simply
needs to prevent these packets from being processed through TC.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
v2: - Fixed and added some comments.
    - Use ovs-pcap to compare packets.

NOTE: This patch needs an AVX512 fix before it can be applied.
      Intel is working on this.
---
 lib/netdev-offload-tc.c | 32 ++++++++++++++
 lib/tc.c                |  5 ++-
 tests/system-traffic.at | 93 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 129 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 921d52317..bdd307933 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1488,6 +1488,30 @@  parse_put_flow_ct_action(struct tc_flower *flower,
         return 0;
 }
 
+static bool
+will_tc_add_l4_checksum(struct tc_flower *flower, int type)
+{
+    /* This function returns true if the tc layer will add a l4 checksum action
+     * for this set action.  Refer to the csum_update_flag() function for
+     * detailed logic.  Note that even the kernel only supports updating TCP,
+     * UDP and ICMPv6. */
+    switch (type) {
+    case OVS_KEY_ATTR_IPV4:
+    case OVS_KEY_ATTR_IPV6:
+    case OVS_KEY_ATTR_TCP:
+    case OVS_KEY_ATTR_UDP:
+        switch (flower->key.ip_proto) {
+        case IPPROTO_TCP:
+        case IPPROTO_UDP:
+        case IPPROTO_ICMPV6:
+        case IPPROTO_UDPLITE:
+            return true;
+        }
+        break;
+    }
+    return false;
+}
+
 static int
 parse_put_flow_set_masked_action(struct tc_flower *flower,
                                  struct tc_action *action,
@@ -1520,6 +1544,14 @@  parse_put_flow_set_masked_action(struct tc_flower *flower,
         return EOPNOTSUPP;
     }
 
+    if (flower->key.flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
+        && will_tc_add_l4_checksum(flower, type)) {
+        VLOG_DBG_RL(&rl, "set action type %d not supported on fragments "
+                    "due to checksum limitation", type);
+        ofpbuf_uninit(&set_buf);
+        return EOPNOTSUPP;
+    }
+
     for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
         struct netlink_field *f = &set_flower_map[type][i];
 
diff --git a/lib/tc.c b/lib/tc.c
index e9bcae4e4..20472d13c 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -2958,7 +2958,10 @@  csum_update_flag(struct tc_flower *flower,
      * eth(dst=<mac>),eth_type(0x0800) actions=set(ipv4(src=<new_ip>))
      * we need to force a more specific flow as this can, for example,
      * need a recalculation of icmp checksum if the packet that passes
-     * is ICMPv6 and tcp checksum if its tcp. */
+     * is ICMPv6 and tcp checksum if its tcp.
+     *
+     * Ensure that you keep the pre-check function in netdev-offload-tc.c,
+     * will_tc_add_l4_checksum(), in sync if you make any changes here. */
 
     switch (htype) {
     case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index bd7647cbe..8f392abd5 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -8977,3 +8977,96 @@  OVS_WAIT_UNTIL([cat p2.pcap | grep -E "0x0050: *0000 *0000 *5002 *2000 *b85e *00
 
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([datapath - mod_nw_src/set_field on IP fragments])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", 36:b1:ee:7c:01:03)
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", 36:b1:ee:7c:01:02)
+
+AT_DATA([flows.txt], [dnl
+  in_port=ovs-p0,ip,nw_src=10.1.1.1 actions=mod_nw_src=11.1.1.1,ovs-p1
+  in_port=ovs-p0,ipv6,ipv6_src=fc00::1 actions=set_field:fc00::100->ipv6_src,ovs-p1
+])
+
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl -Oopenflow13 add-flows br0 flows.txt])
+
+NETNS_DAEMONIZE([at_ns1],
+                [tcpdump -l -nn -xx -U -i p1 -w p1.pcap 2> tcpdump.err],
+                [tcpdump.pid])
+OVS_WAIT_UNTIL([grep "listening" tcpdump.err])
+
+dnl IPv4 Packet content:
+dnl   Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
+dnl       Type: IPv4 (0x0800)
+dnl   Internet Protocol Version 4, Src: 10.1.1.1, Dst: 10.1.1.2
+dnl       0100 .... = Version: 4
+dnl       .... 0101 = Header Length: 20 bytes (5)
+dnl       Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT)
+dnl       Total Length: 38
+dnl       Identification: 0x0001 (1)
+dnl       001. .... = Flags: 0x1, More fragments
+dnl           0... .... = Reserved bit: Not set
+dnl           .0.. .... = Don't fragment: Not set
+dnl           ..1. .... = More fragments: Set
+dnl       ...0 0000 0000 0000 = Fragment Offset: 0
+dnl       Time to Live: 64
+dnl       Protocol: UDP (17)
+dnl       Header Checksum: 0x44c2
+dnl   Data (18 bytes)
+eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 08 00"
+ip="45 00 00 26 00 01 20 00 40 11 44 c2 0a 01 01 01 0a 01 01 02"
+data="0b c4 08 84 00 26 e9 64 01 02 03 04 05 06 07 08 09 0a"
+packet="${eth} ${ip} ${data}"
+
+dnl We send each packet multiple times, ones for learning which will flow
+dnl through the used datapath for learning, and the others will go through the
+dnl actual datapath.
+for i in 1 2 3 4 5; do
+  NS_CHECK_EXEC([at_ns0],
+                [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
+done
+
+dnl Update source address, and checksums in original packet for comparison.
+packet=$(echo "$packet" | sed -e 's/ //g' \
+         -e 's/0a010101/0b010101/g' -e 's/44c2/43c2/g' -e 's/e964/e864/g')
+OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])
+
+dnl Repeat similar test with IPv6.
+dnl Packet content:
+dnl   Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
+dnl       Type: IPv6 (0x86dd)
+dnl   Internet Protocol Version 6, Src: fc00::1, Dst: fc00::2
+dnl       Payload Length: 24
+dnl       Next Header: Fragment Header for IPv6 (44)
+dnl       Hop Limit: 64
+dnl       Fragment Header for IPv6
+dnl           Next header: UDP (17)
+dnl           Reserved octet: 0x00
+dnl           0000 0000 0000 0... = Offset: 0 (0 bytes)
+dnl           .... .... .... .00. = Reserved bits: 0
+dnl           .... .... .... ...1 = More Fragments: Yes
+dnl           Identification: 0x2316ab36
+dnl   Data (16 bytes)
+eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 86 dd"
+ip="60 00 00 00 00 18 2c 40 fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \
+    fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 11 00 00 01 23 16 ab 36"
+data="0b c4 08 84 00 26 07 65 01 02 03 04 05 06 07 08"
+packet="${eth} ${ip} ${data}"
+
+for i in 1 2 3 4 5; do
+  NS_CHECK_EXEC([at_ns0],
+                [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
+done
+
+dnl Update checksum and source address in original packet for comparison.
+packet=$(echo "$packet" | sed -e 's/ //g' -e 's/0765/0666/g' -e \
+       's/fc000000000000000000000000000001/fc000000000000000000000000000100/g')
+OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP