diff mbox series

[ovs-dev,v2] netdev-offload-dpdk: Support QinQ offload.

Message ID 20241223120712.1907-1-allen.chen@jaguarmicro.com
State Rejected
Delegated to: Kevin Traynor
Headers show
Series [ovs-dev,v2] netdev-offload-dpdk: Support QinQ offload. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Allen Chen Dec. 23, 2024, 12:07 p.m. UTC
Support QinQ offload

Signed-off-by: Allen Chen <allen.chen@jaguarmicro.com>

---
/*step 1*/add ovs flow table
[root@localhost ~]# ovs-ofctl add-flow br-jmnd in_port=dpdk0,dl_dst=00:00:00:13:40:20,nw_dst=192.168.0.10,action=pop_vlan,output:dpdk1
2018-06-23T02:26:05Z|00001|ofp_match|INFO|normalization changed ofp_match, details:
2018-06-23T02:26:05Z|00002|ofp_match|INFO| pre: in_port=1,dl_dst=00:00:00:13:40:20,nw_dst=192.168.0.10
2018-06-23T02:26:05Z|00003|ofp_match|INFO|post: in_port=1,dl_dst=00:00:00:13:40:20

/*step 2*/dump ovs flow table
[root@localhost ~]# ovs-ofctl dump-flows br-jmnd
 cookie=0x0, duration=9.404s, table=0, n_packets=0, n_bytes=0, in_port=dpdk0,dl_dst=00:00:00:13:40:20 actions=strip_vlan,output:dpdk1

/*step 3*/
set 'ovs-appctl vlog/set netdev_offload_dpdk:dbg' and 'ovs-vsctl set Open_vSwitch . other_config:vlan-limit=2'

/*step 4*/send packet(eth/vlan/vlan/ipv4)

/*step 5*/gdb info
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
b0x0000ffff882ba8dc in poll () from /lib64/libc.so.6
(gdb) b jmnd_flow_add
Breakpoint 1 at 0xbdbc8c: file ../drivers/net/jmnd/jmnd_flow/jmnd_flow.c, line 629.
(gdb) c
Continuing.
[Switching to Thread 0xffff8228e500 (LWP 981115)]
Thread 10 "hw_offload5" hit Breakpoint 1, jmnd_flow_add (dev=0x6293740 <rte_eth_devices>, attr=0xffff8228d748, patterns=0xfffe68001ee0, actions=0xfffe680034c0, flow=0x10037bac0,
    error=0xffff8228d6e8) at ../drivers/net/jmnd/jmnd_flow/jmnd_flow.c:629
629     ../drivers/net/jmnd/jmnd_flow/jmnd_flow.c: No such file or directory.
(gdb) p patterns[0]
$1 = {type = RTE_FLOW_ITEM_TYPE_ETH, spec = 0xfffe68001a50, last = 0x0, mask = 0xfffe68002570}
(gdb) p patterns[1]
$2 = {type = RTE_FLOW_ITEM_TYPE_VLAN, spec = 0xfffe680020e0, last = 0x0, mask = 0xfffe68001b80}
(gdb) p patterns[2]
$3 = {type = RTE_FLOW_ITEM_TYPE_VLAN, spec = 0xfffe68001ba0, last = 0x0, mask = 0xfffe68001de0}
(gdb) p patterns[3]
$4 = {type = RTE_FLOW_ITEM_TYPE_IPV4, spec = 0xfffe68001e00, last = 0x0, mask = 0xfffe680029e0}
(gdb) p patterns[4]
$5 = {type = RTE_FLOW_ITEM_TYPE_END, spec = 0x0, last = 0x0, mask = 0x0}
(gdb) p/x *(struct rte_flow_item_eth *)0xfffe68001a50
$6 = {{{dst = {addr_bytes = {0x0, 0x0, 0x0, 0x13, 0x40, 0x20}}, src = {addr_bytes = {0x0, 0x1, 0x0, 0x0, 0x0, 0x2}}, type = 0x81}, hdr = {dst_addr = {addr_bytes = {0x0, 0x0, 0x0, 0x13, 0x40,
          0x20}}, src_addr = {addr_bytes = {0x0, 0x1, 0x0, 0x0, 0x0, 0x2}}, ether_type = 0x81}}, has_vlan = 0x1, reserved = 0x0}
(gdb) p/x *(struct rte_flow_item_eth *)0xfffe68002570
$7 = {{{dst = {addr_bytes = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, src = {addr_bytes = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, type = 0xffff}, hdr = {dst_addr = {addr_bytes = {0xff, 0xff, 0xff, 0xff,
          0xff, 0xff}}, src_addr = {addr_bytes = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, ether_type = 0xffff}}, has_vlan = 0x1, reserved = 0x0}
(gdb) p/x *(struct rte_flow_item_vlan *)0xfffe680020e0
$8 = {{{tci = 0xe903, inner_type = 0x81}, hdr = {vlan_tci = 0xe903, eth_proto = 0x81}}, has_more_vlan = 0x1, reserved = 0x0}
(gdb) p/x *(struct rte_flow_item_vlan *)0xfffe68001b80
$9 = {{{tci = 0xffef, inner_type = 0xffff}, hdr = {vlan_tci = 0xffef, eth_proto = 0xffff}}, has_more_vlan = 0x1, reserved = 0x0}
(gdb) p/x *(struct rte_flow_item_vlan *)0xfffe68001ba0
$10 = {{{tci = 0xd207, inner_type = 0x8}, hdr = {vlan_tci = 0xd207, eth_proto = 0x8}}, has_more_vlan = 0x0, reserved = 0x0}
(gdb) p/x *(struct rte_flow_item_vlan *)0xfffe68001de0
$11 = {{{tci = 0xffef, inner_type = 0xffff}, hdr = {vlan_tci = 0xffef, eth_proto = 0xffff}}, has_more_vlan = 0x0, reserved = 0x0}
(gdb) p/x *(struct rte_flow_item_ipv4 *)0xfffe68001e00
$12 = {hdr = {{version_ihl = 0x0, {ihl = 0x0, version = 0x0}}, type_of_service = 0x0, total_length = 0x0, packet_id = 0x0, fragment_offset = 0x0, time_to_live = 0x80, next_proto_id = 0xfd,
    hdr_checksum = 0x0, src_addr = 0x200a8c0, dst_addr = 0xa00a8c0}}
(gdb) p/x *(struct rte_flow_item_ipv4 *)0xfffe680029e0
$13 = {hdr = {{version_ihl = 0x0, {ihl = 0x0, version = 0x0}}, type_of_service = 0x0, total_length = 0x0, packet_id = 0x0, fragment_offset = 0xff3f, time_to_live = 0x0, next_proto_id = 0x0,
    hdr_checksum = 0x0, src_addr = 0x0, dst_addr = 0x0}}
(gdb) p actions[0]
$14 = {type = RTE_FLOW_ACTION_TYPE_COUNT, conf = 0xfffe68002a00}
(gdb) p actions[1]
$15 = {type = RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, conf = 0x0}
(gdb) p actions[2]
$16 = {type = RTE_FLOW_ACTION_TYPE_PORT_ID, conf = 0xfffe68002a20}
(gdb) p actions[3]
$17 = {type = RTE_FLOW_ACTION_TYPE_END, conf = 0x0}
(gdb) bt
    at ../drivers/net/jmnd/jmnd_flow/jmnd_flow.c:629
    at ../drivers/net/jmnd/jmnd_flow/jmnd_flow.c:1781
    at lib/netdev-offload-dpdk.c:1277
    act_vars=0xffff8228d818) at lib/netdev-offload-dpdk.c:2828
    at lib/netdev-offload-dpdk.c:2873
    at lib/netdev-offload-dpdk.c:3007
    at lib/netdev-offload.c:264
(gdb)

/*step 6*/cat /var/log/openvswitch/ovs-vswitchd.log
2018-06-23T02:27:02.177Z|00028|netdev_offload_dpdk(hw_offload5)|DBG|dpdk0: rte_flow 0x10037b980   flow create 0 priority 0 group 0 transfer pattern eth dst is 00:00:00:13:40:20 type is 0x8100 has_vlan is 1 / vlan inner_type is 0x8100 tci spec 0x3e9 tci mask 0xefff / vlan inner_type is 0x800 tci spec 0x7d2 tci mask 0xefff / ipv4 fragment_offset is 0x0 / end actions count / of_pop_vlan / port_id original 0 id 1 / end
2018-06-23T02:27:02.177Z|00029|netdev_offload_dpdk(hw_offload5)|DBG|dpdk0/dpdk0: installed flow 0x10037b980 by ufid 94a50841-30e3-49e1-b2b0-81684ec56a8b

/*step 7*/confirm the flow is offloaded
[root@localhost ~]# ovs-appctl dpctl/dump-flows -m | grep offloaded
ufid:0303692c-8c6e-4428-b169-1745bc313d57, mega_ufid:94a50841-30e3-49e1-b2b0-81684ec56a8b, skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(dpdk0),packet_type(ns=0,id=0),eth(src=00:01:00:00:00:02/00:00:00:00:00:00,dst=00:00:00:13:40:20),eth_type(0x8100),vlan(vid=1001,pcp=0),encap(eth_type(0x8100),vlan(vid=2002,pcp=0),encap(eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.10/0.0.0.0,proto=253/0,tos=0/0,ttl=128/0,frag=no))), packets:46, bytes:5704, dpe_packets:46, dpe_bytes:5704,used:8.926s, offloaded:yes, dp:dpdk, actions:pop_vlan,dpdk1, dp-extra-info:miniflow_bits(5,1)

/*step 8*/check receive packet(eth/vlan/ipv4)

/*test cases*/all test cases listed below
1.eth.vlan.vlan.ipv4.icmp action:pop_vlan&port_id
2.eth.vlan.vlan.ipv4.tcp action:pop_vlan&port_id
3.eth.vlan.vlan.ipv4.udp action:pop_vlan&port_id
4.eth.vlan.ipv4.icmp action:pop_vlan&port_id
5.eth.vlan.ipv4.tcp action:pop_vlan&port_id
6.eth.vlan.ipv4.udp action:pop_vlan&port_id
7.eth.ipv4.icmp action:push_vlan&port_id
8.eth.ipv4.tcp action:push_vlan&port_id
9.eth.ipv4.udp action:push_vlan&port_id
10.eth.vlan.ipv4.icmp action:push_vlan&port_id
11.eth.vlan.ipv4.tcp action:push_vlan&port_id
12.eth.vlan.ipv4.udp action:push_vlan&port_id
13.eth.ipv4.udp.vxlan.eth.ipv4 action:vxlan_dacap&port_id
14.eth.vlan.ipv4.udp.vxlan.eth.vlan.ipv4.udp action:vxlan_dacap&pop_vlan&port_id
15.eth.ipv4.udp.vxlan.eth.vlan.ipv4.udp action:vxlan_dacap&pop_vlan&port_id
16.eth.vlan.ipv4.udp.vxlan.eth.vlan.ipv4.tcp action:vxlan_dacap&pop_vlan&port_id
17.eth.ipv4.udp.vxlan.eth.vlan.ipv4.tcp action:vxlan_dacap&pop_vlan&port_id
18.eth.vlan.ipv4.udp.vxlan.eth.vlan.ipv4.icmp action:vxlan_dacap&pop_vlan&port_id
19.eth.ipv4.udp.vxlan.eth.vlan.ipv4.icmp  action:vxlan_dacap&pop_vlan&port_id
20.eth.vlan.ipv4.udp.vxlan.eth.vlan.vlan.ipv4.udp action:vxlan_dacap&pop_vlan&port_id
21.eth.vlan.vlan.ipv4.udp.vxlan.eth.vlan.vlan.ipv4.udp  action:vxlan_dacap&pop_vlan&port_id
22.eth.ipv4 action:vxlan_encap&port_id
23.eth.ipv4 action:push_vlan&vxlan_encap&port_id
24.eth.vlan.ipv4 action:push_vlan&vxlan_encap&port_id
25.eth.ipv4  action:vxlan_encap&push_vlan&port_id
26.eth.ipv4 action:push_vlan&vxlan_encap&push_vlan&port_id
27.eth.vlan.vlan.ipv4 action:pop_vlan&port_id
---
 NEWS                      |  4 ++++
 lib/netdev-offload-dpdk.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

Comments

Ilya Maximets Jan. 2, 2025, 7:40 p.m. UTC | #1
On 12/23/24 13:07, Allen Chen via dev wrote:
> Support QinQ offload
> 
> Signed-off-by: Allen Chen <allen.chen@jaguarmicro.com>
> 

Hi, Allen.  Thanks for the patch!

Could you please, provide a list of cards you tested this change with?

Unfortunately, some DPDK drivers are ignoring vlan configuration, even
though they report supporting these fields in rte_flow API:
  https://bugs.dpdk.org/show_bug.cgi?id=958
And the situation doesn't improve, only gets worse over time.

So, unless this DPDK issue is fixed, we can't actually rely on 'has_vlan'
or 'has_more_vlan' fileds, as drivers will just ignore them.

We added support for 'has_vlan' previously, but that was to fix a bug
that we couldn't fix otherwise.  So, it's a bug in OVS vs bug in DPDK
trade-off and also some of the most used DPDK drivers do actually support
'has_vlan'.

'has_more_vlan' is more complicated and more drivers seem to ignore it.
So, I'm not sure if we should use it, unless it is necessary to fix some
other important bug.

Best regards, Ilya Maximets.
Allen Chen Jan. 8, 2025, 9:48 a.m. UTC | #2
Hi Ilya,

I tested this change with MLX card and the card of company which I work for.
I think as an indepent virtual switch,it is important to support qinq for OVS.
If some DPDK drivers are ignoring vlan configuration,they can move qinq feature from their product feature list.
So, I think the bug is about DPDK,rather than about OVS. DPDK drivers can adapt to OVS new feature or declare not supporting OVS new feature temporarily.

Best wishes.
-----邮件原件-----
发件人: Ilya Maximets <i.maximets@ovn.org> 
发送时间: 2025年1月3日 3:41
收件人: Allen Chen <allen.chen@jaguarmicro.com>; ovs-dev@openvswitch.org
抄送: Kevin Traynor <ktraynor@redhat.com>; David Marchand <dmarchan@redhat.com>; i.maximets@ovn.org
主题: Re: [ovs-dev] [PATCH v2] netdev-offload-dpdk: Support QinQ offload.

External Mail: This email originated from OUTSIDE of the organization!
Do not click links, open attachments or provide ANY information unless you recognize the sender and know the content is safe.


On 12/23/24 13:07, Allen Chen via dev wrote:
> Support QinQ offload
>
> Signed-off-by: Allen Chen <allen.chen@jaguarmicro.com>
>

Hi, Allen.  Thanks for the patch!

Could you please, provide a list of cards you tested this change with?

Unfortunately, some DPDK drivers are ignoring vlan configuration, even though they report supporting these fields in rte_flow API:
  https://bugs.dpdk.org/show_bug.cgi?id=958
And the situation doesn't improve, only gets worse over time.

So, unless this DPDK issue is fixed, we can't actually rely on 'has_vlan'
or 'has_more_vlan' fileds, as drivers will just ignore them.

We added support for 'has_vlan' previously, but that was to fix a bug that we couldn't fix otherwise.  So, it's a bug in OVS vs bug in DPDK trade-off and also some of the most used DPDK drivers do actually support 'has_vlan'.

'has_more_vlan' is more complicated and more drivers seem to ignore it.
So, I'm not sure if we should use it, unless it is necessary to fix some other important bug.

Best regards, Ilya Maximets.
Kevin Traynor Jan. 14, 2025, 5:04 p.m. UTC | #3
On 08/01/2025 09:48, Allen Chen wrote:
> Hi Ilya,
> 
> I tested this change with MLX card and the card of company which I work for.
> I think as an indepent virtual switch,it is important to support qinq for OVS.
> If some DPDK drivers are ignoring vlan configuration,they can move qinq feature from their product feature list.
> So, I think the bug is about DPDK,rather than about OVS. DPDK drivers can adapt to OVS new feature or declare not supporting OVS new feature temporarily.
> 

Hi Allen,

There is no API pollable feature list. So an attempt to offload/validate
the match/action will be done to any DPDK driver. If a driver correctly
validates the match, it will accept or reject the rule based on it's
support. That would be fine.

The issue is that only a few upstream DPDK drivers contain a reference
to the has_more_vlan flag. The concern is that drivers not checking the
presence of has_more_vlan will accept an offload and then not implement
it correctly leading to incorrect flow rules.

Sometimes as a temporary measure for NIC config, the driver name is
checked to workaround a known driver issue. However, there isn't a
facility to do this for specific rte_flow rules.

So while any issue would be with DPDK not OVS, adding the feature would
expose any issues to OVS users. That is why there is a reluctance to
accept a patch at present.

thanks,
Kevin.

> Best wishes.
> -----邮件原件-----
> 发件人: Ilya Maximets <i.maximets@ovn.org> 
> 发送时间: 2025年1月3日 3:41
> 收件人: Allen Chen <allen.chen@jaguarmicro.com>; ovs-dev@openvswitch.org
> 抄送: Kevin Traynor <ktraynor@redhat.com>; David Marchand <dmarchan@redhat.com>; i.maximets@ovn.org
> 主题: Re: [ovs-dev] [PATCH v2] netdev-offload-dpdk: Support QinQ offload.
> 
> External Mail: This email originated from OUTSIDE of the organization!
> Do not click links, open attachments or provide ANY information unless you recognize the sender and know the content is safe.
> 
> 
> On 12/23/24 13:07, Allen Chen via dev wrote:
>> Support QinQ offload
>>
>> Signed-off-by: Allen Chen <allen.chen@jaguarmicro.com>
>>
> 
> Hi, Allen.  Thanks for the patch!
> 
> Could you please, provide a list of cards you tested this change with?
> 
> Unfortunately, some DPDK drivers are ignoring vlan configuration, even though they report supporting these fields in rte_flow API:
>   https://bugs.dpdk.org/show_bug.cgi?id=958
> And the situation doesn't improve, only gets worse over time.
> 
> So, unless this DPDK issue is fixed, we can't actually rely on 'has_vlan'
> or 'has_more_vlan' fileds, as drivers will just ignore them.
> 
> We added support for 'has_vlan' previously, but that was to fix a bug that we couldn't fix otherwise.  So, it's a bug in OVS vs bug in DPDK trade-off and also some of the most used DPDK drivers do actually support 'has_vlan'.
> 
> 'has_more_vlan' is more complicated and more drivers seem to ignore it.
> So, I'm not sure if we should use it, unless it is necessary to fix some other important bug.
> 
> Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 6e3f56d73..22218e966 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@ 
+v3.4.0 - 11 Dec 2024
+---------------------
+    - Add support for QinQ offload
+
 Post-v3.4.0
 --------------------
    - The limit on the number of fields for address prefix tracking in flow
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 1a6e100ff..7a519cd68 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -1379,6 +1379,7 @@  parse_flow_match(struct netdev *netdev,
                  struct match *match)
 {
     struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
+    struct rte_flow_item_vlan *vlan_spec = NULL, *vlan_mask = NULL;
     struct flow *consumed_masks;
     uint8_t proto = 0;
 
@@ -1451,6 +1452,9 @@  parse_flow_match(struct netdev *netdev,
             eth_mask->type = match->wc.masks.vlans[0].tpid;
         }
 
+        vlan_spec = spec;
+        vlan_mask = mask;
+
         add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask, NULL);
     }
     /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and
@@ -1459,6 +1463,33 @@  parse_flow_match(struct netdev *netdev,
      */
     memset(&consumed_masks->vlans[0], 0, sizeof consumed_masks->vlans[0]);
 
+    /* VLAN */
+    if (match->wc.masks.vlans[1].tci && match->flow.vlans[1].tci) {
+        struct rte_flow_item_vlan *spec, *mask;
+
+        spec = xzalloc(sizeof *spec);
+        mask = xzalloc(sizeof *mask);
+
+        spec->tci = match->flow.vlans[1].tci & ~htons(VLAN_CFI);
+        mask->tci = match->wc.masks.vlans[1].tci & ~htons(VLAN_CFI);
+
+        if (vlan_spec && vlan_mask) {
+            vlan_spec->has_more_vlan = 1;
+            vlan_mask->has_more_vlan = 1;
+            spec->inner_type = vlan_spec->inner_type;
+            mask->inner_type = vlan_mask->inner_type;
+            vlan_spec->inner_type = match->flow.vlans[1].tpid;
+            vlan_mask->inner_type = match->wc.masks.vlans[1].tpid;
+        }
+
+        add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask, NULL);
+    }
+    /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and
+     * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if
+     * scope to handle that.
+     */
+    memset(&consumed_masks->vlans[1], 0, sizeof consumed_masks->vlans[1]);
+
     /* IP v4 */
     if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
         struct rte_flow_item_ipv4 *spec, *mask, *last = NULL;