diff mbox series

[ovs-dev,v1] nedev-dpdk: Fix config with dpdk net_bonding offloads.

Message ID 1712903385-24393-1-git-send-email-junwang01@cestc.cn
State Under Review
Delegated to: Ilya Maximets
Headers show
Series [ovs-dev,v1] nedev-dpdk: Fix config with dpdk net_bonding offloads. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Jun Wang April 12, 2024, 6:29 a.m. UTC
If it's a DPDK net_bonding, it may cause
offload-related configurations to take effect,
leading to offload failure.

/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd \
--system-id=test
ovs-vsctl --no-wait set open . external-ids:ovn-bridge-datapath-type\
=netdev
ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true
ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem=\
"4096,4096"
ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=\
0xff0000
ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-extra=\
"-a 0000:ca:00.0  -a 0000:ca:00.1 --vdev net_bonding2494589023,\
mode=4,member=0000:ca:00.0,member=0000:ca:00.1,xmit_policy=l34,\
lacp_rate=fast"
ovs-vswitchd unix:/var/run/openvswitch/db.sock -vconsole:emer \
-vsyslog:err -vfile:info --mlockall --no-chdir \
--log-file=/var/log/openvswitch/ovs-vswitchd.log \
--pidfile=/var/run/openvswitch/ovs-vswitchd.pid --detach --monitor
ovs-vsctl  add-br br-tun -- set bridge br-tun datapath_type=netdev
ovs-vsctl --may-exist add-port br-tun dpdk_tun_port -- set Interface \
dpdk_tun_port type=dpdk options:dpdk-devargs="net_bonding2494589023"

{bus_info="bus_name=vdev", driver_name=net_bonding,
 if_descr="DPDK 23.11.0 net_bonding", if_type="6",link_speed="20Gbps",
 max_hash_mac_addrs="0", max_mac_addrs="16", max_rx_pktlen="1618",
 max_rx_queues="1023", max_tx_queues="1023", max_vfs="0",
 max_vmdq_pools="0", min_rx_bufsize="0", n_rxq="4", n_txq="9",
 numa_id="0", port_no="2", rx-steering=rss, rx_csum_offload="false",
 tx_geneve_tso_offload="false", tx_ip_csum_offload="false",
 tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false",
 tx_sctp_csum_offload="false", tx_tcp_csum_offload="false",
 tx_tcp_seg_offload="false", tx_udp_csum_offload="false",
 tx_vxlan_tso_offload="false"}

Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")

Signed-off-by: Jun Wang <junwang01@cestc.cn>
---
 lib/netdev-dpdk.c | 75 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 30 deletions(-)

Comments

David Marchand April 12, 2024, 7:07 a.m. UTC | #1
Hello,

On Fri, Apr 12, 2024 at 8:30 AM Jun Wang <junwang01@cestc.cn> wrote:
>
> If it's a DPDK net_bonding, it may cause
> offload-related configurations to take effect,
> leading to offload failure.

I did not look at the patch for now.

What is the interest of using a net/bonding DPDK port when there is
native support of bonding in OVS?
I am not familiar with OVN setups so maybe I am missing something on this side.
Jun Wang April 12, 2024, 7:19 a.m. UTC | #2
>Hello,
>
>On Fri, Apr 12, 2024 at 8:30 AM Jun Wang <junwang01@cestc.cn> wrote:
>>
>> If it's a DPDK net_bonding, it may cause
>> offload-related configurations to take effect,
>> leading to offload failure.
>
>I did not look at the patch for now.

>What is the interest of using a net/bonding DPDK port when there is
>native support of bonding in OVS?
>I am not familiar with OVN setups so maybe I am missing something on this side.

Yes, OVS itself has bonding capability. However, there are cases where DPDK
net/bonding is used. Specifically, we have compared the performance of the 
two methods in some scenarios and found some differences. Therefore,
in some scenarios, DPDK net/bonding is chosen. However, OVS did not 
consider the issue of DPDK net/bonding when modifying checksum offload.
Ilya Maximets April 12, 2024, 5:51 p.m. UTC | #3
On 4/12/24 08:29, Jun Wang wrote:
> If it's a DPDK net_bonding, it may cause
> offload-related configurations to take effect,
> leading to offload failure.
> 
> /usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd \
> --system-id=test
> ovs-vsctl --no-wait set open . external-ids:ovn-bridge-datapath-type\
> =netdev
> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true
> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem=\
> "4096,4096"
> ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=\
> 0xff0000
> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-extra=\
> "-a 0000:ca:00.0  -a 0000:ca:00.1 --vdev net_bonding2494589023,\
> mode=4,member=0000:ca:00.0,member=0000:ca:00.1,xmit_policy=l34,\
> lacp_rate=fast"
> ovs-vswitchd unix:/var/run/openvswitch/db.sock -vconsole:emer \
> -vsyslog:err -vfile:info --mlockall --no-chdir \
> --log-file=/var/log/openvswitch/ovs-vswitchd.log \
> --pidfile=/var/run/openvswitch/ovs-vswitchd.pid --detach --monitor
> ovs-vsctl  add-br br-tun -- set bridge br-tun datapath_type=netdev
> ovs-vsctl --may-exist add-port br-tun dpdk_tun_port -- set Interface \
> dpdk_tun_port type=dpdk options:dpdk-devargs="net_bonding2494589023"
> 
> {bus_info="bus_name=vdev", driver_name=net_bonding,
>  if_descr="DPDK 23.11.0 net_bonding", if_type="6",link_speed="20Gbps",
>  max_hash_mac_addrs="0", max_mac_addrs="16", max_rx_pktlen="1618",
>  max_rx_queues="1023", max_tx_queues="1023", max_vfs="0",
>  max_vmdq_pools="0", min_rx_bufsize="0", n_rxq="4", n_txq="9",
>  numa_id="0", port_no="2", rx-steering=rss, rx_csum_offload="false",
>  tx_geneve_tso_offload="false", tx_ip_csum_offload="false",
>  tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false",
>  tx_sctp_csum_offload="false", tx_tcp_csum_offload="false",
>  tx_tcp_seg_offload="false", tx_udp_csum_offload="false",
>  tx_vxlan_tso_offload="false"}
> 
> Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
> 
> Signed-off-by: Jun Wang <junwang01@cestc.cn>
> ---
>  lib/netdev-dpdk.c | 75 +++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 45 insertions(+), 30 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 2111f77..191c83d 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1294,15 +1294,10 @@ dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev)
>      dev->rx_metadata_delivery_configured = true;
>  }
>  
> -static int
> -dpdk_eth_dev_init(struct netdev_dpdk *dev)
> -    OVS_REQUIRES(dev->mutex)
> +static void
> +dpdk_eth_offload_config(struct netdev_dpdk *dev,
> +                               struct rte_eth_dev_info *info)
>  {
> -    struct rte_pktmbuf_pool_private *mbp_priv;
> -    struct rte_eth_dev_info info;
> -    struct rte_ether_addr eth_addr;
> -    int diag;
> -    int n_rxq, n_txq;
>      uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>                                       RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>                                       RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
> @@ -1319,16 +1314,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          dpdk_eth_dev_init_rx_metadata(dev);
>      }
>  
> -    rte_eth_dev_info_get(dev->port_id, &info);
> -
> -    if (strstr(info.driver_name, "vf") != NULL) {
> +    if (strstr(info->driver_name, "vf") != NULL) {
>          VLOG_INFO("Virtual function detected, HW_CRC_STRIP will be enabled");
>          dev->hw_ol_features |= NETDEV_RX_HW_CRC_STRIP;
>      } else {
>          dev->hw_ol_features &= ~NETDEV_RX_HW_CRC_STRIP;
>      }
>  
> -    if ((info.rx_offload_capa & rx_chksm_offload_capa) !=
> +    if ((info->rx_offload_capa & rx_chksm_offload_capa) !=
>              rx_chksm_offload_capa) {
>          VLOG_WARN("Rx checksum offload is not supported on port "
>                    DPDK_PORT_ID_FMT, dev->port_id);
> @@ -1337,66 +1330,66 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          dev->hw_ol_features |= NETDEV_RX_CHECKSUM_OFFLOAD;
>      }
>  
> -    if (info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
> +    if (info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
>          dev->hw_ol_features |= NETDEV_RX_HW_SCATTER;
>      } else {
>          /* Do not warn on lack of scatter support */
>          dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
>      }
>  
> -    if (!strcmp(info.driver_name, "net_tap")) {
> +    if (!strcmp(info->driver_name, "net_tap")) {
>          /* FIXME: L4 checksum offloading is broken in DPDK net/tap driver.
>           * This workaround can be removed once the fix makes it to a DPDK
>           * LTS release used by OVS. */
>          VLOG_INFO("%s: disabled Tx L4 checksum offloads for a net/tap port.",
>                    netdev_get_name(&dev->up));
> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
>      }
>  
> -    if (!strcmp(info.driver_name, "net_ice")
> -        || !strcmp(info.driver_name, "net_i40e")) {
> +    if (!strcmp(info->driver_name, "net_ice")
> +        || !strcmp(info->driver_name, "net_i40e")) {
>          /* FIXME: Driver advertises the capability but doesn't seem
>           * to actually support it correctly.  Can remove this once
>           * the driver is fixed on DPDK side. */
>          VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
>                    "net/ice or net/i40e port.", netdev_get_name(&dev->up));
> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
>      }
>  
> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
>          dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>      } else {
>          dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>      }
>  
> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
>          dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
>      } else {
>          dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
>      }
>  
> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
>          dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
>      } else {
>          dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
>      }
>  
> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
>          dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>      } else {
>          dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>      }
>  
> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
>          dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>      } else {
>          dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>      }
>  
> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
>          dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
>      } else {
>          dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
> @@ -1404,21 +1397,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>  
>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>      if (userspace_tso_enabled()) {
> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>              dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
>          } else {
>              VLOG_WARN("%s: Tx TSO offload is not supported.",
>                        netdev_get_name(&dev->up));
>          }
>  
> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
>              dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD;
>          } else {
>              VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.",
>                        netdev_get_name(&dev->up));
>          }
>  
> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
>              dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD;
>          } else {
>              VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.",
> @@ -1426,6 +1419,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          }
>      }
>  
> +}
> +
> +static int
> +dpdk_eth_dev_init(struct netdev_dpdk *dev)
> +    OVS_REQUIRES(dev->mutex)
> +{
> +    struct rte_pktmbuf_pool_private *mbp_priv;
> +    struct rte_eth_dev_info info;
> +    struct rte_ether_addr eth_addr;
> +    int diag;
> +    int n_rxq, n_txq;
> +
> +    rte_eth_dev_info_get(dev->port_id, &info);
> +    dpdk_eth_offload_config(dev, &info);
> +
>      n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
>      n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
>  
> @@ -1439,6 +1447,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          return -diag;
>      }
>  
> +    rte_eth_dev_info_get(dev->port_id, &info);
> +    if (!strcmp(info.driver_name, "net_bonding")) {
> +        dpdk_eth_offload_config(dev, &info);
> +        VLOG_INFO("%s: configure offloads for a dpdk net_bonding port.",
> +                   netdev_get_name(&dev->up));
> +    }
> +

I'm a bit confused  by this.  Why do we need to check offloads again
after the initialization?  Is there a chance that bonding driver
will enable features we didn't ask for?

In general, since we don't know what kind of device is in the bonding,
we should just disable all offloads for it as long as there is a chance
to have a broken driver downstream of a bond.

Best regards, Ilya Maximets.
Jun Wang April 13, 2024, 4:56 a.m. UTC | #4
>On 4/12/24 08:29, Jun Wang wrote:
>> If it's a DPDK net_bonding, it may cause
>> offload-related configurations to take effect,
>> leading to offload failure.
>>
>> /usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd \
>> --system-id=test
>> ovs-vsctl --no-wait set open . external-ids:ovn-bridge-datapath-type\
>> =netdev
>>ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem=\
>> "4096,4096"
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=\
>> 0xff0000
>> ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-extra=\
>> "-a 0000:ca:00.0  -a 0000:ca:00.1 --vdev net_bonding2494589023,\
>> mode=4,member=0000:ca:00.0,member=0000:ca:00.1,xmit_policy=l34,\
>> lacp_rate=fast"
>> ovs-vswitchd unix:/var/run/openvswitch/db.sock -vconsole:emer \
>> -vsyslog:err -vfile:info --mlockall --no-chdir \
>> --log-file=/var/log/openvswitch/ovs-vswitchd.log \
>> --pidfile=/var/run/openvswitch/ovs-vswitchd.pid --detach --monitor
>> ovs-vsctl  add-br br-tun -- set bridge br-tun datapath_type=netdev
>> ovs-vsctl --may-exist add-port br-tun dpdk_tun_port -- set Interface \
>> dpdk_tun_port type=dpdk options:dpdk-devargs="net_bonding2494589023"
>>
>> {bus_info="bus_name=vdev", driver_name=net_bonding,
>>  if_descr="DPDK 23.11.0 net_bonding", if_type="6",link_speed="20Gbps",
>>  max_hash_mac_addrs="0", max_mac_addrs="16", max_rx_pktlen="1618",
>>  max_rx_queues="1023", max_tx_queues="1023", max_vfs="0",
>>  max_vmdq_pools="0", min_rx_bufsize="0", n_rxq="4", n_txq="9",
>>  numa_id="0", port_no="2", rx-steering=rss, rx_csum_offload="false",
>>  tx_geneve_tso_offload="false", tx_ip_csum_offload="false",
>>  tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false",
>>  tx_sctp_csum_offload="false", tx_tcp_csum_offload="false",
>>  tx_tcp_seg_offload="false", tx_udp_csum_offload="false",
>>  tx_vxlan_tso_offload="false"}
>>
>> Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
>>
>> Signed-off-by: Jun Wang <junwang01@cestc.cn>
>> ---
>>  lib/netdev-dpdk.c | 75 +++++++++++++++++++++++++++++++++----------------------
>>  1 file changed, 45 insertions(+), 30 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 2111f77..191c83d 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1294,15 +1294,10 @@ dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev)
>>      dev->rx_metadata_delivery_configured = true;
>>  }
>> 
>> -static int
>> -dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> -    OVS_REQUIRES(dev->mutex)
>> +static void
>> +dpdk_eth_offload_config(struct netdev_dpdk *dev,
>> +                               struct rte_eth_dev_info *info)
>>  {
>> -    struct rte_pktmbuf_pool_private *mbp_priv;
>> -    struct rte_eth_dev_info info;
>> -    struct rte_ether_addr eth_addr;
>> -    int diag;
>> -    int n_rxq, n_txq;
>>      uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>>                                       RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>>                                       RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
>> @@ -1319,16 +1314,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          dpdk_eth_dev_init_rx_metadata(dev);
>>      }
>> 
>> -    rte_eth_dev_info_get(dev->port_id, &info);
>> -
>> -    if (strstr(info.driver_name, "vf") != NULL) {
>> +    if (strstr(info->driver_name, "vf") != NULL) {
>>          VLOG_INFO("Virtual function detected, HW_CRC_STRIP will be enabled");
>>          dev->hw_ol_features |= NETDEV_RX_HW_CRC_STRIP;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_RX_HW_CRC_STRIP;
>>      }
>> 
>> -    if ((info.rx_offload_capa & rx_chksm_offload_capa) !=
>> +    if ((info->rx_offload_capa & rx_chksm_offload_capa) !=
>>              rx_chksm_offload_capa) {
>>          VLOG_WARN("Rx checksum offload is not supported on port "
>>                    DPDK_PORT_ID_FMT, dev->port_id);
>> @@ -1337,66 +1330,66 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          dev->hw_ol_features |= NETDEV_RX_CHECKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
>> +    if (info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
>>          dev->hw_ol_features |= NETDEV_RX_HW_SCATTER;
>>      } else {
>>          /* Do not warn on lack of scatter support */
>>          dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
>>      }
>> 
>> -    if (!strcmp(info.driver_name, "net_tap")) {
>> +    if (!strcmp(info->driver_name, "net_tap")) {
>>          /* FIXME: L4 checksum offloading is broken in DPDK net/tap driver.
>>           * This workaround can be removed once the fix makes it to a DPDK
>>           * LTS release used by OVS. */
>>          VLOG_INFO("%s: disabled Tx L4 checksum offloads for a net/tap port.",
>>                    netdev_get_name(&dev->up));
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
>>      }
>> 
>> -    if (!strcmp(info.driver_name, "net_ice")
>> -        || !strcmp(info.driver_name, "net_i40e")) {
>> +    if (!strcmp(info->driver_name, "net_ice")
>> +        || !strcmp(info->driver_name, "net_i40e")) {
>>          /* FIXME: Driver advertises the capability but doesn't seem
>>           * to actually support it correctly.  Can remove this once
>>           * the driver is fixed on DPDK side. */
>>          VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
>>                    "net/ice or net/i40e port.", netdev_get_name(&dev->up));
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
>> -        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
>> +        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
>>      }
>> 
>> -    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
>> +    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
>>          dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
>>      } else {
>>          dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
>> @@ -1404,21 +1397,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> 
>>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>>      if (userspace_tso_enabled()) {
>> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>>              dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
>>          } else {
>>              VLOG_WARN("%s: Tx TSO offload is not supported.",
>>                        netdev_get_name(&dev->up));
>>          }
>> 
>> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
>> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
>>              dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD;
>>          } else {
>>              VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.",
>>                        netdev_get_name(&dev->up));
>>          }
>> 
>> -        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
>> +        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
>>              dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD;
>>          } else {
>>              VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.",
>> @@ -1426,6 +1419,21 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          }
>>      }
>> 
>> +}
>> +
>> +static int
>> +dpdk_eth_dev_init(struct netdev_dpdk *dev)
>> +    OVS_REQUIRES(dev->mutex)
>> +{
>> +    struct rte_pktmbuf_pool_private *mbp_priv;
>> +    struct rte_eth_dev_info info;
>> +    struct rte_ether_addr eth_addr;
>> +    int diag;
>> +    int n_rxq, n_txq;
>> +
>> +    rte_eth_dev_info_get(dev->port_id, &info);
>> +    dpdk_eth_offload_config(dev, &info);
>> +
>>      n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
>>      n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
>> 
>> @@ -1439,6 +1447,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>>          return -diag;
>>      }
>> 
>> +    rte_eth_dev_info_get(dev->port_id, &info);
>> +    if (!strcmp(info.driver_name, "net_bonding")) {
>> +        dpdk_eth_offload_config(dev, &info);
>> +        VLOG_INFO("%s: configure offloads for a dpdk net_bonding port.",
>> +                   netdev_get_name(&dev->up));
>> +    }
>> +
> 
>I'm a bit confused  by this.  Why do we need to check offloads again
>after the initialization?  Is there a chance that bonding driver
>will enable features we didn't ask for?
> 
>In general, since we don't know what kind of device is in the bonding,
>we should just disable all offloads for it as long as there is a chance
>to have a broken driver downstream of a bond.
> 
>Best regards, Ilya Maximets.

I believe the DPDK net/bonding port has not been configured here yet, so all the obtained offloads are false. That's why I reset them again after dpdk_eth_dev_port_config. It seems like the DPDK bond has completed initialization at this point, allowing us to properly obtain the offload flags.I think this should be where the DPDK net/bonding port sets the offload flags, which may be the logic of DPDK net/bonding. Additionally, regarding your second point, shouldn't we consider DPDK's net/bonding? I have verified with CX5 network cards, and after reacquiring the offloads, the functionality works properly.



Jun Wang
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2111f77..191c83d 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1294,15 +1294,10 @@  dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev)
     dev->rx_metadata_delivery_configured = true;
 }
 
-static int
-dpdk_eth_dev_init(struct netdev_dpdk *dev)
-    OVS_REQUIRES(dev->mutex)
+static void
+dpdk_eth_offload_config(struct netdev_dpdk *dev,
+                               struct rte_eth_dev_info *info)
 {
-    struct rte_pktmbuf_pool_private *mbp_priv;
-    struct rte_eth_dev_info info;
-    struct rte_ether_addr eth_addr;
-    int diag;
-    int n_rxq, n_txq;
     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
@@ -1319,16 +1314,14 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dpdk_eth_dev_init_rx_metadata(dev);
     }
 
-    rte_eth_dev_info_get(dev->port_id, &info);
-
-    if (strstr(info.driver_name, "vf") != NULL) {
+    if (strstr(info->driver_name, "vf") != NULL) {
         VLOG_INFO("Virtual function detected, HW_CRC_STRIP will be enabled");
         dev->hw_ol_features |= NETDEV_RX_HW_CRC_STRIP;
     } else {
         dev->hw_ol_features &= ~NETDEV_RX_HW_CRC_STRIP;
     }
 
-    if ((info.rx_offload_capa & rx_chksm_offload_capa) !=
+    if ((info->rx_offload_capa & rx_chksm_offload_capa) !=
             rx_chksm_offload_capa) {
         VLOG_WARN("Rx checksum offload is not supported on port "
                   DPDK_PORT_ID_FMT, dev->port_id);
@@ -1337,66 +1330,66 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features |= NETDEV_RX_CHECKSUM_OFFLOAD;
     }
 
-    if (info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
+    if (info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER) {
         dev->hw_ol_features |= NETDEV_RX_HW_SCATTER;
     } else {
         /* Do not warn on lack of scatter support */
         dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
     }
 
-    if (!strcmp(info.driver_name, "net_tap")) {
+    if (!strcmp(info->driver_name, "net_tap")) {
         /* FIXME: L4 checksum offloading is broken in DPDK net/tap driver.
          * This workaround can be removed once the fix makes it to a DPDK
          * LTS release used by OVS. */
         VLOG_INFO("%s: disabled Tx L4 checksum offloads for a net/tap port.",
                   netdev_get_name(&dev->up));
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
+        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
     }
 
-    if (!strcmp(info.driver_name, "net_ice")
-        || !strcmp(info.driver_name, "net_i40e")) {
+    if (!strcmp(info->driver_name, "net_ice")
+        || !strcmp(info->driver_name, "net_i40e")) {
         /* FIXME: Driver advertises the capability but doesn't seem
          * to actually support it correctly.  Can remove this once
          * the driver is fixed on DPDK side. */
         VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
                   "net/ice or net/i40e port.", netdev_get_name(&dev->up));
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
+        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
+        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
+        info->tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
     }
 
-    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
+    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
     } else {
         dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
     }
 
-    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
+    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
     } else {
         dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
     }
 
-    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
+    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
     } else {
         dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
     }
 
-    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
+    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
     } else {
         dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
     }
 
-    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
+    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
     } else {
         dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD;
     }
 
-    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
+    if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
     } else {
         dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD;
@@ -1404,21 +1397,21 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
 
     dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
     if (userspace_tso_enabled()) {
-        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
+        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
             dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
         } else {
             VLOG_WARN("%s: Tx TSO offload is not supported.",
                       netdev_get_name(&dev->up));
         }
 
-        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
+        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) {
             dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD;
         } else {
             VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.",
                       netdev_get_name(&dev->up));
         }
 
-        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
+        if (info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) {
             dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD;
         } else {
             VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.",
@@ -1426,6 +1419,21 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         }
     }
 
+}
+
+static int
+dpdk_eth_dev_init(struct netdev_dpdk *dev)
+    OVS_REQUIRES(dev->mutex)
+{
+    struct rte_pktmbuf_pool_private *mbp_priv;
+    struct rte_eth_dev_info info;
+    struct rte_ether_addr eth_addr;
+    int diag;
+    int n_rxq, n_txq;
+
+    rte_eth_dev_info_get(dev->port_id, &info);
+    dpdk_eth_offload_config(dev, &info);
+
     n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
     n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
 
@@ -1439,6 +1447,13 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         return -diag;
     }
 
+    rte_eth_dev_info_get(dev->port_id, &info);
+    if (!strcmp(info.driver_name, "net_bonding")) {
+        dpdk_eth_offload_config(dev, &info);
+        VLOG_INFO("%s: configure offloads for a dpdk net_bonding port.",
+                   netdev_get_name(&dev->up));
+    }
+
     diag = rte_eth_dev_start(dev->port_id);
     if (diag) {
         VLOG_ERR("Interface %s start error: %s", dev->up.name,