diff mbox series

[net,v3] i40e: Fix crash when rebuild fails in i40e_xdp_setup When attaching XDP program on i40e driver there was a reset and rebuild of the interface to reconfigure the queues for XDP operation. If one of the steps of rebuild failed then the interface wa

Message ID 20230118153102.1325350-1-kamil.maziarz@intel.com
State Superseded
Headers show
Series [net,v3] i40e: Fix crash when rebuild fails in i40e_xdp_setup When attaching XDP program on i40e driver there was a reset and rebuild of the interface to reconfigure the queues for XDP operation. If one of the steps of rebuild failed then the interface wa | expand

Commit Message

Kamil Maziarz Jan. 18, 2023, 3:31 p.m. UTC
From: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>

capability discovery failed, err I40E_ERR_ADMIN_QUEUE_TIMEOUT aq_err OK
BUG: unable to handle kernel NULL pointer dereference at
0000000000000000 Call Trace:
? i40e_reconfig_rss_queues+0x120/0x120 [i40e]
  dev_xdp_install+0x70/0x100
  dev_xdp_attach+0x1d7/0x530
  dev_change_xdp_fd+0x1f4/0x230
  do_setlink+0x45f/0xf30
  ? irq_work_interrupt+0xa/0x20
  ? __nla_validate_parse+0x12d/0x1a0
  rtnl_setlink+0xb5/0x120
  rtnetlink_rcv_msg+0x2b1/0x360
  ? sock_has_perm+0x80/0xa0
  ? rtnl_calcit.isra.42+0x120/0x120
  netlink_rcv_skb+0x4c/0x120
  netlink_unicast+0x196/0x230
  netlink_sendmsg+0x204/0x3d0
  sock_sendmsg+0x4c/0x50
  __sys_sendto+0xee/0x160
  ? handle_mm_fault+0xc1/0x1e0
  ? syscall_trace_enter+0x1fb/0x2c0
  ? __sys_setsockopt+0xd6/0x1d0
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x5b/0x1a0
  entry_SYSCALL_64_after_hwframe+0x65/0xca
  RIP: 0033:0x7f3535d99781

Fix this by removing reset and rebuild from i40e_xdp_setup and replace it by
interface down, reconfigure queues and interface up. This way if any step fails
the interface will remain in a correct state.

Fixes: 0c8493d90b6b ("i40e: add XDP support for pass and drop actions")
Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
Signed-off-by: Piotr Raczynski <piotr.raczynski@intel.com>
Signed-off-by: Andrii Staikov <andrii.staikov@intel.com>
Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
---
v2: don't reinitialize rings while hotswapping program
---
v3: error code 'ret' set to -EIO
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 145 +++++++++++++++-----
 1 file changed, 109 insertions(+), 36 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 53d0083e35da..94ff72d38e06 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -50,6 +50,8 @@  static int i40e_veb_get_bw_info(struct i40e_veb *veb);
 static int i40e_get_capabilities(struct i40e_pf *pf,
 				 enum i40e_admin_queue_opc list_type);
 static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf);
+static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi,
+					      bool is_xdp);
 
 /* i40e_pci_tbl - PCI Device ID Table
  *
@@ -3563,11 +3565,17 @@  static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	/* clear the context structure first */
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	if (ring->vsi->type == I40E_VSI_MAIN)
-		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	if (ring->vsi->type == I40E_VSI_MAIN) {
+		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->queue_index,
+					 ring->q_vector->napi.napi_id);
+	}
 
 	ring->xsk_pool = i40e_xsk_pool(ring);
 	if (ring->xsk_pool) {
+		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
 		ring->rx_buf_len =
 		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
 		/* For AF_XDP ZC, we disallow packets to span on
@@ -13305,6 +13313,34 @@  static netdev_features_t i40e_features_check(struct sk_buff *skb,
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
+/**
+ * i40e_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to changed
+ * @prog: XDP program
+ **/
+static void i40e_vsi_assign_bpf_prog(struct i40e_vsi *vsi,
+						 struct bpf_prog *prog)
+{
+	int i;
+
+	for (i = 0; i < vsi->num_queue_pairs; i++)
+		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * i40e_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
+ * @vsi: VSI to schedule napi on
+ */
+static void i40e_vsi_rx_napi_schedule(struct i40e_vsi *vsi)
+{
+	int i;
+
+	for (i = 0; i < vsi->num_queue_pairs; i++)
+		if (vsi->xdp_rings[i]->xsk_pool)
+			(void)i40e_xsk_wakeup(vsi->netdev, i,
+					      XDP_WAKEUP_RX);
+}
+
 /**
  * i40e_xdp_setup - add/remove an XDP program
  * @vsi: VSI to changed
@@ -13315,10 +13351,12 @@  static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
 			  struct netlink_ext_ack *extack)
 {
 	int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	bool is_xdp_enabled = i40e_enabled_xdp_vsi(vsi);
+	bool if_running = netif_running(vsi->netdev);
+	bool need_reinit = is_xdp_enabled != !!prog;
 	struct i40e_pf *pf = vsi->back;
 	struct bpf_prog *old_prog;
-	bool need_reset;
-	int i;
+	int ret = 0;
 
 	/* Don't allow frames that span over multiple buffers */
 	if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) {
@@ -13326,35 +13364,59 @@  static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
 		return -EINVAL;
 	}
 
-	/* When turning XDP on->off/off->on we reset and rebuild the rings. */
-	need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
-
-	if (need_reset)
-		i40e_prep_for_reset(pf);
-
 	/* VSI shall be deleted in a moment, just return EINVAL */
 	if (test_bit(__I40E_IN_REMOVE, pf->state))
 		return -EINVAL;
 
 	old_prog = xchg(&vsi->xdp_prog, prog);
 
-	if (need_reset) {
-		if (!prog)
-			/* Wait until ndo_xsk_wakeup completes. */
-			synchronize_rcu();
-		i40e_reset_and_rebuild(pf, true, true);
+	if (!need_reinit)
+		goto assign_prog;
+
+	if (if_running && !test_and_set_bit(__I40E_VSI_DOWN, vsi->state))
+		i40e_down(vsi);
+
+	vsi = i40e_vsi_reinit_setup(vsi, true);
+
+	if (!vsi) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to reinitialize VSI during XDP setup");
+		ret = -EIO;
+		goto err_vsi_setup;
 	}
 
-	if (!i40e_enabled_xdp_vsi(vsi) && prog) {
-		if (i40e_realloc_rx_bi_zc(vsi, true))
-			return -ENOMEM;
-	} else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
-		if (i40e_realloc_rx_bi_zc(vsi, false))
-			return -ENOMEM;
+	/* allocate descriptors */
+	ret = i40e_vsi_setup_tx_resources(vsi);
+	if (ret) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to configure TX resources during XDP setup");
+		goto err_setup_tx;
+	}
+	ret = i40e_vsi_setup_rx_resources(vsi);
+	if (ret) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to configure RX resources during XDP setup");
+		goto err_setup_rx;
 	}
 
-	for (i = 0; i < vsi->num_queue_pairs; i++)
-		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+	if (!is_xdp_enabled && prog)
+		ret = i40e_realloc_rx_bi_zc(vsi, true);
+	else if (is_xdp_enabled && !prog)
+		ret = i40e_realloc_rx_bi_zc(vsi, false);
+
+	if (ret) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to reallocate RX resources during XDP setup");
+		goto err_vsi_setup;
+	}
+
+	if (if_running) {
+		ret = i40e_up(vsi);
+
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to open VSI during XDP setup");
+			goto err_vsi_setup;
+		}
+	}
+
+assign_prog:
+	i40e_vsi_assign_bpf_prog(vsi, prog);
 
 	if (old_prog)
 		bpf_prog_put(old_prog);
@@ -13362,13 +13424,20 @@  static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
 	/* Kick start the NAPI context if there is an AF_XDP socket open
 	 * on that queue id. This so that receiving will start.
 	 */
-	if (need_reset && prog)
-		for (i = 0; i < vsi->num_queue_pairs; i++)
-			if (vsi->xdp_rings[i]->xsk_pool)
-				(void)i40e_xsk_wakeup(vsi->netdev, i,
-						      XDP_WAKEUP_RX);
+	if (need_reinit && prog)
+		i40e_vsi_rx_napi_schedule(vsi);
 
 	return 0;
+
+err_setup_rx:
+	i40e_vsi_free_rx_resources(vsi);
+err_setup_tx:
+	i40e_vsi_free_tx_resources(vsi);
+err_vsi_setup:
+	i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+	i40e_vsi_assign_bpf_prog(vsi, old_prog);
+
+	return ret;
 }
 
 /**
@@ -14310,13 +14379,14 @@  static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
 /**
  * i40e_vsi_reinit_setup - return and reallocate resources for a VSI
  * @vsi: pointer to the vsi.
+ * @is_xdp: flag indicating if this is reinit during XDP setup
  *
  * This re-allocates a vsi's queue resources.
  *
  * Returns pointer to the successfully allocated and configured VSI sw struct
  * on success, otherwise returns NULL on failure.
  **/
-static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
+static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi, bool is_xdp)
 {
 	u16 alloc_queue_pairs;
 	struct i40e_pf *pf;
@@ -14352,12 +14422,14 @@  static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
 	/* Update the FW view of the VSI. Force a reset of TC and queue
 	 * layout configurations.
 	 */
-	enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
-	pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
-	pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
-	i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
-	if (vsi->type == I40E_VSI_MAIN)
-		i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
+	if (!is_xdp) {
+		enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
+		pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
+		pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
+		i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
+		if (vsi->type == I40E_VSI_MAIN)
+			i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
+	}
 
 	/* assign it some queues */
 	ret = i40e_alloc_rings(vsi);
@@ -15123,7 +15195,8 @@  static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
 		if (pf->lan_vsi == I40E_NO_VSI)
 			vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0);
 		else if (reinit)
-			vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
+			vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi],
+						    false);
 		if (!vsi) {
 			dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
 			i40e_cloud_filter_exit(pf);