diff mbox series

AF_XDP: soft interrupt takes 100% CPU when APP rx slower than nic's input traffic

Message ID CAPydje-awJLYYs-u_5sEy=AYnxUcY28tteiCKiy5pLMisVOxnA@mail.gmail.com
State Not Applicable
Delegated to: BPF Maintainers
Headers show
Series AF_XDP: soft interrupt takes 100% CPU when APP rx slower than nic's input traffic | expand

Commit Message

Yahui Chen July 1, 2020, 10:59 a.m. UTC
Ring 0 of p6p1 can rx_drop about 1.4Mpps traffic, all 64 bytes long
packets, by kernel
xdpsock sample running with cmd `./xdpsock -r -z -i p6p1 -m`.
I change the samples/bpf/xdpsock_user.c code to make sure only receive 10Kpps:
```
git diff xdpsock_user.c
!ixgbe_alloc_rx_buffers_zc(rx_ring,
cleaned_count);
cleaned_count = 0;
+                       if (failuer)
+                               fail_times++;
}
rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
@@ -352,6 +355,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
return (int)total_rx_packets;
}
+
+       /* too many failure meaning traffic congestion,
+        * this would eat all of the CPU.
+        * packets should be dropped earlier by HW
+        */
+       if (fail_times > total_rx_packets/IXGBE_RX_BUFFER_WRITE/2){
+               q_vector->rx_congestion = 1;
+               return (int)total_rx_packets
+       }
return failure ? budget : (int)total_rx_packets;
}
```

The expected thing is xdpsock can rx 10Kpps and si takes a small amount of CPU.
However, the code above disappointed my expectations. The xdpsock is
able to receive
1Kpps only.

So, what's wrong with my code, or how to solve rx traffic congestion
caused by AF_XDP?
diff mbox series

Patch

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index c91e913..7bbffec 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -857,6 +857,7 @@  static inline void complete_tx_only(struct
xsk_socket_info *xsk,
}
}
+int payload = 100000 /*10Kpps*/;
static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
{
unsigned int rcvd, i;
@@ -888,6 +889,21 @@  static void rx_drop(struct xsk_socket_info *xsk,
struct pollfd *fds)
char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
hex_dump(pkt, len, addr);
+
+               unsigned long now = get_nsecs();
+               unsigned long prev = now;
+               int j;
+               for (;;){
+                       j=0;
+                       do {
+                               j++;
+                       }while(j<1000);
+
+                       now = get_nsecs();
+                       if (now - prev >= payload)
+                               break;
+               }
+
*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
}
```

Then, run the xdpsock with cmd `./xdpsock -r -z -i p6p1 -m`. And check
the CPU usage with `top`.
Unexpectedly the si(soft interrupt) of p6p1's ring 0 is 99.x%, almost
100%. However, if I didn't
modify xdpsock code, the si was about 20% and xdpsock can rx_drop all
the packages. Fewer packages
are processed but more cpu are consumed, this is not correct.

Nic's driver is ixgbe. This unexpected situation means ixgbe_poll
doesn't deal with rx traffic
congestion well. A feasible solution is to make ixgbe realize rx
congestion and drop the packets
by hardware.

```
# git diff
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 5ddfc83..b8592d8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -430,6 +430,7 @@  struct ixgbe_ring_container {
u16 work_limit;                 /* total work allowed per interrupt */
u8 count;                       /* total number of rings in vector */
u8 itr;                         /* current ITR setting for ring */
+       int congestion;                 /* traffic congestion flag */
};
/* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index f162b8b..26e63f9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2542,6 +2542,12 @@  static void ixgbe_update_itr(struct
ixgbe_q_vector *q_vector,
if (time_after(next_update, ring_container->next_update))
goto clear_counts;
+       if (ring_container->congestion){
+               itr = ring_container->itr << 1;
+               ring_container->congestion = 0;
+               goto clear_counts;
+       }
+
packets = ring_container->total_packets;
/* We have no packets to actually measure against. This means
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index be9d2a8..2835cf8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -235,6 +235,7 @@  int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
struct ixgbe_adapter *adapter = q_vector->adapter;
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
unsigned int xdp_res, xdp_xmit = 0;
+       int fail_times = 0;
bool failure = false;
struct sk_buff *skb;
@@ -249,6 +250,8 @@  int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,