diff mbox series

[net-next,v4,2/2] net: bpf: add a test for skb_segment in test_bpf module

Message ID 20180321064722.1411857-3-yhs@fb.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [net-next,v4,1/2] net: permit skb_segment on head_frag frag_list skb | expand

Commit Message

Yonghong Song March 21, 2018, 6:47 a.m. UTC
Without the previous commit,
"modprobe test_bpf" will have the following errors:
...
[   98.149165] ------------[ cut here ]------------
[   98.159362] kernel BUG at net/core/skbuff.c:3667!
[   98.169756] invalid opcode: 0000 [#1] SMP PTI
[   98.179370] Modules linked in:
[   98.179371]  test_bpf(+)
...
which triggers the bug the previous commit intends to fix.

The skbs are constructed to mimic what mlx5 may generate.
The packet size/header may not mimic real cases in production. But
the processing flow is similar.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 lib/test_bpf.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 89 insertions(+), 2 deletions(-)

Comments

Eric Dumazet March 21, 2018, 3:03 p.m. UTC | #1
On 03/20/2018 11:47 PM, Yonghong Song wrote:
...

+
> +static __init int test_skb_segment(void)
> +{
> +	netdev_features_t features;
> +	struct sk_buff *skb;
> +	int ret = -1;
> +
> +	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
> +		   NETIF_F_IPV6_CSUM;
> +	features |= NETIF_F_RXCSUM;
> +	skb = build_test_skb();
> +	if (!skb) {
> +		pr_info("%s: failed to build_test_skb", __func__);
> +		goto done;
> +	}
> +
> +	if (skb_segment(skb, features)) {
> +		ret = 0;
> +		pr_info("%s: success in skb_segment!", __func__);
> +	} else {
> +		pr_info("%s: failed in skb_segment!", __func__);
> +	}

If skb_segmen() was successful skb was already freed.

kfree_skb(old_skb) should thus panic the box, if you run this code
on a kernel having some debugging features like KASAN

So if you do not store in a variable the return of skb_segment(),
you can not properly free memory.

> +	kfree_skb(skb);
> +done:
> +	return ret;
> +}
> +
> 
Please make sure to fully test this code.
Eric Dumazet March 21, 2018, 3:26 p.m. UTC | #2
On 03/20/2018 11:47 PM, Yonghong Song wrote:
> +static __init int test_skb_segment(void)
> +{
> +	netdev_features_t features;
> +	struct sk_buff *skb;
> +	int ret = -1;
> +
> +	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
> +		   NETIF_F_IPV6_CSUM;
> +	features |= NETIF_F_RXCSUM;
> +	skb = build_test_skb();
> +	if (!skb) {
> +		pr_info("%s: failed to build_test_skb", __func__);
> +		goto done;
> +	}
> +
> +	if (skb_segment(skb, features)) {
> +		ret = 0;
> +		pr_info("%s: success in skb_segment!", __func__);
> +	} else {
> +		pr_info("%s: failed in skb_segment!", __func__);
> +	}
> +	kfree_skb(skb);

If skb_segmen() was successful (original) skb was already freed.

kfree_skb(old_skb) should thus panic the box, if you run this code
on a kernel having some debugging features like KASAN

So you must store in a variable the return of skb_segment(),
to be able to free skb(s), using kfree_skb_list()


> +done:
> +	return ret;
> +}
> +
Yonghong Song March 21, 2018, 8:15 p.m. UTC | #3
On 3/21/18 8:26 AM, Eric Dumazet wrote:
> 
> 
> On 03/20/2018 11:47 PM, Yonghong Song wrote:
>> +static __init int test_skb_segment(void)
>> +{
>> +	netdev_features_t features;
>> +	struct sk_buff *skb;
>> +	int ret = -1;
>> +
>> +	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
>> +		   NETIF_F_IPV6_CSUM;
>> +	features |= NETIF_F_RXCSUM;
>> +	skb = build_test_skb();
>> +	if (!skb) {
>> +		pr_info("%s: failed to build_test_skb", __func__);
>> +		goto done;
>> +	}
>> +
>> +	if (skb_segment(skb, features)) {
>> +		ret = 0;
>> +		pr_info("%s: success in skb_segment!", __func__);
>> +	} else {
>> +		pr_info("%s: failed in skb_segment!", __func__);
>> +	}
>> +	kfree_skb(skb);
> 
> If skb_segmen() was successful (original) skb was already freed.
> 
> kfree_skb(old_skb) should thus panic the box, if you run this code
> on a kernel having some debugging features like KASAN

I tried with KASAN. It does not panic.
Looking at the code in net/core/dev.c: validate_xmit_skb:

static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct 
net_device *dev, bool *again)
...

         if (netif_needs_gso(skb, features)) {
                 struct sk_buff *segs;

                 segs = skb_gso_segment(skb, features);
                 if (IS_ERR(segs)) {
                         goto out_kfree_skb;
                 } else if (segs) {
                         consume_skb(skb);
                         skb = segs;
                 }
...
out_kfree_skb:
         kfree_skb(skb);

which also indicates kfree_skb/consume_skb probably is the right way
to free skb after skb_gso_segment/skb_segment.

This probably explains why my above kfree_skb(skb) does not crash.

> 
> So you must store in a variable the return of skb_segment(),
> to be able to free skb(s), using kfree_skb_list()

Totally agree. Will make the change. Thanks!

> 
> 
>> +done:
>> +	return ret;
>> +}
>> +
diff mbox series

Patch

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 2efb213..086a231 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6574,6 +6574,91 @@  static bool exclude_test(int test_id)
 	return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static __init struct sk_buff *build_test_skb(void)
+{
+	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+	struct sk_buff *skb[2];
+	struct page *page[2];
+	int i, data_size = 8;
+
+	for (i = 0; i < 2; i++) {
+		page[i] = alloc_page(GFP_KERNEL);
+		if (!page[i]) {
+			if (i == 0)
+				goto err_page0;
+			else
+				goto err_page1;
+		}
+
+		/* this will set skb[i]->head_frag */
+		skb[i] = dev_alloc_skb(headroom + data_size);
+		if (!skb[i]) {
+			if (i == 0)
+				goto err_skb0;
+			else
+				goto err_skb1;
+		}
+
+		skb_reserve(skb[i], headroom);
+		skb_put(skb[i], data_size);
+		skb[i]->protocol = htons(ETH_P_IP);
+		skb_reset_network_header(skb[i]);
+		skb_set_mac_header(skb[i], -ETH_HLEN);
+
+		skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
+		// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+	}
+
+	/* setup shinfo */
+	skb_shinfo(skb[0])->gso_size = 1448;
+	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb[0])->gso_segs = 0;
+	skb_shinfo(skb[0])->frag_list = skb[1];
+
+	/* adjust skb[0]'s len */
+	skb[0]->len += skb[1]->len;
+	skb[0]->data_len += skb[1]->data_len;
+	skb[0]->truesize += skb[1]->truesize;
+
+	return skb[0];
+
+err_skb1:
+	__free_page(page[1]);
+err_page1:
+	kfree_skb(skb[0]);
+err_skb0:
+	__free_page(page[0]);
+err_page0:
+	return NULL;
+}
+
+static __init int test_skb_segment(void)
+{
+	netdev_features_t features;
+	struct sk_buff *skb;
+	int ret = -1;
+
+	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+		   NETIF_F_IPV6_CSUM;
+	features |= NETIF_F_RXCSUM;
+	skb = build_test_skb();
+	if (!skb) {
+		pr_info("%s: failed to build_test_skb", __func__);
+		goto done;
+	}
+
+	if (skb_segment(skb, features)) {
+		ret = 0;
+		pr_info("%s: success in skb_segment!", __func__);
+	} else {
+		pr_info("%s: failed in skb_segment!", __func__);
+	}
+	kfree_skb(skb);
+done:
+	return ret;
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -6632,9 +6717,11 @@  static int __init test_bpf_init(void)
 		return ret;
 
 	ret = test_bpf();
-
 	destroy_bpf_tests();
-	return ret;
+	if (ret)
+		return ret;
+
+	return test_skb_segment();
 }
 
 static void __exit test_bpf_exit(void)