Message ID | 20180321064722.1411857-3-yhs@fb.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
Series | [net-next,v4,1/2] net: permit skb_segment on head_frag frag_list skb | expand |
On 03/20/2018 11:47 PM, Yonghong Song wrote: ... + > +static __init int test_skb_segment(void) > +{ > + netdev_features_t features; > + struct sk_buff *skb; > + int ret = -1; > + > + features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM | > + NETIF_F_IPV6_CSUM; > + features |= NETIF_F_RXCSUM; > + skb = build_test_skb(); > + if (!skb) { > + pr_info("%s: failed to build_test_skb", __func__); > + goto done; > + } > + > + if (skb_segment(skb, features)) { > + ret = 0; > + pr_info("%s: success in skb_segment!", __func__); > + } else { > + pr_info("%s: failed in skb_segment!", __func__); > + } If skb_segmen() was successful skb was already freed. kfree_skb(old_skb) should thus panic the box, if you run this code on a kernel having some debugging features like KASAN So if you do not store in a variable the return of skb_segment(), you can not properly free memory. > + kfree_skb(skb); > +done: > + return ret; > +} > + > Please make sure to fully test this code.
On 03/20/2018 11:47 PM, Yonghong Song wrote: > +static __init int test_skb_segment(void) > +{ > + netdev_features_t features; > + struct sk_buff *skb; > + int ret = -1; > + > + features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM | > + NETIF_F_IPV6_CSUM; > + features |= NETIF_F_RXCSUM; > + skb = build_test_skb(); > + if (!skb) { > + pr_info("%s: failed to build_test_skb", __func__); > + goto done; > + } > + > + if (skb_segment(skb, features)) { > + ret = 0; > + pr_info("%s: success in skb_segment!", __func__); > + } else { > + pr_info("%s: failed in skb_segment!", __func__); > + } > + kfree_skb(skb); If skb_segmen() was successful (original) skb was already freed. kfree_skb(old_skb) should thus panic the box, if you run this code on a kernel having some debugging features like KASAN So you must store in a variable the return of skb_segment(), to be able to free skb(s), using kfree_skb_list() > +done: > + return ret; > +} > +
On 3/21/18 8:26 AM, Eric Dumazet wrote: > > > On 03/20/2018 11:47 PM, Yonghong Song wrote: >> +static __init int test_skb_segment(void) >> +{ >> + netdev_features_t features; >> + struct sk_buff *skb; >> + int ret = -1; >> + >> + features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM | >> + NETIF_F_IPV6_CSUM; >> + features |= NETIF_F_RXCSUM; >> + skb = build_test_skb(); >> + if (!skb) { >> + pr_info("%s: failed to build_test_skb", __func__); >> + goto done; >> + } >> + >> + if (skb_segment(skb, features)) { >> + ret = 0; >> + pr_info("%s: success in skb_segment!", __func__); >> + } else { >> + pr_info("%s: failed in skb_segment!", __func__); >> + } >> + kfree_skb(skb); > > If skb_segmen() was successful (original) skb was already freed. > > kfree_skb(old_skb) should thus panic the box, if you run this code > on a kernel having some debugging features like KASAN I tried with KASAN. It does not panic. Looking at the code in net/core/dev.c: validate_xmit_skb: static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again) ... if (netif_needs_gso(skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); if (IS_ERR(segs)) { goto out_kfree_skb; } else if (segs) { consume_skb(skb); skb = segs; } ... out_kfree_skb: kfree_skb(skb); which also indicates kfree_skb/consume_skb probably is the right way to free skb after skb_gso_segment/skb_segment. This probably explains why my above kfree_skb(skb) does not crash. > > So you must store in a variable the return of skb_segment(), > to be able to free skb(s), using kfree_skb_list() Totally agree. Will make the change. Thanks! > > >> +done: >> + return ret; >> +} >> +
diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2efb213..086a231 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6574,6 +6574,91 @@ static bool exclude_test(int test_id) return test_id < test_range[0] || test_id > test_range[1]; } +static __init struct sk_buff *build_test_skb(void) +{ + u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN; + struct sk_buff *skb[2]; + struct page *page[2]; + int i, data_size = 8; + + for (i = 0; i < 2; i++) { + page[i] = alloc_page(GFP_KERNEL); + if (!page[i]) { + if (i == 0) + goto err_page0; + else + goto err_page1; + } + + /* this will set skb[i]->head_frag */ + skb[i] = dev_alloc_skb(headroom + data_size); + if (!skb[i]) { + if (i == 0) + goto err_skb0; + else + goto err_skb1; + } + + skb_reserve(skb[i], headroom); + skb_put(skb[i], data_size); + skb[i]->protocol = htons(ETH_P_IP); + skb_reset_network_header(skb[i]); + skb_set_mac_header(skb[i], -ETH_HLEN); + + skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64); + // skb_headlen(skb[i]): 8, skb[i]->head_frag = 1 + } + + /* setup shinfo */ + skb_shinfo(skb[0])->gso_size = 1448; + skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb[0])->gso_segs = 0; + skb_shinfo(skb[0])->frag_list = skb[1]; + + /* adjust skb[0]'s len */ + skb[0]->len += skb[1]->len; + skb[0]->data_len += skb[1]->data_len; + skb[0]->truesize += skb[1]->truesize; + + return skb[0]; + +err_skb1: + __free_page(page[1]); +err_page1: + kfree_skb(skb[0]); +err_skb0: + __free_page(page[0]); +err_page0: + return NULL; +} + +static __init int test_skb_segment(void) +{ + netdev_features_t features; + struct sk_buff *skb; + int ret = -1; + + features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM; + features |= NETIF_F_RXCSUM; + skb = build_test_skb(); + if (!skb) { + pr_info("%s: failed to build_test_skb", __func__); + goto done; + } + + if (skb_segment(skb, features)) { + ret = 0; + pr_info("%s: success in skb_segment!", __func__); + } else { + pr_info("%s: failed in skb_segment!", __func__); + } + kfree_skb(skb); +done: + return ret; +} + static __init int test_bpf(void) { int i, err_cnt = 0, pass_cnt = 0; @@ -6632,9 +6717,11 @@ static int __init test_bpf_init(void) return ret; ret = test_bpf(); - destroy_bpf_tests(); - return ret; + if (ret) + return ret; + + return test_skb_segment(); } static void __exit test_bpf_exit(void)
Without the previous commit, "modprobe test_bpf" will have the following errors: ... [ 98.149165] ------------[ cut here ]------------ [ 98.159362] kernel BUG at net/core/skbuff.c:3667! [ 98.169756] invalid opcode: 0000 [#1] SMP PTI [ 98.179370] Modules linked in: [ 98.179371] test_bpf(+) ... which triggers the bug the previous commit intends to fix. The skbs are constructed to mimic what mlx5 may generate. The packet size/header may not mimic real cases in production. But the processing flow is similar. Signed-off-by: Yonghong Song <yhs@fb.com> --- lib/test_bpf.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 2 deletions(-)