@@ -3475,9 +3475,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+ skb_frag_t *frag = skb_shinfo(head_skb)->frags, head_frag;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
+ struct sk_buff *check_list_skb = list_skb;
struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
@@ -3590,6 +3591,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
+ check_list_skb = list_skb;
if (unlikely(!nskb))
goto err;
@@ -3664,21 +3666,37 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(list_skb));
+ if (skb_headlen(list_skb) && check_list_skb == list_skb) {
+ struct page *page;
+
+ BUG_ON(!list_skb->head_frag);
+
+ i = 0;
+ nfrags = 1;
+ page = virt_to_head_page(list_skb->head);
+ head_frag.page.p = page;
+ head_frag.page_offset = list_skb->data -
+ (unsigned char *)page_address(page);
+ head_frag.size = skb_headlen(list_skb);
+ frag = &head_frag;
+ check_list_skb = list_skb->next;
+ } else {
+ BUG_ON(skb_headlen(list_skb) && check_list_skb == list_skb);
- i = 0;
- nfrags = skb_shinfo(list_skb)->nr_frags;
- frag = skb_shinfo(list_skb)->frags;
- frag_skb = list_skb;
+ i = 0;
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
- BUG_ON(!nfrags);
+ BUG_ON(!nfrags);
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
+ if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ goto err;
- list_skb = list_skb->next;
+ list_skb = list_skb->next;
+ check_list_skb = list_skb;
+ }
}
if (unlikely(skb_shinfo(nskb)->nr_frags >=
One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at function skb_segment(), line 3667. The bpf program attaches to clsact ingress, calls bpf_skb_change_proto to change protocol from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect to send the changed packet out. 3472 struct sk_buff *skb_segment(struct sk_buff *head_skb, 3473 netdev_features_t features) 3474 { 3475 struct sk_buff *segs = NULL; 3476 struct sk_buff *tail = NULL; ... 3665 while (pos < offset + len) { 3666 if (i >= nfrags) { 3667 BUG_ON(skb_headlen(list_skb)); 3668 3669 i = 0; 3670 nfrags = skb_shinfo(list_skb)->nr_frags; 3671 frag = skb_shinfo(list_skb)->frags; 3672 frag_skb = list_skb; ... call stack: ... #1 [ffff883ffef03558] __crash_kexec at ffffffff8110c525 #2 [ffff883ffef03620] crash_kexec at ffffffff8110d5cc #3 [ffff883ffef03640] oops_end at ffffffff8101d7e7 #4 [ffff883ffef03668] die at ffffffff8101deb2 #5 [ffff883ffef03698] do_trap at ffffffff8101a700 #6 [ffff883ffef036e8] do_error_trap at ffffffff8101abfe #7 [ffff883ffef037a0] do_invalid_op at ffffffff8101acd0 #8 [ffff883ffef037b0] invalid_op at ffffffff81a00bab [exception RIP: skb_segment+3044] RIP: ffffffff817e4dd4 RSP: ffff883ffef03860 RFLAGS: 00010216 RAX: 0000000000002bf6 RBX: ffff883feb7aaa00 RCX: 0000000000000011 RDX: ffff883fb87910c0 RSI: 0000000000000011 RDI: ffff883feb7ab500 RBP: ffff883ffef03928 R8: 0000000000002ce2 R9: 00000000000027da R10: 000001ea00000000 R11: 0000000000002d82 R12: ffff883f90a1ee80 R13: ffff883fb8791120 R14: ffff883feb7abc00 R15: 0000000000002ce2 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [ffff883ffef03930] tcp_gso_segment at ffffffff818713e7 --- <IRQ stack> --- ... The triggering input skb has the following properties: list_skb = skb->frag_list; skb->nfrags != NULL && skb_headlen(list_skb) != 0 and skb_segment() is not able to handle a frag_list skb if its headlen (list_skb->len - list_skb->data_len) is not 0. This patch addressed the issue by handling skb_headlen(list_skb) != 0 case properly if list_skb->head_frag is true, which is expected in most cases. A one-element frag array is created for the list_skb head and processed before list_skb->frags are processed. Reported-by: Diptanu Gon Choudhury <diptanu@fb.com> Signed-off-by: Yonghong Song <yhs@fb.com> --- net/core/skbuff.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-)