From patchwork Wed Sep 1 17:14:12 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523337 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fW2G7vz9svs; Thu, 2 Sep 2021 03:14:59 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTp9-0002xS-2N; Wed, 01 Sep 2021 17:14:55 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToj-0002ld-24 for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:29 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:23 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOi025421; Wed, 1 Sep 2021 20:14:22 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 1/9] net/sched: act_ct: fix restore the qdisc_skb_cb after defrag Date: Wed, 1 Sep 2021 12:14:12 -0500 Message-Id: <1630516460-16421-2-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: wenxu BugLink: https://bugs.launchpad.net/bugs/1940872 The fragment packets do defrag in tcf_ct_handle_fragments will clear the skb->cb which make the qdisc_skb_cb clear too. So the qdsic_skb_cb should be store before defrag and restore after that. It also update the pkt_len after all the fragments finish the defrag to one packet and make the following actions counter correct. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: wenxu Signed-off-by: David S. Miller (backported from ae372cb1750f6c95370f92fe5f5620e0954663ba) [maor: small conflicts with newer patches] Signed-off-by: Maor Dickman Signed-off-by: Bodong Wang --- net/sched/act_ct.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index c3fd092..39547cf1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -680,9 +680,10 @@ static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) } static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, - u8 family, u16 zone) + u8 family, u16 zone, bool *defrag) { enum ip_conntrack_info ctinfo; + struct qdisc_skb_cb cb; struct nf_conn *ct; int err = 0; bool frag; @@ -700,6 +701,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); + cb = *qdisc_skb_cb(skb); if (family == NFPROTO_IPV4) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; @@ -710,6 +712,9 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, local_bh_enable(); if (err && err != -EINPROGRESS) return err; + + if (!err) + *defrag = true; } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; @@ -718,12 +723,16 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, err = nf_ct_frag6_gather(net, skb, user); if (err && err != -EINPROGRESS) return err; + + if (!err) + *defrag = true; #else err = -EOPNOTSUPP; goto out_free; #endif } + *qdisc_skb_cb(skb) = cb; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -926,6 +935,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, int nh_ofs, err, retval; struct tcf_ct_params *p; bool skip_add = false; + bool defrag = false; struct nf_conn *ct; u8 family; @@ -957,7 +967,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); - err = tcf_ct_handle_fragments(net, skb, family, p->zone); + err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); if (err == -EINPROGRESS) { retval = TC_ACT_STOLEN; goto out; @@ -1029,6 +1039,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, qdisc_skb_cb(skb)->post_ct = true; out_clear: tcf_action_update_bstats(&c->common, skb); + if (defrag) + qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; drop: From patchwork Wed Sep 1 17:14:13 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523333 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fP71Pdz9sXk; Thu, 2 Sep 2021 03:14:53 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToo-0002n7-RU; Wed, 01 Sep 2021 17:14:34 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToj-0002le-3O for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:29 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:24 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOj025421; Wed, 1 Sep 2021 20:14:23 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 2/9] net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct Date: Wed, 1 Sep 2021 12:14:13 -0500 Message-Id: <1630516460-16421-3-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: wenxu BugLink: https://bugs.launchpad.net/bugs/1940872 When openvswitch conntrack offload with act_ct action. Fragment packets defrag in the ingress tc act_ct action and miss the next chain. Then the packet pass to the openvswitch datapath without the mru. The over mtu packet will be dropped in output action in openvswitch for over mtu. "kernel: net2: dropped over-mtu packet: 1528 > 1500" This patch add mru in the tc_skb_ext for adefrag and miss next chain situation. And also add mru in the qdisc_skb_cb. The act_ct set the mru to the qdisc_skb_cb when the packet defrag. And When the chain miss, The mru is set to tc_skb_ext which can be got by ovs datapath. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: wenxu Reviewed-by: Cong Wang Signed-off-by: David S. Miller (backport from 038ebb1a713d114d54dbf14868a73181c0c92758) [maor: align to 0617099db3d3 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct")] Signed-off-by: Maor Dickman Signed-off-by: Bodong Wang --- include/linux/skbuff.h | 1 + include/net/sch_generic.h | 3 ++- net/openvswitch/flow.c | 1 + net/sched/act_ct.c | 8 ++++++-- net/sched/cls_api.c | 1 + 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1222db3..62e17a5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -283,6 +283,7 @@ struct nf_bridge_info { */ struct tc_skb_ext { __u32 chain; + __u16 mru; bool post_ct; }; #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c395d63..75e0781 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -413,6 +413,7 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; + u16 mru; bool post_ct; }; @@ -493,7 +494,7 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; - BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz); + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb)); BUILD_BUG_ON(sizeof(qcb->data) < sz); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 84f3bf8..3d2e8fd 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -883,6 +883,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (static_branch_unlikely(&tc_recirc_sharing_support)) { tc_ext = skb_ext_find(skb, TC_SKB_EXT); key->recirc_id = tc_ext ? tc_ext->chain : 0; + OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; post_ct = tc_ext ? tc_ext->post_ct : false; } else { key->recirc_id = 0; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 39547cf1..5a13ad69 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -713,8 +713,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (err && err != -EINPROGRESS) return err; - if (!err) + if (!err) { *defrag = true; + cb.mru = IPCB(skb)->frag_max_size; + } } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; @@ -724,8 +726,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (err && err != -EINPROGRESS) return err; - if (!err) + if (!err) { *defrag = true; + cb.mru = IP6CB(skb)->frag_max_size; + } #else err = -EOPNOTSUPP; goto out_free; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9c9afc0..25174e4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1679,6 +1679,7 @@ int tcf_classify_ingress(struct sk_buff *skb, if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; + ext->mru = qdisc_skb_cb(skb)->mru; ext->post_ct = qdisc_skb_cb(skb)->post_ct; } From patchwork Wed Sep 1 17:14:14 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523339 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fh5r6qz9sW8; Thu, 2 Sep 2021 03:15:08 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTpH-000349-Il; Wed, 01 Sep 2021 17:15:03 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToj-0002lf-3N for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:29 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:26 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOk025421; Wed, 1 Sep 2021 20:14:25 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 3/9] net/sched: fix miss init the mru in qdisc_skb_cb Date: Wed, 1 Sep 2021 12:14:14 -0500 Message-Id: <1630516460-16421-4-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: wenxu BugLink: https://bugs.launchpad.net/bugs/1940872 The mru in the qdisc_skb_cb should be init as 0. Only defrag packets in the act_ct will set the value. Fixes: 038ebb1a713d ("net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct") Signed-off-by: wenxu Signed-off-by: Jakub Kicinski (backport from aadaca9e7c392dbf877af8cefb156199f1a67bbe) [maor: align to 0617099db3d3 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct")] Signed-off-by: Maor Dickman Signed-off-by: Bodong Wang --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 46f9b9d..242df5c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3500,6 +3500,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ + qdisc_skb_cb(skb)->mru = 0; qdisc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); @@ -4590,6 +4591,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) } qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_skb_cb(skb)->mru = 0; qdisc_skb_cb(skb)->post_ct = false; skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); From patchwork Wed Sep 1 17:14:15 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523335 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fQ2N3Gz9ssD; Thu, 2 Sep 2021 03:14:54 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTot-0002oS-2I; Wed, 01 Sep 2021 17:14:39 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToj-0002lg-5M for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:29 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:27 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOl025421; Wed, 1 Sep 2021 20:14:26 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 4/9] net/sched: act_ct: fix wild memory access when clearing fragments Date: Wed, 1 Sep 2021 12:14:15 -0500 Message-Id: <1630516460-16421-5-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: Davide Caratti BugLink: https://bugs.launchpad.net/bugs/1940872 while testing re-assembly/re-fragmentation using act_ct, it's possible to observe a crash like the following one: KASAN: maybe wild-memory-access in range [0x0001000000000448-0x000100000000044f] CPU: 50 PID: 0 Comm: swapper/50 Tainted: G S 5.12.0-rc7+ #424 Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 RIP: 0010:inet_frag_rbtree_purge+0x50/0xc0 Code: 00 fc ff df 48 89 c3 31 ed 48 89 df e8 a9 7a 38 ff 4c 89 fe 48 89 df 49 89 c6 e8 5b 3a 38 ff 48 8d 7b 40 48 89 f8 48 c1 e8 03 <42> 80 3c 20 00 75 59 48 8d bb d0 00 00 00 4c 8b 6b 40 48 89 f8 48 RSP: 0018:ffff888c31449db8 EFLAGS: 00010203 RAX: 0000200000000089 RBX: 000100000000040e RCX: ffffffff989eb960 RDX: 0000000000000140 RSI: ffffffff97cfb977 RDI: 000100000000044e RBP: 0000000000000900 R08: 0000000000000000 R09: ffffed1186289350 R10: 0000000000000003 R11: ffffed1186289350 R12: dffffc0000000000 R13: 000100000000040e R14: 0000000000000000 R15: ffff888155e02160 FS: 0000000000000000(0000) GS:ffff888c31440000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005600cb70a5b8 CR3: 0000000a2c014005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_frag_destroy+0xa9/0x150 call_timer_fn+0x2d/0x180 run_timer_softirq+0x4fe/0xe70 __do_softirq+0x197/0x5a0 irq_exit_rcu+0x1de/0x200 sysvec_apic_timer_interrupt+0x6b/0x80 when act_ct temporarily stores an IP fragment, restoring the skb qdisc cb results in putting random data in FRAG_CB(), and this causes those "wild" memory accesses later, when the rbtree is purged. Never overwrite the skb cb in case tcf_ct_handle_fragments() returns -EINPROGRESS. Fixes: ae372cb1750f ("net/sched: act_ct: fix restore the qdisc_skb_cb after defrag") Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support") Reported-by: Shuang Li Signed-off-by: Davide Caratti Signed-off-by: David S. Miller (cherry picked from f77bd544a6bbe69aa50d9ed09f13494cf36ff806) Signed-off-by: Bodong Wang --- net/sched/act_ct.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 5a13ad69..761fb0d 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -736,7 +736,8 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, #endif } - *qdisc_skb_cb(skb) = cb; + if (err != -EINPROGRESS) + *qdisc_skb_cb(skb) = cb; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -974,7 +975,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); if (err == -EINPROGRESS) { retval = TC_ACT_STOLEN; - goto out; + goto out_clear; } if (err) goto drop; @@ -1039,7 +1040,6 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, out_push: skb_push_rcsum(skb, nh_ofs); -out: qdisc_skb_cb(skb)->post_ct = true; out_clear: tcf_action_update_bstats(&c->common, skb); From patchwork Wed Sep 1 17:14:16 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523340 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fj17Wbz9sXk; Thu, 2 Sep 2021 03:15:09 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTpJ-00035u-Lr; Wed, 01 Sep 2021 17:15:05 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToo-0002mY-7R for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:34 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:28 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOm025421; Wed, 1 Sep 2021 20:14:27 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 5/9] Revert "net/sched: act_ct: Fix skb double-free in tcf_ct_handle_fragments() error flow" Date: Wed, 1 Sep 2021 12:14:16 -0500 Message-Id: <1630516460-16421-6-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: Maor Dickman BugLink: https://bugs.launchpad.net/bugs/1940872 This reverts commit f797fab7280c7492e211220dbe03d3b9677b92d8. Replaced with another solution. Signed-off-by: Maor Dickman Signed-off-by: Bodong Wang --- net/sched/act_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 761fb0d..96fd196 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -724,7 +724,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); err = nf_ct_frag6_gather(net, skb, user); if (err && err != -EINPROGRESS) - return err; + goto out_free; if (!err) { *defrag = true; From patchwork Wed Sep 1 17:14:17 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523336 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fQ1zzfz9shn; Thu, 2 Sep 2021 03:14:54 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTp0-0002rv-Mc; Wed, 01 Sep 2021 17:14:46 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToo-0002mZ-7R for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:34 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:29 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOn025421; Wed, 1 Sep 2021 20:14:28 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 6/9] net/sched: act_mirred: refactor the handle of xmit Date: Wed, 1 Sep 2021 12:14:17 -0500 Message-Id: <1630516460-16421-7-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: wenxu BugLink: https://bugs.launchpad.net/bugs/1940872 This one is prepare for the next patch. Signed-off-by: wenxu Signed-off-by: Jakub Kicinski (cherry picked from fa6d639930ee5cd3f932cc314f3407f07a06582d) Signed-off-by: Bodong Wang --- include/net/sch_generic.h | 5 ----- net/sched/act_mirred.c | 21 +++++++++++++++------ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 75e0781..77ab4e8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1319,9 +1319,4 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); -static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) -{ - return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb); -} - #endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6556003..b73bfef 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -205,6 +205,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return err; } +static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +{ + int err; + + if (!want_ingress) + err = dev_queue_xmit(skb); + else + err = netif_receive_skb(skb); + + return err; +} + static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -289,18 +301,15 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, if (want_ingress) nf_reset_ct(skb); res->ingress = want_ingress; - if (skb_tc_reinsert(skb, res)) + err = tcf_mirred_forward(res->ingress, skb); + if (err) tcf_action_inc_overlimit_qstats(&m->common); __this_cpu_dec(mirred_rec_level); return TC_ACT_CONSUMED; } } - if (!want_ingress) - err = dev_queue_xmit(skb2); - else - err = netif_receive_skb(skb2); - + err = tcf_mirred_forward(want_ingress, skb2); if (err) { out: tcf_action_inc_overlimit_qstats(&m->common); From patchwork Wed Sep 1 17:14:18 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523332 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fP5Fvnz9sXN; Thu, 2 Sep 2021 03:14:53 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTow-0002q3-90; Wed, 01 Sep 2021 17:14:42 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToo-0002mb-8s for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:34 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:31 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOo025421; Wed, 1 Sep 2021 20:14:30 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 7/9] net/sched: The error lable position is corrected in ct_init_module Date: Wed, 1 Sep 2021 12:14:18 -0500 Message-Id: <1630516460-16421-8-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: liujian BugLink: https://bugs.launchpad.net/bugs/1940872 Exchange the positions of the err_tbl_init and err_register labels in ct_init_module function. Fixes: c34b961a2492 ("net/sched: act_ct: Create nf flow table per zone") Signed-off-by: liujian Signed-off-by: David S. Miller (cherry picked from 8c5c51f5cac676e9065cb6de9feaa3a16a462675) Signed-off-by: Bodong Wang --- net/sched/act_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 96fd196..ce6d9e6 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1556,10 +1556,10 @@ static int __init ct_init_module(void) return 0; -err_tbl_init: - destroy_workqueue(act_ct_wq); err_register: tcf_ct_flow_tables_uninit(); +err_tbl_init: + destroy_workqueue(act_ct_wq); return err; } From patchwork Wed Sep 1 17:14:19 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523338 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fY6XXQz9sX3; Thu, 2 Sep 2021 03:15:01 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTpC-00030Y-Pn; Wed, 01 Sep 2021 17:14:58 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToo-0002mc-94 for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:34 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:32 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOp025421; Wed, 1 Sep 2021 20:14:31 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 8/9] net/sched: sch_frag: add generic packet fragment support. Date: Wed, 1 Sep 2021 12:14:19 -0500 Message-Id: <1630516460-16421-9-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: wenxu BugLink: https://bugs.launchpad.net/bugs/1940872 Currently kernel tc subsystem can do conntrack in cat_ct. But when several fragment packets go through the act_ct, function tcf_ct_handle_fragments will defrag the packets to a big one. But the last action will redirect mirred to a device which maybe lead the reassembly big packet over the mtu of target device. This patch add support for a xmit hook to mirred, that gets executed before xmiting the packet. Then, when act_ct gets loaded, it configs that hook. The frag xmit hook maybe reused by other modules. Signed-off-by: wenxu Acked-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski (cherry picked from c129412f74e99b609f0a8e95fc3915af1fd40f34) Signed-off-by: Bodong Wang --- include/net/act_api.h | 6 ++ include/net/sch_generic.h | 2 + net/sched/Makefile | 1 + net/sched/act_api.c | 16 +++++ net/sched/act_ct.c | 3 + net/sched/act_mirred.c | 2 +- net/sched/sch_frag.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 net/sched/sch_frag.c diff --git a/include/net/act_api.h b/include/net/act_api.h index 5c2fd22..dc26d8c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -235,6 +235,12 @@ int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct netlink_ext_ack *newchain); struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, struct tcf_chain *newchain); + +#ifdef CONFIG_INET +DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count); +#endif + +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 77ab4e8..62c59db 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1319,4 +1319,6 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); +int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); + #endif diff --git a/net/sched/Makefile b/net/sched/Makefile index 415d1e1..c7332fa 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -5,6 +5,7 @@ obj-y := sch_generic.o sch_mq.o +obj-$(CONFIG_INET) += sch_frag.o obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o obj-$(CONFIG_NET_CLS_ACT) += act_api.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cfcd595..da056b0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -22,6 +22,22 @@ #include #include +#ifdef CONFIG_INET +DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); +EXPORT_SYMBOL_GPL(tcf_frag_xmit_count); +#endif + +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) +{ +#ifdef CONFIG_INET + if (static_branch_unlikely(&tcf_frag_xmit_count)) + return sch_frag_xmit_hook(skb, xmit); +#endif + + return xmit(skb); +} +EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit); + static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ce6d9e6..2ad05a3 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1554,6 +1554,8 @@ static int __init ct_init_module(void) if (err) goto err_register; + static_branch_inc(&tcf_frag_xmit_count); + return 0; err_register: @@ -1565,6 +1567,7 @@ static int __init ct_init_module(void) static void __exit ct_cleanup_module(void) { + static_branch_dec(&tcf_frag_xmit_count); tcf_unregister_action(&act_ct_ops, &ct_net_ops); tcf_ct_flow_tables_uninit(); destroy_workqueue(act_ct_wq); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index b73bfef..27e8db2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -210,7 +210,7 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) int err; if (!want_ingress) - err = dev_queue_xmit(skb); + err = tcf_dev_queue_xmit(skb, dev_queue_xmit); else err = netif_receive_skb(skb); diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c new file mode 100644 index 0000000..e1e77d3 --- /dev/null +++ b/net/sched/sch_frag.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +#include +#include +#include +#include +#include + +struct sch_frag_data { + unsigned long dst; + struct qdisc_skb_cb cb; + __be16 inner_protocol; + u16 vlan_tci; + __be16 vlan_proto; + unsigned int l2_len; + u8 l2_data[VLAN_ETH_HLEN]; + int (*xmit)(struct sk_buff *skb); +}; + +static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage); + +static int sch_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct sch_frag_data *data = this_cpu_ptr(&sch_frag_data_storage); + + if (skb_cow_head(skb, data->l2_len) < 0) { + kfree_skb(skb); + return -ENOMEM; + } + + __skb_dst_copy(skb, data->dst); + *qdisc_skb_cb(skb) = data->cb; + skb->inner_protocol = data->inner_protocol; + if (data->vlan_tci & VLAN_CFI_MASK) + __vlan_hwaccel_put_tag(skb, data->vlan_proto, + data->vlan_tci & ~VLAN_CFI_MASK); + else + __vlan_hwaccel_clear_tag(skb); + + /* Reconstruct the MAC header. */ + skb_push(skb, data->l2_len); + memcpy(skb->data, &data->l2_data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_reset_mac_header(skb); + + return data->xmit(skb); +} + +static void sch_frag_prepare_frag(struct sk_buff *skb, + int (*xmit)(struct sk_buff *skb)) +{ + unsigned int hlen = skb_network_offset(skb); + struct sch_frag_data *data; + + data = this_cpu_ptr(&sch_frag_data_storage); + data->dst = skb->_skb_refdst; + data->cb = *qdisc_skb_cb(skb); + data->xmit = xmit; + data->inner_protocol = skb->inner_protocol; + if (skb_vlan_tag_present(skb)) + data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK; + else + data->vlan_tci = 0; + data->vlan_proto = skb->vlan_proto; + data->l2_len = hlen; + memcpy(&data->l2_data, skb->data, hlen); + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + skb_pull(skb, hlen); +} + +static unsigned int +sch_frag_dst_get_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + +static struct dst_ops sch_frag_dst_ops = { + .family = AF_UNSPEC, + .mtu = sch_frag_dst_get_mtu, +}; + +static int sch_fragment(struct net *net, struct sk_buff *skb, + u16 mru, int (*xmit)(struct sk_buff *skb)) +{ + int ret = -1; + + if (skb_network_offset(skb) > VLAN_ETH_HLEN) { + net_warn_ratelimited("L2 header too long to fragment\n"); + goto err; + } + + if (skb_protocol(skb, true) == htons(ETH_P_IP)) { + struct dst_entry sch_frag_dst; + unsigned long orig_dst; + + sch_frag_prepare_frag(skb, xmit); + dst_init(&sch_frag_dst, &sch_frag_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + sch_frag_dst.dev = skb->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &sch_frag_dst); + IPCB(skb)->frag_max_size = mru; + + ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit); + refdst_drop(orig_dst); + } else if (skb_protocol(skb, true) == htons(ETH_P_IPV6)) { + unsigned long orig_dst; + struct rt6_info sch_frag_rt; + + sch_frag_prepare_frag(skb, xmit); + memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + sch_frag_rt.dst.dev = skb->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &sch_frag_rt.dst); + IP6CB(skb)->frag_max_size = mru; + + ret = ipv6_stub->ipv6_fragment(net, skb->sk, skb, + sch_frag_xmit); + refdst_drop(orig_dst); + } else { + net_warn_ratelimited("Fail frag %s: eth=%x, MRU=%d, MTU=%d\n", + netdev_name(skb->dev), + ntohs(skb_protocol(skb, true)), mru, + skb->dev->mtu); + goto err; + } + + return ret; +err: + kfree_skb(skb); + return ret; +} + +int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) +{ + u16 mru = qdisc_skb_cb(skb)->mru; + int err; + + if (mru && skb->len > mru + skb->dev->hard_header_len) + err = sch_fragment(dev_net(skb->dev), skb, mru, xmit); + else + err = xmit(skb); + + return err; +} +EXPORT_SYMBOL_GPL(sch_frag_xmit_hook); From patchwork Wed Sep 1 17:14:20 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bodong Wang X-Patchwork-Id: 1523334 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4H09fQ0pgCz9sf8; Thu, 2 Sep 2021 03:14:54 +1000 (AEST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLTp5-0002uY-4j; Wed, 01 Sep 2021 17:14:51 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1mLToo-0002md-9g for kernel-team@lists.ubuntu.com; Wed, 01 Sep 2021 17:14:34 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from bodong@nvidia.com) with SMTP; 1 Sep 2021 20:14:33 +0300 Received: from sw-mtx-016.mtx.labs.mlnx. (sw-mtx-016.mtx.labs.mlnx [10.9.150.102]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 181HEKOq025421; Wed, 1 Sep 2021 20:14:32 +0300 From: Bodong Wang To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH V2 9/9] ipv6: add ipv6_fragment hook in ipv6_stub Date: Wed, 1 Sep 2021 12:14:20 -0500 Message-Id: <1630516460-16421-10-git-send-email-bodong@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1630516460-16421-1-git-send-email-bodong@nvidia.com> References: <1630516460-16421-1-git-send-email-bodong@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: maord@nvidia.com, majd@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" From: wenxu BugLink: https://bugs.launchpad.net/bugs/1940872 Add ipv6_fragment to ipv6_stub to avoid calling netfilter when access ip6_fragment. Signed-off-by: wenxu Signed-off-by: David S. Miller (backport from 1d97898b36bab91e8ffb38a660cc40eaba613f88) [maor: adjust according to older kernel] Signed-off-by: Maor Dickman Signed-off-by: Bodong Wang --- include/net/ipv6_stubs.h | 2 ++ net/ipv6/addrconf_core.c | 8 ++++++++ net/ipv6/af_inet6.c | 1 + 3 files changed, 11 insertions(+) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 3e7d2c0..04d1097 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -57,6 +57,8 @@ struct ipv6_stub { const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); struct neigh_table *nd_tbl; + int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index ea00ce3..3dc871a 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -190,6 +190,13 @@ static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt) return -EAFNOSUPPORT; } +static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)) +{ + kfree_skb(skb); + return -EAFNOSUPPORT; +} + const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow, .ipv6_route_input = eafnosupport_ipv6_route_input, @@ -200,6 +207,7 @@ static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt) .ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6, .fib6_nh_init = eafnosupport_fib6_nh_init, .ip6_del_rt = eafnosupport_ip6_del_rt, + .ipv6_fragment = eafnosupport_ipv6_fragment, }; EXPORT_SYMBOL_GPL(ipv6_stub); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 14ac1d9..918ee06 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -961,6 +961,7 @@ static int ipv6_route_input(struct sk_buff *skb) .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, .nd_tbl = &nd_tbl, + .ipv6_fragment = ip6_fragment, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {