Message ID | 1528942062-2353-1-git-send-email-makita.toshiaki@lab.ntt.co.jp |
---|---|
State | Accepted, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf,v2] xdp: Fix handling of devmap in generic XDP | expand |
On Wed, Jun 13, 2018 at 7:07 PM, Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: > Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed > the return value type of __devmap_lookup_elem() from struct net_device * > to struct bpf_dtab_netdev * but forgot to modify generic XDP code > accordingly. > Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct > net_device is expected, then skb->dev was set to invalid value. > > v2: > - Fix compiler warning without CONFIG_BPF_SYSCALL. > > Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") > Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> > --- > include/linux/bpf.h | 12 ++++++++++++ > include/linux/filter.h | 16 ++++++++++++++++ > kernel/bpf/devmap.c | 14 ++++++++++++++ > net/core/filter.c | 21 ++++----------------- > 4 files changed, 46 insertions(+), 17 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 995c3b1..7df32a3 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -488,12 +488,15 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) > > /* Map specifics */ > struct xdp_buff; > +struct sk_buff; > > struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); > void __dev_map_insert_ctx(struct bpf_map *map, u32 index); > void __dev_map_flush(struct bpf_map *map); > int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, > struct net_device *dev_rx); > +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, > + struct bpf_prog *xdp_prog); > > struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); > void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); > @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, > return 0; > } > > +struct sk_buff; > + > +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, > + struct sk_buff *skb, > + struct bpf_prog *xdp_prog) > +{ > + return 0; should you return an error code here? > +} > + > static inline > struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) > { > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 45fc0f5..8ddff1f 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -19,6 +19,7 @@ > #include <linux/cryptohash.h> > #include <linux/set_memory.h> > #include <linux/kallsyms.h> > +#include <linux/if_vlan.h> > > #include <net/sch_generic.h> > > @@ -786,6 +787,21 @@ static inline bool bpf_dump_raw_ok(void) > struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, > const struct bpf_insn *patch, u32 len); > > +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, > + struct net_device *fwd) Previously this function is only used in filter.c and now it is only used in devmap.c. Maybe this function should be in devmap.c until it will be used cross different files? > +{ > + unsigned int len; > + > + if (unlikely(!(fwd->flags & IFF_UP))) > + return -ENETDOWN; > + > + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; > + if (skb->len > len) > + return -EMSGSIZE; > + > + return 0; > +} > + > /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the > * same cpu context. Further for best results no more than a single map > * for the do_redirect/do_flush pair should be used. This limitation is > diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c > index a7cc7b3..642c97f 100644 > --- a/kernel/bpf/devmap.c > +++ b/kernel/bpf/devmap.c > @@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, > return bq_enqueue(dst, xdpf, dev_rx); > } > > +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, > + struct bpf_prog *xdp_prog) > +{ > + int err; > + > + err = __xdp_generic_ok_fwd_dev(skb, dst->dev); > + if (unlikely(err)) > + return err; > + skb->dev = dst->dev; > + generic_xdp_tx(skb, xdp_prog); > + > + return 0; > +} > + > static void *dev_map_lookup_elem(struct bpf_map *map, void *key) > { > struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); > diff --git a/net/core/filter.c b/net/core/filter.c > index 3d9ba7e..e7f12e9 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3214,20 +3214,6 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, > } > EXPORT_SYMBOL_GPL(xdp_do_redirect); > > -static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) > -{ > - unsigned int len; > - > - if (unlikely(!(fwd->flags & IFF_UP))) > - return -ENETDOWN; > - > - len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; > - if (skb->len > len) > - return -EMSGSIZE; > - > - return 0; > -} > - > static int xdp_do_generic_redirect_map(struct net_device *dev, > struct sk_buff *skb, > struct xdp_buff *xdp, > @@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, > } > > if (map->map_type == BPF_MAP_TYPE_DEVMAP) { > - if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) > + struct bpf_dtab_netdev *dst = fwd; > + > + err = dev_map_generic_redirect(dst, skb, xdp_prog); > + if (unlikely(err)) > goto err; > - skb->dev = fwd; > - generic_xdp_tx(skb, xdp_prog); > } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { > struct xdp_sock *xs = fwd; > > -- > 1.8.3.1 > >
On 2018/06/14 11:56, Y Song wrote: ... >> @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, >> return 0; >> } >> >> +struct sk_buff; >> + >> +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, >> + struct sk_buff *skb, >> + struct bpf_prog *xdp_prog) >> +{ >> + return 0; > > should you return an error code here? My understanding is that this function never be called if CONFIG_BPF_SYSCALL is not set so any value is OK. I aligned it with other functions of devmap, specifically dev_map_enqueue() which returns 0. > >> +} >> + >> static inline >> struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) >> { >> diff --git a/include/linux/filter.h b/include/linux/filter.h >> index 45fc0f5..8ddff1f 100644 >> --- a/include/linux/filter.h >> +++ b/include/linux/filter.h >> @@ -19,6 +19,7 @@ >> #include <linux/cryptohash.h> >> #include <linux/set_memory.h> >> #include <linux/kallsyms.h> >> +#include <linux/if_vlan.h> >> >> #include <net/sch_generic.h> >> >> @@ -786,6 +787,21 @@ static inline bool bpf_dump_raw_ok(void) >> struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, >> const struct bpf_insn *patch, u32 len); >> >> +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, >> + struct net_device *fwd) > > Previously this function is only used in filter.c and now it is only > used in devmap.c. Maybe this function should be in devmap.c > until it will be used cross different files? This function is also called from xdp_do_generic_redirect() in net/core/filter.c, so I can't move it to devmap.c. Thanks, Toshiaki Makita
On Wed, Jun 13, 2018 at 8:40 PM, Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: > On 2018/06/14 11:56, Y Song wrote: > ... >>> @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, >>> return 0; >>> } >>> >>> +struct sk_buff; >>> + >>> +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, >>> + struct sk_buff *skb, >>> + struct bpf_prog *xdp_prog) >>> +{ >>> + return 0; >> >> should you return an error code here? > > My understanding is that this function never be called if > CONFIG_BPF_SYSCALL is not set so any value is OK. > I aligned it with other functions of devmap, specifically > dev_map_enqueue() which returns 0. > >> >>> +} >>> + >>> static inline >>> struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) >>> { >>> diff --git a/include/linux/filter.h b/include/linux/filter.h >>> index 45fc0f5..8ddff1f 100644 >>> --- a/include/linux/filter.h >>> +++ b/include/linux/filter.h >>> @@ -19,6 +19,7 @@ >>> #include <linux/cryptohash.h> >>> #include <linux/set_memory.h> >>> #include <linux/kallsyms.h> >>> +#include <linux/if_vlan.h> >>> >>> #include <net/sch_generic.h> >>> >>> @@ -786,6 +787,21 @@ static inline bool bpf_dump_raw_ok(void) >>> struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, >>> const struct bpf_insn *patch, u32 len); >>> >>> +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, >>> + struct net_device *fwd) >> >> Previously this function is only used in filter.c and now it is only >> used in devmap.c. Maybe this function should be in devmap.c >> until it will be used cross different files? > > This function is also called from xdp_do_generic_redirect() in > net/core/filter.c, so I can't move it to devmap.c. Thanks for the explanation. Make sense. Acked-by: Yonghong Song <yhs@fb.com>
On Thu, 14 Jun 2018 11:07:42 +0900 Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: > Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed > the return value type of __devmap_lookup_elem() from struct net_device * > to struct bpf_dtab_netdev * but forgot to modify generic XDP code > accordingly. > Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct > net_device is expected, then skb->dev was set to invalid value. > > v2: > - Fix compiler warning without CONFIG_BPF_SYSCALL. > > Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") > Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Thanks for catching this! Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Notice, that the current code works (and does not crash), but it is pure luck. Because struct bpf_dtab_netdev happen to have the net_device as the first member. struct bpf_dtab_netdev { struct net_device *dev; /* must be first member, due to tracepoint */ struct bpf_dtab *dtab; unsigned int bit; struct xdp_bulk_queue __percpu *bulkq; struct rcu_head rcu; };
On 2018/06/14 17:49, Jesper Dangaard Brouer wrote: > On Thu, 14 Jun 2018 11:07:42 +0900 > Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: > >> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed >> the return value type of __devmap_lookup_elem() from struct net_device * >> to struct bpf_dtab_netdev * but forgot to modify generic XDP code >> accordingly. >> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct >> net_device is expected, then skb->dev was set to invalid value. >> >> v2: >> - Fix compiler warning without CONFIG_BPF_SYSCALL. >> >> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") >> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> > > Thanks for catching this! > > Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> > > Notice, that the current code works (and does not crash), but it is > pure luck. Because struct bpf_dtab_netdev happen to have the > net_device as the first member. > > struct bpf_dtab_netdev { > struct net_device *dev; /* must be first member, due to tracepoint */ > struct bpf_dtab *dtab; > unsigned int bit; > struct xdp_bulk_queue __percpu *bulkq; > struct rcu_head rcu; > }; > Actually no, the current code does not work and can crash, because we need to dereference the pointer, i.e. need fwd->dev (IOW *fwd) not fwd.
On Thu, 14 Jun 2018 18:00:22 +0900 Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: > On 2018/06/14 17:49, Jesper Dangaard Brouer wrote: > > On Thu, 14 Jun 2018 11:07:42 +0900 > > Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: > > > >> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed > >> the return value type of __devmap_lookup_elem() from struct net_device * > >> to struct bpf_dtab_netdev * but forgot to modify generic XDP code > >> accordingly. > >> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct > >> net_device is expected, then skb->dev was set to invalid value. > >> > >> v2: > >> - Fix compiler warning without CONFIG_BPF_SYSCALL. > >> > >> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") > >> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> > > > > Thanks for catching this! > > > > Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> > > > > Notice, that the current code works (and does not crash), but it is > > pure luck. Because struct bpf_dtab_netdev happen to have the > > net_device as the first member. > > > > struct bpf_dtab_netdev { > > struct net_device *dev; /* must be first member, due to tracepoint */ > > struct bpf_dtab *dtab; > > unsigned int bit; > > struct xdp_bulk_queue __percpu *bulkq; > > struct rcu_head rcu; > > }; > > > > Actually no, the current code does not work and can crash, because we > need to dereference the pointer, i.e. need fwd->dev (IOW *fwd) not fwd. You are right, I ran some more tests, and yes, I managed to crash the kernel. Strange that is worked in my initial testing. Now it consistently crash. [] general protection fault: 0000 [#1] SMP PTI [] Modules linked in: time_bench_sample(O) time_bench(O) fuse mlx5_ib ib_uverbs ib_core tun nfnetli nllc bpfilter sunrpc coretemp kvm_intel kvm irqbypass intel_cstate intel_uncore intel_rapl_perf pcs phpchp wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad sch_fq_codel hid_generic mlx5_core i40e ml xdevlink mdio i2c_algo_bit ptp sd_mod i2c_core pps_core [last unloaded: x_tables] [] CPU: 0 PID: 8 Comm: ksoftirqd/0 Tainted: G W O 4.17.0-rc7-net-next-xdp-xdp_paper01+ [] Hardware name: Supermicro Super Server/X10SRi-F, BIOS 2.0a 08/01/2016 [] RIP: 0010:netdev_pick_tx+0x3f/0xc0 [] RSP: 0018:ffffc900031c3b98 EFLAGS: 00010296 [] RAX: dead000000000200 RBX: ffff88070f3d2e80 RCX: 0000000000000200 [] RDX: 0000000000000000 RSI: ffff88070b678d00 RDI: ffff88070f3d2e80 [] RBP: ffff88070f3d2e80 R08: ffff88084fda8080 R09: ffff88087c802f00 [] R10: ffffea001c2d1e00 R11: ffff88081e8287f0 R12: ffff88070b678d00 [] R13: ffffc90003843000 R14: 0000000000000000 R15: ffffc900031c3c30 [] FS: 0000000000000000(0000) GS:ffff88087fc00000(0000) knlGS:0000000000000000 [] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [] CR2: 00007fc939b36140 CR3: 000000087f20a005 CR4: 00000000003606f0 [] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [] Call Trace: [] generic_xdp_tx+0x24/0x180 [] xdp_do_generic_redirect+0x240/0x390 [] do_xdp_generic+0x250/0x3b0 [] ? kmem_cache_alloc+0x38/0x1c0 [] netif_receive_skb_internal+0x8d/0xe0 [] napi_gro_receive+0xb5/0xd0 [] mlx5e_handle_rx_cqe+0x1a4/0x5d0 [mlx5_core] [] mlx5e_poll_rx_cq+0xbc/0x8d0 [mlx5_core] [] ? mlx5e_post_rx_wqes+0x2bc/0x400 [mlx5_core] [] mlx5e_napi_poll+0xb0/0xcc0 [mlx5_core] [] net_rx_action+0x145/0x3d0 [] ? sort_range+0x20/0x20 [] __do_softirq+0xdc/0x2b4 [] ? sort_range+0x20/0x20 [] run_ksoftirqd+0x18/0x20 [] smpboot_thread_fn+0xdf/0x150 [] kthread+0x111/0x130 [] ? kthread_create_worker_on_cpu+0x70/0x70 [] ret_from_fork+0x1f/0x30 [] Code: 00 83 e8 01 3d ff 1f 00 00 76 10 65 8b 05 3a 02 94 7e 83 c0 01 89 86 ac 00 00 00 83 bd 8c 03 00 00 01 74 52 48 8b 85 e8 01 00 00 <48> 8b 40 30 48 85 c0 74 48 48 c7 c1 50 85 6c 81 4c 89 e6 48 89 [] RIP: netdev_pick_tx+0x3f/0xc0 RSP: ffffc900031c3b98 [] ---[ end trace 8b77c7349af71e1b ]--- [] Kernel panic - not syncing: Fatal exception in interrupt [] Kernel Offset: disabled [] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- (gdb) list *(generic_xdp_tx)+0x24 0xffffffff816cf874 is in generic_xdp_tx (net/core/dev.c:4142). 4137 struct netdev_queue *txq; 4138 bool free_skb = true; 4139 int cpu, rc; 4140 4141 txq = netdev_pick_tx(dev, skb, NULL); 4142 cpu = smp_processor_id(); 4143 HARD_TX_LOCK(dev, txq, cpu); 4144 if (!netif_xmit_stopped(txq)) { 4145 rc = netdev_start_xmit(skb, dev, txq, 0); 4146 if (dev_xmit_complete(rc)) (gdb) list *(netdev_pick_tx)+0x3f 0xffffffff816ceeef is in netdev_pick_tx (net/core/dev.c:3472). 3467 #endif 3468 3469 if (dev->real_num_tx_queues != 1) { 3470 const struct net_device_ops *ops = dev->netdev_ops; 3471 3472 if (ops->ndo_select_queue) 3473 queue_index = ops->ndo_select_queue(dev, skb, accel_priv, 3474 __netdev_pick_tx); 3475 else 3476 queue_index = __netdev_pick_tx(dev, skb); (gdb)
On 06/14/2018 11:33 AM, Jesper Dangaard Brouer wrote: > On Thu, 14 Jun 2018 18:00:22 +0900 > Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: >> On 2018/06/14 17:49, Jesper Dangaard Brouer wrote: >>> On Thu, 14 Jun 2018 11:07:42 +0900 >>> Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: >>> >>>> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed >>>> the return value type of __devmap_lookup_elem() from struct net_device * >>>> to struct bpf_dtab_netdev * but forgot to modify generic XDP code >>>> accordingly. >>>> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct >>>> net_device is expected, then skb->dev was set to invalid value. >>>> >>>> v2: >>>> - Fix compiler warning without CONFIG_BPF_SYSCALL. >>>> >>>> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") >>>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> >>> >>> Thanks for catching this! >>> >>> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Applied to bpf, thanks Toshiaki!
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 995c3b1..7df32a3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -488,12 +488,15 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* Map specifics */ struct xdp_buff; +struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, + struct bpf_prog *xdp_prog); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return 0; } +struct sk_buff; + +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, + struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + return 0; +} + static inline struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 45fc0f5..8ddff1f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -19,6 +19,7 @@ #include <linux/cryptohash.h> #include <linux/set_memory.h> #include <linux/kallsyms.h> +#include <linux/if_vlan.h> #include <net/sch_generic.h> @@ -786,6 +787,21 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, + struct net_device *fwd) +{ + unsigned int len; + + if (unlikely(!(fwd->flags & IFF_UP))) + return -ENETDOWN; + + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; + if (skb->len > len) + return -EMSGSIZE; + + return 0; +} + /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index a7cc7b3..642c97f 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return bq_enqueue(dst, xdpf, dev_rx); } +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + int err; + + err = __xdp_generic_ok_fwd_dev(skb, dst->dev); + if (unlikely(err)) + return err; + skb->dev = dst->dev; + generic_xdp_tx(skb, xdp_prog); + + return 0; +} + static void *dev_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); diff --git a/net/core/filter.c b/net/core/filter.c index 3d9ba7e..e7f12e9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3214,20 +3214,6 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, } EXPORT_SYMBOL_GPL(xdp_do_redirect); -static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) -{ - unsigned int len; - - if (unlikely(!(fwd->flags & IFF_UP))) - return -ENETDOWN; - - len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; - if (skb->len > len) - return -EMSGSIZE; - - return 0; -} - static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, @@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, } if (map->map_type == BPF_MAP_TYPE_DEVMAP) { - if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + struct bpf_dtab_netdev *dst = fwd; + + err = dev_map_generic_redirect(dst, skb, xdp_prog); + if (unlikely(err)) goto err; - skb->dev = fwd; - generic_xdp_tx(skb, xdp_prog); } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { struct xdp_sock *xs = fwd;
Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed the return value type of __devmap_lookup_elem() from struct net_device * to struct bpf_dtab_netdev * but forgot to modify generic XDP code accordingly. Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct net_device is expected, then skb->dev was set to invalid value. v2: - Fix compiler warning without CONFIG_BPF_SYSCALL. Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> --- include/linux/bpf.h | 12 ++++++++++++ include/linux/filter.h | 16 ++++++++++++++++ kernel/bpf/devmap.c | 14 ++++++++++++++ net/core/filter.c | 21 ++++----------------- 4 files changed, 46 insertions(+), 17 deletions(-)