[bpf,v2] xdp: Fix handling of devmap in generic XDP

Message ID 1528942062-2353-1-git-send-email-makita.toshiaki@lab.ntt.co.jp
State Accepted
Delegated to: BPF Maintainers
Headers show
Series
  • [bpf,v2] xdp: Fix handling of devmap in generic XDP
Related show

Commit Message

Toshiaki Makita June 14, 2018, 2:07 a.m.
Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed
the return value type of __devmap_lookup_elem() from struct net_device *
to struct bpf_dtab_netdev * but forgot to modify generic XDP code
accordingly.
Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct
net_device is expected, then skb->dev was set to invalid value.

v2:
- Fix compiler warning without CONFIG_BPF_SYSCALL.

Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
---
 include/linux/bpf.h    | 12 ++++++++++++
 include/linux/filter.h | 16 ++++++++++++++++
 kernel/bpf/devmap.c    | 14 ++++++++++++++
 net/core/filter.c      | 21 ++++-----------------
 4 files changed, 46 insertions(+), 17 deletions(-)

Comments

Y Song June 14, 2018, 2:56 a.m. | #1
On Wed, Jun 13, 2018 at 7:07 PM, Toshiaki Makita
<makita.toshiaki@lab.ntt.co.jp> wrote:
> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed
> the return value type of __devmap_lookup_elem() from struct net_device *
> to struct bpf_dtab_netdev * but forgot to modify generic XDP code
> accordingly.
> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct
> net_device is expected, then skb->dev was set to invalid value.
>
> v2:
> - Fix compiler warning without CONFIG_BPF_SYSCALL.
>
> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue")
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> ---
>  include/linux/bpf.h    | 12 ++++++++++++
>  include/linux/filter.h | 16 ++++++++++++++++
>  kernel/bpf/devmap.c    | 14 ++++++++++++++
>  net/core/filter.c      | 21 ++++-----------------
>  4 files changed, 46 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 995c3b1..7df32a3 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -488,12 +488,15 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
>
>  /* Map specifics */
>  struct xdp_buff;
> +struct sk_buff;
>
>  struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
>  void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
>  void __dev_map_flush(struct bpf_map *map);
>  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>                     struct net_device *dev_rx);
> +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
> +                            struct bpf_prog *xdp_prog);
>
>  struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
>  void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
> @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>         return 0;
>  }
>
> +struct sk_buff;
> +
> +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
> +                                          struct sk_buff *skb,
> +                                          struct bpf_prog *xdp_prog)
> +{
> +       return 0;

should you return an error code here?

> +}
> +
>  static inline
>  struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
>  {
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 45fc0f5..8ddff1f 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -19,6 +19,7 @@
>  #include <linux/cryptohash.h>
>  #include <linux/set_memory.h>
>  #include <linux/kallsyms.h>
> +#include <linux/if_vlan.h>
>
>  #include <net/sch_generic.h>
>
> @@ -786,6 +787,21 @@ static inline bool bpf_dump_raw_ok(void)
>  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
>                                        const struct bpf_insn *patch, u32 len);
>
> +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
> +                                          struct net_device *fwd)

Previously this function is only used in filter.c and now it is only
used in devmap.c. Maybe this function should be in devmap.c
until it will be used cross different files?

> +{
> +       unsigned int len;
> +
> +       if (unlikely(!(fwd->flags & IFF_UP)))
> +               return -ENETDOWN;
> +
> +       len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
> +       if (skb->len > len)
> +               return -EMSGSIZE;
> +
> +       return 0;
> +}
> +
>  /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
>   * same cpu context. Further for best results no more than a single map
>   * for the do_redirect/do_flush pair should be used. This limitation is
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index a7cc7b3..642c97f 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>         return bq_enqueue(dst, xdpf, dev_rx);
>  }
>
> +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
> +                            struct bpf_prog *xdp_prog)
> +{
> +       int err;
> +
> +       err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
> +       if (unlikely(err))
> +               return err;
> +       skb->dev = dst->dev;
> +       generic_xdp_tx(skb, xdp_prog);
> +
> +       return 0;
> +}
> +
>  static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
>  {
>         struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 3d9ba7e..e7f12e9 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3214,20 +3214,6 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
>  }
>  EXPORT_SYMBOL_GPL(xdp_do_redirect);
>
> -static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
> -{
> -       unsigned int len;
> -
> -       if (unlikely(!(fwd->flags & IFF_UP)))
> -               return -ENETDOWN;
> -
> -       len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
> -       if (skb->len > len)
> -               return -EMSGSIZE;
> -
> -       return 0;
> -}
> -
>  static int xdp_do_generic_redirect_map(struct net_device *dev,
>                                        struct sk_buff *skb,
>                                        struct xdp_buff *xdp,
> @@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
>         }
>
>         if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
> -               if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
> +               struct bpf_dtab_netdev *dst = fwd;
> +
> +               err = dev_map_generic_redirect(dst, skb, xdp_prog);
> +               if (unlikely(err))
>                         goto err;
> -               skb->dev = fwd;
> -               generic_xdp_tx(skb, xdp_prog);
>         } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
>                 struct xdp_sock *xs = fwd;
>
> --
> 1.8.3.1
>
>
Toshiaki Makita June 14, 2018, 3:40 a.m. | #2
On 2018/06/14 11:56, Y Song wrote:
...
>> @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>>         return 0;
>>  }
>>
>> +struct sk_buff;
>> +
>> +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
>> +                                          struct sk_buff *skb,
>> +                                          struct bpf_prog *xdp_prog)
>> +{
>> +       return 0;
> 
> should you return an error code here?

My understanding is that this function never be called if
CONFIG_BPF_SYSCALL is not set so any value is OK.
I aligned it with other functions of devmap, specifically
dev_map_enqueue() which returns 0.

> 
>> +}
>> +
>>  static inline
>>  struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
>>  {
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index 45fc0f5..8ddff1f 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -19,6 +19,7 @@
>>  #include <linux/cryptohash.h>
>>  #include <linux/set_memory.h>
>>  #include <linux/kallsyms.h>
>> +#include <linux/if_vlan.h>
>>
>>  #include <net/sch_generic.h>
>>
>> @@ -786,6 +787,21 @@ static inline bool bpf_dump_raw_ok(void)
>>  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
>>                                        const struct bpf_insn *patch, u32 len);
>>
>> +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
>> +                                          struct net_device *fwd)
> 
> Previously this function is only used in filter.c and now it is only
> used in devmap.c. Maybe this function should be in devmap.c
> until it will be used cross different files?

This function is also called from xdp_do_generic_redirect() in
net/core/filter.c, so I can't move it to devmap.c.

Thanks,
Toshiaki Makita
Y Song June 14, 2018, 4:36 a.m. | #3
On Wed, Jun 13, 2018 at 8:40 PM, Toshiaki Makita
<makita.toshiaki@lab.ntt.co.jp> wrote:
> On 2018/06/14 11:56, Y Song wrote:
> ...
>>> @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>>>         return 0;
>>>  }
>>>
>>> +struct sk_buff;
>>> +
>>> +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
>>> +                                          struct sk_buff *skb,
>>> +                                          struct bpf_prog *xdp_prog)
>>> +{
>>> +       return 0;
>>
>> should you return an error code here?
>
> My understanding is that this function never be called if
> CONFIG_BPF_SYSCALL is not set so any value is OK.
> I aligned it with other functions of devmap, specifically
> dev_map_enqueue() which returns 0.
>
>>
>>> +}
>>> +
>>>  static inline
>>>  struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
>>>  {
>>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>>> index 45fc0f5..8ddff1f 100644
>>> --- a/include/linux/filter.h
>>> +++ b/include/linux/filter.h
>>> @@ -19,6 +19,7 @@
>>>  #include <linux/cryptohash.h>
>>>  #include <linux/set_memory.h>
>>>  #include <linux/kallsyms.h>
>>> +#include <linux/if_vlan.h>
>>>
>>>  #include <net/sch_generic.h>
>>>
>>> @@ -786,6 +787,21 @@ static inline bool bpf_dump_raw_ok(void)
>>>  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
>>>                                        const struct bpf_insn *patch, u32 len);
>>>
>>> +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
>>> +                                          struct net_device *fwd)
>>
>> Previously this function is only used in filter.c and now it is only
>> used in devmap.c. Maybe this function should be in devmap.c
>> until it will be used cross different files?
>
> This function is also called from xdp_do_generic_redirect() in
> net/core/filter.c, so I can't move it to devmap.c.

Thanks for the explanation. Make sense.
Acked-by: Yonghong Song <yhs@fb.com>
Jesper Dangaard Brouer June 14, 2018, 8:49 a.m. | #4
On Thu, 14 Jun 2018 11:07:42 +0900
Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:

> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed
> the return value type of __devmap_lookup_elem() from struct net_device *
> to struct bpf_dtab_netdev * but forgot to modify generic XDP code
> accordingly.
> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct
> net_device is expected, then skb->dev was set to invalid value.
> 
> v2:
> - Fix compiler warning without CONFIG_BPF_SYSCALL.
> 
> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue")
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

Thanks for catching this!

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

Notice, that the current code works (and does not crash), but it is
pure luck.  Because struct bpf_dtab_netdev happen to have the
net_device as the first member.

struct bpf_dtab_netdev {
	struct net_device *dev; /* must be first member, due to tracepoint */
	struct bpf_dtab *dtab;
	unsigned int bit;
	struct xdp_bulk_queue __percpu *bulkq;
	struct rcu_head rcu;
};
Toshiaki Makita June 14, 2018, 9 a.m. | #5
On 2018/06/14 17:49, Jesper Dangaard Brouer wrote:
> On Thu, 14 Jun 2018 11:07:42 +0900
> Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:
> 
>> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed
>> the return value type of __devmap_lookup_elem() from struct net_device *
>> to struct bpf_dtab_netdev * but forgot to modify generic XDP code
>> accordingly.
>> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct
>> net_device is expected, then skb->dev was set to invalid value.
>>
>> v2:
>> - Fix compiler warning without CONFIG_BPF_SYSCALL.
>>
>> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue")
>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> 
> Thanks for catching this!
> 
> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
> 
> Notice, that the current code works (and does not crash), but it is
> pure luck.  Because struct bpf_dtab_netdev happen to have the
> net_device as the first member.
> 
> struct bpf_dtab_netdev {
> 	struct net_device *dev; /* must be first member, due to tracepoint */
> 	struct bpf_dtab *dtab;
> 	unsigned int bit;
> 	struct xdp_bulk_queue __percpu *bulkq;
> 	struct rcu_head rcu;
> };
> 

Actually no, the current code does not work and can crash, because we
need to dereference the pointer, i.e. need fwd->dev (IOW *fwd) not fwd.
Jesper Dangaard Brouer June 14, 2018, 9:33 a.m. | #6
On Thu, 14 Jun 2018 18:00:22 +0900
Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:

> On 2018/06/14 17:49, Jesper Dangaard Brouer wrote:
> > On Thu, 14 Jun 2018 11:07:42 +0900
> > Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:
> >   
> >> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed
> >> the return value type of __devmap_lookup_elem() from struct net_device *
> >> to struct bpf_dtab_netdev * but forgot to modify generic XDP code
> >> accordingly.
> >> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct
> >> net_device is expected, then skb->dev was set to invalid value.
> >>
> >> v2:
> >> - Fix compiler warning without CONFIG_BPF_SYSCALL.
> >>
> >> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue")
> >> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>  
> > 
> > Thanks for catching this!
> > 
> > Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
> > 
> > Notice, that the current code works (and does not crash), but it is
> > pure luck.  Because struct bpf_dtab_netdev happen to have the
> > net_device as the first member.
> > 
> > struct bpf_dtab_netdev {
> > 	struct net_device *dev; /* must be first member, due to tracepoint */
> > 	struct bpf_dtab *dtab;
> > 	unsigned int bit;
> > 	struct xdp_bulk_queue __percpu *bulkq;
> > 	struct rcu_head rcu;
> > };
> >   
> 
> Actually no, the current code does not work and can crash, because we
> need to dereference the pointer, i.e. need fwd->dev (IOW *fwd) not fwd.

You are right, I ran some more tests, and yes, I managed to crash the
kernel.  Strange that is worked in my initial testing.  Now it
consistently crash.

[] general protection fault: 0000 [#1] SMP PTI
[] Modules linked in: time_bench_sample(O) time_bench(O) fuse mlx5_ib ib_uverbs ib_core tun nfnetli
nllc bpfilter sunrpc coretemp kvm_intel kvm irqbypass intel_cstate intel_uncore intel_rapl_perf pcs
phpchp wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad sch_fq_codel hid_generic mlx5_core i40e ml
xdevlink mdio i2c_algo_bit ptp sd_mod i2c_core pps_core [last unloaded: x_tables]
[] CPU: 0 PID: 8 Comm: ksoftirqd/0 Tainted: G        W  O      4.17.0-rc7-net-next-xdp-xdp_paper01+
 
[] Hardware name: Supermicro Super Server/X10SRi-F, BIOS 2.0a 08/01/2016
[] RIP: 0010:netdev_pick_tx+0x3f/0xc0
[] RSP: 0018:ffffc900031c3b98 EFLAGS: 00010296
[] RAX: dead000000000200 RBX: ffff88070f3d2e80 RCX: 0000000000000200
[] RDX: 0000000000000000 RSI: ffff88070b678d00 RDI: ffff88070f3d2e80
[] RBP: ffff88070f3d2e80 R08: ffff88084fda8080 R09: ffff88087c802f00
[] R10: ffffea001c2d1e00 R11: ffff88081e8287f0 R12: ffff88070b678d00
[] R13: ffffc90003843000 R14: 0000000000000000 R15: ffffc900031c3c30
[] FS:  0000000000000000(0000) GS:ffff88087fc00000(0000) knlGS:0000000000000000
[] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[] CR2: 00007fc939b36140 CR3: 000000087f20a005 CR4: 00000000003606f0
[] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[] Call Trace:
[]  generic_xdp_tx+0x24/0x180
[]  xdp_do_generic_redirect+0x240/0x390
[]  do_xdp_generic+0x250/0x3b0
[]  ? kmem_cache_alloc+0x38/0x1c0
[]  netif_receive_skb_internal+0x8d/0xe0
[]  napi_gro_receive+0xb5/0xd0
[]  mlx5e_handle_rx_cqe+0x1a4/0x5d0 [mlx5_core]
[]  mlx5e_poll_rx_cq+0xbc/0x8d0 [mlx5_core]
[]  ? mlx5e_post_rx_wqes+0x2bc/0x400 [mlx5_core]
[]  mlx5e_napi_poll+0xb0/0xcc0 [mlx5_core]
[]  net_rx_action+0x145/0x3d0
[]  ? sort_range+0x20/0x20
[]  __do_softirq+0xdc/0x2b4
[]  ? sort_range+0x20/0x20
[]  run_ksoftirqd+0x18/0x20
[]  smpboot_thread_fn+0xdf/0x150
[]  kthread+0x111/0x130
[]  ? kthread_create_worker_on_cpu+0x70/0x70
[]  ret_from_fork+0x1f/0x30
[] Code: 00 83 e8 01 3d ff 1f 00 00 76 10 65 8b 05 3a 02 94 7e 83 c0 01 89 86 ac 00 00 00 83 bd 8c 03 00 00 01 74 52 48 8b 85 e8 01 00 00 <48> 8b 40 30 48 85 c0 74 48 48 c7 c1 50 85 6c 81 4c 89 e6 48 89 
[] RIP: netdev_pick_tx+0x3f/0xc0 RSP: ffffc900031c3b98
[] ---[ end trace 8b77c7349af71e1b ]---
[] Kernel panic - not syncing: Fatal exception in interrupt
[] Kernel Offset: disabled
[] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---


(gdb) list *(generic_xdp_tx)+0x24
0xffffffff816cf874 is in generic_xdp_tx (net/core/dev.c:4142).
4137		struct netdev_queue *txq;
4138		bool free_skb = true;
4139		int cpu, rc;
4140	
4141		txq = netdev_pick_tx(dev, skb, NULL);
4142		cpu = smp_processor_id();
4143		HARD_TX_LOCK(dev, txq, cpu);
4144		if (!netif_xmit_stopped(txq)) {
4145			rc = netdev_start_xmit(skb, dev, txq, 0);
4146			if (dev_xmit_complete(rc))


(gdb) list *(netdev_pick_tx)+0x3f
0xffffffff816ceeef is in netdev_pick_tx (net/core/dev.c:3472).
3467	#endif
3468	
3469		if (dev->real_num_tx_queues != 1) {
3470			const struct net_device_ops *ops = dev->netdev_ops;
3471	
3472			if (ops->ndo_select_queue)
3473				queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
3474								    __netdev_pick_tx);
3475			else
3476				queue_index = __netdev_pick_tx(dev, skb);
(gdb)
Daniel Borkmann June 15, 2018, 10:27 p.m. | #7
On 06/14/2018 11:33 AM, Jesper Dangaard Brouer wrote:
> On Thu, 14 Jun 2018 18:00:22 +0900
> Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:
>> On 2018/06/14 17:49, Jesper Dangaard Brouer wrote:
>>> On Thu, 14 Jun 2018 11:07:42 +0900
>>> Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:
>>>   
>>>> Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed
>>>> the return value type of __devmap_lookup_elem() from struct net_device *
>>>> to struct bpf_dtab_netdev * but forgot to modify generic XDP code
>>>> accordingly.
>>>> Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct
>>>> net_device is expected, then skb->dev was set to invalid value.
>>>>
>>>> v2:
>>>> - Fix compiler warning without CONFIG_BPF_SYSCALL.
>>>>
>>>> Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue")
>>>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>  
>>>
>>> Thanks for catching this!
>>>
>>> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

Applied to bpf, thanks Toshiaki!

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 995c3b1..7df32a3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -488,12 +488,15 @@  static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 
 /* Map specifics */
 struct xdp_buff;
+struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
+int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+			     struct bpf_prog *xdp_prog);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
@@ -586,6 +589,15 @@  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return 0;
 }
 
+struct sk_buff;
+
+static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
+					   struct sk_buff *skb,
+					   struct bpf_prog *xdp_prog)
+{
+	return 0;
+}
+
 static inline
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 45fc0f5..8ddff1f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -19,6 +19,7 @@ 
 #include <linux/cryptohash.h>
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
+#include <linux/if_vlan.h>
 
 #include <net/sch_generic.h>
 
@@ -786,6 +787,21 @@  static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
+static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
+					   struct net_device *fwd)
+{
+	unsigned int len;
+
+	if (unlikely(!(fwd->flags & IFF_UP)))
+		return -ENETDOWN;
+
+	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+	if (skb->len > len)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
  * same cpu context. Further for best results no more than a single map
  * for the do_redirect/do_flush pair should be used. This limitation is
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a7cc7b3..642c97f 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -345,6 +345,20 @@  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return bq_enqueue(dst, xdpf, dev_rx);
 }
 
+int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+			     struct bpf_prog *xdp_prog)
+{
+	int err;
+
+	err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
+	if (unlikely(err))
+		return err;
+	skb->dev = dst->dev;
+	generic_xdp_tx(skb, xdp_prog);
+
+	return 0;
+}
+
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d9ba7e..e7f12e9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3214,20 +3214,6 @@  int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
-static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
-{
-	unsigned int len;
-
-	if (unlikely(!(fwd->flags & IFF_UP)))
-		return -ENETDOWN;
-
-	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
-	if (skb->len > len)
-		return -EMSGSIZE;
-
-	return 0;
-}
-
 static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
@@ -3256,10 +3242,11 @@  static int xdp_do_generic_redirect_map(struct net_device *dev,
 	}
 
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
-		if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+		struct bpf_dtab_netdev *dst = fwd;
+
+		err = dev_map_generic_redirect(dst, skb, xdp_prog);
+		if (unlikely(err))
 			goto err;
-		skb->dev = fwd;
-		generic_xdp_tx(skb, xdp_prog);
 	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
 		struct xdp_sock *xs = fwd;