diff mbox series

[bpf-next,v2,1/2] net: xdp: refactor XDP_QUERY_PROG{,_HW} to netdev

Message ID 20190531094215.3729-2-bjorn.topel@gmail.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series net: xdp: refactor the XDP_QUERY_PROG and XDP_QUERY_PROG_HW code | expand

Commit Message

Björn Töpel May 31, 2019, 9:42 a.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
command of ndo_bpf. The query code is fairly generic. This commit
refactors the query code up from the drivers to the netdev level.

The struct net_device has gained two new members: xdp_prog_hw and
xdp_flags. The former is the offloaded XDP program, if any, and the
latter tracks the flags that the supplied when attaching the XDP
program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.

The xdp_prog member, previously only used for SKB_MODE, is shared with
DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
mutually exclusive. To differentiate between the two modes, a new
internal flag is introduced as well.

The program query operations is all done under the rtnl lock. However,
the xdp_prog member is __rcu annotated, and used in a lock-less manner
for the SKB_MODE. Now that xdp_prog member is shared from a query
program perspective, RCU read and assignments functions are still used
when altering xdp_prog, even when only for queries in DRV_MODE. This
is for sparse/lockdep correctness.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/linux/netdevice.h |  15 +++--
 include/net/xdp.h         |   4 ++
 net/core/dev.c            | 138 ++++++++++++++++++++++++--------------
 net/core/rtnetlink.c      |  53 +++++++--------
 net/core/xdp.c            |  17 +++--
 5 files changed, 139 insertions(+), 88 deletions(-)

Comments

Saeed Mahameed May 31, 2019, 7:18 p.m. UTC | #1
On Fri, 2019-05-31 at 11:42 +0200, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
> command of ndo_bpf. The query code is fairly generic. This commit
> refactors the query code up from the drivers to the netdev level.
> 
> The struct net_device has gained two new members: xdp_prog_hw and
> xdp_flags. The former is the offloaded XDP program, if any, and the
> latter tracks the flags that the supplied when attaching the XDP
> program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
> 
> The xdp_prog member, previously only used for SKB_MODE, is shared
> with
> DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
> mutually exclusive. To differentiate between the two modes, a new
> internal flag is introduced as well.

Just thinking out loud, why can't we allow any combination of
HW/DRV/SKB modes? they are totally different attach points in a totally
different checkpoints in a frame life cycle.

Down the road i think we will utilize this fact and start introducing
SKB helpers for SKB mode and driver helpers for DRV mode..

> 
> The program query operations is all done under the rtnl lock.
> However,
> the xdp_prog member is __rcu annotated, and used in a lock-less
> manner
> for the SKB_MODE. Now that xdp_prog member is shared from a query
> program perspective, RCU read and assignments functions are still
> used
> when altering xdp_prog, even when only for queries in DRV_MODE. This
> is for sparse/lockdep correctness.
> 
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>  include/linux/netdevice.h |  15 +++--
>  include/net/xdp.h         |   4 ++
>  net/core/dev.c            | 138 ++++++++++++++++++++++++----------
> ----
>  net/core/rtnetlink.c      |  53 +++++++--------
>  net/core/xdp.c            |  17 +++--
>  5 files changed, 139 insertions(+), 88 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 44b47e9df94a..f3a875a52c6c 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1940,6 +1940,9 @@ struct net_device {
>  #endif
>  	struct hlist_node	index_hlist;
>  
> +	struct bpf_prog		*xdp_prog_hw;
> +	u32			xdp_flags;
> +
>  /*
>   * Cache lines mostly used on transmit path
>   */
> @@ -2043,11 +2046,14 @@ struct net_device {
>  };
>  #define to_net_dev(d) container_of(d, struct net_device, dev)
>  
> +/* Reusing the XDP flags space for kernel internal flag */
> +#define XDP_FLAGS_KERN_GENERIC (1U << 4)
> +static_assert(!(XDP_FLAGS_KERN_GENERIC & XDP_FLAGS_MASK));
> +
>  static inline bool netif_elide_gro(const struct net_device *dev)
>  {
> -	if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
> -		return true;
> -	return false;
> +	return !(dev->features & NETIF_F_GRO) ||
> +		dev->xdp_flags & XDP_FLAGS_KERN_GENERIC;
>  }
>  
>  #define	NETDEV_ALIGN		32
> @@ -3684,8 +3690,7 @@ struct sk_buff *dev_hard_start_xmit(struct
> sk_buff *skb, struct net_device *dev,
>  typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf
> *bpf);
>  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack
> *extack,
>  		      int fd, u32 flags);
> -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
> -		    enum bpf_netdev_command cmd);
> +u32 dev_xdp_query(struct net_device *dev, unsigned int mode);
>  int xdp_umem_query(struct net_device *dev, u16 queue_id);
>  
>  int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 0f25b3675c5c..3691280c8fc1 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -51,6 +51,7 @@ struct xdp_mem_info {
>  };
>  
>  struct page_pool;
> +struct netlink_ext_ack;
>  
>  struct zero_copy_allocator {
>  	void (*free)(struct zero_copy_allocator *zca, unsigned long
> handle);
> @@ -166,4 +167,7 @@ bool xdp_attachment_flags_ok(struct
> xdp_attachment_info *info,
>  void xdp_attachment_setup(struct xdp_attachment_info *info,
>  			  struct netdev_bpf *bpf);
>  
> +bool xdp_prog_flags_ok(u32 old_flags, u32 new_flags,
> +		       struct netlink_ext_ack *extack);
> +
>  #endif /* __LINUX_NET_XDP_H__ */
> diff --git a/net/core/dev.c b/net/core/dev.c
> index b6b8505cfb3e..1a9da508149a 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -8005,21 +8005,43 @@ int dev_change_proto_down_generic(struct
> net_device *dev, bool proto_down)
>  }
>  EXPORT_SYMBOL(dev_change_proto_down_generic);
>  
> -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
> -		    enum bpf_netdev_command cmd)
> +static u32 dev_xdp_query_generic(struct net_device *dev)
>  {
> -	struct netdev_bpf xdp;
> +	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
>  
> -	if (!bpf_op)
> -		return 0;
> +	return prog && dev->xdp_flags & XDP_FLAGS_KERN_GENERIC ?
> +		prog->aux->id : 0;
> +}
>  
> -	memset(&xdp, 0, sizeof(xdp));
> -	xdp.command = cmd;
> +static u32 dev_xdp_query_drv(struct net_device *dev)
> +{
> +	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
> +
> +	return prog && !(dev->xdp_flags & XDP_FLAGS_KERN_GENERIC) ?
> +		prog->aux->id : 0;
> +}
> +
> +static u32 dev_xdp_current_mode(struct net_device *dev)
> +{
> +	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
>  
> -	/* Query must always succeed. */
> -	WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
> +	if (prog)
> +		return dev_xdp_query_generic(dev) ? XDP_FLAGS_SKB_MODE
> :
> +			XDP_FLAGS_DRV_MODE;
> +	return 0;
> +}
>  
> -	return xdp.prog_id;
> +u32 dev_xdp_query(struct net_device *dev, unsigned int mode)
> +{
> +	switch (mode) {
> +	case XDP_FLAGS_DRV_MODE:
> +		return dev_xdp_query_drv(dev);
> +	case XDP_FLAGS_SKB_MODE:
> +		return dev_xdp_query_generic(dev);
> +	case XDP_FLAGS_HW_MODE:
> +		return dev->xdp_prog_hw ? dev->xdp_prog_hw->aux->id :
> 0;
> +	}
> +	return 0;
>  }
>  
>  static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
> @@ -8027,45 +8049,47 @@ static int dev_xdp_install(struct net_device
> *dev, bpf_op_t bpf_op,
>  			   struct bpf_prog *prog)
>  {
>  	struct netdev_bpf xdp;
> +	int err;
>  
>  	memset(&xdp, 0, sizeof(xdp));
> -	if (flags & XDP_FLAGS_HW_MODE)
> +	if (flags & XDP_FLAGS_HW_MODE) {
>  		xdp.command = XDP_SETUP_PROG_HW;
> -	else
> +		xdp.flags = XDP_FLAGS_HW_MODE;
> +	} else {
>  		xdp.command = XDP_SETUP_PROG;
> +		xdp.flags = flags;
> +	}
>  	xdp.extack = extack;
> -	xdp.flags = flags;
>  	xdp.prog = prog;
>  
> -	return bpf_op(dev, &xdp);
> +	err = bpf_op(dev, &xdp);
> +	if (err)
> +		return err;
> +
> +	if (flags & XDP_FLAGS_HW_MODE) {
> +		dev->xdp_prog_hw = prog;
> +		return 0;
> +	}
> +
> +	rcu_assign_pointer(dev->xdp_prog, prog);
> +	dev->xdp_flags = prog ? flags : 0;
> +	return 0;
>  }
>  
>  static void dev_xdp_uninstall(struct net_device *dev)
>  {
> -	struct netdev_bpf xdp;
> -	bpf_op_t ndo_bpf;
> -
> -	/* Remove generic XDP */
> -	WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0,
> NULL));
> -
> -	/* Remove from the driver */
> -	ndo_bpf = dev->netdev_ops->ndo_bpf;
> -	if (!ndo_bpf)
> -		return;
> -
> -	memset(&xdp, 0, sizeof(xdp));
> -	xdp.command = XDP_QUERY_PROG;
> -	WARN_ON(ndo_bpf(dev, &xdp));
> -	if (xdp.prog_id)
> -		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL,
> xdp.prog_flags,
> -					NULL));
> +	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
> +	bpf_op_t bpf_op;
>  
> -	/* Remove HW offload */
> -	memset(&xdp, 0, sizeof(xdp));
> -	xdp.command = XDP_QUERY_PROG_HW;
> -	if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
> -		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL,
> xdp.prog_flags,
> +	if (prog) {
> +		bpf_op = dev_xdp_query_generic(dev) ?
> +			 generic_xdp_install : dev->netdev_ops-
> >ndo_bpf;
> +		WARN_ON(dev_xdp_install(dev, bpf_op, NULL, dev-
> >xdp_flags,
>  					NULL));
> +	}
> +	if (dev_xdp_query(dev, XDP_FLAGS_HW_MODE))
> +		WARN_ON(dev_xdp_install(dev, dev->netdev_ops->ndo_bpf,
> +					NULL, XDP_FLAGS_HW_MODE,
> NULL));
>  }
>  
>  /**
> @@ -8080,41 +8104,49 @@ static void dev_xdp_uninstall(struct
> net_device *dev)
>  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack
> *extack,
>  		      int fd, u32 flags)
>  {
> -	const struct net_device_ops *ops = dev->netdev_ops;
> -	enum bpf_netdev_command query;
>  	struct bpf_prog *prog = NULL;
> -	bpf_op_t bpf_op, bpf_chk;
> +	u32 mode, curr_mode;
> +	bpf_op_t bpf_op;
>  	bool offload;
>  	int err;
>  
>  	ASSERT_RTNL();
>  
>  	offload = flags & XDP_FLAGS_HW_MODE;
> -	query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
> +	mode = offload ? XDP_FLAGS_HW_MODE : XDP_FLAGS_DRV_MODE;
>  
> -	bpf_op = bpf_chk = ops->ndo_bpf;
> +	bpf_op = dev->netdev_ops->ndo_bpf;
>  	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE |
> XDP_FLAGS_HW_MODE))) {
>  		NL_SET_ERR_MSG(extack, "underlying driver does not
> support XDP in native mode");
>  		return -EOPNOTSUPP;
>  	}
> -	if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
> -		bpf_op = generic_xdp_install;
> -	if (bpf_op == bpf_chk)
> -		bpf_chk = generic_xdp_install;
>  
> -	if (fd >= 0) {
> -		if (!offload && __dev_xdp_query(dev, bpf_chk,
> XDP_QUERY_PROG)) {
> +	if (!bpf_op || flags & XDP_FLAGS_SKB_MODE)
> +		mode = XDP_FLAGS_SKB_MODE;
> +
> +	curr_mode = dev_xdp_current_mode(dev);
> +
> +	if (!offload && curr_mode && (mode ^ curr_mode) &
> +	    (XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE)) {

if i am reading this correctly this is equivalent to :

if (!offload && (curre_mode != mode)) 
offlad is false then curr_mode and mode must be DRV or GENERIC .. 

better if you keep bitwise operations for actual bitmasks, mode and
curr_mode are not bitmask, they can hold one value each .. according to
your logic.. 


> +		if (fd >= 0) {
>  			NL_SET_ERR_MSG(extack, "native and generic XDP
> can't be active at the same time");
>  			return -EEXIST;
>  		}
> -		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
> -		    __dev_xdp_query(dev, bpf_op, query)) {
> +		return 0;
> +	}
> +
> +	if (!offload && dev_xdp_query(dev, mode) &&
> +	    !xdp_prog_flags_ok(dev->xdp_flags, flags, extack))
> +		return -EBUSY;
> +
> +	if (fd >= 0) {
> +		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST &&
> +		    dev_xdp_query(dev, mode)) {
>  			NL_SET_ERR_MSG(extack, "XDP program already
> attached");
>  			return -EBUSY;
>  		}
>  
> -		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
> -					     bpf_op == ops->ndo_bpf);
> +		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
> !!bpf_op);
>  		if (IS_ERR(prog))
>  			return PTR_ERR(prog);
>  
> @@ -8125,6 +8157,10 @@ int dev_change_xdp_fd(struct net_device *dev,
> struct netlink_ext_ack *extack,
>  		}
>  	}
>  
> +	if (mode == XDP_FLAGS_SKB_MODE) {
> +		bpf_op = generic_xdp_install;
> +		flags |= XDP_FLAGS_KERN_GENERIC;
> +	}
>  	err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
>  	if (err < 0 && prog)
>  		bpf_prog_put(prog);
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index adcc045952c2..5e396fd01d8b 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -1360,42 +1360,44 @@ static int rtnl_fill_link_ifmap(struct
> sk_buff *skb, struct net_device *dev)
>  	return 0;
>  }
>  
> -static u32 rtnl_xdp_prog_skb(struct net_device *dev)
> +static unsigned int rtnl_xdp_mode_to_flag(u8 tgt_mode)
>  {
> -	const struct bpf_prog *generic_xdp_prog;
> -
> -	ASSERT_RTNL();
> -
> -	generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
> -	if (!generic_xdp_prog)
> -		return 0;
> -	return generic_xdp_prog->aux->id;
> -}
> -
> -static u32 rtnl_xdp_prog_drv(struct net_device *dev)
> -{
> -	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
> XDP_QUERY_PROG);
> +	switch (tgt_mode) {
> +	case XDP_ATTACHED_DRV:
> +		return XDP_FLAGS_DRV_MODE;
> +	case XDP_ATTACHED_SKB:
> +		return XDP_FLAGS_SKB_MODE;
> +	case XDP_ATTACHED_HW:
> +		return XDP_FLAGS_HW_MODE;
> +	}
> +	return 0;
>  }
>  
> -static u32 rtnl_xdp_prog_hw(struct net_device *dev)
> +static u32 rtnl_xdp_mode_to_attr(u8 tgt_mode)
>  {
> -	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
> -			       XDP_QUERY_PROG_HW);
> +	switch (tgt_mode) {
> +	case XDP_ATTACHED_DRV:
> +		return IFLA_XDP_DRV_PROG_ID;
> +	case XDP_ATTACHED_SKB:
> +		return IFLA_XDP_SKB_PROG_ID;
> +	case XDP_ATTACHED_HW:
> +		return IFLA_XDP_HW_PROG_ID;
> +	}
> +	return 0;
>  }
>  
>  static int rtnl_xdp_report_one(struct sk_buff *skb, struct
> net_device *dev,
> -			       u32 *prog_id, u8 *mode, u8 tgt_mode, u32
> attr,
> -			       u32 (*get_prog_id)(struct net_device
> *dev))
> +			       u32 *prog_id, u8 *mode, u8 tgt_mode)
>  {
>  	u32 curr_id;
>  	int err;
>  
> -	curr_id = get_prog_id(dev);
> +	curr_id = dev_xdp_query(dev, rtnl_xdp_mode_to_flag(tgt_mode));
>  	if (!curr_id)
>  		return 0;
>  
>  	*prog_id = curr_id;
> -	err = nla_put_u32(skb, attr, curr_id);
> +	err = nla_put_u32(skb, rtnl_xdp_mode_to_attr(tgt_mode),
> curr_id);
>  	if (err)
>  		return err;
>  
> @@ -1420,16 +1422,13 @@ static int rtnl_xdp_fill(struct sk_buff *skb,
> struct net_device *dev)
>  
>  	prog_id = 0;
>  	mode = XDP_ATTACHED_NONE;
> -	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> XDP_ATTACHED_SKB,
> -				  IFLA_XDP_SKB_PROG_ID,
> rtnl_xdp_prog_skb);
> +	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> XDP_ATTACHED_SKB);
>  	if (err)
>  		goto err_cancel;
> -	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> XDP_ATTACHED_DRV,
> -				  IFLA_XDP_DRV_PROG_ID,
> rtnl_xdp_prog_drv);
> +	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> XDP_ATTACHED_DRV);
>  	if (err)
>  		goto err_cancel;
> -	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> XDP_ATTACHED_HW,
> -				  IFLA_XDP_HW_PROG_ID,
> rtnl_xdp_prog_hw);
> +	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> XDP_ATTACHED_HW);
>  	if (err)
>  		goto err_cancel;
>  
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 4b2b194f4f1f..af92c04a58d8 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -388,16 +388,23 @@ int xdp_attachment_query(struct
> xdp_attachment_info *info,
>  }
>  EXPORT_SYMBOL_GPL(xdp_attachment_query);
>  
> -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
> -			     struct netdev_bpf *bpf)
> +bool xdp_prog_flags_ok(u32 old_flags, u32 new_flags,
> +		       struct netlink_ext_ack *extack)
>  {
> -	if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES)
> {
> -		NL_SET_ERR_MSG(bpf->extack,
> -			       "program loaded with different flags");
> +	if ((new_flags ^ old_flags) & XDP_FLAGS_MODES) {
> +		NL_SET_ERR_MSG(extack, "program loaded with different
> flags");
>  		return false;
>  	}
>  	return true;
>  }
> +
> +bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
> +			     struct netdev_bpf *bpf)
> +{
> +	if (info->prog)
> +		return xdp_prog_flags_ok(bpf->flags, info->flags, bpf-
> >extack);
> +	return true;
> +}
>  EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
>  
>  void xdp_attachment_setup(struct xdp_attachment_info *info,
Jonathan Lemon June 1, 2019, 6:12 p.m. UTC | #2
On 31 May 2019, at 2:42, Björn Töpel wrote:

> From: Björn Töpel <bjorn.topel@intel.com>
>
> All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
> command of ndo_bpf. The query code is fairly generic. This commit
> refactors the query code up from the drivers to the netdev level.
>
> The struct net_device has gained two new members: xdp_prog_hw and
> xdp_flags. The former is the offloaded XDP program, if any, and the
> latter tracks the flags that the supplied when attaching the XDP
> program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
>
> The xdp_prog member, previously only used for SKB_MODE, is shared with
> DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
> mutually exclusive. To differentiate between the two modes, a new
> internal flag is introduced as well.

I'm not entirely clear why this new flag is needed - GENERIC seems to
be an alias for SKB_MODE, so why just use SKB_MODE directly?

If the user does not explicitly specify a type (skb|drv|hw), then the
command should choose the correct type and then behave as if this type
was specified.

The logic in dev_change_xdp_fd() is too complicated.  It disallows
setting (drv|skb), but allows (hw|skb), which I'm not sure is 
intentional.

It should be clearer as to which combinations are allowed.
Jakub Kicinski June 1, 2019, 7:42 p.m. UTC | #3
On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:
> On Fri, 2019-05-31 at 11:42 +0200, Björn Töpel wrote:
> > From: Björn Töpel <bjorn.topel@intel.com>
> > 
> > All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
> > command of ndo_bpf. The query code is fairly generic. This commit
> > refactors the query code up from the drivers to the netdev level.
> > 
> > The struct net_device has gained two new members: xdp_prog_hw and
> > xdp_flags. The former is the offloaded XDP program, if any, and the
> > latter tracks the flags that the supplied when attaching the XDP
> > program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
> > 
> > The xdp_prog member, previously only used for SKB_MODE, is shared
> > with
> > DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
> > mutually exclusive. To differentiate between the two modes, a new
> > internal flag is introduced as well.  
> 
> Just thinking out loud, why can't we allow any combination of
> HW/DRV/SKB modes? they are totally different attach points in a totally
> different checkpoints in a frame life cycle.

FWIW see Message-ID: <20190201080236.446d84d4@redhat.com>

> Down the road i think we will utilize this fact and start introducing
> SKB helpers for SKB mode and driver helpers for DRV mode..

Any reason why we would want the extra complexity?  There is cls_bpf
if someone wants skb features after all..
Jakub Kicinski June 1, 2019, 7:57 p.m. UTC | #4
On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:
> > +	if (!bpf_op || flags & XDP_FLAGS_SKB_MODE)
> > +		mode = XDP_FLAGS_SKB_MODE;
> > +
> > +	curr_mode = dev_xdp_current_mode(dev);
> > +
> > +	if (!offload && curr_mode && (mode ^ curr_mode) &
> > +	    (XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE)) {  
> 
> if i am reading this correctly this is equivalent to :
> 
> if (!offload && (curre_mode != mode)) 
> offlad is false then curr_mode and mode must be DRV or GENERIC .. 

Naw, if curr_mode is not set, i.e. nothing installed now, we don't care
about the diff.

> better if you keep bitwise operations for actual bitmasks, mode and
> curr_mode are not bitmask, they can hold one value each .. according to
> your logic.. 

Well, they hold one bit each, whether one bit is a bitmap perhaps is
disputable? :)

I think the logic is fine.

What happened to my request to move the change in behaviour for
disabling to a separate patch, tho, Bjorn? :)
Jakub Kicinski June 1, 2019, 8:02 p.m. UTC | #5
On Fri, 31 May 2019 11:42:14 +0200, Björn Töpel wrote:
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 44b47e9df94a..f3a875a52c6c 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1940,6 +1940,9 @@ struct net_device {
>  #endif
>  	struct hlist_node	index_hlist;
>  
> +	struct bpf_prog		*xdp_prog_hw;

IDK if we should pay the cost of this pointer for every netdev on the
system just for the single production driver out there that implements
HW offload :(  I'm on the fence about this..

> +	u32			xdp_flags;
> +
>  /*
>   * Cache lines mostly used on transmit path
>   */

> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index adcc045952c2..5e396fd01d8b 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -1360,42 +1360,44 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
>  	return 0;
>  }
>  
> -static u32 rtnl_xdp_prog_skb(struct net_device *dev)
> +static unsigned int rtnl_xdp_mode_to_flag(u8 tgt_mode)
>  {
> -	const struct bpf_prog *generic_xdp_prog;
> -
> -	ASSERT_RTNL();
> -
> -	generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
> -	if (!generic_xdp_prog)
> -		return 0;
> -	return generic_xdp_prog->aux->id;
> -}
> -
> -static u32 rtnl_xdp_prog_drv(struct net_device *dev)
> -{
> -	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
> +	switch (tgt_mode) {
> +	case XDP_ATTACHED_DRV:
> +		return XDP_FLAGS_DRV_MODE;
> +	case XDP_ATTACHED_SKB:
> +		return XDP_FLAGS_SKB_MODE;
> +	case XDP_ATTACHED_HW:
> +		return XDP_FLAGS_HW_MODE;
> +	}
> +	return 0;
>  }
>  
> -static u32 rtnl_xdp_prog_hw(struct net_device *dev)
> +static u32 rtnl_xdp_mode_to_attr(u8 tgt_mode)
>  {
> -	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
> -			       XDP_QUERY_PROG_HW);
> +	switch (tgt_mode) {
> +	case XDP_ATTACHED_DRV:
> +		return IFLA_XDP_DRV_PROG_ID;
> +	case XDP_ATTACHED_SKB:
> +		return IFLA_XDP_SKB_PROG_ID;
> +	case XDP_ATTACHED_HW:
> +		return IFLA_XDP_HW_PROG_ID;
> +	}
> +	return 0;
>  }
>  
>  static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
> -			       u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
> -			       u32 (*get_prog_id)(struct net_device *dev))
> +			       u32 *prog_id, u8 *mode, u8 tgt_mode)
>  {
>  	u32 curr_id;
>  	int err;
>  
> -	curr_id = get_prog_id(dev);
> +	curr_id = dev_xdp_query(dev, rtnl_xdp_mode_to_flag(tgt_mode));
>  	if (!curr_id)
>  		return 0;
>  
>  	*prog_id = curr_id;
> -	err = nla_put_u32(skb, attr, curr_id);
> +	err = nla_put_u32(skb, rtnl_xdp_mode_to_attr(tgt_mode), curr_id);
>  	if (err)
>  		return err;
>  
> @@ -1420,16 +1422,13 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
>  
>  	prog_id = 0;
>  	mode = XDP_ATTACHED_NONE;
> -	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
> -				  IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
> +	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB);
>  	if (err)
>  		goto err_cancel;
> -	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
> -				  IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
> +	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV);
>  	if (err)
>  		goto err_cancel;
> -	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
> -				  IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
> +	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW);
>  	if (err)
>  		goto err_cancel;
>  

So you remove all the attr and flag params just to add a conversion
helpers to get them based on mode?  Why?  Seems like unnecessary churn,
and questionable change :S

Otherwise looks good to me!
Björn Töpel June 3, 2019, 8:39 a.m. UTC | #6
On Sat, 1 Jun 2019 at 20:12, Jonathan Lemon <jlemon@flugsvamp.com> wrote:
>
> On 31 May 2019, at 2:42, Björn Töpel wrote:
>
> > From: Björn Töpel <bjorn.topel@intel.com>
> >
> > All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
> > command of ndo_bpf. The query code is fairly generic. This commit
> > refactors the query code up from the drivers to the netdev level.
> >
> > The struct net_device has gained two new members: xdp_prog_hw and
> > xdp_flags. The former is the offloaded XDP program, if any, and the
> > latter tracks the flags that the supplied when attaching the XDP
> > program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
> >
> > The xdp_prog member, previously only used for SKB_MODE, is shared with
> > DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
> > mutually exclusive. To differentiate between the two modes, a new
> > internal flag is introduced as well.
>
> I'm not entirely clear why this new flag is needed - GENERIC seems to
> be an alias for SKB_MODE, so why just use SKB_MODE directly?
>
> If the user does not explicitly specify a type (skb|drv|hw), then the
> command should choose the correct type and then behave as if this type
> was specified.
>

Yes, this is kind of hairy.

SKB and DRV are mutually exclusive, but HW is not. IOW, valid options are:
SKB, DRV, HW, SKB+HW DRV+HW.

What complicates things further, is that SKB and DRV can be implicitly
(auto/no flags) or explicitly enabled (flags).

If a user doesn't pass any flags, the "best supported mode" should be
selected. If this "auto mode" is used, it should be seen as a third
mode. E.g.

ip link set dev eth0 xdp on -- OK
ip link set dev eth0 xdp off -- OK

ip link set dev eth0 xdp on -- OK # generic auto selected
ip link set dev eth0 xdpgeneric off -- NOK, bad flags

ip link set dev eth0 xdp on -- OK # drv auto selected
ip link set dev eth0 xdpdrv off -- NOK, bad flags

...and so on. The idea is that a user should use the same set of flags always.

The internal "GENERIC" flag is only to determine if the xdp_prog
represents a DRV version or SKB version. Maybe it would be clearer
just to add an additional xdp_prog_drv to the net_device, instead?

> The logic in dev_change_xdp_fd() is too complicated.  It disallows
> setting (drv|skb), but allows (hw|skb), which I'm not sure is
> intentional.
>
> It should be clearer as to which combinations are allowed.

Fair point. I'll try to clean it up further.


Björn

> --
> Jonathan
>
>
>
> >
> > The program query operations is all done under the rtnl lock. However,
> > the xdp_prog member is __rcu annotated, and used in a lock-less manner
> > for the SKB_MODE. Now that xdp_prog member is shared from a query
> > program perspective, RCU read and assignments functions are still used
> > when altering xdp_prog, even when only for queries in DRV_MODE. This
> > is for sparse/lockdep correctness.
> >
> > Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> > ---
> >  include/linux/netdevice.h |  15 +++--
> >  include/net/xdp.h         |   4 ++
> >  net/core/dev.c            | 138
> > ++++++++++++++++++++++++--------------
> >  net/core/rtnetlink.c      |  53 +++++++--------
> >  net/core/xdp.c            |  17 +++--
> >  5 files changed, 139 insertions(+), 88 deletions(-)
> >
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 44b47e9df94a..f3a875a52c6c 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -1940,6 +1940,9 @@ struct net_device {
> >  #endif
> >       struct hlist_node       index_hlist;
> >
> > +     struct bpf_prog         *xdp_prog_hw;
> > +     u32                     xdp_flags;
> > +
> >  /*
> >   * Cache lines mostly used on transmit path
> >   */
> > @@ -2043,11 +2046,14 @@ struct net_device {
> >  };
> >  #define to_net_dev(d) container_of(d, struct net_device, dev)
> >
> > +/* Reusing the XDP flags space for kernel internal flag */
> > +#define XDP_FLAGS_KERN_GENERIC (1U << 4)
> > +static_assert(!(XDP_FLAGS_KERN_GENERIC & XDP_FLAGS_MASK));
> > +
> >  static inline bool netif_elide_gro(const struct net_device *dev)
> >  {
> > -     if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
> > -             return true;
> > -     return false;
> > +     return !(dev->features & NETIF_F_GRO) ||
> > +             dev->xdp_flags & XDP_FLAGS_KERN_GENERIC;
> >  }
> >
> >  #define      NETDEV_ALIGN            32
> > @@ -3684,8 +3690,7 @@ struct sk_buff *dev_hard_start_xmit(struct
> > sk_buff *skb, struct net_device *dev,
> >  typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf
> > *bpf);
> >  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack
> > *extack,
> >                     int fd, u32 flags);
> > -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
> > -                 enum bpf_netdev_command cmd);
> > +u32 dev_xdp_query(struct net_device *dev, unsigned int mode);
> >  int xdp_umem_query(struct net_device *dev, u16 queue_id);
> >
> >  int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
> > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > index 0f25b3675c5c..3691280c8fc1 100644
> > --- a/include/net/xdp.h
> > +++ b/include/net/xdp.h
> > @@ -51,6 +51,7 @@ struct xdp_mem_info {
> >  };
> >
> >  struct page_pool;
> > +struct netlink_ext_ack;
> >
> >  struct zero_copy_allocator {
> >       void (*free)(struct zero_copy_allocator *zca, unsigned long handle);
> > @@ -166,4 +167,7 @@ bool xdp_attachment_flags_ok(struct
> > xdp_attachment_info *info,
> >  void xdp_attachment_setup(struct xdp_attachment_info *info,
> >                         struct netdev_bpf *bpf);
> >
> > +bool xdp_prog_flags_ok(u32 old_flags, u32 new_flags,
> > +                    struct netlink_ext_ack *extack);
> > +
> >  #endif /* __LINUX_NET_XDP_H__ */
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index b6b8505cfb3e..1a9da508149a 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -8005,21 +8005,43 @@ int dev_change_proto_down_generic(struct
> > net_device *dev, bool proto_down)
> >  }
> >  EXPORT_SYMBOL(dev_change_proto_down_generic);
> >
> > -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
> > -                 enum bpf_netdev_command cmd)
> > +static u32 dev_xdp_query_generic(struct net_device *dev)
> >  {
> > -     struct netdev_bpf xdp;
> > +     struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
> >
> > -     if (!bpf_op)
> > -             return 0;
> > +     return prog && dev->xdp_flags & XDP_FLAGS_KERN_GENERIC ?
> > +             prog->aux->id : 0;
> > +}
> >
> > -     memset(&xdp, 0, sizeof(xdp));
> > -     xdp.command = cmd;
> > +static u32 dev_xdp_query_drv(struct net_device *dev)
> > +{
> > +     struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
> > +
> > +     return prog && !(dev->xdp_flags & XDP_FLAGS_KERN_GENERIC) ?
> > +             prog->aux->id : 0;
> > +}
> > +
> > +static u32 dev_xdp_current_mode(struct net_device *dev)
> > +{
> > +     struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
> >
> > -     /* Query must always succeed. */
> > -     WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
> > +     if (prog)
> > +             return dev_xdp_query_generic(dev) ? XDP_FLAGS_SKB_MODE :
> > +                     XDP_FLAGS_DRV_MODE;
>
>         return xdp->flags & XDP_FLAGS_MODE;
>
>
> > +     return 0;
> > +}
> >
> > -     return xdp.prog_id;
> > +u32 dev_xdp_query(struct net_device *dev, unsigned int mode)
> > +{
> > +     switch (mode) {
> > +     case XDP_FLAGS_DRV_MODE:
> > +             return dev_xdp_query_drv(dev);
> > +     case XDP_FLAGS_SKB_MODE:
> > +             return dev_xdp_query_generic(dev);
> > +     case XDP_FLAGS_HW_MODE:
> > +             return dev->xdp_prog_hw ? dev->xdp_prog_hw->aux->id : 0;
> > +     }
> > +     return 0;
> >  }
> >
> >  static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
> > @@ -8027,45 +8049,47 @@ static int dev_xdp_install(struct net_device
> > *dev, bpf_op_t bpf_op,
> >                          struct bpf_prog *prog)
> >  {
> >       struct netdev_bpf xdp;
> > +     int err;
> >
> >       memset(&xdp, 0, sizeof(xdp));
> > -     if (flags & XDP_FLAGS_HW_MODE)
> > +     if (flags & XDP_FLAGS_HW_MODE) {
> >               xdp.command = XDP_SETUP_PROG_HW;
> > -     else
> > +             xdp.flags = XDP_FLAGS_HW_MODE;
> > +     } else {
> >               xdp.command = XDP_SETUP_PROG;
> > +             xdp.flags = flags;
> > +     }
> >       xdp.extack = extack;
> > -     xdp.flags = flags;
> >       xdp.prog = prog;
> >
> > -     return bpf_op(dev, &xdp);
> > +     err = bpf_op(dev, &xdp);
> > +     if (err)
> > +             return err;
> > +
> > +     if (flags & XDP_FLAGS_HW_MODE) {
> > +             dev->xdp_prog_hw = prog;
> > +             return 0;
> > +     }
> > +
> > +     rcu_assign_pointer(dev->xdp_prog, prog);
> > +     dev->xdp_flags = prog ? flags : 0;
> > +     return 0;
> >  }
> >
> >  static void dev_xdp_uninstall(struct net_device *dev)
> >  {
> > -     struct netdev_bpf xdp;
> > -     bpf_op_t ndo_bpf;
> > -
> > -     /* Remove generic XDP */
> > -     WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
> > -
> > -     /* Remove from the driver */
> > -     ndo_bpf = dev->netdev_ops->ndo_bpf;
> > -     if (!ndo_bpf)
> > -             return;
> > -
> > -     memset(&xdp, 0, sizeof(xdp));
> > -     xdp.command = XDP_QUERY_PROG;
> > -     WARN_ON(ndo_bpf(dev, &xdp));
> > -     if (xdp.prog_id)
> > -             WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
> > -                                     NULL));
> > +     struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
> > +     bpf_op_t bpf_op;
> >
> > -     /* Remove HW offload */
> > -     memset(&xdp, 0, sizeof(xdp));
> > -     xdp.command = XDP_QUERY_PROG_HW;
> > -     if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
> > -             WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
> > +     if (prog) {
> > +             bpf_op = dev_xdp_query_generic(dev) ?
> > +                      generic_xdp_install : dev->netdev_ops->ndo_bpf;
> > +             WARN_ON(dev_xdp_install(dev, bpf_op, NULL, dev->xdp_flags,
> >                                       NULL));
> > +     }
> > +     if (dev_xdp_query(dev, XDP_FLAGS_HW_MODE))
> > +             WARN_ON(dev_xdp_install(dev, dev->netdev_ops->ndo_bpf,
> > +                                     NULL, XDP_FLAGS_HW_MODE, NULL));
> >  }
> >
> >  /**
> > @@ -8080,41 +8104,49 @@ static void dev_xdp_uninstall(struct
> > net_device *dev)
> >  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack
> > *extack,
> >                     int fd, u32 flags)
> >  {
> > -     const struct net_device_ops *ops = dev->netdev_ops;
> > -     enum bpf_netdev_command query;
> >       struct bpf_prog *prog = NULL;
> > -     bpf_op_t bpf_op, bpf_chk;
> > +     u32 mode, curr_mode;
> > +     bpf_op_t bpf_op;
> >       bool offload;
> >       int err;
> >
> >       ASSERT_RTNL();
> >
> >       offload = flags & XDP_FLAGS_HW_MODE;
> > -     query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
> > +     mode = offload ? XDP_FLAGS_HW_MODE : XDP_FLAGS_DRV_MODE;
> >
> > -     bpf_op = bpf_chk = ops->ndo_bpf;
> > +     bpf_op = dev->netdev_ops->ndo_bpf;
> >       if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
> >               NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in
> > native mode");
> >               return -EOPNOTSUPP;
> >       }
> > -     if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
> > -             bpf_op = generic_xdp_install;
> > -     if (bpf_op == bpf_chk)
> > -             bpf_chk = generic_xdp_install;
> >
> > -     if (fd >= 0) {
> > -             if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
> > +     if (!bpf_op || flags & XDP_FLAGS_SKB_MODE)
> > +             mode = XDP_FLAGS_SKB_MODE;
> > +
> > +     curr_mode = dev_xdp_current_mode(dev);
> > +
> > +     if (!offload && curr_mode && (mode ^ curr_mode) &
> > +         (XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE)) {
> > +             if (fd >= 0) {
> >                       NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at
> > the same time");
> >                       return -EEXIST;
> >               }
> > -             if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
> > -                 __dev_xdp_query(dev, bpf_op, query)) {
> > +             return 0;
> > +     }
> > +
> > +     if (!offload && dev_xdp_query(dev, mode) &&
> > +         !xdp_prog_flags_ok(dev->xdp_flags, flags, extack))
> > +             return -EBUSY;
> > +
> > +     if (fd >= 0) {
> > +             if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST &&
> > +                 dev_xdp_query(dev, mode)) {
> >                       NL_SET_ERR_MSG(extack, "XDP program already attached");
> >                       return -EBUSY;
> >               }
> >
> > -             prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
> > -                                          bpf_op == ops->ndo_bpf);
> > +             prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, !!bpf_op);
> >               if (IS_ERR(prog))
> >                       return PTR_ERR(prog);
> >
> > @@ -8125,6 +8157,10 @@ int dev_change_xdp_fd(struct net_device *dev,
> > struct netlink_ext_ack *extack,
> >               }
> >       }
> >
> > +     if (mode == XDP_FLAGS_SKB_MODE) {
> > +             bpf_op = generic_xdp_install;
> > +             flags |= XDP_FLAGS_KERN_GENERIC;
> > +     }
> >       err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
> >       if (err < 0 && prog)
> >               bpf_prog_put(prog);
> > diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> > index adcc045952c2..5e396fd01d8b 100644
> > --- a/net/core/rtnetlink.c
> > +++ b/net/core/rtnetlink.c
> > @@ -1360,42 +1360,44 @@ static int rtnl_fill_link_ifmap(struct sk_buff
> > *skb, struct net_device *dev)
> >       return 0;
> >  }
> >
> > -static u32 rtnl_xdp_prog_skb(struct net_device *dev)
> > +static unsigned int rtnl_xdp_mode_to_flag(u8 tgt_mode)
> >  {
> > -     const struct bpf_prog *generic_xdp_prog;
> > -
> > -     ASSERT_RTNL();
> > -
> > -     generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
> > -     if (!generic_xdp_prog)
> > -             return 0;
> > -     return generic_xdp_prog->aux->id;
> > -}
> > -
> > -static u32 rtnl_xdp_prog_drv(struct net_device *dev)
> > -{
> > -     return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
> > XDP_QUERY_PROG);
> > +     switch (tgt_mode) {
> > +     case XDP_ATTACHED_DRV:
> > +             return XDP_FLAGS_DRV_MODE;
> > +     case XDP_ATTACHED_SKB:
> > +             return XDP_FLAGS_SKB_MODE;
> > +     case XDP_ATTACHED_HW:
> > +             return XDP_FLAGS_HW_MODE;
> > +     }
> > +     return 0;
> >  }
> >
> > -static u32 rtnl_xdp_prog_hw(struct net_device *dev)
> > +static u32 rtnl_xdp_mode_to_attr(u8 tgt_mode)
> >  {
> > -     return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
> > -                            XDP_QUERY_PROG_HW);
> > +     switch (tgt_mode) {
> > +     case XDP_ATTACHED_DRV:
> > +             return IFLA_XDP_DRV_PROG_ID;
> > +     case XDP_ATTACHED_SKB:
> > +             return IFLA_XDP_SKB_PROG_ID;
> > +     case XDP_ATTACHED_HW:
> > +             return IFLA_XDP_HW_PROG_ID;
> > +     }
> > +     return 0;
> >  }
> >
> >  static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device
> > *dev,
> > -                            u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
> > -                            u32 (*get_prog_id)(struct net_device *dev))
> > +                            u32 *prog_id, u8 *mode, u8 tgt_mode)
> >  {
> >       u32 curr_id;
> >       int err;
> >
> > -     curr_id = get_prog_id(dev);
> > +     curr_id = dev_xdp_query(dev, rtnl_xdp_mode_to_flag(tgt_mode));
> >       if (!curr_id)
> >               return 0;
> >
> >       *prog_id = curr_id;
> > -     err = nla_put_u32(skb, attr, curr_id);
> > +     err = nla_put_u32(skb, rtnl_xdp_mode_to_attr(tgt_mode), curr_id);
> >       if (err)
> >               return err;
> >
> > @@ -1420,16 +1422,13 @@ static int rtnl_xdp_fill(struct sk_buff *skb,
> > struct net_device *dev)
> >
> >       prog_id = 0;
> >       mode = XDP_ATTACHED_NONE;
> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> > XDP_ATTACHED_SKB,
> > -                               IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> > XDP_ATTACHED_SKB);
> >       if (err)
> >               goto err_cancel;
> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> > XDP_ATTACHED_DRV,
> > -                               IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> > XDP_ATTACHED_DRV);
> >       if (err)
> >               goto err_cancel;
> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> > XDP_ATTACHED_HW,
> > -                               IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode,
> > XDP_ATTACHED_HW);
> >       if (err)
> >               goto err_cancel;
> >
> > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > index 4b2b194f4f1f..af92c04a58d8 100644
> > --- a/net/core/xdp.c
> > +++ b/net/core/xdp.c
> > @@ -388,16 +388,23 @@ int xdp_attachment_query(struct
> > xdp_attachment_info *info,
> >  }
> >  EXPORT_SYMBOL_GPL(xdp_attachment_query);
> >
> > -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
> > -                          struct netdev_bpf *bpf)
> > +bool xdp_prog_flags_ok(u32 old_flags, u32 new_flags,
> > +                    struct netlink_ext_ack *extack)
> >  {
> > -     if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
> > -             NL_SET_ERR_MSG(bpf->extack,
> > -                            "program loaded with different flags");
> > +     if ((new_flags ^ old_flags) & XDP_FLAGS_MODES) {
> > +             NL_SET_ERR_MSG(extack, "program loaded with different flags");
> >               return false;
> >       }
> >       return true;
> >  }
> > +
> > +bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
> > +                          struct netdev_bpf *bpf)
> > +{
> > +     if (info->prog)
> > +             return xdp_prog_flags_ok(bpf->flags, info->flags, bpf->extack);
> > +     return true;
> > +}
> >  EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
> >
> >  void xdp_attachment_setup(struct xdp_attachment_info *info,
> > --
> > 2.20.1
Björn Töpel June 3, 2019, 9:04 a.m. UTC | #7
On Sat, 1 Jun 2019 at 21:42, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:
> > On Fri, 2019-05-31 at 11:42 +0200, Björn Töpel wrote:
> > > From: Björn Töpel <bjorn.topel@intel.com>
> > >
> > > All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
> > > command of ndo_bpf. The query code is fairly generic. This commit
> > > refactors the query code up from the drivers to the netdev level.
> > >
> > > The struct net_device has gained two new members: xdp_prog_hw and
> > > xdp_flags. The former is the offloaded XDP program, if any, and the
> > > latter tracks the flags that the supplied when attaching the XDP
> > > program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
> > >
> > > The xdp_prog member, previously only used for SKB_MODE, is shared
> > > with
> > > DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
> > > mutually exclusive. To differentiate between the two modes, a new
> > > internal flag is introduced as well.
> >
> > Just thinking out loud, why can't we allow any combination of
> > HW/DRV/SKB modes? they are totally different attach points in a totally
> > different checkpoints in a frame life cycle.
>
> FWIW see Message-ID: <20190201080236.446d84d4@redhat.com>
>

I've always seen the SKB-mode as something that will eventually be removed.

Clickable link:
https://lore.kernel.org/netdev/20190201080236.446d84d4@redhat.com/ :-P

> > Down the road i think we will utilize this fact and start introducing
> > SKB helpers for SKB mode and driver helpers for DRV mode..
>
> Any reason why we would want the extra complexity?  There is cls_bpf
> if someone wants skb features after all..
Björn Töpel June 3, 2019, 9:04 a.m. UTC | #8
On Sat, 1 Jun 2019 at 21:57, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:
> > > +   if (!bpf_op || flags & XDP_FLAGS_SKB_MODE)
> > > +           mode = XDP_FLAGS_SKB_MODE;
> > > +
> > > +   curr_mode = dev_xdp_current_mode(dev);
> > > +
> > > +   if (!offload && curr_mode && (mode ^ curr_mode) &
> > > +       (XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE)) {
> >
> > if i am reading this correctly this is equivalent to :
> >
> > if (!offload && (curre_mode != mode))
> > offlad is false then curr_mode and mode must be DRV or GENERIC ..
>
> Naw, if curr_mode is not set, i.e. nothing installed now, we don't care
> about the diff.
>
> > better if you keep bitwise operations for actual bitmasks, mode and
> > curr_mode are not bitmask, they can hold one value each .. according to
> > your logic..
>
> Well, they hold one bit each, whether one bit is a bitmap perhaps is
> disputable? :)
>
> I think the logic is fine.
>

Hmm, but changing to:

       if (!offload && curr_mode && mode != curr_mode)

is equal, and to Saeed's point, clearer. I'll go that route in a v3.

> What happened to my request to move the change in behaviour for
> disabling to a separate patch, tho, Bjorn? :)

Actually, I left that out completely. This patch doesn't change the
behavior. After I realized how the flags *should* be used, I don't
think my v1 change makes sense anymore. My v1 patch was to give an
error if you tried to disable, say generic if drv was enabled via
"auto detect/no flags". But this is catched by looking at the flags.

What I did, however, was moving the flags check into change_fd so that
the driver doesn't have to do the check. E.g. the Intel drivers didn't
do correct checking of flags.
Björn Töpel June 3, 2019, 9:07 a.m. UTC | #9
On Sat, 1 Jun 2019 at 22:02, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Fri, 31 May 2019 11:42:14 +0200, Björn Töpel wrote:
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 44b47e9df94a..f3a875a52c6c 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -1940,6 +1940,9 @@ struct net_device {
> >  #endif
> >       struct hlist_node       index_hlist;
> >
> > +     struct bpf_prog         *xdp_prog_hw;
>
> IDK if we should pay the cost of this pointer for every netdev on the
> system just for the single production driver out there that implements
> HW offload :(  I'm on the fence about this..
>

Hmm. Adding a config option? Keep the QUERY_PROG_HW?

> > +     u32                     xdp_flags;
> > +
> >  /*
> >   * Cache lines mostly used on transmit path
> >   */
>
> > diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> > index adcc045952c2..5e396fd01d8b 100644
> > --- a/net/core/rtnetlink.c
> > +++ b/net/core/rtnetlink.c
> > @@ -1360,42 +1360,44 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
> >       return 0;
> >  }
> >
> > -static u32 rtnl_xdp_prog_skb(struct net_device *dev)
> > +static unsigned int rtnl_xdp_mode_to_flag(u8 tgt_mode)
> >  {
> > -     const struct bpf_prog *generic_xdp_prog;
> > -
> > -     ASSERT_RTNL();
> > -
> > -     generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
> > -     if (!generic_xdp_prog)
> > -             return 0;
> > -     return generic_xdp_prog->aux->id;
> > -}
> > -
> > -static u32 rtnl_xdp_prog_drv(struct net_device *dev)
> > -{
> > -     return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
> > +     switch (tgt_mode) {
> > +     case XDP_ATTACHED_DRV:
> > +             return XDP_FLAGS_DRV_MODE;
> > +     case XDP_ATTACHED_SKB:
> > +             return XDP_FLAGS_SKB_MODE;
> > +     case XDP_ATTACHED_HW:
> > +             return XDP_FLAGS_HW_MODE;
> > +     }
> > +     return 0;
> >  }
> >
> > -static u32 rtnl_xdp_prog_hw(struct net_device *dev)
> > +static u32 rtnl_xdp_mode_to_attr(u8 tgt_mode)
> >  {
> > -     return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
> > -                            XDP_QUERY_PROG_HW);
> > +     switch (tgt_mode) {
> > +     case XDP_ATTACHED_DRV:
> > +             return IFLA_XDP_DRV_PROG_ID;
> > +     case XDP_ATTACHED_SKB:
> > +             return IFLA_XDP_SKB_PROG_ID;
> > +     case XDP_ATTACHED_HW:
> > +             return IFLA_XDP_HW_PROG_ID;
> > +     }
> > +     return 0;
> >  }
> >
> >  static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
> > -                            u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
> > -                            u32 (*get_prog_id)(struct net_device *dev))
> > +                            u32 *prog_id, u8 *mode, u8 tgt_mode)
> >  {
> >       u32 curr_id;
> >       int err;
> >
> > -     curr_id = get_prog_id(dev);
> > +     curr_id = dev_xdp_query(dev, rtnl_xdp_mode_to_flag(tgt_mode));
> >       if (!curr_id)
> >               return 0;
> >
> >       *prog_id = curr_id;
> > -     err = nla_put_u32(skb, attr, curr_id);
> > +     err = nla_put_u32(skb, rtnl_xdp_mode_to_attr(tgt_mode), curr_id);
> >       if (err)
> >               return err;
> >
> > @@ -1420,16 +1422,13 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
> >
> >       prog_id = 0;
> >       mode = XDP_ATTACHED_NONE;
> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
> > -                               IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB);
> >       if (err)
> >               goto err_cancel;
> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
> > -                               IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV);
> >       if (err)
> >               goto err_cancel;
> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
> > -                               IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW);
> >       if (err)
> >               goto err_cancel;
> >
>
> So you remove all the attr and flag params just to add a conversion
> helpers to get them based on mode?  Why?  Seems like unnecessary churn,
> and questionable change :S
>

Fair enough. I'll address this!

> Otherwise looks good to me!
Toke Høiland-Jørgensen June 3, 2019, 10:56 a.m. UTC | #10
Björn Töpel <bjorn.topel@gmail.com> writes:

> On Sat, 1 Jun 2019 at 22:02, Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
>>
>> On Fri, 31 May 2019 11:42:14 +0200, Björn Töpel wrote:
>> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> > index 44b47e9df94a..f3a875a52c6c 100644
>> > --- a/include/linux/netdevice.h
>> > +++ b/include/linux/netdevice.h
>> > @@ -1940,6 +1940,9 @@ struct net_device {
>> >  #endif
>> >       struct hlist_node       index_hlist;
>> >
>> > +     struct bpf_prog         *xdp_prog_hw;
>>
>> IDK if we should pay the cost of this pointer for every netdev on the
>> system just for the single production driver out there that implements
>> HW offload :(  I'm on the fence about this..
>>
>
> Hmm. Adding a config option? Keep the QUERY_PROG_HW?
>
>> > +     u32                     xdp_flags;
>> > +
>> >  /*
>> >   * Cache lines mostly used on transmit path
>> >   */
>>
>> > diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
>> > index adcc045952c2..5e396fd01d8b 100644
>> > --- a/net/core/rtnetlink.c
>> > +++ b/net/core/rtnetlink.c
>> > @@ -1360,42 +1360,44 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
>> >       return 0;
>> >  }
>> >
>> > -static u32 rtnl_xdp_prog_skb(struct net_device *dev)
>> > +static unsigned int rtnl_xdp_mode_to_flag(u8 tgt_mode)
>> >  {
>> > -     const struct bpf_prog *generic_xdp_prog;
>> > -
>> > -     ASSERT_RTNL();
>> > -
>> > -     generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
>> > -     if (!generic_xdp_prog)
>> > -             return 0;
>> > -     return generic_xdp_prog->aux->id;
>> > -}
>> > -
>> > -static u32 rtnl_xdp_prog_drv(struct net_device *dev)
>> > -{
>> > -     return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
>> > +     switch (tgt_mode) {
>> > +     case XDP_ATTACHED_DRV:
>> > +             return XDP_FLAGS_DRV_MODE;
>> > +     case XDP_ATTACHED_SKB:
>> > +             return XDP_FLAGS_SKB_MODE;
>> > +     case XDP_ATTACHED_HW:
>> > +             return XDP_FLAGS_HW_MODE;
>> > +     }
>> > +     return 0;
>> >  }
>> >
>> > -static u32 rtnl_xdp_prog_hw(struct net_device *dev)
>> > +static u32 rtnl_xdp_mode_to_attr(u8 tgt_mode)
>> >  {
>> > -     return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
>> > -                            XDP_QUERY_PROG_HW);
>> > +     switch (tgt_mode) {
>> > +     case XDP_ATTACHED_DRV:
>> > +             return IFLA_XDP_DRV_PROG_ID;
>> > +     case XDP_ATTACHED_SKB:
>> > +             return IFLA_XDP_SKB_PROG_ID;
>> > +     case XDP_ATTACHED_HW:
>> > +             return IFLA_XDP_HW_PROG_ID;
>> > +     }
>> > +     return 0;
>> >  }
>> >
>> >  static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
>> > -                            u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
>> > -                            u32 (*get_prog_id)(struct net_device *dev))
>> > +                            u32 *prog_id, u8 *mode, u8 tgt_mode)
>> >  {
>> >       u32 curr_id;
>> >       int err;
>> >
>> > -     curr_id = get_prog_id(dev);
>> > +     curr_id = dev_xdp_query(dev, rtnl_xdp_mode_to_flag(tgt_mode));
>> >       if (!curr_id)
>> >               return 0;
>> >
>> >       *prog_id = curr_id;
>> > -     err = nla_put_u32(skb, attr, curr_id);
>> > +     err = nla_put_u32(skb, rtnl_xdp_mode_to_attr(tgt_mode), curr_id);
>> >       if (err)
>> >               return err;
>> >
>> > @@ -1420,16 +1422,13 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
>> >
>> >       prog_id = 0;
>> >       mode = XDP_ATTACHED_NONE;
>> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
>> > -                               IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
>> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB);
>> >       if (err)
>> >               goto err_cancel;
>> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
>> > -                               IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
>> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV);
>> >       if (err)
>> >               goto err_cancel;
>> > -     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
>> > -                               IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
>> > +     err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW);
>> >       if (err)
>> >               goto err_cancel;
>> >
>>
>> So you remove all the attr and flag params just to add a conversion
>> helpers to get them based on mode?  Why?  Seems like unnecessary churn,
>> and questionable change :S
>>
>
> Fair enough. I'll address this!

I think this was actually my idea, wasn't it? :)

My thought being that if you just do the minimal change here, we'll end
up with three empty wrapper functions, which we might as well just fold
into the caller...

-Toke
Jonathan Lemon June 3, 2019, 2:58 p.m. UTC | #11
On 3 Jun 2019, at 1:39, Björn Töpel wrote:

> On Sat, 1 Jun 2019 at 20:12, Jonathan Lemon <jlemon@flugsvamp.com> 
> wrote:
>>
>> On 31 May 2019, at 2:42, Björn Töpel wrote:
>>
>>> From: Björn Töpel <bjorn.topel@intel.com>
>>>
>>> All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
>>> command of ndo_bpf. The query code is fairly generic. This commit
>>> refactors the query code up from the drivers to the netdev level.
>>>
>>> The struct net_device has gained two new members: xdp_prog_hw and
>>> xdp_flags. The former is the offloaded XDP program, if any, and the
>>> latter tracks the flags that the supplied when attaching the XDP
>>> program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
>>>
>>> The xdp_prog member, previously only used for SKB_MODE, is shared 
>>> with
>>> DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
>>> mutually exclusive. To differentiate between the two modes, a new
>>> internal flag is introduced as well.
>>
>> I'm not entirely clear why this new flag is needed - GENERIC seems to
>> be an alias for SKB_MODE, so why just use SKB_MODE directly?
>>
>> If the user does not explicitly specify a type (skb|drv|hw), then the
>> command should choose the correct type and then behave as if this 
>> type
>> was specified.
>>
>
> Yes, this is kind of hairy.
>
> SKB and DRV are mutually exclusive, but HW is not. IOW, valid options 
> are:
> SKB, DRV, HW, SKB+HW DRV+HW.

Fair enough, that was the understanding that I had from the code, 
although I'm not sure about the usage of SKB+HW mode.



>
> What complicates things further, is that SKB and DRV can be implicitly
> (auto/no flags) or explicitly enabled (flags).
>
> If a user doesn't pass any flags, the "best supported mode" should be
> selected. If this "auto mode" is used, it should be seen as a third
> mode. E.g.
>
> ip link set dev eth0 xdp on -- OK
> ip link set dev eth0 xdp off -- OK
>
> ip link set dev eth0 xdp on -- OK # generic auto selected
> ip link set dev eth0 xdpgeneric off -- NOK, bad flags
>
> ip link set dev eth0 xdp on -- OK # drv auto selected
> ip link set dev eth0 xdpdrv off -- NOK, bad flags
>
> ...and so on. The idea is that a user should use the same set of flags 
> always.

I'm not sure about this.  The "xdp" mode shouldn't be treated as a 
separate mode, it should be "best supported mode", as indicated above.  
 From my view, it should select the appropriate mode, and then proceed 
as if the user had specified that mode, rather than being treated as an 
independent mode.

ip link set dev eth0 xdp on		- OK
ip link set dev eth0 xdp off	- OK

ip link set dev eth0 xdp on 	- OK, selected dev
ip link set dev eth0 xdpgeneric off - NOK, not running
ip link set dev eth0 xdpdrv off	- OK




> The internal "GENERIC" flag is only to determine if the xdp_prog
> represents a DRV version or SKB version. Maybe it would be clearer
> just to add an additional xdp_prog_drv to the net_device, instead?

I'd go the other way, and remove GENERIC, leaving only SKB, DRV, and HW.
The appropriate mode flag (SKB|DRV) is enough to indicate the type of 
xdp_prog.
Jakub Kicinski June 3, 2019, 5:03 p.m. UTC | #12
On Mon, 3 Jun 2019 11:04:36 +0200, Björn Töpel wrote:
> On Sat, 1 Jun 2019 at 21:57, Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
> >
> > On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:  
> > > > +   if (!bpf_op || flags & XDP_FLAGS_SKB_MODE)
> > > > +           mode = XDP_FLAGS_SKB_MODE;
> > > > +
> > > > +   curr_mode = dev_xdp_current_mode(dev);
> > > > +
> > > > +   if (!offload && curr_mode && (mode ^ curr_mode) &
> > > > +       (XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE)) {  
> > >
> > > if i am reading this correctly this is equivalent to :
> > >
> > > if (!offload && (curre_mode != mode))
> > > offlad is false then curr_mode and mode must be DRV or GENERIC ..  
> >
> > Naw, if curr_mode is not set, i.e. nothing installed now, we don't care
> > about the diff.
> >  
> > > better if you keep bitwise operations for actual bitmasks, mode and
> > > curr_mode are not bitmask, they can hold one value each .. according to
> > > your logic..  
> >
> > Well, they hold one bit each, whether one bit is a bitmap perhaps is
> > disputable? :)
> >
> > I think the logic is fine.
> >  
> 
> Hmm, but changing to:
> 
>        if (!offload && curr_mode && mode != curr_mode)
> 
> is equal, and to Saeed's point, clearer. I'll go that route in a v3.

Sorry, you're right, the flags get mangled before they get here, so
yeah, this condition should work.  Confusingly.

> > What happened to my request to move the change in behaviour for
> > disabling to a separate patch, tho, Bjorn? :)  
> 
> Actually, I left that out completely. This patch doesn't change the
> behavior. After I realized how the flags *should* be used, I don't
> think my v1 change makes sense anymore. My v1 patch was to give an
> error if you tried to disable, say generic if drv was enabled via
> "auto detect/no flags". But this is catched by looking at the flags.
> 
> What I did, however, was moving the flags check into change_fd so that
> the driver doesn't have to do the check. E.g. the Intel drivers didn't
> do correct checking of flags.

Ugh.  Could you please rewrite the conditions to make the fd >= check
consistently the outside if?  Also could you add extack to this:

+	if (!offload && dev_xdp_query(dev, mode) &&
+	    !xdp_prog_flags_ok(dev->xdp_flags, flags, extack))
+		return -EBUSY;

It's unclear what it's doing.
Saeed Mahameed June 3, 2019, 9:20 p.m. UTC | #13
On Mon, 2019-06-03 at 11:04 +0200, Björn Töpel wrote:
> On Sat, 1 Jun 2019 at 21:42, Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
> > On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:
> > > On Fri, 2019-05-31 at 11:42 +0200, Björn Töpel wrote:
> > > > From: Björn Töpel <bjorn.topel@intel.com>
> > > > 
> > > > All XDP capable drivers need to implement the
> > > > XDP_QUERY_PROG{,_HW}
> > > > command of ndo_bpf. The query code is fairly generic. This
> > > > commit
> > > > refactors the query code up from the drivers to the netdev
> > > > level.
> > > > 
> > > > The struct net_device has gained two new members: xdp_prog_hw
> > > > and
> > > > xdp_flags. The former is the offloaded XDP program, if any, and
> > > > the
> > > > latter tracks the flags that the supplied when attaching the
> > > > XDP
> > > > program. The flags only apply to SKB_MODE or DRV_MODE, not
> > > > HW_MODE.
> > > > 
> > > > The xdp_prog member, previously only used for SKB_MODE, is
> > > > shared
> > > > with
> > > > DRV_MODE. This is OK, due to the fact that SKB_MODE and
> > > > DRV_MODE are
> > > > mutually exclusive. To differentiate between the two modes, a
> > > > new
> > > > internal flag is introduced as well.
> > > 
> > > Just thinking out loud, why can't we allow any combination of
> > > HW/DRV/SKB modes? they are totally different attach points in a
> > > totally
> > > different checkpoints in a frame life cycle.
> > 
> > FWIW see Message-ID: <20190201080236.446d84d4@redhat.com>
> > 
> 
> I've always seen the SKB-mode as something that will eventually be
> removed.
> 

I don't think so, we are too deep into SKB-mode.

> Clickable link:
> https://lore.kernel.org/netdev/20190201080236.446d84d4@redhat.com/ :-
> P
> 

So we are all hanging on Jesper's refactoring ideas that are not
getting any priority for now :).


> > > Down the road i think we will utilize this fact and start
> > > introducing
> > > SKB helpers for SKB mode and driver helpers for DRV mode..
> > 
> > Any reason why we would want the extra complexity?  There is
> > cls_bpf
> > if someone wants skb features after all..

Donno, SKB mode is earlier in the stack maybe ..
Daniel Borkmann June 3, 2019, 11:11 p.m. UTC | #14
On 06/03/2019 10:39 AM, Björn Töpel wrote:
> On Sat, 1 Jun 2019 at 20:12, Jonathan Lemon <jlemon@flugsvamp.com> wrote:
>> On 31 May 2019, at 2:42, Björn Töpel wrote:
>>> From: Björn Töpel <bjorn.topel@intel.com>
>>>
>>> All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
>>> command of ndo_bpf. The query code is fairly generic. This commit
>>> refactors the query code up from the drivers to the netdev level.
>>>
>>> The struct net_device has gained two new members: xdp_prog_hw and
>>> xdp_flags. The former is the offloaded XDP program, if any, and the
>>> latter tracks the flags that the supplied when attaching the XDP
>>> program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
>>>
>>> The xdp_prog member, previously only used for SKB_MODE, is shared with
>>> DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
>>> mutually exclusive. To differentiate between the two modes, a new
>>> internal flag is introduced as well.
>>
>> I'm not entirely clear why this new flag is needed - GENERIC seems to
>> be an alias for SKB_MODE, so why just use SKB_MODE directly?
>>
>> If the user does not explicitly specify a type (skb|drv|hw), then the
>> command should choose the correct type and then behave as if this type
>> was specified.
> 
> Yes, this is kind of hairy.
> 
> SKB and DRV are mutually exclusive, but HW is not. IOW, valid options are:
> SKB, DRV, HW, SKB+HW DRV+HW.

Correct, HW is a bit special here in that it helps offloading parts of
the DRV XDP program to NIC, but also do RSS steering in BPF etc, hence
this combo is intentionally allowed (see also git log).

> What complicates things further, is that SKB and DRV can be implicitly
> (auto/no flags) or explicitly enabled (flags).

Mainly out of historic context: originally the fallback to SKB mode was
implicit if the ndo_bpf was missing. But there are use cases where we
want to fail if the driver does not support native XDP to avoid surprises.

> If a user doesn't pass any flags, the "best supported mode" should be
> selected. If this "auto mode" is used, it should be seen as a third
> mode. E.g.
> 
> ip link set dev eth0 xdp on -- OK
> ip link set dev eth0 xdp off -- OK
> 
> ip link set dev eth0 xdp on -- OK # generic auto selected
> ip link set dev eth0 xdpgeneric off -- NOK, bad flags

This would work if the auto selection would have selected XDP generic.

> ip link set dev eth0 xdp on -- OK # drv auto selected
> ip link set dev eth0 xdpdrv off -- NOK, bad flags

This would work if the auto selection chose native XDP previously. Are
you saying it's not the case?

Also, what is the use case in mixing these commands? It should be xdp
on+off, xdpdrv on+off, and so on. Are you saying you would prefer a
xdp{,any} off that uninstalls everything? Isn't this mostly a user space
issue to whatever orchestrates XDP?

> ...and so on. The idea is that a user should use the same set of flags always.
> 
> The internal "GENERIC" flag is only to determine if the xdp_prog
> represents a DRV version or SKB version. Maybe it would be clearer
> just to add an additional xdp_prog_drv to the net_device, instead?
> 
>> The logic in dev_change_xdp_fd() is too complicated.  It disallows
>> setting (drv|skb), but allows (hw|skb), which I'm not sure is
>> intentional.
>>
>> It should be clearer as to which combinations are allowed.
> 
> Fair point. I'll try to clean it up further.
>
Björn Töpel June 4, 2019, 5:16 a.m. UTC | #15
On Mon, 3 Jun 2019 at 19:03, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Mon, 3 Jun 2019 11:04:36 +0200, Björn Töpel wrote:
> > On Sat, 1 Jun 2019 at 21:57, Jakub Kicinski
> > <jakub.kicinski@netronome.com> wrote:
> > >
> > > On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:
> > > > > +   if (!bpf_op || flags & XDP_FLAGS_SKB_MODE)
> > > > > +           mode = XDP_FLAGS_SKB_MODE;
> > > > > +
> > > > > +   curr_mode = dev_xdp_current_mode(dev);
> > > > > +
> > > > > +   if (!offload && curr_mode && (mode ^ curr_mode) &
> > > > > +       (XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE)) {
> > > >
> > > > if i am reading this correctly this is equivalent to :
> > > >
> > > > if (!offload && (curre_mode != mode))
> > > > offlad is false then curr_mode and mode must be DRV or GENERIC ..
> > >
> > > Naw, if curr_mode is not set, i.e. nothing installed now, we don't care
> > > about the diff.
> > >
> > > > better if you keep bitwise operations for actual bitmasks, mode and
> > > > curr_mode are not bitmask, they can hold one value each .. according to
> > > > your logic..
> > >
> > > Well, they hold one bit each, whether one bit is a bitmap perhaps is
> > > disputable? :)
> > >
> > > I think the logic is fine.
> > >
> >
> > Hmm, but changing to:
> >
> >        if (!offload && curr_mode && mode != curr_mode)
> >
> > is equal, and to Saeed's point, clearer. I'll go that route in a v3.
>
> Sorry, you're right, the flags get mangled before they get here, so
> yeah, this condition should work.  Confusingly.
>
> > > What happened to my request to move the change in behaviour for
> > > disabling to a separate patch, tho, Bjorn? :)
> >
> > Actually, I left that out completely. This patch doesn't change the
> > behavior. After I realized how the flags *should* be used, I don't
> > think my v1 change makes sense anymore. My v1 patch was to give an
> > error if you tried to disable, say generic if drv was enabled via
> > "auto detect/no flags". But this is catched by looking at the flags.
> >
> > What I did, however, was moving the flags check into change_fd so that
> > the driver doesn't have to do the check. E.g. the Intel drivers didn't
> > do correct checking of flags.
>
> Ugh.  Could you please rewrite the conditions to make the fd >= check
> consistently the outside if?  Also could you add extack to this:
>

The reason I moved the if-statement (checking if we're mixing
drv/skb), is because I'd like to catch the no-op (e.g. xdpdrv active
and calling xdpgeneric off) early (the return 0, under the if (fd >=
check).

> +       if (!offload && dev_xdp_query(dev, mode) &&
> +           !xdp_prog_flags_ok(dev->xdp_flags, flags, extack))
> +               return -EBUSY;
>
> It's unclear what it's doing.

This checks whether the flags have changed, pulling out the logic from
the drivers. xdp_prog_flags_ok adds to extack, resuing the flags_ok
function. The xdp_attachment_flags_ok OTOH is not necessary anymore,
and should be further cleaned up. I'll address this and make the this
clause more clear.


Björn
Björn Töpel June 4, 2019, 5:18 a.m. UTC | #16
On Mon, 3 Jun 2019 at 23:20, Saeed Mahameed <saeedm@mellanox.com> wrote:
>
> On Mon, 2019-06-03 at 11:04 +0200, Björn Töpel wrote:
> > On Sat, 1 Jun 2019 at 21:42, Jakub Kicinski
> > <jakub.kicinski@netronome.com> wrote:
> > > On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:
> > > > On Fri, 2019-05-31 at 11:42 +0200, Björn Töpel wrote:
> > > > > From: Björn Töpel <bjorn.topel@intel.com>
> > > > >
> > > > > All XDP capable drivers need to implement the
> > > > > XDP_QUERY_PROG{,_HW}
> > > > > command of ndo_bpf. The query code is fairly generic. This
> > > > > commit
> > > > > refactors the query code up from the drivers to the netdev
> > > > > level.
> > > > >
> > > > > The struct net_device has gained two new members: xdp_prog_hw
> > > > > and
> > > > > xdp_flags. The former is the offloaded XDP program, if any, and
> > > > > the
> > > > > latter tracks the flags that the supplied when attaching the
> > > > > XDP
> > > > > program. The flags only apply to SKB_MODE or DRV_MODE, not
> > > > > HW_MODE.
> > > > >
> > > > > The xdp_prog member, previously only used for SKB_MODE, is
> > > > > shared
> > > > > with
> > > > > DRV_MODE. This is OK, due to the fact that SKB_MODE and
> > > > > DRV_MODE are
> > > > > mutually exclusive. To differentiate between the two modes, a
> > > > > new
> > > > > internal flag is introduced as well.
> > > >
> > > > Just thinking out loud, why can't we allow any combination of
> > > > HW/DRV/SKB modes? they are totally different attach points in a
> > > > totally
> > > > different checkpoints in a frame life cycle.
> > >
> > > FWIW see Message-ID: <20190201080236.446d84d4@redhat.com>
> > >
> >
> > I've always seen the SKB-mode as something that will eventually be
> > removed.
> >
>
> I don't think so, we are too deep into SKB-mode.
>

You might be right. :-(

Are you envisioning sk_buff specfic XDP_SKB functions, that only apply
for XDP_SKB? Ick.

> > Clickable link:
> > https://lore.kernel.org/netdev/20190201080236.446d84d4@redhat.com/ :-
> > P
> >
>
> So we are all hanging on Jesper's refactoring ideas that are not
> getting any priority for now :).
>
>
> > > > Down the road i think we will utilize this fact and start
> > > > introducing
> > > > SKB helpers for SKB mode and driver helpers for DRV mode..
> > >
> > > Any reason why we would want the extra complexity?  There is
> > > cls_bpf
> > > if someone wants skb features after all..
>
> Donno, SKB mode is earlier in the stack maybe ..
>
>
Björn Töpel June 4, 2019, 5:37 a.m. UTC | #17
On Tue, 4 Jun 2019 at 01:11, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 06/03/2019 10:39 AM, Björn Töpel wrote:
> > On Sat, 1 Jun 2019 at 20:12, Jonathan Lemon <jlemon@flugsvamp.com> wrote:
> >> On 31 May 2019, at 2:42, Björn Töpel wrote:
> >>> From: Björn Töpel <bjorn.topel@intel.com>
> >>>
> >>> All XDP capable drivers need to implement the XDP_QUERY_PROG{,_HW}
> >>> command of ndo_bpf. The query code is fairly generic. This commit
> >>> refactors the query code up from the drivers to the netdev level.
> >>>
> >>> The struct net_device has gained two new members: xdp_prog_hw and
> >>> xdp_flags. The former is the offloaded XDP program, if any, and the
> >>> latter tracks the flags that the supplied when attaching the XDP
> >>> program. The flags only apply to SKB_MODE or DRV_MODE, not HW_MODE.
> >>>
> >>> The xdp_prog member, previously only used for SKB_MODE, is shared with
> >>> DRV_MODE. This is OK, due to the fact that SKB_MODE and DRV_MODE are
> >>> mutually exclusive. To differentiate between the two modes, a new
> >>> internal flag is introduced as well.
> >>
> >> I'm not entirely clear why this new flag is needed - GENERIC seems to
> >> be an alias for SKB_MODE, so why just use SKB_MODE directly?
> >>
> >> If the user does not explicitly specify a type (skb|drv|hw), then the
> >> command should choose the correct type and then behave as if this type
> >> was specified.
> >
> > Yes, this is kind of hairy.
> >
> > SKB and DRV are mutually exclusive, but HW is not. IOW, valid options are:
> > SKB, DRV, HW, SKB+HW DRV+HW.
>
> Correct, HW is a bit special here in that it helps offloading parts of
> the DRV XDP program to NIC, but also do RSS steering in BPF etc, hence
> this combo is intentionally allowed (see also git log).
>
> > What complicates things further, is that SKB and DRV can be implicitly
> > (auto/no flags) or explicitly enabled (flags).
>
> Mainly out of historic context: originally the fallback to SKB mode was
> implicit if the ndo_bpf was missing. But there are use cases where we
> want to fail if the driver does not support native XDP to avoid surprises.
>
> > If a user doesn't pass any flags, the "best supported mode" should be
> > selected. If this "auto mode" is used, it should be seen as a third
> > mode. E.g.
> >
> > ip link set dev eth0 xdp on -- OK
> > ip link set dev eth0 xdp off -- OK
> >
> > ip link set dev eth0 xdp on -- OK # generic auto selected
> > ip link set dev eth0 xdpgeneric off -- NOK, bad flags
>
> This would work if the auto selection would have selected XDP generic.
>
> > ip link set dev eth0 xdp on -- OK # drv auto selected
> > ip link set dev eth0 xdpdrv off -- NOK, bad flags
>
> This would work if the auto selection chose native XDP previously. Are
> you saying it's not the case?
>

Yes, that is *not* the case for some drivers. With the Intel drivers
we didn't check the flags at all at XDP attachment (check out the
usage of xdp_attachment_flags_ok), but e.g. nfp and netdevsim does.
Grep for 'program loaded with different flags' in the test_offload.py
selftest. I like this approach, and my patch does this flag check in
dev_change_xdp_fd.

> Also, what is the use case in mixing these commands? It should be xdp
> on+off, xdpdrv on+off, and so on. Are you saying you would prefer a
> xdp{,any} off that uninstalls everything? Isn't this mostly a user space
> issue to whatever orchestrates XDP?
>

No, I'm not suggesting a change. There is no use-case mixing them.
What the flags ok checks do is returning an error (like nfp and
netdevsim does) if a user tries to mix, say,  "xdp" and explicit
xdpdrv/xdpgeneric". This patch moves this check to the generic
function dev_change_xdp_fd.

There seems to be a confusion about how this is supposed to be used.
It was for me, e.g. I though using "enable with xdp and disable with
xdpdrv" was OK. This was the reason why I added an error on "disable
with xdpgeneric off, if xdpdrv is active" in my first revision of the
series. I removed this in v2, after Jakub pointed out the
test_offload.py test, which is a great showcase/test of what should be
allowed and what shouldn't in terms of flags.

TL;DR: Let's stick to what test_offload.py asserts, for all XDP.


> > ...and so on. The idea is that a user should use the same set of flags always.
> >
> > The internal "GENERIC" flag is only to determine if the xdp_prog
> > represents a DRV version or SKB version. Maybe it would be clearer
> > just to add an additional xdp_prog_drv to the net_device, instead?
> >
> >> The logic in dev_change_xdp_fd() is too complicated.  It disallows
> >> setting (drv|skb), but allows (hw|skb), which I'm not sure is
> >> intentional.
> >>
> >> It should be clearer as to which combinations are allowed.
> >
> > Fair point. I'll try to clean it up further.
> >
Jesper Dangaard Brouer June 4, 2019, 10:17 a.m. UTC | #18
On Mon, 3 Jun 2019 21:20:30 +0000
Saeed Mahameed <saeedm@mellanox.com> wrote:

> On Mon, 2019-06-03 at 11:04 +0200, Björn Töpel wrote:
> > On Sat, 1 Jun 2019 at 21:42, Jakub Kicinski
> > <jakub.kicinski@netronome.com> wrote:  
> > > On Fri, 31 May 2019 19:18:17 +0000, Saeed Mahameed wrote:  
> > > > On Fri, 2019-05-31 at 11:42 +0200, Björn Töpel wrote:  
> > > > > From: Björn Töpel <bjorn.topel@intel.com>
> > > > > 
> > > > > All XDP capable drivers need to implement the
> > > > > XDP_QUERY_PROG{,_HW} command of ndo_bpf. The query code is
> > > > > fairly generic. This commit refactors the query code up from
> > > > > the drivers to the netdev level.
> > > > > 
> > > > > The struct net_device has gained two new members: xdp_prog_hw
> > > > > and xdp_flags. The former is the offloaded XDP program, if
> > > > > any, and the latter tracks the flags that the supplied when
> > > > > attaching the XDP  program. The flags only apply to SKB_MODE
> > > > > or DRV_MODE, not HW_MODE.
> > > > > 
> > > > > The xdp_prog member, previously only used for SKB_MODE, is
> > > > > shared with DRV_MODE. This is OK, due to the fact that
> > > > > SKB_MODE and DRV_MODE are mutually exclusive. To
> > > > > differentiate between the two modes, a new internal flag is
> > > > > introduced as well.  
> > > > 
> > > > Just thinking out loud, why can't we allow any combination of
> > > > HW/DRV/SKB modes? they are totally different attach points in a
> > > > totally different checkpoints in a frame life cycle.  

The DRV_MODE and SKB_MODE is tricky to run concurrently. The XDP-redirect
scheme (designed by John) use a BPF helper (bpf_xdp_redirect_map) that
set global per-CPU state (this_cpu_ptr(&bpf_redirect_info)).

The tricky part (which I warned about, and we already have some fixes
for) is that the XDP/BPF-prog can call bpf_redirect_map, which update
the per-CPU state, but it can still choose not to return XDP_REDIRECT,
which then miss an invocation of xdp_do_redirect().  
 Later, your SKB_MODE XDP/BPF-prog can return XDP_REDIRECT without
calling the helper, and then use/reach to the per-CPU info set by the
DRV_MODE program, which is NOT something I want us to "support".


> > > FWIW see Message-ID: <20190201080236.446d84d4@redhat.com>
> > 
> > I've always seen the SKB-mode as something that will eventually be
> > removed.
> 
> I don't think so, we are too deep into SKB-mode.

I wish we could remove it.  After we got native XDP support in veth,
then its original purpose is gone, which were making it easier for
developers to get something working on their laptop, without having to
deploy it to the server with physical hardware all the time.


> > Clickable link:
> >  https://lore.kernel.org/netdev/20190201080236.446d84d4@redhat.com/
> > 
> 
> So we are all hanging on Jesper's refactoring ideas that are not
> getting any priority for now :).

Well, that is not true.  This patchset _is_ the refactor idea that
Bjørn is taking over and working on.  Specifically [2] from above link.

[2] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp_per_rxq01.org#refactor-idea-xdpbpf_prog-into-netdev_rx_queuenet_device
 
> > > > Down the road i think we will utilize this fact and start
> > > > introducing SKB helpers for SKB mode and driver helpers for DRV
> > > > mode..  
> > > 
> > > Any reason why we would want the extra complexity?  There is
> > > cls_bpf if someone wants skb features after all..  
> 
> Donno, SKB mode is earlier in the stack maybe .. 

From a BPF perspective you cannot introduce SKB helpers for SKB mode.
An XDP-prog have bpf prog type XDP (BPF_PROG_TYPE_XDP), and the program
itself is identical regardless of attaching for DRV_MODE or SKB_MODE.
You cannot detect this at attach time, due to tail-calls (which have
painted us into a corner).
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 44b47e9df94a..f3a875a52c6c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1940,6 +1940,9 @@  struct net_device {
 #endif
 	struct hlist_node	index_hlist;
 
+	struct bpf_prog		*xdp_prog_hw;
+	u32			xdp_flags;
+
 /*
  * Cache lines mostly used on transmit path
  */
@@ -2043,11 +2046,14 @@  struct net_device {
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
+/* Reusing the XDP flags space for kernel internal flag */
+#define XDP_FLAGS_KERN_GENERIC (1U << 4)
+static_assert(!(XDP_FLAGS_KERN_GENERIC & XDP_FLAGS_MASK));
+
 static inline bool netif_elide_gro(const struct net_device *dev)
 {
-	if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
-		return true;
-	return false;
+	return !(dev->features & NETIF_F_GRO) ||
+		dev->xdp_flags & XDP_FLAGS_KERN_GENERIC;
 }
 
 #define	NETDEV_ALIGN		32
@@ -3684,8 +3690,7 @@  struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags);
-u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
-		    enum bpf_netdev_command cmd);
+u32 dev_xdp_query(struct net_device *dev, unsigned int mode);
 int xdp_umem_query(struct net_device *dev, u16 queue_id);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 0f25b3675c5c..3691280c8fc1 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -51,6 +51,7 @@  struct xdp_mem_info {
 };
 
 struct page_pool;
+struct netlink_ext_ack;
 
 struct zero_copy_allocator {
 	void (*free)(struct zero_copy_allocator *zca, unsigned long handle);
@@ -166,4 +167,7 @@  bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
 void xdp_attachment_setup(struct xdp_attachment_info *info,
 			  struct netdev_bpf *bpf);
 
+bool xdp_prog_flags_ok(u32 old_flags, u32 new_flags,
+		       struct netlink_ext_ack *extack);
+
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/net/core/dev.c b/net/core/dev.c
index b6b8505cfb3e..1a9da508149a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8005,21 +8005,43 @@  int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down_generic);
 
-u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
-		    enum bpf_netdev_command cmd)
+static u32 dev_xdp_query_generic(struct net_device *dev)
 {
-	struct netdev_bpf xdp;
+	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
 
-	if (!bpf_op)
-		return 0;
+	return prog && dev->xdp_flags & XDP_FLAGS_KERN_GENERIC ?
+		prog->aux->id : 0;
+}
 
-	memset(&xdp, 0, sizeof(xdp));
-	xdp.command = cmd;
+static u32 dev_xdp_query_drv(struct net_device *dev)
+{
+	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
+
+	return prog && !(dev->xdp_flags & XDP_FLAGS_KERN_GENERIC) ?
+		prog->aux->id : 0;
+}
+
+static u32 dev_xdp_current_mode(struct net_device *dev)
+{
+	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
 
-	/* Query must always succeed. */
-	WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
+	if (prog)
+		return dev_xdp_query_generic(dev) ? XDP_FLAGS_SKB_MODE :
+			XDP_FLAGS_DRV_MODE;
+	return 0;
+}
 
-	return xdp.prog_id;
+u32 dev_xdp_query(struct net_device *dev, unsigned int mode)
+{
+	switch (mode) {
+	case XDP_FLAGS_DRV_MODE:
+		return dev_xdp_query_drv(dev);
+	case XDP_FLAGS_SKB_MODE:
+		return dev_xdp_query_generic(dev);
+	case XDP_FLAGS_HW_MODE:
+		return dev->xdp_prog_hw ? dev->xdp_prog_hw->aux->id : 0;
+	}
+	return 0;
 }
 
 static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
@@ -8027,45 +8049,47 @@  static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
 			   struct bpf_prog *prog)
 {
 	struct netdev_bpf xdp;
+	int err;
 
 	memset(&xdp, 0, sizeof(xdp));
-	if (flags & XDP_FLAGS_HW_MODE)
+	if (flags & XDP_FLAGS_HW_MODE) {
 		xdp.command = XDP_SETUP_PROG_HW;
-	else
+		xdp.flags = XDP_FLAGS_HW_MODE;
+	} else {
 		xdp.command = XDP_SETUP_PROG;
+		xdp.flags = flags;
+	}
 	xdp.extack = extack;
-	xdp.flags = flags;
 	xdp.prog = prog;
 
-	return bpf_op(dev, &xdp);
+	err = bpf_op(dev, &xdp);
+	if (err)
+		return err;
+
+	if (flags & XDP_FLAGS_HW_MODE) {
+		dev->xdp_prog_hw = prog;
+		return 0;
+	}
+
+	rcu_assign_pointer(dev->xdp_prog, prog);
+	dev->xdp_flags = prog ? flags : 0;
+	return 0;
 }
 
 static void dev_xdp_uninstall(struct net_device *dev)
 {
-	struct netdev_bpf xdp;
-	bpf_op_t ndo_bpf;
-
-	/* Remove generic XDP */
-	WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
-
-	/* Remove from the driver */
-	ndo_bpf = dev->netdev_ops->ndo_bpf;
-	if (!ndo_bpf)
-		return;
-
-	memset(&xdp, 0, sizeof(xdp));
-	xdp.command = XDP_QUERY_PROG;
-	WARN_ON(ndo_bpf(dev, &xdp));
-	if (xdp.prog_id)
-		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
-					NULL));
+	struct bpf_prog *prog = rtnl_dereference(dev->xdp_prog);
+	bpf_op_t bpf_op;
 
-	/* Remove HW offload */
-	memset(&xdp, 0, sizeof(xdp));
-	xdp.command = XDP_QUERY_PROG_HW;
-	if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
-		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+	if (prog) {
+		bpf_op = dev_xdp_query_generic(dev) ?
+			 generic_xdp_install : dev->netdev_ops->ndo_bpf;
+		WARN_ON(dev_xdp_install(dev, bpf_op, NULL, dev->xdp_flags,
 					NULL));
+	}
+	if (dev_xdp_query(dev, XDP_FLAGS_HW_MODE))
+		WARN_ON(dev_xdp_install(dev, dev->netdev_ops->ndo_bpf,
+					NULL, XDP_FLAGS_HW_MODE, NULL));
 }
 
 /**
@@ -8080,41 +8104,49 @@  static void dev_xdp_uninstall(struct net_device *dev)
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
-	enum bpf_netdev_command query;
 	struct bpf_prog *prog = NULL;
-	bpf_op_t bpf_op, bpf_chk;
+	u32 mode, curr_mode;
+	bpf_op_t bpf_op;
 	bool offload;
 	int err;
 
 	ASSERT_RTNL();
 
 	offload = flags & XDP_FLAGS_HW_MODE;
-	query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+	mode = offload ? XDP_FLAGS_HW_MODE : XDP_FLAGS_DRV_MODE;
 
-	bpf_op = bpf_chk = ops->ndo_bpf;
+	bpf_op = dev->netdev_ops->ndo_bpf;
 	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
 		NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
 		return -EOPNOTSUPP;
 	}
-	if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
-		bpf_op = generic_xdp_install;
-	if (bpf_op == bpf_chk)
-		bpf_chk = generic_xdp_install;
 
-	if (fd >= 0) {
-		if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
+	if (!bpf_op || flags & XDP_FLAGS_SKB_MODE)
+		mode = XDP_FLAGS_SKB_MODE;
+
+	curr_mode = dev_xdp_current_mode(dev);
+
+	if (!offload && curr_mode && (mode ^ curr_mode) &
+	    (XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE)) {
+		if (fd >= 0) {
 			NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
 			return -EEXIST;
 		}
-		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-		    __dev_xdp_query(dev, bpf_op, query)) {
+		return 0;
+	}
+
+	if (!offload && dev_xdp_query(dev, mode) &&
+	    !xdp_prog_flags_ok(dev->xdp_flags, flags, extack))
+		return -EBUSY;
+
+	if (fd >= 0) {
+		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST &&
+		    dev_xdp_query(dev, mode)) {
 			NL_SET_ERR_MSG(extack, "XDP program already attached");
 			return -EBUSY;
 		}
 
-		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
-					     bpf_op == ops->ndo_bpf);
+		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, !!bpf_op);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 
@@ -8125,6 +8157,10 @@  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		}
 	}
 
+	if (mode == XDP_FLAGS_SKB_MODE) {
+		bpf_op = generic_xdp_install;
+		flags |= XDP_FLAGS_KERN_GENERIC;
+	}
 	err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index adcc045952c2..5e396fd01d8b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1360,42 +1360,44 @@  static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static u32 rtnl_xdp_prog_skb(struct net_device *dev)
+static unsigned int rtnl_xdp_mode_to_flag(u8 tgt_mode)
 {
-	const struct bpf_prog *generic_xdp_prog;
-
-	ASSERT_RTNL();
-
-	generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
-	if (!generic_xdp_prog)
-		return 0;
-	return generic_xdp_prog->aux->id;
-}
-
-static u32 rtnl_xdp_prog_drv(struct net_device *dev)
-{
-	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+	switch (tgt_mode) {
+	case XDP_ATTACHED_DRV:
+		return XDP_FLAGS_DRV_MODE;
+	case XDP_ATTACHED_SKB:
+		return XDP_FLAGS_SKB_MODE;
+	case XDP_ATTACHED_HW:
+		return XDP_FLAGS_HW_MODE;
+	}
+	return 0;
 }
 
-static u32 rtnl_xdp_prog_hw(struct net_device *dev)
+static u32 rtnl_xdp_mode_to_attr(u8 tgt_mode)
 {
-	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
-			       XDP_QUERY_PROG_HW);
+	switch (tgt_mode) {
+	case XDP_ATTACHED_DRV:
+		return IFLA_XDP_DRV_PROG_ID;
+	case XDP_ATTACHED_SKB:
+		return IFLA_XDP_SKB_PROG_ID;
+	case XDP_ATTACHED_HW:
+		return IFLA_XDP_HW_PROG_ID;
+	}
+	return 0;
 }
 
 static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
-			       u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
-			       u32 (*get_prog_id)(struct net_device *dev))
+			       u32 *prog_id, u8 *mode, u8 tgt_mode)
 {
 	u32 curr_id;
 	int err;
 
-	curr_id = get_prog_id(dev);
+	curr_id = dev_xdp_query(dev, rtnl_xdp_mode_to_flag(tgt_mode));
 	if (!curr_id)
 		return 0;
 
 	*prog_id = curr_id;
-	err = nla_put_u32(skb, attr, curr_id);
+	err = nla_put_u32(skb, rtnl_xdp_mode_to_attr(tgt_mode), curr_id);
 	if (err)
 		return err;
 
@@ -1420,16 +1422,13 @@  static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 
 	prog_id = 0;
 	mode = XDP_ATTACHED_NONE;
-	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
-				  IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
+	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB);
 	if (err)
 		goto err_cancel;
-	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
-				  IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
+	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV);
 	if (err)
 		goto err_cancel;
-	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
-				  IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
+	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW);
 	if (err)
 		goto err_cancel;
 
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4b2b194f4f1f..af92c04a58d8 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -388,16 +388,23 @@  int xdp_attachment_query(struct xdp_attachment_info *info,
 }
 EXPORT_SYMBOL_GPL(xdp_attachment_query);
 
-bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
-			     struct netdev_bpf *bpf)
+bool xdp_prog_flags_ok(u32 old_flags, u32 new_flags,
+		       struct netlink_ext_ack *extack)
 {
-	if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
-		NL_SET_ERR_MSG(bpf->extack,
-			       "program loaded with different flags");
+	if ((new_flags ^ old_flags) & XDP_FLAGS_MODES) {
+		NL_SET_ERR_MSG(extack, "program loaded with different flags");
 		return false;
 	}
 	return true;
 }
+
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+			     struct netdev_bpf *bpf)
+{
+	if (info->prog)
+		return xdp_prog_flags_ok(bpf->flags, info->flags, bpf->extack);
+	return true;
+}
 EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
 
 void xdp_attachment_setup(struct xdp_attachment_info *info,