diff mbox series

[bpf-next,V5,10/15] xdp: rhashtable with allocator ID to pointer mapping

Message ID 152180752969.20167.10855856167552164450.stgit@firesoul
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series XDP redirect memory return API | expand

Commit Message

Jesper Dangaard Brouer March 23, 2018, 12:18 p.m. UTC
Use the IDA infrastructure for getting a cyclic increasing ID number,
that is used for keeping track of each registered allocator per
RX-queue xdp_rxq_info.  Instead of using the IDR infrastructure, which
uses a radix tree, use a dynamic rhashtable, for creating ID to
pointer lookup table, because this is faster.

The problem that is being solved here is that, the xdp_rxq_info
pointer (stored in xdp_buff) cannot be used directly, as the
guaranteed lifetime is too short.  The info is needed on a
(potentially) remote CPU during DMA-TX completion time . In an
xdp_frame the xdp_mem_info is stored, when it got converted from an
xdp_buff, which is sufficient for the simple page refcnt based recycle
schemes.

For more advanced allocators there is a need to store a pointer to the
registered allocator.  Thus, there is a need to guard the lifetime or
validity of the allocator pointer, which is done through this
rhashtable ID map to pointer. The removal and validity of of the
allocator and helper struct xdp_mem_allocator is guarded by RCU.  The
allocator will be created by the driver, and registered with
xdp_rxq_info_reg_mem_model().

It is up-to debate who is responsible for freeing the allocator
pointer or invoking the allocator destructor function.  In any case,
this must happen via RCU freeing.

Use the IDA infrastructure for getting a cyclic increasing ID number,
that is used for keeping track of each registered allocator per
RX-queue xdp_rxq_info.

V4: Per req of Jason Wang
- Use xdp_rxq_info_reg_mem_model() in all drivers implementing
  XDP_REDIRECT, even-though it's not strictly necessary when
  allocator==NULL for type MEM_TYPE_PAGE_SHARED (given it's zero).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    9 +
 drivers/net/tun.c                             |    6 +
 drivers/net/virtio_net.c                      |    7 +
 include/net/xdp.h                             |   15 --
 net/core/xdp.c                                |  230 ++++++++++++++++++++++++-
 5 files changed, 248 insertions(+), 19 deletions(-)

Comments

Alexander H Duyck March 23, 2018, 4:56 p.m. UTC | #1
On Fri, Mar 23, 2018 at 5:18 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> Use the IDA infrastructure for getting a cyclic increasing ID number,
> that is used for keeping track of each registered allocator per
> RX-queue xdp_rxq_info.  Instead of using the IDR infrastructure, which
> uses a radix tree, use a dynamic rhashtable, for creating ID to
> pointer lookup table, because this is faster.
>
> The problem that is being solved here is that, the xdp_rxq_info
> pointer (stored in xdp_buff) cannot be used directly, as the
> guaranteed lifetime is too short.  The info is needed on a
> (potentially) remote CPU during DMA-TX completion time . In an
> xdp_frame the xdp_mem_info is stored, when it got converted from an
> xdp_buff, which is sufficient for the simple page refcnt based recycle
> schemes.
>
> For more advanced allocators there is a need to store a pointer to the
> registered allocator.  Thus, there is a need to guard the lifetime or
> validity of the allocator pointer, which is done through this
> rhashtable ID map to pointer. The removal and validity of of the
> allocator and helper struct xdp_mem_allocator is guarded by RCU.  The
> allocator will be created by the driver, and registered with
> xdp_rxq_info_reg_mem_model().
>
> It is up-to debate who is responsible for freeing the allocator
> pointer or invoking the allocator destructor function.  In any case,
> this must happen via RCU freeing.
>
> Use the IDA infrastructure for getting a cyclic increasing ID number,
> that is used for keeping track of each registered allocator per
> RX-queue xdp_rxq_info.
>
> V4: Per req of Jason Wang
> - Use xdp_rxq_info_reg_mem_model() in all drivers implementing
>   XDP_REDIRECT, even-though it's not strictly necessary when
>   allocator==NULL for type MEM_TYPE_PAGE_SHARED (given it's zero).
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    9 +
>  drivers/net/tun.c                             |    6 +
>  drivers/net/virtio_net.c                      |    7 +
>  include/net/xdp.h                             |   15 --
>  net/core/xdp.c                                |  230 ++++++++++++++++++++++++-
>  5 files changed, 248 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index 45520eb503ee..ff069597fccf 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -6360,7 +6360,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
>         struct device *dev = rx_ring->dev;
>         int orig_node = dev_to_node(dev);
>         int ring_node = -1;
> -       int size;
> +       int size, err;
>
>         size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
>
> @@ -6397,6 +6397,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
>                              rx_ring->queue_index) < 0)
>                 goto err;
>
> +       err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
> +                                        MEM_TYPE_PAGE_SHARED, NULL);
> +       if (err) {
> +               xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
> +               goto err;
> +       }
> +
>         rx_ring->xdp_prog = adapter->xdp_prog;
>
>         return 0;
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 6750980d9f30..81fddf9cc58f 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -846,6 +846,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
>                                        tun->dev, tfile->queue_index);
>                 if (err < 0)
>                         goto out;
> +               err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
> +                                                MEM_TYPE_PAGE_SHARED, NULL);
> +               if (err < 0) {
> +                       xdp_rxq_info_unreg(&tfile->xdp_rxq);
> +                       goto out;
> +               }
>                 err = 0;
>         }
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 6c4220450506..48c86accd3b8 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1312,6 +1312,13 @@ static int virtnet_open(struct net_device *dev)
>                 if (err < 0)
>                         return err;
>
> +               err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
> +                                                MEM_TYPE_PAGE_SHARED, NULL);
> +               if (err < 0) {
> +                       xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
> +                       return err;
> +               }
> +
>                 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
>                 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
>         }
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index bc0cb97e20dc..859aa9b737fe 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -41,7 +41,7 @@ enum mem_type {
>
>  struct xdp_mem_info {
>         u32 type; /* enum mem_type, but known size type */
> -       /* u32 id; will be added later in this patchset */
> +       u32 id;
>  };
>
>  struct xdp_rxq_info {
> @@ -100,18 +100,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
>         return xdp_frame;
>  }
>
> -static inline
> -void xdp_return_frame(void *data, struct xdp_mem_info *mem)
> -{
> -       if (mem->type == MEM_TYPE_PAGE_SHARED)
> -               page_frag_free(data);
> -
> -       if (mem->type == MEM_TYPE_PAGE_ORDER0) {
> -               struct page *page = virt_to_page(data); /* Assumes order0 page*/
> -
> -               put_page(page);
> -       }
> -}
> +void xdp_return_frame(void *data, struct xdp_mem_info *mem);
>
>  int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
>                      struct net_device *dev, u32 queue_index);
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 9eee0c431126..06a5b39491ad 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -5,6 +5,9 @@
>   */
>  #include <linux/types.h>
>  #include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/idr.h>
> +#include <linux/rhashtable.h>
>
>  #include <net/xdp.h>
>
> @@ -13,6 +16,99 @@
>  #define REG_STATE_UNREGISTERED 0x2
>  #define REG_STATE_UNUSED       0x3
>
> +DEFINE_IDA(mem_id_pool);
> +static DEFINE_MUTEX(mem_id_lock);
> +#define MEM_ID_MAX 0xFFFE
> +#define MEM_ID_MIN 1
> +static int mem_id_next = MEM_ID_MIN;
> +
> +static bool mem_id_init; /* false */
> +static struct rhashtable *mem_id_ht;
> +
> +struct xdp_mem_allocator {
> +       struct xdp_mem_info mem;
> +       void *allocator;
> +       struct rhash_head node;
> +       struct rcu_head rcu;
> +};
> +
> +static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
> +{
> +       const u32 *k = data;
> +       const u32 key = *k;
> +
> +       BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
> +                    != sizeof(u32));
> +
> +       /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
> +       return key << RHT_HASH_RESERVED_SPACE;
> +}
> +
> +static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
> +                         const void *ptr)
> +{
> +       const struct xdp_mem_allocator *xa = ptr;
> +       u32 mem_id = *(u32 *)arg->key;
> +
> +       return xa->mem.id != mem_id;
> +}
> +
> +static const struct rhashtable_params mem_id_rht_params = {
> +       .nelem_hint = 64,
> +       .head_offset = offsetof(struct xdp_mem_allocator, node),
> +       .key_offset  = offsetof(struct xdp_mem_allocator, mem.id),
> +       .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
> +       .max_size = MEM_ID_MAX,
> +       .min_size = 8,
> +       .automatic_shrinking = true,
> +       .hashfn    = xdp_mem_id_hashfn,
> +       .obj_cmpfn = xdp_mem_id_cmp,
> +};
> +
> +void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
> +{
> +       struct xdp_mem_allocator *xa;
> +
> +       xa = container_of(rcu, struct xdp_mem_allocator, rcu);
> +
> +       /* Allow this ID to be reused */
> +       ida_simple_remove(&mem_id_pool, xa->mem.id);
> +
> +       /* TODO: Depending on allocator type/pointer free resources */
> +
> +       /* Poison memory */
> +       xa->mem.id = 0xFFFF;
> +       xa->mem.type = 0xF0F0;
> +       xa->allocator = (void *)0xDEAD9001;
> +
> +       kfree(xa);
> +}
> +
> +void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
> +{
> +       struct xdp_mem_allocator *xa;
> +       int id = xdp_rxq->mem.id;
> +       int err;
> +
> +       if (id == 0)
> +               return;
> +
> +       mutex_lock(&mem_id_lock);
> +
> +       xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
> +       if (!xa) {
> +               mutex_unlock(&mem_id_lock);
> +               return;
> +       }
> +
> +       err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
> +       WARN_ON(err);
> +
> +       call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
> +
> +       mutex_unlock(&mem_id_lock);
> +}
> +
>  void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
>  {
>         /* Simplify driver cleanup code paths, allow unreg "unused" */
> @@ -21,8 +117,14 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
>
>         WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
>
> +       __xdp_rxq_info_unreg_mem_model(xdp_rxq);
> +
>         xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
>         xdp_rxq->dev = NULL;
> +
> +       /* Reset mem info to defaults */
> +       xdp_rxq->mem.id = 0;
> +       xdp_rxq->mem.type = 0;
>  }
>  EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
>
> @@ -72,20 +174,138 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
>  }
>  EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
>
> +int __mem_id_init_hash_table(void)
> +{
> +       struct rhashtable *rht;
> +       int ret;
> +
> +       if (unlikely(mem_id_init))
> +               return 0;
> +
> +       rht = kzalloc(sizeof(*rht), GFP_KERNEL);
> +       if (!rht)
> +               return -ENOMEM;
> +
> +       ret = rhashtable_init(rht, &mem_id_rht_params);
> +       if (ret < 0) {
> +               kfree(rht);
> +               return ret;
> +       }
> +       mem_id_ht = rht;
> +       smp_mb(); /* mutex lock should provide enough pairing */
> +       mem_id_init = true;
> +
> +       return 0;
> +}
> +
> +/* Allocate a cyclic ID that maps to allocator pointer.
> + * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
> + *
> + * Caller must lock mem_id_lock.
> + */
> +static int __mem_id_cyclic_get(gfp_t gfp)
> +{
> +       int retries = 1;
> +       int id;
> +
> +again:
> +       id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
> +       if (id < 0) {
> +               if (id == -ENOSPC) {
> +                       /* Cyclic allocator, reset next id */
> +                       if (retries--) {
> +                               mem_id_next = MEM_ID_MIN;
> +                               goto again;
> +                       }
> +               }
> +               return id; /* errno */
> +       }
> +       mem_id_next = id + 1;
> +
> +       return id;
> +}
> +
>  int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>                                enum mem_type type, void *allocator)
>  {
> +       struct xdp_mem_allocator *xdp_alloc;
> +       gfp_t gfp = GFP_KERNEL;
> +       int id, errno, ret;
> +       void *ptr;
> +
> +       if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
> +               WARN(1, "Missing register, driver bug");
> +               return -EFAULT;
> +       }
> +
>         if (type >= MEM_TYPE_MAX)
>                 return -EINVAL;
>
>         xdp_rxq->mem.type = type;
>
> -       if (allocator)
> -               return -EOPNOTSUPP;
> +       if (!allocator)
> +               return 0;
> +
> +       /* Delay init of rhashtable to save memory if feature isn't used */
> +       if (!mem_id_init) {
> +               mutex_lock(&mem_id_lock);
> +               ret = __mem_id_init_hash_table();
> +               mutex_unlock(&mem_id_lock);
> +               if (ret < 0) {
> +                       WARN_ON(1);
> +                       return ret;
> +               }
> +       }
> +
> +       xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
> +       if (!xdp_alloc)
> +               return -ENOMEM;
> +
> +       mutex_lock(&mem_id_lock);
> +       id = __mem_id_cyclic_get(gfp);
> +       if (id < 0) {
> +               errno = id;
> +               goto err;
> +       }
> +       xdp_rxq->mem.id = id;
> +       xdp_alloc->mem  = xdp_rxq->mem;
> +       xdp_alloc->allocator = allocator;
> +
> +       /* Insert allocator into ID lookup table */
> +       ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
> +       if (IS_ERR(ptr)) {
> +               errno = PTR_ERR(ptr);
> +               goto err;
> +       }
> +
> +       mutex_unlock(&mem_id_lock);
>
> -       /* TODO: Allocate an ID that maps to allocator pointer
> -        * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
> -        */
>         return 0;
> +err:
> +       mutex_unlock(&mem_id_lock);
> +       kfree(xdp_alloc);
> +       return errno;
>  }
>  EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
> +
> +void xdp_return_frame(void *data, struct xdp_mem_info *mem)
> +{
> +       struct xdp_mem_allocator *xa;
> +
> +       rcu_read_lock();
> +       if (mem->id)
> +               xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
> +       rcu_read_unlock();
> +
> +       if (mem->type == MEM_TYPE_PAGE_SHARED) {
> +               page_frag_free(data);
> +               return;
> +       }
> +
> +       if (mem->type == MEM_TYPE_PAGE_ORDER0) {
> +               struct page *page = virt_to_page(data); /* Assumes order0 page*/
> +
> +               put_page(page);
> +       }
> +}
> +EXPORT_SYMBOL_GPL(xdp_return_frame);
>

I'm not sure what the point is of getting the xa value if it is not
going to be used. Also I would assume there are types that won't even
need the hash table lookup. I would prefer to see this bit held off on
until you have something that actually needs it.
Jesper Dangaard Brouer March 23, 2018, 6:15 p.m. UTC | #2
On Fri, 23 Mar 2018 09:56:50 -0700
Alexander Duyck <alexander.duyck@gmail.com> wrote:

> On Fri, Mar 23, 2018 at 5:18 AM, Jesper Dangaard Brouer
> <brouer@redhat.com> wrote:
> > Use the IDA infrastructure for getting a cyclic increasing ID number,
> > that is used for keeping track of each registered allocator per
> > RX-queue xdp_rxq_info.  Instead of using the IDR infrastructure, which
> > uses a radix tree, use a dynamic rhashtable, for creating ID to
> > pointer lookup table, because this is faster.
> >
> > The problem that is being solved here is that, the xdp_rxq_info
> > pointer (stored in xdp_buff) cannot be used directly, as the
> > guaranteed lifetime is too short.  The info is needed on a
> > (potentially) remote CPU during DMA-TX completion time . In an
> > xdp_frame the xdp_mem_info is stored, when it got converted from an
> > xdp_buff, which is sufficient for the simple page refcnt based recycle
> > schemes.
> >
> > For more advanced allocators there is a need to store a pointer to the
> > registered allocator.  Thus, there is a need to guard the lifetime or
> > validity of the allocator pointer, which is done through this
> > rhashtable ID map to pointer. The removal and validity of of the
> > allocator and helper struct xdp_mem_allocator is guarded by RCU.  The
> > allocator will be created by the driver, and registered with
> > xdp_rxq_info_reg_mem_model().
> >
> > It is up-to debate who is responsible for freeing the allocator
> > pointer or invoking the allocator destructor function.  In any case,
> > this must happen via RCU freeing.
> >
> > Use the IDA infrastructure for getting a cyclic increasing ID number,
> > that is used for keeping track of each registered allocator per
> > RX-queue xdp_rxq_info.
> >
> > V4: Per req of Jason Wang
> > - Use xdp_rxq_info_reg_mem_model() in all drivers implementing
> >   XDP_REDIRECT, even-though it's not strictly necessary when
> >   allocator==NULL for type MEM_TYPE_PAGE_SHARED (given it's zero).
> >
> > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> > ---
> >  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    9 +
> >  drivers/net/tun.c                             |    6 +
> >  drivers/net/virtio_net.c                      |    7 +
> >  include/net/xdp.h                             |   15 --
> >  net/core/xdp.c                                |  230 ++++++++++++++++++++++++-
> >  5 files changed, 248 insertions(+), 19 deletions(-)
> >
[...]
> >  int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
> >                                enum mem_type type, void *allocator)
> >  {
> > +       struct xdp_mem_allocator *xdp_alloc;
> > +       gfp_t gfp = GFP_KERNEL;
> > +       int id, errno, ret;
> > +       void *ptr;
> > +
> > +       if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
> > +               WARN(1, "Missing register, driver bug");
> > +               return -EFAULT;
> > +       }
> > +
> >         if (type >= MEM_TYPE_MAX)
> >                 return -EINVAL;
> >
> >         xdp_rxq->mem.type = type;
> >
> > -       if (allocator)
> > -               return -EOPNOTSUPP;
> > +       if (!allocator)
> > +               return 0;
> > +
> > +       /* Delay init of rhashtable to save memory if feature isn't used */
> > +       if (!mem_id_init) {
> > +               mutex_lock(&mem_id_lock);
> > +               ret = __mem_id_init_hash_table();
> > +               mutex_unlock(&mem_id_lock);
> > +               if (ret < 0) {
> > +                       WARN_ON(1);
> > +                       return ret;
> > +               }
> > +       }
> > +
> > +       xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
> > +       if (!xdp_alloc)
> > +               return -ENOMEM;
> > +
> > +       mutex_lock(&mem_id_lock);
> > +       id = __mem_id_cyclic_get(gfp);
> > +       if (id < 0) {
> > +               errno = id;
> > +               goto err;
> > +       }
> > +       xdp_rxq->mem.id = id;
> > +       xdp_alloc->mem  = xdp_rxq->mem;
> > +       xdp_alloc->allocator = allocator;
> > +
> > +       /* Insert allocator into ID lookup table */
> > +       ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
> > +       if (IS_ERR(ptr)) {
> > +               errno = PTR_ERR(ptr);
> > +               goto err;
> > +       }
> > +
> > +       mutex_unlock(&mem_id_lock);
> >
> > -       /* TODO: Allocate an ID that maps to allocator pointer
> > -        * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
> > -        */
> >         return 0;
> > +err:
> > +       mutex_unlock(&mem_id_lock);
> > +       kfree(xdp_alloc);
> > +       return errno;
> >  }
> >  EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
> > +
> > +void xdp_return_frame(void *data, struct xdp_mem_info *mem)
> > +{
> > +       struct xdp_mem_allocator *xa;
> > +
> > +       rcu_read_lock();
> > +       if (mem->id)
> > +               xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
> > +       rcu_read_unlock();
> > +
> > +       if (mem->type == MEM_TYPE_PAGE_SHARED) {
> > +               page_frag_free(data);
> > +               return;
> > +       }
> > +
> > +       if (mem->type == MEM_TYPE_PAGE_ORDER0) {
> > +               struct page *page = virt_to_page(data); /* Assumes order0 page*/
> > +
> > +               put_page(page);
> > +       }
> > +}
> > +EXPORT_SYMBOL_GPL(xdp_return_frame);
> >  
> 
> I'm not sure what the point is of getting the xa value if it is not
> going to be used. Also I would assume there are types that won't even
> need the hash table lookup. I would prefer to see this bit held off on
> until you have something that actually needs it.

I think, you misread the patch. The lookup is NOT going to be performed
when mem->id is zero, which is the case that you are interested in for
your ixgbe driver.
Alexander H Duyck March 23, 2018, 6:22 p.m. UTC | #3
On Fri, Mar 23, 2018 at 11:15 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> On Fri, 23 Mar 2018 09:56:50 -0700
> Alexander Duyck <alexander.duyck@gmail.com> wrote:
>
>> On Fri, Mar 23, 2018 at 5:18 AM, Jesper Dangaard Brouer
>> <brouer@redhat.com> wrote:
>> > Use the IDA infrastructure for getting a cyclic increasing ID number,
>> > that is used for keeping track of each registered allocator per
>> > RX-queue xdp_rxq_info.  Instead of using the IDR infrastructure, which
>> > uses a radix tree, use a dynamic rhashtable, for creating ID to
>> > pointer lookup table, because this is faster.
>> >
>> > The problem that is being solved here is that, the xdp_rxq_info
>> > pointer (stored in xdp_buff) cannot be used directly, as the
>> > guaranteed lifetime is too short.  The info is needed on a
>> > (potentially) remote CPU during DMA-TX completion time . In an
>> > xdp_frame the xdp_mem_info is stored, when it got converted from an
>> > xdp_buff, which is sufficient for the simple page refcnt based recycle
>> > schemes.
>> >
>> > For more advanced allocators there is a need to store a pointer to the
>> > registered allocator.  Thus, there is a need to guard the lifetime or
>> > validity of the allocator pointer, which is done through this
>> > rhashtable ID map to pointer. The removal and validity of of the
>> > allocator and helper struct xdp_mem_allocator is guarded by RCU.  The
>> > allocator will be created by the driver, and registered with
>> > xdp_rxq_info_reg_mem_model().
>> >
>> > It is up-to debate who is responsible for freeing the allocator
>> > pointer or invoking the allocator destructor function.  In any case,
>> > this must happen via RCU freeing.
>> >
>> > Use the IDA infrastructure for getting a cyclic increasing ID number,
>> > that is used for keeping track of each registered allocator per
>> > RX-queue xdp_rxq_info.
>> >
>> > V4: Per req of Jason Wang
>> > - Use xdp_rxq_info_reg_mem_model() in all drivers implementing
>> >   XDP_REDIRECT, even-though it's not strictly necessary when
>> >   allocator==NULL for type MEM_TYPE_PAGE_SHARED (given it's zero).
>> >
>> > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
>> > ---
>> >  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    9 +
>> >  drivers/net/tun.c                             |    6 +
>> >  drivers/net/virtio_net.c                      |    7 +
>> >  include/net/xdp.h                             |   15 --
>> >  net/core/xdp.c                                |  230 ++++++++++++++++++++++++-
>> >  5 files changed, 248 insertions(+), 19 deletions(-)
>> >
> [...]
>> >  int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>> >                                enum mem_type type, void *allocator)
>> >  {
>> > +       struct xdp_mem_allocator *xdp_alloc;
>> > +       gfp_t gfp = GFP_KERNEL;
>> > +       int id, errno, ret;
>> > +       void *ptr;
>> > +
>> > +       if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
>> > +               WARN(1, "Missing register, driver bug");
>> > +               return -EFAULT;
>> > +       }
>> > +
>> >         if (type >= MEM_TYPE_MAX)
>> >                 return -EINVAL;
>> >
>> >         xdp_rxq->mem.type = type;
>> >
>> > -       if (allocator)
>> > -               return -EOPNOTSUPP;
>> > +       if (!allocator)
>> > +               return 0;
>> > +
>> > +       /* Delay init of rhashtable to save memory if feature isn't used */
>> > +       if (!mem_id_init) {
>> > +               mutex_lock(&mem_id_lock);
>> > +               ret = __mem_id_init_hash_table();
>> > +               mutex_unlock(&mem_id_lock);
>> > +               if (ret < 0) {
>> > +                       WARN_ON(1);
>> > +                       return ret;
>> > +               }
>> > +       }
>> > +
>> > +       xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
>> > +       if (!xdp_alloc)
>> > +               return -ENOMEM;
>> > +
>> > +       mutex_lock(&mem_id_lock);
>> > +       id = __mem_id_cyclic_get(gfp);
>> > +       if (id < 0) {
>> > +               errno = id;
>> > +               goto err;
>> > +       }
>> > +       xdp_rxq->mem.id = id;
>> > +       xdp_alloc->mem  = xdp_rxq->mem;
>> > +       xdp_alloc->allocator = allocator;
>> > +
>> > +       /* Insert allocator into ID lookup table */
>> > +       ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
>> > +       if (IS_ERR(ptr)) {
>> > +               errno = PTR_ERR(ptr);
>> > +               goto err;
>> > +       }
>> > +
>> > +       mutex_unlock(&mem_id_lock);
>> >
>> > -       /* TODO: Allocate an ID that maps to allocator pointer
>> > -        * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
>> > -        */
>> >         return 0;
>> > +err:
>> > +       mutex_unlock(&mem_id_lock);
>> > +       kfree(xdp_alloc);
>> > +       return errno;
>> >  }
>> >  EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
>> > +
>> > +void xdp_return_frame(void *data, struct xdp_mem_info *mem)
>> > +{
>> > +       struct xdp_mem_allocator *xa;
>> > +
>> > +       rcu_read_lock();
>> > +       if (mem->id)
>> > +               xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
>> > +       rcu_read_unlock();
>> > +
>> > +       if (mem->type == MEM_TYPE_PAGE_SHARED) {
>> > +               page_frag_free(data);
>> > +               return;
>> > +       }
>> > +
>> > +       if (mem->type == MEM_TYPE_PAGE_ORDER0) {
>> > +               struct page *page = virt_to_page(data); /* Assumes order0 page*/
>> > +
>> > +               put_page(page);
>> > +       }
>> > +}
>> > +EXPORT_SYMBOL_GPL(xdp_return_frame);
>> >
>>
>> I'm not sure what the point is of getting the xa value if it is not
>> going to be used. Also I would assume there are types that won't even
>> need the hash table lookup. I would prefer to see this bit held off on
>> until you have something that actually needs it.
>
> I think, you misread the patch. The lookup is NOT going to be performed
> when mem->id is zero, which is the case that you are interested in for
> your ixgbe driver.

Sorry, to clarify. Why do I have to take rcu_read_lock and
rcu_read_unlock if i am not doing an rcu read? Why even bother doing a
conditional check for mem->id if the lookup using it is not used?

Basically if I am not using it why should I take any of the overhead
for it. I would much rather have this code reduced to be as small and
fast as possible instead of wasting cycles on the RCU acquire/release,
reading mem->id, testing mem->id, and then jumping over the code that
is not used in either case even if mem->id isn't 0.

What I don't get is why you aren't getting warnings about variables
being assigned but never used in the case of xa.

- Alex
Jesper Dangaard Brouer March 26, 2018, 9:04 p.m. UTC | #4
On Fri, 23 Mar 2018 09:56:50 -0700
Alexander Duyck <alexander.duyck@gmail.com> wrote:

> > +void xdp_return_frame(void *data, struct xdp_mem_info *mem)
> > +{
> > +       struct xdp_mem_allocator *xa;
> > +
> > +       rcu_read_lock();
> > +       if (mem->id)
> > +               xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
> > +       rcu_read_unlock();
> > +
> > +       if (mem->type == MEM_TYPE_PAGE_SHARED) {
> > +               page_frag_free(data);
> > +               return;
> > +       }
> > +
> > +       if (mem->type == MEM_TYPE_PAGE_ORDER0) {
> > +               struct page *page = virt_to_page(data); /* Assumes order0 page*/
> > +
> > +               put_page(page);
> > +       }
> > +}
> > +EXPORT_SYMBOL_GPL(xdp_return_frame);
> >  
> 
> I'm not sure what the point is of getting the xa value if it is not
> going to be used. Also I would assume there are types that won't even
> need the hash table lookup. I would prefer to see this bit held off on
> until you have something that actually needs it.

I agree, fixed.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 45520eb503ee..ff069597fccf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6360,7 +6360,7 @@  int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 	struct device *dev = rx_ring->dev;
 	int orig_node = dev_to_node(dev);
 	int ring_node = -1;
-	int size;
+	int size, err;
 
 	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
 
@@ -6397,6 +6397,13 @@  int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 			     rx_ring->queue_index) < 0)
 		goto err;
 
+	err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
+					 MEM_TYPE_PAGE_SHARED, NULL);
+	if (err) {
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+		goto err;
+	}
+
 	rx_ring->xdp_prog = adapter->xdp_prog;
 
 	return 0;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 6750980d9f30..81fddf9cc58f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -846,6 +846,12 @@  static int tun_attach(struct tun_struct *tun, struct file *file,
 				       tun->dev, tfile->queue_index);
 		if (err < 0)
 			goto out;
+		err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
+						 MEM_TYPE_PAGE_SHARED, NULL);
+		if (err < 0) {
+			xdp_rxq_info_unreg(&tfile->xdp_rxq);
+			goto out;
+		}
 		err = 0;
 	}
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6c4220450506..48c86accd3b8 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1312,6 +1312,13 @@  static int virtnet_open(struct net_device *dev)
 		if (err < 0)
 			return err;
 
+		err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
+						 MEM_TYPE_PAGE_SHARED, NULL);
+		if (err < 0) {
+			xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
+			return err;
+		}
+
 		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 		virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
 	}
diff --git a/include/net/xdp.h b/include/net/xdp.h
index bc0cb97e20dc..859aa9b737fe 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -41,7 +41,7 @@  enum mem_type {
 
 struct xdp_mem_info {
 	u32 type; /* enum mem_type, but known size type */
-	/* u32 id; will be added later in this patchset */
+	u32 id;
 };
 
 struct xdp_rxq_info {
@@ -100,18 +100,7 @@  struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	return xdp_frame;
 }
 
-static inline
-void xdp_return_frame(void *data, struct xdp_mem_info *mem)
-{
-	if (mem->type == MEM_TYPE_PAGE_SHARED)
-		page_frag_free(data);
-
-	if (mem->type == MEM_TYPE_PAGE_ORDER0) {
-		struct page *page = virt_to_page(data); /* Assumes order0 page*/
-
-		put_page(page);
-	}
-}
+void xdp_return_frame(void *data, struct xdp_mem_info *mem);
 
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
 		     struct net_device *dev, u32 queue_index);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9eee0c431126..06a5b39491ad 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -5,6 +5,9 @@ 
  */
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/rhashtable.h>
 
 #include <net/xdp.h>
 
@@ -13,6 +16,99 @@ 
 #define REG_STATE_UNREGISTERED	0x2
 #define REG_STATE_UNUSED	0x3
 
+DEFINE_IDA(mem_id_pool);
+static DEFINE_MUTEX(mem_id_lock);
+#define MEM_ID_MAX 0xFFFE
+#define MEM_ID_MIN 1
+static int mem_id_next = MEM_ID_MIN;
+
+static bool mem_id_init; /* false */
+static struct rhashtable *mem_id_ht;
+
+struct xdp_mem_allocator {
+	struct xdp_mem_info mem;
+	void *allocator;
+	struct rhash_head node;
+	struct rcu_head rcu;
+};
+
+static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
+{
+	const u32 *k = data;
+	const u32 key = *k;
+
+	BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
+		     != sizeof(u32));
+
+	/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
+	return key << RHT_HASH_RESERVED_SPACE;
+}
+
+static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
+			  const void *ptr)
+{
+	const struct xdp_mem_allocator *xa = ptr;
+	u32 mem_id = *(u32 *)arg->key;
+
+	return xa->mem.id != mem_id;
+}
+
+static const struct rhashtable_params mem_id_rht_params = {
+	.nelem_hint = 64,
+	.head_offset = offsetof(struct xdp_mem_allocator, node),
+	.key_offset  = offsetof(struct xdp_mem_allocator, mem.id),
+	.key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
+	.max_size = MEM_ID_MAX,
+	.min_size = 8,
+	.automatic_shrinking = true,
+	.hashfn    = xdp_mem_id_hashfn,
+	.obj_cmpfn = xdp_mem_id_cmp,
+};
+
+void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
+{
+	struct xdp_mem_allocator *xa;
+
+	xa = container_of(rcu, struct xdp_mem_allocator, rcu);
+
+	/* Allow this ID to be reused */
+	ida_simple_remove(&mem_id_pool, xa->mem.id);
+
+	/* TODO: Depending on allocator type/pointer free resources */
+
+	/* Poison memory */
+	xa->mem.id = 0xFFFF;
+	xa->mem.type = 0xF0F0;
+	xa->allocator = (void *)0xDEAD9001;
+
+	kfree(xa);
+}
+
+void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+	struct xdp_mem_allocator *xa;
+	int id = xdp_rxq->mem.id;
+	int err;
+
+	if (id == 0)
+		return;
+
+	mutex_lock(&mem_id_lock);
+
+	xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
+	if (!xa) {
+		mutex_unlock(&mem_id_lock);
+		return;
+	}
+
+	err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
+	WARN_ON(err);
+
+	call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+
+	mutex_unlock(&mem_id_lock);
+}
+
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 {
 	/* Simplify driver cleanup code paths, allow unreg "unused" */
@@ -21,8 +117,14 @@  void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 
 	WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
 
+	__xdp_rxq_info_unreg_mem_model(xdp_rxq);
+
 	xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
 	xdp_rxq->dev = NULL;
+
+	/* Reset mem info to defaults */
+	xdp_rxq->mem.id = 0;
+	xdp_rxq->mem.type = 0;
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
 
@@ -72,20 +174,138 @@  bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
 
+int __mem_id_init_hash_table(void)
+{
+	struct rhashtable *rht;
+	int ret;
+
+	if (unlikely(mem_id_init))
+		return 0;
+
+	rht = kzalloc(sizeof(*rht), GFP_KERNEL);
+	if (!rht)
+		return -ENOMEM;
+
+	ret = rhashtable_init(rht, &mem_id_rht_params);
+	if (ret < 0) {
+		kfree(rht);
+		return ret;
+	}
+	mem_id_ht = rht;
+	smp_mb(); /* mutex lock should provide enough pairing */
+	mem_id_init = true;
+
+	return 0;
+}
+
+/* Allocate a cyclic ID that maps to allocator pointer.
+ * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
+ *
+ * Caller must lock mem_id_lock.
+ */
+static int __mem_id_cyclic_get(gfp_t gfp)
+{
+	int retries = 1;
+	int id;
+
+again:
+	id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
+	if (id < 0) {
+		if (id == -ENOSPC) {
+			/* Cyclic allocator, reset next id */
+			if (retries--) {
+				mem_id_next = MEM_ID_MIN;
+				goto again;
+			}
+		}
+		return id; /* errno */
+	}
+	mem_id_next = id + 1;
+
+	return id;
+}
+
 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 			       enum mem_type type, void *allocator)
 {
+	struct xdp_mem_allocator *xdp_alloc;
+	gfp_t gfp = GFP_KERNEL;
+	int id, errno, ret;
+	void *ptr;
+
+	if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+		WARN(1, "Missing register, driver bug");
+		return -EFAULT;
+	}
+
 	if (type >= MEM_TYPE_MAX)
 		return -EINVAL;
 
 	xdp_rxq->mem.type = type;
 
-	if (allocator)
-		return -EOPNOTSUPP;
+	if (!allocator)
+		return 0;
+
+	/* Delay init of rhashtable to save memory if feature isn't used */
+	if (!mem_id_init) {
+		mutex_lock(&mem_id_lock);
+		ret = __mem_id_init_hash_table();
+		mutex_unlock(&mem_id_lock);
+		if (ret < 0) {
+			WARN_ON(1);
+			return ret;
+		}
+	}
+
+	xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
+	if (!xdp_alloc)
+		return -ENOMEM;
+
+	mutex_lock(&mem_id_lock);
+	id = __mem_id_cyclic_get(gfp);
+	if (id < 0) {
+		errno = id;
+		goto err;
+	}
+	xdp_rxq->mem.id = id;
+	xdp_alloc->mem  = xdp_rxq->mem;
+	xdp_alloc->allocator = allocator;
+
+	/* Insert allocator into ID lookup table */
+	ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
+	if (IS_ERR(ptr)) {
+		errno = PTR_ERR(ptr);
+		goto err;
+	}
+
+	mutex_unlock(&mem_id_lock);
 
-	/* TODO: Allocate an ID that maps to allocator pointer
-	 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
-	 */
 	return 0;
+err:
+	mutex_unlock(&mem_id_lock);
+	kfree(xdp_alloc);
+	return errno;
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
+
+void xdp_return_frame(void *data, struct xdp_mem_info *mem)
+{
+	struct xdp_mem_allocator *xa;
+
+	rcu_read_lock();
+	if (mem->id)
+		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+	rcu_read_unlock();
+
+	if (mem->type == MEM_TYPE_PAGE_SHARED) {
+		page_frag_free(data);
+		return;
+	}
+
+	if (mem->type == MEM_TYPE_PAGE_ORDER0) {
+		struct page *page = virt_to_page(data); /* Assumes order0 page*/
+
+		put_page(page);
+	}
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame);