[ovs-dev,v8,06/13] dp-packet: Handle multi-seg mbufs in helper funcs.

Message ID 1528734090-220990-7-git-send-email-tiago.lam@intel.com
State Superseded
Headers show
Series
  • Support multi-segment mbufs
Related show

Commit Message

Lam, Tiago June 11, 2018, 4:21 p.m.
Most helper functions in dp-packet assume that the data held by a
dp_packet is contiguous, and perform operations such as pointer
arithmetic under that assumption. However, with the introduction of
multi-segment mbufs, where data is non-contiguous, such assumptions are
no longer possible. Some examples of Such helper functions are
dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
dp_packet_get_allocated() and dp_packet_at().

Thus, instead of assuming contiguous data in dp_packet, they  now
iterate over the (non-contiguous) data in mbufs to perform their
calculations.

Finally, dp_packet_use__() has also been modified to perform the
initialisation of the packet (and setting the source) before continuing
to set its size and data length, which now depends on the type of
packet.

Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com>

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Tiago Lam <tiago.lam@intel.com>
---
 lib/dp-packet.c |   4 +-
 lib/dp-packet.h | 242 +++++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 197 insertions(+), 49 deletions(-)

Comments

Eelco Chaudron June 18, 2018, 11:50 a.m. | #1
On 11 Jun 2018, at 18:21, Tiago Lam wrote:

> Most helper functions in dp-packet assume that the data held by a
> dp_packet is contiguous, and perform operations such as pointer
> arithmetic under that assumption. However, with the introduction of
> multi-segment mbufs, where data is non-contiguous, such assumptions 
> are
> no longer possible. Some examples of Such helper functions are
> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
> dp_packet_get_allocated() and dp_packet_at().
>
> Thus, instead of assuming contiguous data in dp_packet, they  now
> iterate over the (non-contiguous) data in mbufs to perform their
> calculations.
>
> Finally, dp_packet_use__() has also been modified to perform the
> initialisation of the packet (and setting the source) before 
> continuing
> to set its size and data length, which now depends on the type of
> packet.
>
> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>
> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
> ---
>  lib/dp-packet.c |   4 +-
>  lib/dp-packet.h | 242 
> +++++++++++++++++++++++++++++++++++++++++++++-----------
>  2 files changed, 197 insertions(+), 49 deletions(-)
>
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index 782e7c2..2aaeaae 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -41,11 +41,11 @@ static void
>  dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
>               enum dp_packet_source source)
>  {
> +    dp_packet_init__(b, allocated, source);
> +
>      dp_packet_set_base(b, base);
>      dp_packet_set_data(b, base);
>      dp_packet_set_size(b, 0);
> -
> -    dp_packet_init__(b, allocated, source);
>  }
>
>  /* Initializes 'b' as an empty dp_packet that contains the 
> 'allocated' bytes of
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index c301ed5..272597f 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct 
> dp_packet *, size_t offset,
>                                   size_t size);
>  static inline void *dp_packet_at_assert(const struct dp_packet *,
>                                          size_t offset, size_t size);
> +#ifdef DPDK_NETDEV
> +static inline void * dp_packet_at_offset(const struct dp_packet *b,
> +                                         size_t offset);
> +#endif
>  static inline void *dp_packet_tail(const struct dp_packet *);
>  static inline void *dp_packet_end(const struct dp_packet *);
>
> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b)
>      }
>  }
>
> -/* If 'b' contains at least 'offset + size' bytes of data, returns a 
> pointer to
> - * byte 'offset'.  Otherwise, returns a null pointer. */
> -static inline void *
> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
> -{
> -    return offset + size <= dp_packet_size(b)
> -           ? (char *) dp_packet_data(b) + offset
> -           : NULL;
> -}
> -
> -/* Returns a pointer to byte 'offset' in 'b', which must contain at 
> least
> - * 'offset + size' bytes of data. */
> -static inline void *
> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t 
> size)
> -{
> -    ovs_assert(offset + size <= dp_packet_size(b));
> -    return ((char *) dp_packet_data(b)) + offset;
> -}
> -
> -/* Returns a pointer to byte following the last byte of data in use 
> in 'b'. */
> -static inline void *
> -dp_packet_tail(const struct dp_packet *b)
> -{
> -    return (char *) dp_packet_data(b) + dp_packet_size(b);
> -}
> -
> -/* Returns a pointer to byte following the last byte allocated for 
> use (but
> - * not necessarily in use) in 'b'. */
> -static inline void *
> -dp_packet_end(const struct dp_packet *b)
> -{
> -    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
> -}
> -
>  /* Returns the number of bytes of headroom in 'b', that is, the 
> number of bytes
>   * of unused space in dp_packet 'b' before the data that is in use.  
> (Most
>   * commonly, the data in a dp_packet is at its beginning, and thus 
> the
> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b)
>  static inline size_t
>  dp_packet_tailroom(const struct dp_packet *b)
>  {
> +#ifdef DPDK_NETDEV
> +    if (b->source == DPBUF_DPDK) {
> +        struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, 
> &b->mbuf);
> +        tmbuf = rte_pktmbuf_lastseg(tmbuf);
> +

What if the last two chains contain no user data, we have more 
tailroom?!? Should we not call rte_pktmbuf_tailroom(tmbuf)?

> +        return rte_pktmbuf_tailroom(tmbuf);
> +    }
> +#endif
>      return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
>  }
>
> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b)
>  static inline void
>  dp_packet_clear(struct dp_packet *b)
>  {
> +#ifdef DPDK_NETDEV
> +    dp_packet_set_size(b, 0);
> +    rte_pktmbuf_reset(&b->mbuf);

This would probably assert on none DPBUF_DPDK, do we need a “if 
(b->source == DPBUF_DPDK)” here also?

> +#else
>      dp_packet_set_data(b, dp_packet_base(b));
>      dp_packet_set_size(b, 0);
> +#endif
>  }
>
>  /* Removes 'size' bytes from the head end of 'b', which must contain 
> at least
> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t size)
>      return data;
>  }
>
> +#ifdef DPDK_NETDEV
> +/* Similar to dp_packet_try_pull() but doesn't actually pull any 
> code, only
> + * returns true or false if it would be possible to actually pull any 
> code.
> + * Valid for dp_packets carrying mbufs only. */
> +static inline bool
> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
> +    if (size > b->mbuf.data_len) {
> +        return false;
> +    }
> +
> +    return true;
> +}
> +#endif
> +
>  /* If 'b' has at least 'size' bytes of data, removes that many bytes 
> from the
>   * head end of 'b' and returns the first byte removed.  Otherwise, 
> returns a
>   * null pointer without modifying 'b'. */
>  static inline void *
>  dp_packet_try_pull(struct dp_packet *b, size_t size)
>  {
> +#ifdef DPDK_NETDEV
> +    if (!dp_packet_mbuf_may_pull(b, size)) {
> +        return NULL;
> +    }
> +#endif
> +
>      return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
>          ? dp_packet_pull(b, size) : NULL;
>  }
> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, 
> uint8_t pad_size)
>  static inline void *
>  dp_packet_l2_5(const struct dp_packet *b)
>  {
> +#ifdef DPDK_NETDEV
> +    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
> +        return NULL;
> +    }
> +#endif
> +
>      return b->l2_5_ofs != UINT16_MAX
>             ? (char *) dp_packet_data(b) + b->l2_5_ofs
>             : NULL;
> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void 
> *l2_5)
>  static inline void *
>  dp_packet_l3(const struct dp_packet *b)
>  {
> +#ifdef DPDK_NETDEV
> +    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
> +        return NULL;
> +    }
> +#endif
> +

Can we not use the dp_packet_at_offset() function here (and the similar 
functions)? And include the above and below checks there?

>      return b->l3_ofs != UINT16_MAX
>             ? (char *) dp_packet_data(b) + b->l3_ofs
>             : NULL;
> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
>  static inline void *
>  dp_packet_l4(const struct dp_packet *b)
>  {
> +#ifdef DPDK_NETDEV
> +    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
> +        return NULL;
> +    }
> +#endif
> +
>      return b->l4_ofs != UINT16_MAX
>             ? (char *) dp_packet_data(b) + b->l4_ofs
>             : NULL;
> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void *l4)
>  static inline size_t
>  dp_packet_l4_size(const struct dp_packet *b)
>  {
> +#ifdef DPDK_NETDEV
> +    if (b->l4_ofs == UINT16_MAX) {
> +        return 0;
> +    }
> +
> +    struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
> +    struct rte_mbuf *tmbuf = dp_packet_tail(b);
> +
> +    size_t l4_size = 0;
> +    if (hmbuf == tmbuf) {

Do not think this optimisation help much, as the dp_packet_tail(b) does 
the same walk as in the "while (hmbuf)" clause below, so why not just 
keet the else case?	

> +        l4_size = hmbuf->data_len - b->l4_ofs;
> +    } else {
> +        l4_size = hmbuf->data_len - b->l4_ofs;
> +        hmbuf = hmbuf->next;
> +
> +        while (hmbuf) {
> +            l4_size += tmbuf->data_len;
> +
> +            hmbuf = hmbuf->next;
> +        }
> +    }
> +
> +    l4_size -= dp_packet_l2_pad_size(b);
> +
> +    return l4_size;
> +#else
>      return b->l4_ofs != UINT16_MAX
>          ? (const char *)dp_packet_tail(b) - (const char 
> *)dp_packet_l4(b)
>          - dp_packet_l2_pad_size(b)
>          : 0;
> +#endif
>  }
>
>  static inline const void *
> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t v)
>  static inline uint16_t
>  dp_packet_get_allocated(const struct dp_packet *b)
>  {
> -    return b->mbuf.buf_len;
> +    return b->mbuf.nb_segs * b->mbuf.buf_len;
>  }
>
>  static inline void
> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, 
> uint16_t s)
>  {
>      b->mbuf.buf_len = s;
>  }
> +
> +static inline void *
> +dp_packet_at_offset(const struct dp_packet *b, size_t offset)
> +{
> +    if (b->source == DPBUF_DPDK) {
> +        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, 
> &b->mbuf);
> +
> +        while (buf && offset > buf->data_len) {
> +            offset -= buf->data_len;
> +
> +            buf = buf->next;
> +        }
> +        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : 
> NULL;
> +    } else {
> +        return (char *) dp_packet_data(b) + offset;
> +    }
> +}
> +
> +/* If 'b' contains at least 'offset + size' bytes of data, returns a 
> pointer to
> + * byte 'offset'.  Otherwise, returns a null pointer. */
> +static inline void *
> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
> +{
> +    return offset + size <= dp_packet_size(b)
> +        ? dp_packet_at_offset(b, offset)
> +        : NULL;
> +}
> +
> +/* Returns a pointer to byte 'offset' in 'b', which must contain at 
> least
> + * 'offset + size' bytes of data. */
> +static inline void *
> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t 
> size)
> +{
> +    ovs_assert(offset + size <= dp_packet_size(b));
> +    return dp_packet_at_offset(b, offset);
> +}
> +
> +/* Returns a pointer to byte following the last byte of data in use 
> in 'b'. */
> +static inline void *
> +dp_packet_tail(const struct dp_packet *b)
> +{
> +    struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
> +    if (b->source == DPBUF_DPDK) {
> +        /* Find last segment where data ends, meaning the tail of the 
> chained
> +           mbufs is there */
> +        buf = rte_pktmbuf_lastseg(buf);
> +    }
> +

What if the last two chains contains no user data, we should find the 
last buffer with data.
Looks like we assume only the last in the chain contains room, and we 
achieve this by stripping off buffers (but we never add them again when 
needed, so something is wrong in this implementation).

> +    return rte_pktmbuf_mtod_offset(buf, void *, buf->data_len);
> +}
> +
> +/* Returns a pointer to byte following the last byte allocated for 
> use (but
> + * not necessarily in use) in 'b'. */
> +static inline void *
> +dp_packet_end(const struct dp_packet *b)
> +{
> +    if (b->source == DPBUF_DPDK) {
> +        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, 
> &(b->mbuf));
> +
> +        buf = rte_pktmbuf_lastseg(buf);
> +

See Above

> +        return (char *) buf->buf_addr + buf->buf_len;
> +    } else {
> +        return (char *) dp_packet_base(b) + 
> dp_packet_get_allocated(b);
> +    }
> +}
>  #else
> +/* If 'b' contains at least 'offset + size' bytes of data, returns a 
> pointer to
> + * byte 'offset'.  Otherwise, returns a null pointer. */
> +static inline void *
> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
> +{
> +    return offset + size <= dp_packet_size(b)
> +           ? (char *) dp_packet_data(b) + offset
> +           : NULL;
> +}
> +
> +/* Returns a pointer to byte 'offset' in 'b', which must contain at 
> least
> + * 'offset + size' bytes of data. */
> +static inline void *
> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t 
> size)
> +{
> +    ovs_assert(offset + size <= dp_packet_size(b));
> +    return ((char *) dp_packet_data(b)) + offset;
> +}
> +
> +/* Returns a pointer to byte following the last byte of data in use 
> in 'b'. */
> +static inline void *
> +dp_packet_tail(const struct dp_packet *b)
> +{
> +    return (char *) dp_packet_data(b) + dp_packet_size(b);
> +}
> +
> +/* Returns a pointer to byte following the last byte allocated for 
> use (but
> + * not necessarily in use) in 'b'. */
> +static inline void *
> +dp_packet_end(const struct dp_packet *b)
> +{
> +    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
> +}
> +
>  static inline void *
>  dp_packet_base(const struct dp_packet *b)
>  {
> @@ -518,34 +666,34 @@ dp_packet_size(const struct dp_packet *b)
>      return b->size_;
>  }
>
> -static inline void
> -dp_packet_set_size(struct dp_packet *b, uint32_t v)
> +static inline uint16_t
> +dp_packet_get_allocated(const struct dp_packet *b)
>  {
> -    b->size_ = v;
> +    return b->allocated_;
>  }
>
> -static inline uint16_t
> -__packet_data(const struct dp_packet *b)
> +static inline void
> +dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
>  {
> -    return b->data_ofs;
> +    b->allocated_ = s;
>  }
>
>  static inline void
> -__packet_set_data(struct dp_packet *b, uint16_t v)
> +dp_packet_set_size(struct dp_packet *b, uint32_t v)
>  {
> -    b->data_ofs = v;
> +    b->size_ = v;
>  }
>
>  static inline uint16_t
> -dp_packet_get_allocated(const struct dp_packet *b)
> +__packet_data(const struct dp_packet *b)
>  {
> -    return b->allocated_;
> +    return b->data_ofs;
>  }
>
>  static inline void
> -dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
> +__packet_set_data(struct dp_packet *b, uint16_t v)
>  {
> -    b->allocated_ = s;
> +    b->data_ofs = v;
>  }
>  #endif
>
> -- 
> 2.7.4
Lam, Tiago June 22, 2018, 7:04 p.m. | #2
On 18/06/2018 12:50, Eelco Chaudron wrote:
> 
> 
> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
> 
>> Most helper functions in dp-packet assume that the data held by a
>> dp_packet is contiguous, and perform operations such as pointer
>> arithmetic under that assumption. However, with the introduction of
>> multi-segment mbufs, where data is non-contiguous, such assumptions 
>> are
>> no longer possible. Some examples of Such helper functions are
>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
>> dp_packet_get_allocated() and dp_packet_at().
>>
>> Thus, instead of assuming contiguous data in dp_packet, they  now
>> iterate over the (non-contiguous) data in mbufs to perform their
>> calculations.
>>
>> Finally, dp_packet_use__() has also been modified to perform the
>> initialisation of the packet (and setting the source) before 
>> continuing
>> to set its size and data length, which now depends on the type of
>> packet.
>>
>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>
>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>> ---
>>  lib/dp-packet.c |   4 +-
>>  lib/dp-packet.h | 242 
>> +++++++++++++++++++++++++++++++++++++++++++++-----------
>>  2 files changed, 197 insertions(+), 49 deletions(-)
>>
>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>> index 782e7c2..2aaeaae 100644
>> --- a/lib/dp-packet.c
>> +++ b/lib/dp-packet.c
>> @@ -41,11 +41,11 @@ static void
>>  dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
>>               enum dp_packet_source source)
>>  {
>> +    dp_packet_init__(b, allocated, source);
>> +
>>      dp_packet_set_base(b, base);
>>      dp_packet_set_data(b, base);
>>      dp_packet_set_size(b, 0);
>> -
>> -    dp_packet_init__(b, allocated, source);
>>  }
>>
>>  /* Initializes 'b' as an empty dp_packet that contains the 
>> 'allocated' bytes of
>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>> index c301ed5..272597f 100644
>> --- a/lib/dp-packet.h
>> +++ b/lib/dp-packet.h
>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct 
>> dp_packet *, size_t offset,
>>                                   size_t size);
>>  static inline void *dp_packet_at_assert(const struct dp_packet *,
>>                                          size_t offset, size_t size);
>> +#ifdef DPDK_NETDEV
>> +static inline void * dp_packet_at_offset(const struct dp_packet *b,
>> +                                         size_t offset);
>> +#endif
>>  static inline void *dp_packet_tail(const struct dp_packet *);
>>  static inline void *dp_packet_end(const struct dp_packet *);
>>
>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b)
>>      }
>>  }
>>
>> -/* If 'b' contains at least 'offset + size' bytes of data, returns a 
>> pointer to
>> - * byte 'offset'.  Otherwise, returns a null pointer. */
>> -static inline void *
>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>> -{
>> -    return offset + size <= dp_packet_size(b)
>> -           ? (char *) dp_packet_data(b) + offset
>> -           : NULL;
>> -}
>> -
>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at 
>> least
>> - * 'offset + size' bytes of data. */
>> -static inline void *
>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t 
>> size)
>> -{
>> -    ovs_assert(offset + size <= dp_packet_size(b));
>> -    return ((char *) dp_packet_data(b)) + offset;
>> -}
>> -
>> -/* Returns a pointer to byte following the last byte of data in use 
>> in 'b'. */
>> -static inline void *
>> -dp_packet_tail(const struct dp_packet *b)
>> -{
>> -    return (char *) dp_packet_data(b) + dp_packet_size(b);
>> -}
>> -
>> -/* Returns a pointer to byte following the last byte allocated for 
>> use (but
>> - * not necessarily in use) in 'b'. */
>> -static inline void *
>> -dp_packet_end(const struct dp_packet *b)
>> -{
>> -    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
>> -}
>> -
>>  /* Returns the number of bytes of headroom in 'b', that is, the 
>> number of bytes
>>   * of unused space in dp_packet 'b' before the data that is in use.  
>> (Most
>>   * commonly, the data in a dp_packet is at its beginning, and thus 
>> the
>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b)
>>  static inline size_t
>>  dp_packet_tailroom(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (b->source == DPBUF_DPDK) {
>> +        struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, 
>> &b->mbuf);
>> +        tmbuf = rte_pktmbuf_lastseg(tmbuf);
>> +
> 
> What if the last two chains contain no user data, we have more 
> tailroom?!? Should we not call rte_pktmbuf_tailroom(tmbuf)?

I've covered this in my reply to the cover letter, hopefully it is more
clear now. This shouldn't be a possibility in the implementation. Also,
we do call rte_pktmbuf_tailroom(), just below.

> 
>> +        return rte_pktmbuf_tailroom(tmbuf);
>> +    }
>> +#endif
>>      return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
>>  }
>>
>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b)
>>  static inline void
>>  dp_packet_clear(struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    dp_packet_set_size(b, 0);
>> +    rte_pktmbuf_reset(&b->mbuf);
> 
> This would probably assert on none DPBUF_DPDK, do we need a “if 
> (b->source == DPBUF_DPDK)” here also?
> 

We do, I'll add it to next iteration. It only asserts when using DPDK
with `RTE_LIBRTE_MBUF_DEBUG`, but still...

>> +#else
>>      dp_packet_set_data(b, dp_packet_base(b));
>>      dp_packet_set_size(b, 0);
>> +#endif
>>  }
>>
>>  /* Removes 'size' bytes from the head end of 'b', which must contain 
>> at least
>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t size)
>>      return data;
>>  }
>>
>> +#ifdef DPDK_NETDEV
>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any 
>> code, only
>> + * returns true or false if it would be possible to actually pull any 
>> code.
>> + * Valid for dp_packets carrying mbufs only. */
>> +static inline bool
>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
>> +    if (size > b->mbuf.data_len) {
>> +        return false;
>> +    }
>> +
>> +    return true;
>> +}
>> +#endif
>> +
>>  /* If 'b' has at least 'size' bytes of data, removes that many bytes 
>> from the
>>   * head end of 'b' and returns the first byte removed.  Otherwise, 
>> returns a
>>   * null pointer without modifying 'b'. */
>>  static inline void *
>>  dp_packet_try_pull(struct dp_packet *b, size_t size)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, size)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
>>      return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
>>          ? dp_packet_pull(b, size) : NULL;
>>  }
>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, 
>> uint8_t pad_size)
>>  static inline void *
>>  dp_packet_l2_5(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
>>      return b->l2_5_ofs != UINT16_MAX
>>             ? (char *) dp_packet_data(b) + b->l2_5_ofs
>>             : NULL;
>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void 
>> *l2_5)
>>  static inline void *
>>  dp_packet_l3(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
> 
> Can we not use the dp_packet_at_offset() function here (and the similar 
> functions)? And include the above and below checks there?
> 

They are aimed at different purposes. dp_packet_at_offset() should walk
the chain of mbufs and return a pointer to the offset, while here we
actually want to know if the offset is within the first mbuf. If these
headers are not within the first mbuf, we don't return it as it crosses
mbufs.

>>      return b->l3_ofs != UINT16_MAX
>>             ? (char *) dp_packet_data(b) + b->l3_ofs
>>             : NULL;
>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
>>  static inline void *
>>  dp_packet_l4(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
>>      return b->l4_ofs != UINT16_MAX
>>             ? (char *) dp_packet_data(b) + b->l4_ofs
>>             : NULL;
>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void *l4)
>>  static inline size_t
>>  dp_packet_l4_size(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (b->l4_ofs == UINT16_MAX) {
>> +        return 0;
>> +    }
>> +
>> +    struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>> +    struct rte_mbuf *tmbuf = dp_packet_tail(b);
>> +
>> +    size_t l4_size = 0;
>> +    if (hmbuf == tmbuf) {
> 
> Do not think this optimisation help much, as the dp_packet_tail(b) does 
> the same walk as in the "while (hmbuf)" clause below, so why not just 
> keet the else case?	
> 

Yeah, good point. I'll change that in the next iteration.

>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>> +    } else {
>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>> +        hmbuf = hmbuf->next;
>> +
>> +        while (hmbuf) {
>> +            l4_size += tmbuf->data_len;
>> +
>> +            hmbuf = hmbuf->next;
>> +        }
>> +    }
>> +
>> +    l4_size -= dp_packet_l2_pad_size(b);
>> +
>> +    return l4_size;
>> +#else
>>      return b->l4_ofs != UINT16_MAX
>>          ? (const char *)dp_packet_tail(b) - (const char 
>> *)dp_packet_l4(b)
>>          - dp_packet_l2_pad_size(b)
>>          : 0;
>> +#endif
>>  }
>>
>>  static inline const void *
>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t v)
>>  static inline uint16_t
>>  dp_packet_get_allocated(const struct dp_packet *b)
>>  {
>> -    return b->mbuf.buf_len;
>> +    return b->mbuf.nb_segs * b->mbuf.buf_len;
>>  }
>>
>>  static inline void
>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, 
>> uint16_t s)
>>  {
>>      b->mbuf.buf_len = s;
>>  }
>> +
>> +static inline void *
>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset)
>> +{
>> +    if (b->source == DPBUF_DPDK) {
>> +        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, 
>> &b->mbuf);
>> +
>> +        while (buf && offset > buf->data_len) {
>> +            offset -= buf->data_len;
>> +
>> +            buf = buf->next;
>> +        }
>> +        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : 
>> NULL;
>> +    } else {
>> +        return (char *) dp_packet_data(b) + offset;
>> +    }
>> +}
>> +
>> +/* If 'b' contains at least 'offset + size' bytes of data, returns a 
>> pointer to
>> + * byte 'offset'.  Otherwise, returns a null pointer. */
>> +static inline void *
>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>> +{
>> +    return offset + size <= dp_packet_size(b)
>> +        ? dp_packet_at_offset(b, offset)
>> +        : NULL;
>> +}
>> +
>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at 
>> least
>> + * 'offset + size' bytes of data. */
>> +static inline void *
>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t 
>> size)
>> +{
>> +    ovs_assert(offset + size <= dp_packet_size(b));
>> +    return dp_packet_at_offset(b, offset);
>> +}
>> +
>> +/* Returns a pointer to byte following the last byte of data in use 
>> in 'b'. */
>> +static inline void *
>> +dp_packet_tail(const struct dp_packet *b)
>> +{
>> +    struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>> +    if (b->source == DPBUF_DPDK) {
>> +        /* Find last segment where data ends, meaning the tail of the 
>> chained
>> +           mbufs is there */
>> +        buf = rte_pktmbuf_lastseg(buf);
>> +    }
>> +
> 
> What if the last two chains contains no user data, we should find the 
> last buffer with data.
> Looks like we assume only the last in the chain contains room, and we 
> achieve this by stripping off buffers (but we never add them again when 
> needed, so something is wrong in this implementation).
> 

Does my reply to the cover letter help here as well? That's a valid
point, that if the size is decreased and mbufs are freed then mbufs are
not allocated again if size needs to be incremented. But as explained,
this is to enforce that data is contiguous and so the tail points to the
last mbuf. I haven't yet encountered cases where this would lead to
complications, but maybe you have something in mind?

Tiago.
Eelco Chaudron June 26, 2018, 10:31 a.m. | #3
On 22 Jun 2018, at 21:04, Lam, Tiago wrote:

> On 18/06/2018 12:50, Eelco Chaudron wrote:
>>
>>
>> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
>>
>>> Most helper functions in dp-packet assume that the data held by a
>>> dp_packet is contiguous, and perform operations such as pointer
>>> arithmetic under that assumption. However, with the introduction of
>>> multi-segment mbufs, where data is non-contiguous, such assumptions
>>> are
>>> no longer possible. Some examples of Such helper functions are
>>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
>>> dp_packet_get_allocated() and dp_packet_at().
>>>
>>> Thus, instead of assuming contiguous data in dp_packet, they  now
>>> iterate over the (non-contiguous) data in mbufs to perform their
>>> calculations.
>>>
>>> Finally, dp_packet_use__() has also been modified to perform the
>>> initialisation of the packet (and setting the source) before
>>> continuing
>>> to set its size and data length, which now depends on the type of
>>> packet.
>>>
>>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>>
>>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>>> ---
>>>  lib/dp-packet.c |   4 +-
>>>  lib/dp-packet.h | 242
>>> +++++++++++++++++++++++++++++++++++++++++++++-----------
>>>  2 files changed, 197 insertions(+), 49 deletions(-)
>>>
>>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>>> index 782e7c2..2aaeaae 100644
>>> --- a/lib/dp-packet.c
>>> +++ b/lib/dp-packet.c
>>> @@ -41,11 +41,11 @@ static void
>>>  dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
>>>               enum dp_packet_source source)
>>>  {
>>> +    dp_packet_init__(b, allocated, source);
>>> +
>>>      dp_packet_set_base(b, base);
>>>      dp_packet_set_data(b, base);
>>>      dp_packet_set_size(b, 0);
>>> -
>>> -    dp_packet_init__(b, allocated, source);
>>>  }
>>>
>>>  /* Initializes 'b' as an empty dp_packet that contains the
>>> 'allocated' bytes of
>>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>>> index c301ed5..272597f 100644
>>> --- a/lib/dp-packet.h
>>> +++ b/lib/dp-packet.h
>>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct
>>> dp_packet *, size_t offset,
>>>                                   size_t size);
>>>  static inline void *dp_packet_at_assert(const struct dp_packet *,
>>>                                          size_t offset, size_t 
>>> size);
>>> +#ifdef DPDK_NETDEV
>>> +static inline void * dp_packet_at_offset(const struct dp_packet *b,
>>> +                                         size_t offset);
>>> +#endif
>>>  static inline void *dp_packet_tail(const struct dp_packet *);
>>>  static inline void *dp_packet_end(const struct dp_packet *);
>>>
>>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b)
>>>      }
>>>  }
>>>
>>> -/* If 'b' contains at least 'offset + size' bytes of data, returns 
>>> a
>>> pointer to
>>> - * byte 'offset'.  Otherwise, returns a null pointer. */
>>> -static inline void *
>>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>>> -{
>>> -    return offset + size <= dp_packet_size(b)
>>> -           ? (char *) dp_packet_data(b) + offset
>>> -           : NULL;
>>> -}
>>> -
>>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at
>>> least
>>> - * 'offset + size' bytes of data. */
>>> -static inline void *
>>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, 
>>> size_t
>>> size)
>>> -{
>>> -    ovs_assert(offset + size <= dp_packet_size(b));
>>> -    return ((char *) dp_packet_data(b)) + offset;
>>> -}
>>> -
>>> -/* Returns a pointer to byte following the last byte of data in use
>>> in 'b'. */
>>> -static inline void *
>>> -dp_packet_tail(const struct dp_packet *b)
>>> -{
>>> -    return (char *) dp_packet_data(b) + dp_packet_size(b);
>>> -}
>>> -
>>> -/* Returns a pointer to byte following the last byte allocated for
>>> use (but
>>> - * not necessarily in use) in 'b'. */
>>> -static inline void *
>>> -dp_packet_end(const struct dp_packet *b)
>>> -{
>>> -    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
>>> -}
>>> -
>>>  /* Returns the number of bytes of headroom in 'b', that is, the
>>> number of bytes
>>>   * of unused space in dp_packet 'b' before the data that is in use.
>>> (Most
>>>   * commonly, the data in a dp_packet is at its beginning, and thus
>>> the
>>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b)
>>>  static inline size_t
>>>  dp_packet_tailroom(const struct dp_packet *b)
>>>  {
>>> +#ifdef DPDK_NETDEV
>>> +    if (b->source == DPBUF_DPDK) {
>>> +        struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *,
>>> &b->mbuf);
>>> +        tmbuf = rte_pktmbuf_lastseg(tmbuf);
>>> +
>>
>> What if the last two chains contain no user data, we have more
>> tailroom?!? Should we not call 	(tmbuf)?
>
> I've covered this in my reply to the cover letter, hopefully it is 
> more
> clear now. This shouldn't be a possibility in the implementation. 
> Also,
> we do call rte_pktmbuf_tailroom(), just below.

I know but I do not see the reason for not calling 
rte_pktmbuf_tailroom() on the mbuf directly. This will work for all 
cases, add’s less complexity, even in the future if we move to a 
design where we will support multiple buffers. Trying to “limit” the 
usual mbuf cases does not sound right, and it will confuse people.

>>
>>> +        return rte_pktmbuf_tailroom(tmbuf);
>>> +    }
>>> +#endif
>>>      return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
>>>  }
>>>
>>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b)
>>>  static inline void
>>>  dp_packet_clear(struct dp_packet *b)
>>>  {
>>> +#ifdef DPDK_NETDEV
>>> +    dp_packet_set_size(b, 0);
>>> +    rte_pktmbuf_reset(&b->mbuf);
>>
>> This would probably assert on none DPBUF_DPDK, do we need a “if
>> (b->source == DPBUF_DPDK)” here also?
>>
>
> We do, I'll add it to next iteration. It only asserts when using DPDK
> with `RTE_LIBRTE_MBUF_DEBUG`, but still...

Yes, but you do not want this assert when you are trying to debug 
something else ;)

>>> +#else
>>>      dp_packet_set_data(b, dp_packet_base(b));
>>>      dp_packet_set_size(b, 0);
>>> +#endif
>>>  }
>>>
>>>  /* Removes 'size' bytes from the head end of 'b', which must 
>>> contain
>>> at least
>>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t 
>>> size)
>>>      return data;
>>>  }
>>>
>>> +#ifdef DPDK_NETDEV
>>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any
>>> code, only
>>> + * returns true or false if it would be possible to actually pull 
>>> any
>>> code.
>>> + * Valid for dp_packets carrying mbufs only. */
>>> +static inline bool
>>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
>>> +    if (size > b->mbuf.data_len) {
>>> +        return false;
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +#endif
>>> +
>>>  /* If 'b' has at least 'size' bytes of data, removes that many 
>>> bytes
>>> from the
>>>   * head end of 'b' and returns the first byte removed.  Otherwise,
>>> returns a
>>>   * null pointer without modifying 'b'. */
>>>  static inline void *
>>>  dp_packet_try_pull(struct dp_packet *b, size_t size)
>>>  {
>>> +#ifdef DPDK_NETDEV
>>> +    if (!dp_packet_mbuf_may_pull(b, size)) {
>>> +        return NULL;
>>> +    }
>>> +#endif
>>> +
>>>      return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
>>>          ? dp_packet_pull(b, size) : NULL;
>>>  }
>>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b,
>>> uint8_t pad_size)
>>>  static inline void *
>>>  dp_packet_l2_5(const struct dp_packet *b)
>>>  {
>>> +#ifdef DPDK_NETDEV
>>> +    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
>>> +        return NULL;
>>> +    }
>>> +#endif
>>> +
>>>      return b->l2_5_ofs != UINT16_MAX
>>>             ? (char *) dp_packet_data(b) + b->l2_5_ofs
>>>             : NULL;
>>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void
>>> *l2_5)
>>>  static inline void *
>>>  dp_packet_l3(const struct dp_packet *b)
>>>  {
>>> +#ifdef DPDK_NETDEV
>>> +    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
>>> +        return NULL;
>>> +    }
>>> +#endif
>>> +
>>
>> Can we not use the dp_packet_at_offset() function here (and the 
>> similar
>> functions)? And include the above and below checks there?
>>
>
> They are aimed at different purposes. dp_packet_at_offset() should 
> walk
> the chain of mbufs and return a pointer to the offset, while here we
> actually want to know if the offset is within the first mbuf. If these
> headers are not within the first mbuf, we don't return it as it 
> crosses
> mbufs.

My bad, I was referring to dp_packet_at(), which probably also need the 
may_pull() if there are control protocols with jumbo packets?


>>>      return b->l3_ofs != UINT16_MAX
>>>             ? (char *) dp_packet_data(b) + b->l3_ofs
>>>             : NULL;
>>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
>>>  static inline void *
>>>  dp_packet_l4(const struct dp_packet *b)
>>>  {
>>> +#ifdef DPDK_NETDEV
>>> +    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>>> +        return NULL;
>>> +    }
>>> +#endif
>>> +
>>>      return b->l4_ofs != UINT16_MAX
>>>             ? (char *) dp_packet_data(b) + b->l4_ofs
>>>             : NULL;
>>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void 
>>> *l4)
>>>  static inline size_t
>>>  dp_packet_l4_size(const struct dp_packet *b)
>>>  {
>>> +#ifdef DPDK_NETDEV
>>> +    if (b->l4_ofs == UINT16_MAX) {
>>> +        return 0;
>>> +    }
>>> +
>>> +    struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, 
>>> &b->mbuf);
>>> +    struct rte_mbuf *tmbuf = dp_packet_tail(b);
>>> +
>>> +    size_t l4_size = 0;
>>> +    if (hmbuf == tmbuf) {
>>
>> Do not think this optimisation help much, as the dp_packet_tail(b) 
>> does
>> the same walk as in the "while (hmbuf)" clause below, so why not just
>> keet the else case?	
>>
>
> Yeah, good point. I'll change that in the next iteration.
>
>>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>>> +    } else {
>>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>>> +        hmbuf = hmbuf->next;
>>> +
>>> +        while (hmbuf) {
>>> +            l4_size += tmbuf->data_len;
>>> +
>>> +            hmbuf = hmbuf->next;
>>> +        }
>>> +    }
>>> +
>>> +    l4_size -= dp_packet_l2_pad_size(b);
>>> +
>>> +    return l4_size;
>>> +#else
>>>      return b->l4_ofs != UINT16_MAX
>>>          ? (const char *)dp_packet_tail(b) - (const char
>>> *)dp_packet_l4(b)
>>>          - dp_packet_l2_pad_size(b)
>>>          : 0;
>>> +#endif
>>>  }
>>>
>>>  static inline const void *
>>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t 
>>> v)
>>>  static inline uint16_t
>>>  dp_packet_get_allocated(const struct dp_packet *b)
>>>  {
>>> -    return b->mbuf.buf_len;
>>> +    return b->mbuf.nb_segs * b->mbuf.buf_len;
>>>  }
>>>
>>>  static inline void
>>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b,
>>> uint16_t s)
>>>  {
>>>      b->mbuf.buf_len = s;
>>>  }
>>> +
>>> +static inline void *
>>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset)
>>> +{
>>> +    if (b->source == DPBUF_DPDK) {
>>> +        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *,
>>> &b->mbuf);
>>> +
>>> +        while (buf && offset > buf->data_len) {
>>> +            offset -= buf->data_len;
>>> +
>>> +            buf = buf->next;
>>> +        }
>>> +        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) :
>>> NULL;
>>> +    } else {
>>> +        return (char *) dp_packet_data(b) + offset;
>>> +    }
>>> +}
>>> +
>>> +/* If 'b' contains at least 'offset + size' bytes of data, returns 
>>> a
>>> pointer to
>>> + * byte 'offset'.  Otherwise, returns a null pointer. */
>>> +static inline void *
>>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>>> +{
>>> +    return offset + size <= dp_packet_size(b)
>>> +        ? dp_packet_at_offset(b, offset)
>>> +        : NULL;
>>> +}
>>> +
>>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at
>>> least
>>> + * 'offset + size' bytes of data. */
>>> +static inline void *
>>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, 
>>> size_t
>>> size)
>>> +{
>>> +    ovs_assert(offset + size <= dp_packet_size(b));
>>> +    return dp_packet_at_offset(b, offset);
>>> +}
>>> +
>>> +/* Returns a pointer to byte following the last byte of data in use
>>> in 'b'. */
>>> +static inline void *
>>> +dp_packet_tail(const struct dp_packet *b)
>>> +{
>>> +    struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>>> +    if (b->source == DPBUF_DPDK) {
>>> +        /* Find last segment where data ends, meaning the tail of 
>>> the
>>> chained
>>> +           mbufs is there */
>>> +        buf = rte_pktmbuf_lastseg(buf);
>>> +    }
>>> +
>>
>> What if the last two chains contains no user data, we should find the
>> last buffer with data.
>> Looks like we assume only the last in the chain contains room, and we
>> achieve this by stripping off buffers (but we never add them again 
>> when
>> needed, so something is wrong in this implementation).
>>
>
> Does my reply to the cover letter help here as well? That's a valid
> point, that if the size is decreased and mbufs are freed then mbufs 
> are
> not allocated again if size needs to be incremented. But as explained,
> this is to enforce that data is contiguous and so the tail points to 
> the
> last mbuf. I haven't yet encountered cases where this would lead to
> complications, but maybe you have something in mind?

I feel like we should not design the general APIs to force the current 
assumed limitation.
As rte_pktmbuf_lastseg() is already walking the three, we could do it 
here instead and checking for the mailroom.
Lam, Tiago June 27, 2018, 10:09 a.m. | #4
On 26/06/2018 11:31, Eelco Chaudron wrote:
> 
> 
> On 22 Jun 2018, at 21:04, Lam, Tiago wrote:
> 
>> On 18/06/2018 12:50, Eelco Chaudron wrote:
>>>
>>>
>>> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
>>>
>>>> Most helper functions in dp-packet assume that the data held by a
>>>> dp_packet is contiguous, and perform operations such as pointer
>>>> arithmetic under that assumption. However, with the introduction of
>>>> multi-segment mbufs, where data is non-contiguous, such assumptions
>>>> are
>>>> no longer possible. Some examples of Such helper functions are
>>>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
>>>> dp_packet_get_allocated() and dp_packet_at().
>>>>
>>>> Thus, instead of assuming contiguous data in dp_packet, they  now
>>>> iterate over the (non-contiguous) data in mbufs to perform their
>>>> calculations.
>>>>
>>>> Finally, dp_packet_use__() has also been modified to perform the
>>>> initialisation of the packet (and setting the source) before
>>>> continuing
>>>> to set its size and data length, which now depends on the type of
>>>> packet.
>>>>
>>>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>>>
>>>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>>>> ---
>>>>  lib/dp-packet.c |   4 +-
>>>>  lib/dp-packet.h | 242
>>>> +++++++++++++++++++++++++++++++++++++++++++++-----------
>>>>  2 files changed, 197 insertions(+), 49 deletions(-)
>>>>
>>>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>>>> index 782e7c2..2aaeaae 100644
>>>> --- a/lib/dp-packet.c
>>>> +++ b/lib/dp-packet.c
>>>> @@ -41,11 +41,11 @@ static void
>>>>  dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
>>>>               enum dp_packet_source source)
>>>>  {
>>>> +    dp_packet_init__(b, allocated, source);
>>>> +
>>>>      dp_packet_set_base(b, base);
>>>>      dp_packet_set_data(b, base);
>>>>      dp_packet_set_size(b, 0);
>>>> -
>>>> -    dp_packet_init__(b, allocated, source);
>>>>  }
>>>>
>>>>  /* Initializes 'b' as an empty dp_packet that contains the
>>>> 'allocated' bytes of
>>>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>>>> index c301ed5..272597f 100644
>>>> --- a/lib/dp-packet.h
>>>> +++ b/lib/dp-packet.h
>>>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct
>>>> dp_packet *, size_t offset,
>>>>                                   size_t size);
>>>>  static inline void *dp_packet_at_assert(const struct dp_packet *,
>>>>                                          size_t offset, size_t 
>>>> size);
>>>> +#ifdef DPDK_NETDEV
>>>> +static inline void * dp_packet_at_offset(const struct dp_packet *b,
>>>> +                                         size_t offset);
>>>> +#endif
>>>>  static inline void *dp_packet_tail(const struct dp_packet *);
>>>>  static inline void *dp_packet_end(const struct dp_packet *);
>>>>
>>>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b)
>>>>      }
>>>>  }
>>>>
>>>> -/* If 'b' contains at least 'offset + size' bytes of data, returns 
>>>> a
>>>> pointer to
>>>> - * byte 'offset'.  Otherwise, returns a null pointer. */
>>>> -static inline void *
>>>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>>>> -{
>>>> -    return offset + size <= dp_packet_size(b)
>>>> -           ? (char *) dp_packet_data(b) + offset
>>>> -           : NULL;
>>>> -}
>>>> -
>>>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at
>>>> least
>>>> - * 'offset + size' bytes of data. */
>>>> -static inline void *
>>>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, 
>>>> size_t
>>>> size)
>>>> -{
>>>> -    ovs_assert(offset + size <= dp_packet_size(b));
>>>> -    return ((char *) dp_packet_data(b)) + offset;
>>>> -}
>>>> -
>>>> -/* Returns a pointer to byte following the last byte of data in use
>>>> in 'b'. */
>>>> -static inline void *
>>>> -dp_packet_tail(const struct dp_packet *b)
>>>> -{
>>>> -    return (char *) dp_packet_data(b) + dp_packet_size(b);
>>>> -}
>>>> -
>>>> -/* Returns a pointer to byte following the last byte allocated for
>>>> use (but
>>>> - * not necessarily in use) in 'b'. */
>>>> -static inline void *
>>>> -dp_packet_end(const struct dp_packet *b)
>>>> -{
>>>> -    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
>>>> -}
>>>> -
>>>>  /* Returns the number of bytes of headroom in 'b', that is, the
>>>> number of bytes
>>>>   * of unused space in dp_packet 'b' before the data that is in use.
>>>> (Most
>>>>   * commonly, the data in a dp_packet is at its beginning, and thus
>>>> the
>>>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b)
>>>>  static inline size_t
>>>>  dp_packet_tailroom(const struct dp_packet *b)
>>>>  {
>>>> +#ifdef DPDK_NETDEV
>>>> +    if (b->source == DPBUF_DPDK) {
>>>> +        struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *,
>>>> &b->mbuf);
>>>> +        tmbuf = rte_pktmbuf_lastseg(tmbuf);
>>>> +
>>>
>>> What if the last two chains contain no user data, we have more
>>> tailroom?!? Should we not call 	(tmbuf)?
>>
>> I've covered this in my reply to the cover letter, hopefully it is 
>> more
>> clear now. This shouldn't be a possibility in the implementation. 
>> Also,
>> we do call rte_pktmbuf_tailroom(), just below.
> 
> I know but I do not see the reason for not calling 
> rte_pktmbuf_tailroom() on the mbuf directly. This will work for all 
> cases, add’s less complexity, even in the future if we move to a 
> design where we will support multiple buffers. Trying to “limit” the 
> usual mbuf cases does not sound right, and it will confuse people.
>

I think there's some confusion here. rte_pktmbuf_tailroom() doesn't
calculate the case above where "the last two chains contain no user
data". It only calculates the tailroom for the passed mbuf.

In fact, most of the rte_pktmbuf_* functions does not deal with such
cases. They only do the calculations for the passed mbuf, and it is up
to the user of the library to add logic on top (using helper functions
such as rte_pktmbuf_lastseg() to traverse the chain). Since this series
doesn't allow for an mbuf in the chain to be free of data after the
tail, I didn't add the extra logic.

Hopefully this clarifies why we are calling rte_pktmbuf_tailroom() in
the tail mbuf only.

Tiago.

>>>
>>>> +        return rte_pktmbuf_tailroom(tmbuf);
>>>> +    }
>>>> +#endif
>>>>      return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
>>>>  }
>>>>
>>>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b)
>>>>  static inline void
>>>>  dp_packet_clear(struct dp_packet *b)
>>>>  {
>>>> +#ifdef DPDK_NETDEV
>>>> +    dp_packet_set_size(b, 0);
>>>> +    rte_pktmbuf_reset(&b->mbuf);
>>>
>>> This would probably assert on none DPBUF_DPDK, do we need a “if
>>> (b->source == DPBUF_DPDK)” here also?
>>>
>>
>> We do, I'll add it to next iteration. It only asserts when using DPDK
>> with `RTE_LIBRTE_MBUF_DEBUG`, but still...
> 
> Yes, but you do not want this assert when you are trying to debug 
> something else ;)
> 
>>>> +#else
>>>>      dp_packet_set_data(b, dp_packet_base(b));
>>>>      dp_packet_set_size(b, 0);
>>>> +#endif
>>>>  }
>>>>
>>>>  /* Removes 'size' bytes from the head end of 'b', which must 
>>>> contain
>>>> at least
>>>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t 
>>>> size)
>>>>      return data;
>>>>  }
>>>>
>>>> +#ifdef DPDK_NETDEV
>>>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any
>>>> code, only
>>>> + * returns true or false if it would be possible to actually pull 
>>>> any
>>>> code.
>>>> + * Valid for dp_packets carrying mbufs only. */
>>>> +static inline bool
>>>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
>>>> +    if (size > b->mbuf.data_len) {
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    return true;
>>>> +}
>>>> +#endif
>>>> +
>>>>  /* If 'b' has at least 'size' bytes of data, removes that many 
>>>> bytes
>>>> from the
>>>>   * head end of 'b' and returns the first byte removed.  Otherwise,
>>>> returns a
>>>>   * null pointer without modifying 'b'. */
>>>>  static inline void *
>>>>  dp_packet_try_pull(struct dp_packet *b, size_t size)
>>>>  {
>>>> +#ifdef DPDK_NETDEV
>>>> +    if (!dp_packet_mbuf_may_pull(b, size)) {
>>>> +        return NULL;
>>>> +    }
>>>> +#endif
>>>> +
>>>>      return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
>>>>          ? dp_packet_pull(b, size) : NULL;
>>>>  }
>>>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b,
>>>> uint8_t pad_size)
>>>>  static inline void *
>>>>  dp_packet_l2_5(const struct dp_packet *b)
>>>>  {
>>>> +#ifdef DPDK_NETDEV
>>>> +    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
>>>> +        return NULL;
>>>> +    }
>>>> +#endif
>>>> +
>>>>      return b->l2_5_ofs != UINT16_MAX
>>>>             ? (char *) dp_packet_data(b) + b->l2_5_ofs
>>>>             : NULL;
>>>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void
>>>> *l2_5)
>>>>  static inline void *
>>>>  dp_packet_l3(const struct dp_packet *b)
>>>>  {
>>>> +#ifdef DPDK_NETDEV
>>>> +    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
>>>> +        return NULL;
>>>> +    }
>>>> +#endif
>>>> +
>>>
>>> Can we not use the dp_packet_at_offset() function here (and the 
>>> similar
>>> functions)? And include the above and below checks there?
>>>
>>
>> They are aimed at different purposes. dp_packet_at_offset() should 
>> walk
>> the chain of mbufs and return a pointer to the offset, while here we
>> actually want to know if the offset is within the first mbuf. If these
>> headers are not within the first mbuf, we don't return it as it 
>> crosses
>> mbufs.
> 
> My bad, I was referring to dp_packet_at(), which probably also need the 
> may_pull() if there are control protocols with jumbo packets?
> 
> 
>>>>      return b->l3_ofs != UINT16_MAX
>>>>             ? (char *) dp_packet_data(b) + b->l3_ofs
>>>>             : NULL;
>>>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
>>>>  static inline void *
>>>>  dp_packet_l4(const struct dp_packet *b)
>>>>  {
>>>> +#ifdef DPDK_NETDEV
>>>> +    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>>>> +        return NULL;
>>>> +    }
>>>> +#endif
>>>> +
>>>>      return b->l4_ofs != UINT16_MAX
>>>>             ? (char *) dp_packet_data(b) + b->l4_ofs
>>>>             : NULL;
>>>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void 
>>>> *l4)
>>>>  static inline size_t
>>>>  dp_packet_l4_size(const struct dp_packet *b)
>>>>  {
>>>> +#ifdef DPDK_NETDEV
>>>> +    if (b->l4_ofs == UINT16_MAX) {
>>>> +        return 0;
>>>> +    }
>>>> +
>>>> +    struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, 
>>>> &b->mbuf);
>>>> +    struct rte_mbuf *tmbuf = dp_packet_tail(b);
>>>> +
>>>> +    size_t l4_size = 0;
>>>> +    if (hmbuf == tmbuf) {
>>>
>>> Do not think this optimisation help much, as the dp_packet_tail(b) 
>>> does
>>> the same walk as in the "while (hmbuf)" clause below, so why not just
>>> keet the else case?	
>>>
>>
>> Yeah, good point. I'll change that in the next iteration.
>>
>>>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>>>> +    } else {
>>>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>>>> +        hmbuf = hmbuf->next;
>>>> +
>>>> +        while (hmbuf) {
>>>> +            l4_size += tmbuf->data_len;
>>>> +
>>>> +            hmbuf = hmbuf->next;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    l4_size -= dp_packet_l2_pad_size(b);
>>>> +
>>>> +    return l4_size;
>>>> +#else
>>>>      return b->l4_ofs != UINT16_MAX
>>>>          ? (const char *)dp_packet_tail(b) - (const char
>>>> *)dp_packet_l4(b)
>>>>          - dp_packet_l2_pad_size(b)
>>>>          : 0;
>>>> +#endif
>>>>  }
>>>>
>>>>  static inline const void *
>>>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t 
>>>> v)
>>>>  static inline uint16_t
>>>>  dp_packet_get_allocated(const struct dp_packet *b)
>>>>  {
>>>> -    return b->mbuf.buf_len;
>>>> +    return b->mbuf.nb_segs * b->mbuf.buf_len;
>>>>  }
>>>>
>>>>  static inline void
>>>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b,
>>>> uint16_t s)
>>>>  {
>>>>      b->mbuf.buf_len = s;
>>>>  }
>>>> +
>>>> +static inline void *
>>>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset)
>>>> +{
>>>> +    if (b->source == DPBUF_DPDK) {
>>>> +        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *,
>>>> &b->mbuf);
>>>> +
>>>> +        while (buf && offset > buf->data_len) {
>>>> +            offset -= buf->data_len;
>>>> +
>>>> +            buf = buf->next;
>>>> +        }
>>>> +        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) :
>>>> NULL;
>>>> +    } else {
>>>> +        return (char *) dp_packet_data(b) + offset;
>>>> +    }
>>>> +}
>>>> +
>>>> +/* If 'b' contains at least 'offset + size' bytes of data, returns 
>>>> a
>>>> pointer to
>>>> + * byte 'offset'.  Otherwise, returns a null pointer. */
>>>> +static inline void *
>>>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>>>> +{
>>>> +    return offset + size <= dp_packet_size(b)
>>>> +        ? dp_packet_at_offset(b, offset)
>>>> +        : NULL;
>>>> +}
>>>> +
>>>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at
>>>> least
>>>> + * 'offset + size' bytes of data. */
>>>> +static inline void *
>>>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, 
>>>> size_t
>>>> size)
>>>> +{
>>>> +    ovs_assert(offset + size <= dp_packet_size(b));
>>>> +    return dp_packet_at_offset(b, offset);
>>>> +}
>>>> +
>>>> +/* Returns a pointer to byte following the last byte of data in use
>>>> in 'b'. */
>>>> +static inline void *
>>>> +dp_packet_tail(const struct dp_packet *b)
>>>> +{
>>>> +    struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>>>> +    if (b->source == DPBUF_DPDK) {
>>>> +        /* Find last segment where data ends, meaning the tail of 
>>>> the
>>>> chained
>>>> +           mbufs is there */
>>>> +        buf = rte_pktmbuf_lastseg(buf);
>>>> +    }
>>>> +
>>>
>>> What if the last two chains contains no user data, we should find the
>>> last buffer with data.
>>> Looks like we assume only the last in the chain contains room, and we
>>> achieve this by stripping off buffers (but we never add them again 
>>> when
>>> needed, so something is wrong in this implementation).
>>>
>>
>> Does my reply to the cover letter help here as well? That's a valid
>> point, that if the size is decreased and mbufs are freed then mbufs 
>> are
>> not allocated again if size needs to be incremented. But as explained,
>> this is to enforce that data is contiguous and so the tail points to 
>> the
>> last mbuf. I haven't yet encountered cases where this would lead to
>> complications, but maybe you have something in mind?
> 
> I feel like we should not design the general APIs to force the current 
> assumed limitation.
> As rte_pktmbuf_lastseg() is already walking the three, we could do it 
> here instead and checking for the mailroom.
>
Eelco Chaudron June 27, 2018, 1:05 p.m. | #5
On 27 Jun 2018, at 12:09, Lam, Tiago wrote:

> On 26/06/2018 11:31, Eelco Chaudron wrote:
>>
>>
>> On 22 Jun 2018, at 21:04, Lam, Tiago wrote:
>>
>>> On 18/06/2018 12:50, Eelco Chaudron wrote:
>>>>
>>>>
>>>> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
>>>>
>>>>> Most helper functions in dp-packet assume that the data held by a
>>>>> dp_packet is contiguous, and perform operations such as pointer
>>>>> arithmetic under that assumption. However, with the introduction of
>>>>> multi-segment mbufs, where data is non-contiguous, such assumptions
>>>>> are
>>>>> no longer possible. Some examples of Such helper functions are
>>>>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
>>>>> dp_packet_get_allocated() and dp_packet_at().
>>>>>
>>>>> Thus, instead of assuming contiguous data in dp_packet, they  now
>>>>> iterate over the (non-contiguous) data in mbufs to perform their
>>>>> calculations.
>>>>>
>>>>> Finally, dp_packet_use__() has also been modified to perform the
>>>>> initialisation of the packet (and setting the source) before
>>>>> continuing
>>>>> to set its size and data length, which now depends on the type of
>>>>> packet.
>>>>>
>>>>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>>>>
>>>>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>>>>> ---
>>>>>  lib/dp-packet.c |   4 +-
>>>>>  lib/dp-packet.h | 242
>>>>> +++++++++++++++++++++++++++++++++++++++++++++-----------
>>>>>  2 files changed, 197 insertions(+), 49 deletions(-)
>>>>>
>>>>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>>>>> index 782e7c2..2aaeaae 100644
>>>>> --- a/lib/dp-packet.c
>>>>> +++ b/lib/dp-packet.c
>>>>> @@ -41,11 +41,11 @@ static void
>>>>>  dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
>>>>>               enum dp_packet_source source)
>>>>>  {
>>>>> +    dp_packet_init__(b, allocated, source);
>>>>> +
>>>>>      dp_packet_set_base(b, base);
>>>>>      dp_packet_set_data(b, base);
>>>>>      dp_packet_set_size(b, 0);
>>>>> -
>>>>> -    dp_packet_init__(b, allocated, source);
>>>>>  }
>>>>>
>>>>>  /* Initializes 'b' as an empty dp_packet that contains the
>>>>> 'allocated' bytes of
>>>>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>>>>> index c301ed5..272597f 100644
>>>>> --- a/lib/dp-packet.h
>>>>> +++ b/lib/dp-packet.h
>>>>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct
>>>>> dp_packet *, size_t offset,
>>>>>                                   size_t size);
>>>>>  static inline void *dp_packet_at_assert(const struct dp_packet *,
>>>>>                                          size_t offset, size_t
>>>>> size);
>>>>> +#ifdef DPDK_NETDEV
>>>>> +static inline void * dp_packet_at_offset(const struct dp_packet *b,
>>>>> +                                         size_t offset);
>>>>> +#endif
>>>>>  static inline void *dp_packet_tail(const struct dp_packet *);
>>>>>  static inline void *dp_packet_end(const struct dp_packet *);
>>>>>
>>>>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b)
>>>>>      }
>>>>>  }
>>>>>
>>>>> -/* If 'b' contains at least 'offset + size' bytes of data, returns
>>>>> a
>>>>> pointer to
>>>>> - * byte 'offset'.  Otherwise, returns a null pointer. */
>>>>> -static inline void *
>>>>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>>>>> -{
>>>>> -    return offset + size <= dp_packet_size(b)
>>>>> -           ? (char *) dp_packet_data(b) + offset
>>>>> -           : NULL;
>>>>> -}
>>>>> -
>>>>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at
>>>>> least
>>>>> - * 'offset + size' bytes of data. */
>>>>> -static inline void *
>>>>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset,
>>>>> size_t
>>>>> size)
>>>>> -{
>>>>> -    ovs_assert(offset + size <= dp_packet_size(b));
>>>>> -    return ((char *) dp_packet_data(b)) + offset;
>>>>> -}
>>>>> -
>>>>> -/* Returns a pointer to byte following the last byte of data in use
>>>>> in 'b'. */
>>>>> -static inline void *
>>>>> -dp_packet_tail(const struct dp_packet *b)
>>>>> -{
>>>>> -    return (char *) dp_packet_data(b) + dp_packet_size(b);
>>>>> -}
>>>>> -
>>>>> -/* Returns a pointer to byte following the last byte allocated for
>>>>> use (but
>>>>> - * not necessarily in use) in 'b'. */
>>>>> -static inline void *
>>>>> -dp_packet_end(const struct dp_packet *b)
>>>>> -{
>>>>> -    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
>>>>> -}
>>>>> -
>>>>>  /* Returns the number of bytes of headroom in 'b', that is, the
>>>>> number of bytes
>>>>>   * of unused space in dp_packet 'b' before the data that is in use.
>>>>> (Most
>>>>>   * commonly, the data in a dp_packet is at its beginning, and thus
>>>>> the
>>>>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b)
>>>>>  static inline size_t
>>>>>  dp_packet_tailroom(const struct dp_packet *b)
>>>>>  {
>>>>> +#ifdef DPDK_NETDEV
>>>>> +    if (b->source == DPBUF_DPDK) {
>>>>> +        struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *,
>>>>> &b->mbuf);
>>>>> +        tmbuf = rte_pktmbuf_lastseg(tmbuf);
>>>>> +
>>>>
>>>> What if the last two chains contain no user data, we have more
>>>> tailroom?!? Should we not call 	(tmbuf)?
>>>
>>> I've covered this in my reply to the cover letter, hopefully it is
>>> more
>>> clear now. This shouldn't be a possibility in the implementation.
>>> Also,
>>> we do call rte_pktmbuf_tailroom(), just below.
>>
>> I know but I do not see the reason for not calling
>> rte_pktmbuf_tailroom() on the mbuf directly. This will work for all
>> cases, add’s less complexity, even in the future if we move to a
>> design where we will support multiple buffers. Trying to “limit” the
>> usual mbuf cases does not sound right, and it will confuse people.
>>
>
> I think there's some confusion here. rte_pktmbuf_tailroom() doesn't
> calculate the case above where "the last two chains contain no user
> data". It only calculates the tailroom for the passed mbuf.
>
> In fact, most of the rte_pktmbuf_* functions does not deal with such
> cases. They only do the calculations for the passed mbuf, and it is up
> to the user of the library to add logic on top (using helper functions
> such as rte_pktmbuf_lastseg() to traverse the chain). Since this series
> doesn't allow for an mbuf in the chain to be free of data after the
> tail, I didn't add the extra logic.
>
> Hopefully this clarifies why we are calling rte_pktmbuf_tailroom() in
> the tail mbuf only.
>
It does, thanks.

> Tiago.
>
>>>>
>>>>> +        return rte_pktmbuf_tailroom(tmbuf);
>>>>> +    }
>>>>> +#endif
>>>>>      return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
>>>>>  }
>>>>>
>>>>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b)
>>>>>  static inline void
>>>>>  dp_packet_clear(struct dp_packet *b)
>>>>>  {
>>>>> +#ifdef DPDK_NETDEV
>>>>> +    dp_packet_set_size(b, 0);
>>>>> +    rte_pktmbuf_reset(&b->mbuf);
>>>>
>>>> This would probably assert on none DPBUF_DPDK, do we need a “if
>>>> (b->source == DPBUF_DPDK)” here also?
>>>>
>>>
>>> We do, I'll add it to next iteration. It only asserts when using DPDK
>>> with `RTE_LIBRTE_MBUF_DEBUG`, but still...
>>
>> Yes, but you do not want this assert when you are trying to debug
>> something else ;)
>>
>>>>> +#else
>>>>>      dp_packet_set_data(b, dp_packet_base(b));
>>>>>      dp_packet_set_size(b, 0);
>>>>> +#endif
>>>>>  }
>>>>>
>>>>>  /* Removes 'size' bytes from the head end of 'b', which must
>>>>> contain
>>>>> at least
>>>>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t
>>>>> size)
>>>>>      return data;
>>>>>  }
>>>>>
>>>>> +#ifdef DPDK_NETDEV
>>>>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any
>>>>> code, only
>>>>> + * returns true or false if it would be possible to actually pull
>>>>> any
>>>>> code.
>>>>> + * Valid for dp_packets carrying mbufs only. */
>>>>> +static inline bool
>>>>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
>>>>> +    if (size > b->mbuf.data_len) {
>>>>> +        return false;
>>>>> +    }
>>>>> +
>>>>> +    return true;
>>>>> +}
>>>>> +#endif
>>>>> +
>>>>>  /* If 'b' has at least 'size' bytes of data, removes that many
>>>>> bytes
>>>>> from the
>>>>>   * head end of 'b' and returns the first byte removed.  Otherwise,
>>>>> returns a
>>>>>   * null pointer without modifying 'b'. */
>>>>>  static inline void *
>>>>>  dp_packet_try_pull(struct dp_packet *b, size_t size)
>>>>>  {
>>>>> +#ifdef DPDK_NETDEV
>>>>> +    if (!dp_packet_mbuf_may_pull(b, size)) {
>>>>> +        return NULL;
>>>>> +    }
>>>>> +#endif
>>>>> +
>>>>>      return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
>>>>>          ? dp_packet_pull(b, size) : NULL;
>>>>>  }
>>>>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b,
>>>>> uint8_t pad_size)
>>>>>  static inline void *
>>>>>  dp_packet_l2_5(const struct dp_packet *b)
>>>>>  {
>>>>> +#ifdef DPDK_NETDEV
>>>>> +    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
>>>>> +        return NULL;
>>>>> +    }
>>>>> +#endif
>>>>> +
>>>>>      return b->l2_5_ofs != UINT16_MAX
>>>>>             ? (char *) dp_packet_data(b) + b->l2_5_ofs
>>>>>             : NULL;
>>>>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void
>>>>> *l2_5)
>>>>>  static inline void *
>>>>>  dp_packet_l3(const struct dp_packet *b)
>>>>>  {
>>>>> +#ifdef DPDK_NETDEV
>>>>> +    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
>>>>> +        return NULL;
>>>>> +    }
>>>>> +#endif
>>>>> +
>>>>
>>>> Can we not use the dp_packet_at_offset() function here (and the
>>>> similar
>>>> functions)? And include the above and below checks there?
>>>>
>>>
>>> They are aimed at different purposes. dp_packet_at_offset() should
>>> walk
>>> the chain of mbufs and return a pointer to the offset, while here we
>>> actually want to know if the offset is within the first mbuf. If these
>>> headers are not within the first mbuf, we don't return it as it
>>> crosses
>>> mbufs.
>>
>> My bad, I was referring to dp_packet_at(), which probably also need the
>> may_pull() if there are control protocols with jumbo packets?
>>
>>
>>>>>      return b->l3_ofs != UINT16_MAX
>>>>>             ? (char *) dp_packet_data(b) + b->l3_ofs
>>>>>             : NULL;
>>>>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
>>>>>  static inline void *
>>>>>  dp_packet_l4(const struct dp_packet *b)
>>>>>  {
>>>>> +#ifdef DPDK_NETDEV
>>>>> +    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>>>>> +        return NULL;
>>>>> +    }
>>>>> +#endif
>>>>> +
>>>>>      return b->l4_ofs != UINT16_MAX
>>>>>             ? (char *) dp_packet_data(b) + b->l4_ofs
>>>>>             : NULL;
>>>>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void
>>>>> *l4)
>>>>>  static inline size_t
>>>>>  dp_packet_l4_size(const struct dp_packet *b)
>>>>>  {
>>>>> +#ifdef DPDK_NETDEV
>>>>> +    if (b->l4_ofs == UINT16_MAX) {
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +    struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *,
>>>>> &b->mbuf);
>>>>> +    struct rte_mbuf *tmbuf = dp_packet_tail(b);
>>>>> +
>>>>> +    size_t l4_size = 0;
>>>>> +    if (hmbuf == tmbuf) {
>>>>
>>>> Do not think this optimisation help much, as the dp_packet_tail(b)
>>>> does
>>>> the same walk as in the "while (hmbuf)" clause below, so why not just
>>>> keet the else case?	
>>>>
>>>
>>> Yeah, good point. I'll change that in the next iteration.
>>>
>>>>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>>>>> +    } else {
>>>>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>>>>> +        hmbuf = hmbuf->next;
>>>>> +
>>>>> +        while (hmbuf) {
>>>>> +            l4_size += tmbuf->data_len;
>>>>> +
>>>>> +            hmbuf = hmbuf->next;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    l4_size -= dp_packet_l2_pad_size(b);
>>>>> +
>>>>> +    return l4_size;
>>>>> +#else
>>>>>      return b->l4_ofs != UINT16_MAX
>>>>>          ? (const char *)dp_packet_tail(b) - (const char
>>>>> *)dp_packet_l4(b)
>>>>>          - dp_packet_l2_pad_size(b)
>>>>>          : 0;
>>>>> +#endif
>>>>>  }
>>>>>
>>>>>  static inline const void *
>>>>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t
>>>>> v)
>>>>>  static inline uint16_t
>>>>>  dp_packet_get_allocated(const struct dp_packet *b)
>>>>>  {
>>>>> -    return b->mbuf.buf_len;
>>>>> +    return b->mbuf.nb_segs * b->mbuf.buf_len;
>>>>>  }
>>>>>
>>>>>  static inline void
>>>>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b,
>>>>> uint16_t s)
>>>>>  {
>>>>>      b->mbuf.buf_len = s;
>>>>>  }
>>>>> +
>>>>> +static inline void *
>>>>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset)
>>>>> +{
>>>>> +    if (b->source == DPBUF_DPDK) {
>>>>> +        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *,
>>>>> &b->mbuf);
>>>>> +
>>>>> +        while (buf && offset > buf->data_len) {
>>>>> +            offset -= buf->data_len;
>>>>> +
>>>>> +            buf = buf->next;
>>>>> +        }
>>>>> +        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) :
>>>>> NULL;
>>>>> +    } else {
>>>>> +        return (char *) dp_packet_data(b) + offset;
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +/* If 'b' contains at least 'offset + size' bytes of data, returns
>>>>> a
>>>>> pointer to
>>>>> + * byte 'offset'.  Otherwise, returns a null pointer. */
>>>>> +static inline void *
>>>>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>>>>> +{
>>>>> +    return offset + size <= dp_packet_size(b)
>>>>> +        ? dp_packet_at_offset(b, offset)
>>>>> +        : NULL;
>>>>> +}
>>>>> +
>>>>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at
>>>>> least
>>>>> + * 'offset + size' bytes of data. */
>>>>> +static inline void *
>>>>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset,
>>>>> size_t
>>>>> size)
>>>>> +{
>>>>> +    ovs_assert(offset + size <= dp_packet_size(b));
>>>>> +    return dp_packet_at_offset(b, offset);
>>>>> +}
>>>>> +
>>>>> +/* Returns a pointer to byte following the last byte of data in use
>>>>> in 'b'. */
>>>>> +static inline void *
>>>>> +dp_packet_tail(const struct dp_packet *b)
>>>>> +{
>>>>> +    struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>>>>> +    if (b->source == DPBUF_DPDK) {
>>>>> +        /* Find last segment where data ends, meaning the tail of
>>>>> the
>>>>> chained
>>>>> +           mbufs is there */
>>>>> +        buf = rte_pktmbuf_lastseg(buf);
>>>>> +    }
>>>>> +
>>>>
>>>> What if the last two chains contains no user data, we should find the
>>>> last buffer with data.
>>>> Looks like we assume only the last in the chain contains room, and we
>>>> achieve this by stripping off buffers (but we never add them again
>>>> when
>>>> needed, so something is wrong in this implementation).
>>>>
>>>
>>> Does my reply to the cover letter help here as well? That's a valid
>>> point, that if the size is decreased and mbufs are freed then mbufs
>>> are
>>> not allocated again if size needs to be incremented. But as explained,
>>> this is to enforce that data is contiguous and so the tail points to
>>> the
>>> last mbuf. I haven't yet encountered cases where this would lead to
>>> complications, but maybe you have something in mind?
>>
>> I feel like we should not design the general APIs to force the current
>> assumed limitation.
>> As rte_pktmbuf_lastseg() is already walking the three, we could do it
>> here instead and checking for the mailroom.
>>

Patch

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 782e7c2..2aaeaae 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -41,11 +41,11 @@  static void
 dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
              enum dp_packet_source source)
 {
+    dp_packet_init__(b, allocated, source);
+
     dp_packet_set_base(b, base);
     dp_packet_set_data(b, base);
     dp_packet_set_size(b, 0);
-
-    dp_packet_init__(b, allocated, source);
 }
 
 /* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index c301ed5..272597f 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -133,6 +133,10 @@  static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
                                  size_t size);
 static inline void *dp_packet_at_assert(const struct dp_packet *,
                                         size_t offset, size_t size);
+#ifdef DPDK_NETDEV
+static inline void * dp_packet_at_offset(const struct dp_packet *b,
+                                         size_t offset);
+#endif
 static inline void *dp_packet_tail(const struct dp_packet *);
 static inline void *dp_packet_end(const struct dp_packet *);
 
@@ -180,40 +184,6 @@  dp_packet_delete(struct dp_packet *b)
     }
 }
 
-/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
- * byte 'offset'.  Otherwise, returns a null pointer. */
-static inline void *
-dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
-{
-    return offset + size <= dp_packet_size(b)
-           ? (char *) dp_packet_data(b) + offset
-           : NULL;
-}
-
-/* Returns a pointer to byte 'offset' in 'b', which must contain at least
- * 'offset + size' bytes of data. */
-static inline void *
-dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
-{
-    ovs_assert(offset + size <= dp_packet_size(b));
-    return ((char *) dp_packet_data(b)) + offset;
-}
-
-/* Returns a pointer to byte following the last byte of data in use in 'b'. */
-static inline void *
-dp_packet_tail(const struct dp_packet *b)
-{
-    return (char *) dp_packet_data(b) + dp_packet_size(b);
-}
-
-/* Returns a pointer to byte following the last byte allocated for use (but
- * not necessarily in use) in 'b'. */
-static inline void *
-dp_packet_end(const struct dp_packet *b)
-{
-    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
-}
-
 /* Returns the number of bytes of headroom in 'b', that is, the number of bytes
  * of unused space in dp_packet 'b' before the data that is in use.  (Most
  * commonly, the data in a dp_packet is at its beginning, and thus the
@@ -229,6 +199,14 @@  dp_packet_headroom(const struct dp_packet *b)
 static inline size_t
 dp_packet_tailroom(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+    if (b->source == DPBUF_DPDK) {
+        struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+        tmbuf = rte_pktmbuf_lastseg(tmbuf);
+
+        return rte_pktmbuf_tailroom(tmbuf);
+    }
+#endif
     return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
 }
 
@@ -236,8 +214,13 @@  dp_packet_tailroom(const struct dp_packet *b)
 static inline void
 dp_packet_clear(struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+    dp_packet_set_size(b, 0);
+    rte_pktmbuf_reset(&b->mbuf);
+#else
     dp_packet_set_data(b, dp_packet_base(b));
     dp_packet_set_size(b, 0);
+#endif
 }
 
 /* Removes 'size' bytes from the head end of 'b', which must contain at least
@@ -252,12 +235,32 @@  dp_packet_pull(struct dp_packet *b, size_t size)
     return data;
 }
 
+#ifdef DPDK_NETDEV
+/* Similar to dp_packet_try_pull() but doesn't actually pull any code, only
+ * returns true or false if it would be possible to actually pull any code.
+ * Valid for dp_packets carrying mbufs only. */
+static inline bool
+dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
+    if (size > b->mbuf.data_len) {
+        return false;
+    }
+
+    return true;
+}
+#endif
+
 /* If 'b' has at least 'size' bytes of data, removes that many bytes from the
  * head end of 'b' and returns the first byte removed.  Otherwise, returns a
  * null pointer without modifying 'b'. */
 static inline void *
 dp_packet_try_pull(struct dp_packet *b, size_t size)
 {
+#ifdef DPDK_NETDEV
+    if (!dp_packet_mbuf_may_pull(b, size)) {
+        return NULL;
+    }
+#endif
+
     return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
         ? dp_packet_pull(b, size) : NULL;
 }
@@ -311,6 +314,12 @@  dp_packet_set_l2_pad_size(struct dp_packet *b, uint8_t pad_size)
 static inline void *
 dp_packet_l2_5(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
+        return NULL;
+    }
+#endif
+
     return b->l2_5_ofs != UINT16_MAX
            ? (char *) dp_packet_data(b) + b->l2_5_ofs
            : NULL;
@@ -327,6 +336,12 @@  dp_packet_set_l2_5(struct dp_packet *b, void *l2_5)
 static inline void *
 dp_packet_l3(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
+        return NULL;
+    }
+#endif
+
     return b->l3_ofs != UINT16_MAX
            ? (char *) dp_packet_data(b) + b->l3_ofs
            : NULL;
@@ -341,6 +356,12 @@  dp_packet_set_l3(struct dp_packet *b, void *l3)
 static inline void *
 dp_packet_l4(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
+        return NULL;
+    }
+#endif
+
     return b->l4_ofs != UINT16_MAX
            ? (char *) dp_packet_data(b) + b->l4_ofs
            : NULL;
@@ -355,10 +376,37 @@  dp_packet_set_l4(struct dp_packet *b, void *l4)
 static inline size_t
 dp_packet_l4_size(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+    if (b->l4_ofs == UINT16_MAX) {
+        return 0;
+    }
+
+    struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+    struct rte_mbuf *tmbuf = dp_packet_tail(b);
+
+    size_t l4_size = 0;
+    if (hmbuf == tmbuf) {
+        l4_size = hmbuf->data_len - b->l4_ofs;
+    } else {
+        l4_size = hmbuf->data_len - b->l4_ofs;
+        hmbuf = hmbuf->next;
+
+        while (hmbuf) {
+            l4_size += tmbuf->data_len;
+
+            hmbuf = hmbuf->next;
+        }
+    }
+
+    l4_size -= dp_packet_l2_pad_size(b);
+
+    return l4_size;
+#else
     return b->l4_ofs != UINT16_MAX
         ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l4(b)
         - dp_packet_l2_pad_size(b)
         : 0;
+#endif
 }
 
 static inline const void *
@@ -491,7 +539,7 @@  __packet_set_data(struct dp_packet *b, uint16_t v)
 static inline uint16_t
 dp_packet_get_allocated(const struct dp_packet *b)
 {
-    return b->mbuf.buf_len;
+    return b->mbuf.nb_segs * b->mbuf.buf_len;
 }
 
 static inline void
@@ -499,7 +547,107 @@  dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
 {
     b->mbuf.buf_len = s;
 }
+
+static inline void *
+dp_packet_at_offset(const struct dp_packet *b, size_t offset)
+{
+    if (b->source == DPBUF_DPDK) {
+        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+
+        while (buf && offset > buf->data_len) {
+            offset -= buf->data_len;
+
+            buf = buf->next;
+        }
+        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : NULL;
+    } else {
+        return (char *) dp_packet_data(b) + offset;
+    }
+}
+
+/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
+ * byte 'offset'.  Otherwise, returns a null pointer. */
+static inline void *
+dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
+{
+    return offset + size <= dp_packet_size(b)
+        ? dp_packet_at_offset(b, offset)
+        : NULL;
+}
+
+/* Returns a pointer to byte 'offset' in 'b', which must contain at least
+ * 'offset + size' bytes of data. */
+static inline void *
+dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
+{
+    ovs_assert(offset + size <= dp_packet_size(b));
+    return dp_packet_at_offset(b, offset);
+}
+
+/* Returns a pointer to byte following the last byte of data in use in 'b'. */
+static inline void *
+dp_packet_tail(const struct dp_packet *b)
+{
+    struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+    if (b->source == DPBUF_DPDK) {
+        /* Find last segment where data ends, meaning the tail of the chained
+           mbufs is there */
+        buf = rte_pktmbuf_lastseg(buf);
+    }
+
+    return rte_pktmbuf_mtod_offset(buf, void *, buf->data_len);
+}
+
+/* Returns a pointer to byte following the last byte allocated for use (but
+ * not necessarily in use) in 'b'. */
+static inline void *
+dp_packet_end(const struct dp_packet *b)
+{
+    if (b->source == DPBUF_DPDK) {
+        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &(b->mbuf));
+
+        buf = rte_pktmbuf_lastseg(buf);
+
+        return (char *) buf->buf_addr + buf->buf_len;
+    } else {
+        return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
+    }
+}
 #else
+/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
+ * byte 'offset'.  Otherwise, returns a null pointer. */
+static inline void *
+dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
+{
+    return offset + size <= dp_packet_size(b)
+           ? (char *) dp_packet_data(b) + offset
+           : NULL;
+}
+
+/* Returns a pointer to byte 'offset' in 'b', which must contain at least
+ * 'offset + size' bytes of data. */
+static inline void *
+dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
+{
+    ovs_assert(offset + size <= dp_packet_size(b));
+    return ((char *) dp_packet_data(b)) + offset;
+}
+
+/* Returns a pointer to byte following the last byte of data in use in 'b'. */
+static inline void *
+dp_packet_tail(const struct dp_packet *b)
+{
+    return (char *) dp_packet_data(b) + dp_packet_size(b);
+}
+
+/* Returns a pointer to byte following the last byte allocated for use (but
+ * not necessarily in use) in 'b'. */
+static inline void *
+dp_packet_end(const struct dp_packet *b)
+{
+    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
+}
+
 static inline void *
 dp_packet_base(const struct dp_packet *b)
 {
@@ -518,34 +666,34 @@  dp_packet_size(const struct dp_packet *b)
     return b->size_;
 }
 
-static inline void
-dp_packet_set_size(struct dp_packet *b, uint32_t v)
+static inline uint16_t
+dp_packet_get_allocated(const struct dp_packet *b)
 {
-    b->size_ = v;
+    return b->allocated_;
 }
 
-static inline uint16_t
-__packet_data(const struct dp_packet *b)
+static inline void
+dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
 {
-    return b->data_ofs;
+    b->allocated_ = s;
 }
 
 static inline void
-__packet_set_data(struct dp_packet *b, uint16_t v)
+dp_packet_set_size(struct dp_packet *b, uint32_t v)
 {
-    b->data_ofs = v;
+    b->size_ = v;
 }
 
 static inline uint16_t
-dp_packet_get_allocated(const struct dp_packet *b)
+__packet_data(const struct dp_packet *b)
 {
-    return b->allocated_;
+    return b->data_ofs;
 }
 
 static inline void
-dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
+__packet_set_data(struct dp_packet *b, uint16_t v)
 {
-    b->allocated_ = s;
+    b->data_ofs = v;
 }
 #endif