Message ID | 1528734090-220990-7-git-send-email-tiago.lam@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | Support multi-segment mbufs | expand |
On 11 Jun 2018, at 18:21, Tiago Lam wrote: > Most helper functions in dp-packet assume that the data held by a > dp_packet is contiguous, and perform operations such as pointer > arithmetic under that assumption. However, with the introduction of > multi-segment mbufs, where data is non-contiguous, such assumptions > are > no longer possible. Some examples of Such helper functions are > dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(), > dp_packet_get_allocated() and dp_packet_at(). > > Thus, instead of assuming contiguous data in dp_packet, they now > iterate over the (non-contiguous) data in mbufs to perform their > calculations. > > Finally, dp_packet_use__() has also been modified to perform the > initialisation of the packet (and setting the source) before > continuing > to set its size and data length, which now depends on the type of > packet. > > Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com> > > Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> > Signed-off-by: Tiago Lam <tiago.lam@intel.com> > --- > lib/dp-packet.c | 4 +- > lib/dp-packet.h | 242 > +++++++++++++++++++++++++++++++++++++++++++++----------- > 2 files changed, 197 insertions(+), 49 deletions(-) > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index 782e7c2..2aaeaae 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -41,11 +41,11 @@ static void > dp_packet_use__(struct dp_packet *b, void *base, size_t allocated, > enum dp_packet_source source) > { > + dp_packet_init__(b, allocated, source); > + > dp_packet_set_base(b, base); > dp_packet_set_data(b, base); > dp_packet_set_size(b, 0); > - > - dp_packet_init__(b, allocated, source); > } > > /* Initializes 'b' as an empty dp_packet that contains the > 'allocated' bytes of > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index c301ed5..272597f 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct > dp_packet *, size_t offset, > size_t size); > static inline void *dp_packet_at_assert(const struct dp_packet *, > size_t offset, size_t size); > +#ifdef DPDK_NETDEV > +static inline void * dp_packet_at_offset(const struct dp_packet *b, > + size_t offset); > +#endif > static inline void *dp_packet_tail(const struct dp_packet *); > static inline void *dp_packet_end(const struct dp_packet *); > > @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b) > } > } > > -/* If 'b' contains at least 'offset + size' bytes of data, returns a > pointer to > - * byte 'offset'. Otherwise, returns a null pointer. */ > -static inline void * > -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) > -{ > - return offset + size <= dp_packet_size(b) > - ? (char *) dp_packet_data(b) + offset > - : NULL; > -} > - > -/* Returns a pointer to byte 'offset' in 'b', which must contain at > least > - * 'offset + size' bytes of data. */ > -static inline void * > -dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t > size) > -{ > - ovs_assert(offset + size <= dp_packet_size(b)); > - return ((char *) dp_packet_data(b)) + offset; > -} > - > -/* Returns a pointer to byte following the last byte of data in use > in 'b'. */ > -static inline void * > -dp_packet_tail(const struct dp_packet *b) > -{ > - return (char *) dp_packet_data(b) + dp_packet_size(b); > -} > - > -/* Returns a pointer to byte following the last byte allocated for > use (but > - * not necessarily in use) in 'b'. */ > -static inline void * > -dp_packet_end(const struct dp_packet *b) > -{ > - return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); > -} > - > /* Returns the number of bytes of headroom in 'b', that is, the > number of bytes > * of unused space in dp_packet 'b' before the data that is in use. > (Most > * commonly, the data in a dp_packet is at its beginning, and thus > the > @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b) > static inline size_t > dp_packet_tailroom(const struct dp_packet *b) > { > +#ifdef DPDK_NETDEV > + if (b->source == DPBUF_DPDK) { > + struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, > &b->mbuf); > + tmbuf = rte_pktmbuf_lastseg(tmbuf); > + What if the last two chains contain no user data, we have more tailroom?!? Should we not call rte_pktmbuf_tailroom(tmbuf)? > + return rte_pktmbuf_tailroom(tmbuf); > + } > +#endif > return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b); > } > > @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b) > static inline void > dp_packet_clear(struct dp_packet *b) > { > +#ifdef DPDK_NETDEV > + dp_packet_set_size(b, 0); > + rte_pktmbuf_reset(&b->mbuf); This would probably assert on none DPBUF_DPDK, do we need a “if (b->source == DPBUF_DPDK)” here also? > +#else > dp_packet_set_data(b, dp_packet_base(b)); > dp_packet_set_size(b, 0); > +#endif > } > > /* Removes 'size' bytes from the head end of 'b', which must contain > at least > @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t size) > return data; > } > > +#ifdef DPDK_NETDEV > +/* Similar to dp_packet_try_pull() but doesn't actually pull any > code, only > + * returns true or false if it would be possible to actually pull any > code. > + * Valid for dp_packets carrying mbufs only. */ > +static inline bool > +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) { > + if (size > b->mbuf.data_len) { > + return false; > + } > + > + return true; > +} > +#endif > + > /* If 'b' has at least 'size' bytes of data, removes that many bytes > from the > * head end of 'b' and returns the first byte removed. Otherwise, > returns a > * null pointer without modifying 'b'. */ > static inline void * > dp_packet_try_pull(struct dp_packet *b, size_t size) > { > +#ifdef DPDK_NETDEV > + if (!dp_packet_mbuf_may_pull(b, size)) { > + return NULL; > + } > +#endif > + > return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size > ? dp_packet_pull(b, size) : NULL; > } > @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, > uint8_t pad_size) > static inline void * > dp_packet_l2_5(const struct dp_packet *b) > { > +#ifdef DPDK_NETDEV > + if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) { > + return NULL; > + } > +#endif > + > return b->l2_5_ofs != UINT16_MAX > ? (char *) dp_packet_data(b) + b->l2_5_ofs > : NULL; > @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void > *l2_5) > static inline void * > dp_packet_l3(const struct dp_packet *b) > { > +#ifdef DPDK_NETDEV > + if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) { > + return NULL; > + } > +#endif > + Can we not use the dp_packet_at_offset() function here (and the similar functions)? And include the above and below checks there? > return b->l3_ofs != UINT16_MAX > ? (char *) dp_packet_data(b) + b->l3_ofs > : NULL; > @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3) > static inline void * > dp_packet_l4(const struct dp_packet *b) > { > +#ifdef DPDK_NETDEV > + if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) { > + return NULL; > + } > +#endif > + > return b->l4_ofs != UINT16_MAX > ? (char *) dp_packet_data(b) + b->l4_ofs > : NULL; > @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void *l4) > static inline size_t > dp_packet_l4_size(const struct dp_packet *b) > { > +#ifdef DPDK_NETDEV > + if (b->l4_ofs == UINT16_MAX) { > + return 0; > + } > + > + struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf); > + struct rte_mbuf *tmbuf = dp_packet_tail(b); > + > + size_t l4_size = 0; > + if (hmbuf == tmbuf) { Do not think this optimisation help much, as the dp_packet_tail(b) does the same walk as in the "while (hmbuf)" clause below, so why not just keet the else case? > + l4_size = hmbuf->data_len - b->l4_ofs; > + } else { > + l4_size = hmbuf->data_len - b->l4_ofs; > + hmbuf = hmbuf->next; > + > + while (hmbuf) { > + l4_size += tmbuf->data_len; > + > + hmbuf = hmbuf->next; > + } > + } > + > + l4_size -= dp_packet_l2_pad_size(b); > + > + return l4_size; > +#else > return b->l4_ofs != UINT16_MAX > ? (const char *)dp_packet_tail(b) - (const char > *)dp_packet_l4(b) > - dp_packet_l2_pad_size(b) > : 0; > +#endif > } > > static inline const void * > @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t v) > static inline uint16_t > dp_packet_get_allocated(const struct dp_packet *b) > { > - return b->mbuf.buf_len; > + return b->mbuf.nb_segs * b->mbuf.buf_len; > } > > static inline void > @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, > uint16_t s) > { > b->mbuf.buf_len = s; > } > + > +static inline void * > +dp_packet_at_offset(const struct dp_packet *b, size_t offset) > +{ > + if (b->source == DPBUF_DPDK) { > + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, > &b->mbuf); > + > + while (buf && offset > buf->data_len) { > + offset -= buf->data_len; > + > + buf = buf->next; > + } > + return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : > NULL; > + } else { > + return (char *) dp_packet_data(b) + offset; > + } > +} > + > +/* If 'b' contains at least 'offset + size' bytes of data, returns a > pointer to > + * byte 'offset'. Otherwise, returns a null pointer. */ > +static inline void * > +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) > +{ > + return offset + size <= dp_packet_size(b) > + ? dp_packet_at_offset(b, offset) > + : NULL; > +} > + > +/* Returns a pointer to byte 'offset' in 'b', which must contain at > least > + * 'offset + size' bytes of data. */ > +static inline void * > +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t > size) > +{ > + ovs_assert(offset + size <= dp_packet_size(b)); > + return dp_packet_at_offset(b, offset); > +} > + > +/* Returns a pointer to byte following the last byte of data in use > in 'b'. */ > +static inline void * > +dp_packet_tail(const struct dp_packet *b) > +{ > + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf); > + if (b->source == DPBUF_DPDK) { > + /* Find last segment where data ends, meaning the tail of the > chained > + mbufs is there */ > + buf = rte_pktmbuf_lastseg(buf); > + } > + What if the last two chains contains no user data, we should find the last buffer with data. Looks like we assume only the last in the chain contains room, and we achieve this by stripping off buffers (but we never add them again when needed, so something is wrong in this implementation). > + return rte_pktmbuf_mtod_offset(buf, void *, buf->data_len); > +} > + > +/* Returns a pointer to byte following the last byte allocated for > use (but > + * not necessarily in use) in 'b'. */ > +static inline void * > +dp_packet_end(const struct dp_packet *b) > +{ > + if (b->source == DPBUF_DPDK) { > + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, > &(b->mbuf)); > + > + buf = rte_pktmbuf_lastseg(buf); > + See Above > + return (char *) buf->buf_addr + buf->buf_len; > + } else { > + return (char *) dp_packet_base(b) + > dp_packet_get_allocated(b); > + } > +} > #else > +/* If 'b' contains at least 'offset + size' bytes of data, returns a > pointer to > + * byte 'offset'. Otherwise, returns a null pointer. */ > +static inline void * > +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) > +{ > + return offset + size <= dp_packet_size(b) > + ? (char *) dp_packet_data(b) + offset > + : NULL; > +} > + > +/* Returns a pointer to byte 'offset' in 'b', which must contain at > least > + * 'offset + size' bytes of data. */ > +static inline void * > +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t > size) > +{ > + ovs_assert(offset + size <= dp_packet_size(b)); > + return ((char *) dp_packet_data(b)) + offset; > +} > + > +/* Returns a pointer to byte following the last byte of data in use > in 'b'. */ > +static inline void * > +dp_packet_tail(const struct dp_packet *b) > +{ > + return (char *) dp_packet_data(b) + dp_packet_size(b); > +} > + > +/* Returns a pointer to byte following the last byte allocated for > use (but > + * not necessarily in use) in 'b'. */ > +static inline void * > +dp_packet_end(const struct dp_packet *b) > +{ > + return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); > +} > + > static inline void * > dp_packet_base(const struct dp_packet *b) > { > @@ -518,34 +666,34 @@ dp_packet_size(const struct dp_packet *b) > return b->size_; > } > > -static inline void > -dp_packet_set_size(struct dp_packet *b, uint32_t v) > +static inline uint16_t > +dp_packet_get_allocated(const struct dp_packet *b) > { > - b->size_ = v; > + return b->allocated_; > } > > -static inline uint16_t > -__packet_data(const struct dp_packet *b) > +static inline void > +dp_packet_set_allocated(struct dp_packet *b, uint16_t s) > { > - return b->data_ofs; > + b->allocated_ = s; > } > > static inline void > -__packet_set_data(struct dp_packet *b, uint16_t v) > +dp_packet_set_size(struct dp_packet *b, uint32_t v) > { > - b->data_ofs = v; > + b->size_ = v; > } > > static inline uint16_t > -dp_packet_get_allocated(const struct dp_packet *b) > +__packet_data(const struct dp_packet *b) > { > - return b->allocated_; > + return b->data_ofs; > } > > static inline void > -dp_packet_set_allocated(struct dp_packet *b, uint16_t s) > +__packet_set_data(struct dp_packet *b, uint16_t v) > { > - b->allocated_ = s; > + b->data_ofs = v; > } > #endif > > -- > 2.7.4
On 18/06/2018 12:50, Eelco Chaudron wrote: > > > On 11 Jun 2018, at 18:21, Tiago Lam wrote: > >> Most helper functions in dp-packet assume that the data held by a >> dp_packet is contiguous, and perform operations such as pointer >> arithmetic under that assumption. However, with the introduction of >> multi-segment mbufs, where data is non-contiguous, such assumptions >> are >> no longer possible. Some examples of Such helper functions are >> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(), >> dp_packet_get_allocated() and dp_packet_at(). >> >> Thus, instead of assuming contiguous data in dp_packet, they now >> iterate over the (non-contiguous) data in mbufs to perform their >> calculations. >> >> Finally, dp_packet_use__() has also been modified to perform the >> initialisation of the packet (and setting the source) before >> continuing >> to set its size and data length, which now depends on the type of >> packet. >> >> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >> >> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >> Signed-off-by: Tiago Lam <tiago.lam@intel.com> >> --- >> lib/dp-packet.c | 4 +- >> lib/dp-packet.h | 242 >> +++++++++++++++++++++++++++++++++++++++++++++----------- >> 2 files changed, 197 insertions(+), 49 deletions(-) >> >> diff --git a/lib/dp-packet.c b/lib/dp-packet.c >> index 782e7c2..2aaeaae 100644 >> --- a/lib/dp-packet.c >> +++ b/lib/dp-packet.c >> @@ -41,11 +41,11 @@ static void >> dp_packet_use__(struct dp_packet *b, void *base, size_t allocated, >> enum dp_packet_source source) >> { >> + dp_packet_init__(b, allocated, source); >> + >> dp_packet_set_base(b, base); >> dp_packet_set_data(b, base); >> dp_packet_set_size(b, 0); >> - >> - dp_packet_init__(b, allocated, source); >> } >> >> /* Initializes 'b' as an empty dp_packet that contains the >> 'allocated' bytes of >> diff --git a/lib/dp-packet.h b/lib/dp-packet.h >> index c301ed5..272597f 100644 >> --- a/lib/dp-packet.h >> +++ b/lib/dp-packet.h >> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct >> dp_packet *, size_t offset, >> size_t size); >> static inline void *dp_packet_at_assert(const struct dp_packet *, >> size_t offset, size_t size); >> +#ifdef DPDK_NETDEV >> +static inline void * dp_packet_at_offset(const struct dp_packet *b, >> + size_t offset); >> +#endif >> static inline void *dp_packet_tail(const struct dp_packet *); >> static inline void *dp_packet_end(const struct dp_packet *); >> >> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b) >> } >> } >> >> -/* If 'b' contains at least 'offset + size' bytes of data, returns a >> pointer to >> - * byte 'offset'. Otherwise, returns a null pointer. */ >> -static inline void * >> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >> -{ >> - return offset + size <= dp_packet_size(b) >> - ? (char *) dp_packet_data(b) + offset >> - : NULL; >> -} >> - >> -/* Returns a pointer to byte 'offset' in 'b', which must contain at >> least >> - * 'offset + size' bytes of data. */ >> -static inline void * >> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t >> size) >> -{ >> - ovs_assert(offset + size <= dp_packet_size(b)); >> - return ((char *) dp_packet_data(b)) + offset; >> -} >> - >> -/* Returns a pointer to byte following the last byte of data in use >> in 'b'. */ >> -static inline void * >> -dp_packet_tail(const struct dp_packet *b) >> -{ >> - return (char *) dp_packet_data(b) + dp_packet_size(b); >> -} >> - >> -/* Returns a pointer to byte following the last byte allocated for >> use (but >> - * not necessarily in use) in 'b'. */ >> -static inline void * >> -dp_packet_end(const struct dp_packet *b) >> -{ >> - return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); >> -} >> - >> /* Returns the number of bytes of headroom in 'b', that is, the >> number of bytes >> * of unused space in dp_packet 'b' before the data that is in use. >> (Most >> * commonly, the data in a dp_packet is at its beginning, and thus >> the >> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b) >> static inline size_t >> dp_packet_tailroom(const struct dp_packet *b) >> { >> +#ifdef DPDK_NETDEV >> + if (b->source == DPBUF_DPDK) { >> + struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, >> &b->mbuf); >> + tmbuf = rte_pktmbuf_lastseg(tmbuf); >> + > > What if the last two chains contain no user data, we have more > tailroom?!? Should we not call rte_pktmbuf_tailroom(tmbuf)? I've covered this in my reply to the cover letter, hopefully it is more clear now. This shouldn't be a possibility in the implementation. Also, we do call rte_pktmbuf_tailroom(), just below. > >> + return rte_pktmbuf_tailroom(tmbuf); >> + } >> +#endif >> return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b); >> } >> >> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b) >> static inline void >> dp_packet_clear(struct dp_packet *b) >> { >> +#ifdef DPDK_NETDEV >> + dp_packet_set_size(b, 0); >> + rte_pktmbuf_reset(&b->mbuf); > > This would probably assert on none DPBUF_DPDK, do we need a “if > (b->source == DPBUF_DPDK)” here also? > We do, I'll add it to next iteration. It only asserts when using DPDK with `RTE_LIBRTE_MBUF_DEBUG`, but still... >> +#else >> dp_packet_set_data(b, dp_packet_base(b)); >> dp_packet_set_size(b, 0); >> +#endif >> } >> >> /* Removes 'size' bytes from the head end of 'b', which must contain >> at least >> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t size) >> return data; >> } >> >> +#ifdef DPDK_NETDEV >> +/* Similar to dp_packet_try_pull() but doesn't actually pull any >> code, only >> + * returns true or false if it would be possible to actually pull any >> code. >> + * Valid for dp_packets carrying mbufs only. */ >> +static inline bool >> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) { >> + if (size > b->mbuf.data_len) { >> + return false; >> + } >> + >> + return true; >> +} >> +#endif >> + >> /* If 'b' has at least 'size' bytes of data, removes that many bytes >> from the >> * head end of 'b' and returns the first byte removed. Otherwise, >> returns a >> * null pointer without modifying 'b'. */ >> static inline void * >> dp_packet_try_pull(struct dp_packet *b, size_t size) >> { >> +#ifdef DPDK_NETDEV >> + if (!dp_packet_mbuf_may_pull(b, size)) { >> + return NULL; >> + } >> +#endif >> + >> return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size >> ? dp_packet_pull(b, size) : NULL; >> } >> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, >> uint8_t pad_size) >> static inline void * >> dp_packet_l2_5(const struct dp_packet *b) >> { >> +#ifdef DPDK_NETDEV >> + if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) { >> + return NULL; >> + } >> +#endif >> + >> return b->l2_5_ofs != UINT16_MAX >> ? (char *) dp_packet_data(b) + b->l2_5_ofs >> : NULL; >> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void >> *l2_5) >> static inline void * >> dp_packet_l3(const struct dp_packet *b) >> { >> +#ifdef DPDK_NETDEV >> + if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) { >> + return NULL; >> + } >> +#endif >> + > > Can we not use the dp_packet_at_offset() function here (and the similar > functions)? And include the above and below checks there? > They are aimed at different purposes. dp_packet_at_offset() should walk the chain of mbufs and return a pointer to the offset, while here we actually want to know if the offset is within the first mbuf. If these headers are not within the first mbuf, we don't return it as it crosses mbufs. >> return b->l3_ofs != UINT16_MAX >> ? (char *) dp_packet_data(b) + b->l3_ofs >> : NULL; >> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3) >> static inline void * >> dp_packet_l4(const struct dp_packet *b) >> { >> +#ifdef DPDK_NETDEV >> + if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) { >> + return NULL; >> + } >> +#endif >> + >> return b->l4_ofs != UINT16_MAX >> ? (char *) dp_packet_data(b) + b->l4_ofs >> : NULL; >> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void *l4) >> static inline size_t >> dp_packet_l4_size(const struct dp_packet *b) >> { >> +#ifdef DPDK_NETDEV >> + if (b->l4_ofs == UINT16_MAX) { >> + return 0; >> + } >> + >> + struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf); >> + struct rte_mbuf *tmbuf = dp_packet_tail(b); >> + >> + size_t l4_size = 0; >> + if (hmbuf == tmbuf) { > > Do not think this optimisation help much, as the dp_packet_tail(b) does > the same walk as in the "while (hmbuf)" clause below, so why not just > keet the else case? > Yeah, good point. I'll change that in the next iteration. >> + l4_size = hmbuf->data_len - b->l4_ofs; >> + } else { >> + l4_size = hmbuf->data_len - b->l4_ofs; >> + hmbuf = hmbuf->next; >> + >> + while (hmbuf) { >> + l4_size += tmbuf->data_len; >> + >> + hmbuf = hmbuf->next; >> + } >> + } >> + >> + l4_size -= dp_packet_l2_pad_size(b); >> + >> + return l4_size; >> +#else >> return b->l4_ofs != UINT16_MAX >> ? (const char *)dp_packet_tail(b) - (const char >> *)dp_packet_l4(b) >> - dp_packet_l2_pad_size(b) >> : 0; >> +#endif >> } >> >> static inline const void * >> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t v) >> static inline uint16_t >> dp_packet_get_allocated(const struct dp_packet *b) >> { >> - return b->mbuf.buf_len; >> + return b->mbuf.nb_segs * b->mbuf.buf_len; >> } >> >> static inline void >> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, >> uint16_t s) >> { >> b->mbuf.buf_len = s; >> } >> + >> +static inline void * >> +dp_packet_at_offset(const struct dp_packet *b, size_t offset) >> +{ >> + if (b->source == DPBUF_DPDK) { >> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >> &b->mbuf); >> + >> + while (buf && offset > buf->data_len) { >> + offset -= buf->data_len; >> + >> + buf = buf->next; >> + } >> + return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : >> NULL; >> + } else { >> + return (char *) dp_packet_data(b) + offset; >> + } >> +} >> + >> +/* If 'b' contains at least 'offset + size' bytes of data, returns a >> pointer to >> + * byte 'offset'. Otherwise, returns a null pointer. */ >> +static inline void * >> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >> +{ >> + return offset + size <= dp_packet_size(b) >> + ? dp_packet_at_offset(b, offset) >> + : NULL; >> +} >> + >> +/* Returns a pointer to byte 'offset' in 'b', which must contain at >> least >> + * 'offset + size' bytes of data. */ >> +static inline void * >> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t >> size) >> +{ >> + ovs_assert(offset + size <= dp_packet_size(b)); >> + return dp_packet_at_offset(b, offset); >> +} >> + >> +/* Returns a pointer to byte following the last byte of data in use >> in 'b'. */ >> +static inline void * >> +dp_packet_tail(const struct dp_packet *b) >> +{ >> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf); >> + if (b->source == DPBUF_DPDK) { >> + /* Find last segment where data ends, meaning the tail of the >> chained >> + mbufs is there */ >> + buf = rte_pktmbuf_lastseg(buf); >> + } >> + > > What if the last two chains contains no user data, we should find the > last buffer with data. > Looks like we assume only the last in the chain contains room, and we > achieve this by stripping off buffers (but we never add them again when > needed, so something is wrong in this implementation). > Does my reply to the cover letter help here as well? That's a valid point, that if the size is decreased and mbufs are freed then mbufs are not allocated again if size needs to be incremented. But as explained, this is to enforce that data is contiguous and so the tail points to the last mbuf. I haven't yet encountered cases where this would lead to complications, but maybe you have something in mind? Tiago.
On 22 Jun 2018, at 21:04, Lam, Tiago wrote: > On 18/06/2018 12:50, Eelco Chaudron wrote: >> >> >> On 11 Jun 2018, at 18:21, Tiago Lam wrote: >> >>> Most helper functions in dp-packet assume that the data held by a >>> dp_packet is contiguous, and perform operations such as pointer >>> arithmetic under that assumption. However, with the introduction of >>> multi-segment mbufs, where data is non-contiguous, such assumptions >>> are >>> no longer possible. Some examples of Such helper functions are >>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(), >>> dp_packet_get_allocated() and dp_packet_at(). >>> >>> Thus, instead of assuming contiguous data in dp_packet, they now >>> iterate over the (non-contiguous) data in mbufs to perform their >>> calculations. >>> >>> Finally, dp_packet_use__() has also been modified to perform the >>> initialisation of the packet (and setting the source) before >>> continuing >>> to set its size and data length, which now depends on the type of >>> packet. >>> >>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >>> >>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >>> Signed-off-by: Tiago Lam <tiago.lam@intel.com> >>> --- >>> lib/dp-packet.c | 4 +- >>> lib/dp-packet.h | 242 >>> +++++++++++++++++++++++++++++++++++++++++++++----------- >>> 2 files changed, 197 insertions(+), 49 deletions(-) >>> >>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c >>> index 782e7c2..2aaeaae 100644 >>> --- a/lib/dp-packet.c >>> +++ b/lib/dp-packet.c >>> @@ -41,11 +41,11 @@ static void >>> dp_packet_use__(struct dp_packet *b, void *base, size_t allocated, >>> enum dp_packet_source source) >>> { >>> + dp_packet_init__(b, allocated, source); >>> + >>> dp_packet_set_base(b, base); >>> dp_packet_set_data(b, base); >>> dp_packet_set_size(b, 0); >>> - >>> - dp_packet_init__(b, allocated, source); >>> } >>> >>> /* Initializes 'b' as an empty dp_packet that contains the >>> 'allocated' bytes of >>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h >>> index c301ed5..272597f 100644 >>> --- a/lib/dp-packet.h >>> +++ b/lib/dp-packet.h >>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct >>> dp_packet *, size_t offset, >>> size_t size); >>> static inline void *dp_packet_at_assert(const struct dp_packet *, >>> size_t offset, size_t >>> size); >>> +#ifdef DPDK_NETDEV >>> +static inline void * dp_packet_at_offset(const struct dp_packet *b, >>> + size_t offset); >>> +#endif >>> static inline void *dp_packet_tail(const struct dp_packet *); >>> static inline void *dp_packet_end(const struct dp_packet *); >>> >>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b) >>> } >>> } >>> >>> -/* If 'b' contains at least 'offset + size' bytes of data, returns >>> a >>> pointer to >>> - * byte 'offset'. Otherwise, returns a null pointer. */ >>> -static inline void * >>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >>> -{ >>> - return offset + size <= dp_packet_size(b) >>> - ? (char *) dp_packet_data(b) + offset >>> - : NULL; >>> -} >>> - >>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at >>> least >>> - * 'offset + size' bytes of data. */ >>> -static inline void * >>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, >>> size_t >>> size) >>> -{ >>> - ovs_assert(offset + size <= dp_packet_size(b)); >>> - return ((char *) dp_packet_data(b)) + offset; >>> -} >>> - >>> -/* Returns a pointer to byte following the last byte of data in use >>> in 'b'. */ >>> -static inline void * >>> -dp_packet_tail(const struct dp_packet *b) >>> -{ >>> - return (char *) dp_packet_data(b) + dp_packet_size(b); >>> -} >>> - >>> -/* Returns a pointer to byte following the last byte allocated for >>> use (but >>> - * not necessarily in use) in 'b'. */ >>> -static inline void * >>> -dp_packet_end(const struct dp_packet *b) >>> -{ >>> - return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); >>> -} >>> - >>> /* Returns the number of bytes of headroom in 'b', that is, the >>> number of bytes >>> * of unused space in dp_packet 'b' before the data that is in use. >>> (Most >>> * commonly, the data in a dp_packet is at its beginning, and thus >>> the >>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b) >>> static inline size_t >>> dp_packet_tailroom(const struct dp_packet *b) >>> { >>> +#ifdef DPDK_NETDEV >>> + if (b->source == DPBUF_DPDK) { >>> + struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, >>> &b->mbuf); >>> + tmbuf = rte_pktmbuf_lastseg(tmbuf); >>> + >> >> What if the last two chains contain no user data, we have more >> tailroom?!? Should we not call (tmbuf)? > > I've covered this in my reply to the cover letter, hopefully it is > more > clear now. This shouldn't be a possibility in the implementation. > Also, > we do call rte_pktmbuf_tailroom(), just below. I know but I do not see the reason for not calling rte_pktmbuf_tailroom() on the mbuf directly. This will work for all cases, add’s less complexity, even in the future if we move to a design where we will support multiple buffers. Trying to “limit” the usual mbuf cases does not sound right, and it will confuse people. >> >>> + return rte_pktmbuf_tailroom(tmbuf); >>> + } >>> +#endif >>> return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b); >>> } >>> >>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b) >>> static inline void >>> dp_packet_clear(struct dp_packet *b) >>> { >>> +#ifdef DPDK_NETDEV >>> + dp_packet_set_size(b, 0); >>> + rte_pktmbuf_reset(&b->mbuf); >> >> This would probably assert on none DPBUF_DPDK, do we need a “if >> (b->source == DPBUF_DPDK)” here also? >> > > We do, I'll add it to next iteration. It only asserts when using DPDK > with `RTE_LIBRTE_MBUF_DEBUG`, but still... Yes, but you do not want this assert when you are trying to debug something else ;) >>> +#else >>> dp_packet_set_data(b, dp_packet_base(b)); >>> dp_packet_set_size(b, 0); >>> +#endif >>> } >>> >>> /* Removes 'size' bytes from the head end of 'b', which must >>> contain >>> at least >>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t >>> size) >>> return data; >>> } >>> >>> +#ifdef DPDK_NETDEV >>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any >>> code, only >>> + * returns true or false if it would be possible to actually pull >>> any >>> code. >>> + * Valid for dp_packets carrying mbufs only. */ >>> +static inline bool >>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) { >>> + if (size > b->mbuf.data_len) { >>> + return false; >>> + } >>> + >>> + return true; >>> +} >>> +#endif >>> + >>> /* If 'b' has at least 'size' bytes of data, removes that many >>> bytes >>> from the >>> * head end of 'b' and returns the first byte removed. Otherwise, >>> returns a >>> * null pointer without modifying 'b'. */ >>> static inline void * >>> dp_packet_try_pull(struct dp_packet *b, size_t size) >>> { >>> +#ifdef DPDK_NETDEV >>> + if (!dp_packet_mbuf_may_pull(b, size)) { >>> + return NULL; >>> + } >>> +#endif >>> + >>> return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size >>> ? dp_packet_pull(b, size) : NULL; >>> } >>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, >>> uint8_t pad_size) >>> static inline void * >>> dp_packet_l2_5(const struct dp_packet *b) >>> { >>> +#ifdef DPDK_NETDEV >>> + if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) { >>> + return NULL; >>> + } >>> +#endif >>> + >>> return b->l2_5_ofs != UINT16_MAX >>> ? (char *) dp_packet_data(b) + b->l2_5_ofs >>> : NULL; >>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void >>> *l2_5) >>> static inline void * >>> dp_packet_l3(const struct dp_packet *b) >>> { >>> +#ifdef DPDK_NETDEV >>> + if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) { >>> + return NULL; >>> + } >>> +#endif >>> + >> >> Can we not use the dp_packet_at_offset() function here (and the >> similar >> functions)? And include the above and below checks there? >> > > They are aimed at different purposes. dp_packet_at_offset() should > walk > the chain of mbufs and return a pointer to the offset, while here we > actually want to know if the offset is within the first mbuf. If these > headers are not within the first mbuf, we don't return it as it > crosses > mbufs. My bad, I was referring to dp_packet_at(), which probably also need the may_pull() if there are control protocols with jumbo packets? >>> return b->l3_ofs != UINT16_MAX >>> ? (char *) dp_packet_data(b) + b->l3_ofs >>> : NULL; >>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3) >>> static inline void * >>> dp_packet_l4(const struct dp_packet *b) >>> { >>> +#ifdef DPDK_NETDEV >>> + if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) { >>> + return NULL; >>> + } >>> +#endif >>> + >>> return b->l4_ofs != UINT16_MAX >>> ? (char *) dp_packet_data(b) + b->l4_ofs >>> : NULL; >>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void >>> *l4) >>> static inline size_t >>> dp_packet_l4_size(const struct dp_packet *b) >>> { >>> +#ifdef DPDK_NETDEV >>> + if (b->l4_ofs == UINT16_MAX) { >>> + return 0; >>> + } >>> + >>> + struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, >>> &b->mbuf); >>> + struct rte_mbuf *tmbuf = dp_packet_tail(b); >>> + >>> + size_t l4_size = 0; >>> + if (hmbuf == tmbuf) { >> >> Do not think this optimisation help much, as the dp_packet_tail(b) >> does >> the same walk as in the "while (hmbuf)" clause below, so why not just >> keet the else case? >> > > Yeah, good point. I'll change that in the next iteration. > >>> + l4_size = hmbuf->data_len - b->l4_ofs; >>> + } else { >>> + l4_size = hmbuf->data_len - b->l4_ofs; >>> + hmbuf = hmbuf->next; >>> + >>> + while (hmbuf) { >>> + l4_size += tmbuf->data_len; >>> + >>> + hmbuf = hmbuf->next; >>> + } >>> + } >>> + >>> + l4_size -= dp_packet_l2_pad_size(b); >>> + >>> + return l4_size; >>> +#else >>> return b->l4_ofs != UINT16_MAX >>> ? (const char *)dp_packet_tail(b) - (const char >>> *)dp_packet_l4(b) >>> - dp_packet_l2_pad_size(b) >>> : 0; >>> +#endif >>> } >>> >>> static inline const void * >>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t >>> v) >>> static inline uint16_t >>> dp_packet_get_allocated(const struct dp_packet *b) >>> { >>> - return b->mbuf.buf_len; >>> + return b->mbuf.nb_segs * b->mbuf.buf_len; >>> } >>> >>> static inline void >>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, >>> uint16_t s) >>> { >>> b->mbuf.buf_len = s; >>> } >>> + >>> +static inline void * >>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset) >>> +{ >>> + if (b->source == DPBUF_DPDK) { >>> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >>> &b->mbuf); >>> + >>> + while (buf && offset > buf->data_len) { >>> + offset -= buf->data_len; >>> + >>> + buf = buf->next; >>> + } >>> + return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : >>> NULL; >>> + } else { >>> + return (char *) dp_packet_data(b) + offset; >>> + } >>> +} >>> + >>> +/* If 'b' contains at least 'offset + size' bytes of data, returns >>> a >>> pointer to >>> + * byte 'offset'. Otherwise, returns a null pointer. */ >>> +static inline void * >>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >>> +{ >>> + return offset + size <= dp_packet_size(b) >>> + ? dp_packet_at_offset(b, offset) >>> + : NULL; >>> +} >>> + >>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at >>> least >>> + * 'offset + size' bytes of data. */ >>> +static inline void * >>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, >>> size_t >>> size) >>> +{ >>> + ovs_assert(offset + size <= dp_packet_size(b)); >>> + return dp_packet_at_offset(b, offset); >>> +} >>> + >>> +/* Returns a pointer to byte following the last byte of data in use >>> in 'b'. */ >>> +static inline void * >>> +dp_packet_tail(const struct dp_packet *b) >>> +{ >>> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf); >>> + if (b->source == DPBUF_DPDK) { >>> + /* Find last segment where data ends, meaning the tail of >>> the >>> chained >>> + mbufs is there */ >>> + buf = rte_pktmbuf_lastseg(buf); >>> + } >>> + >> >> What if the last two chains contains no user data, we should find the >> last buffer with data. >> Looks like we assume only the last in the chain contains room, and we >> achieve this by stripping off buffers (but we never add them again >> when >> needed, so something is wrong in this implementation). >> > > Does my reply to the cover letter help here as well? That's a valid > point, that if the size is decreased and mbufs are freed then mbufs > are > not allocated again if size needs to be incremented. But as explained, > this is to enforce that data is contiguous and so the tail points to > the > last mbuf. I haven't yet encountered cases where this would lead to > complications, but maybe you have something in mind? I feel like we should not design the general APIs to force the current assumed limitation. As rte_pktmbuf_lastseg() is already walking the three, we could do it here instead and checking for the mailroom.
On 26/06/2018 11:31, Eelco Chaudron wrote: > > > On 22 Jun 2018, at 21:04, Lam, Tiago wrote: > >> On 18/06/2018 12:50, Eelco Chaudron wrote: >>> >>> >>> On 11 Jun 2018, at 18:21, Tiago Lam wrote: >>> >>>> Most helper functions in dp-packet assume that the data held by a >>>> dp_packet is contiguous, and perform operations such as pointer >>>> arithmetic under that assumption. However, with the introduction of >>>> multi-segment mbufs, where data is non-contiguous, such assumptions >>>> are >>>> no longer possible. Some examples of Such helper functions are >>>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(), >>>> dp_packet_get_allocated() and dp_packet_at(). >>>> >>>> Thus, instead of assuming contiguous data in dp_packet, they now >>>> iterate over the (non-contiguous) data in mbufs to perform their >>>> calculations. >>>> >>>> Finally, dp_packet_use__() has also been modified to perform the >>>> initialisation of the packet (and setting the source) before >>>> continuing >>>> to set its size and data length, which now depends on the type of >>>> packet. >>>> >>>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >>>> >>>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com> >>>> --- >>>> lib/dp-packet.c | 4 +- >>>> lib/dp-packet.h | 242 >>>> +++++++++++++++++++++++++++++++++++++++++++++----------- >>>> 2 files changed, 197 insertions(+), 49 deletions(-) >>>> >>>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c >>>> index 782e7c2..2aaeaae 100644 >>>> --- a/lib/dp-packet.c >>>> +++ b/lib/dp-packet.c >>>> @@ -41,11 +41,11 @@ static void >>>> dp_packet_use__(struct dp_packet *b, void *base, size_t allocated, >>>> enum dp_packet_source source) >>>> { >>>> + dp_packet_init__(b, allocated, source); >>>> + >>>> dp_packet_set_base(b, base); >>>> dp_packet_set_data(b, base); >>>> dp_packet_set_size(b, 0); >>>> - >>>> - dp_packet_init__(b, allocated, source); >>>> } >>>> >>>> /* Initializes 'b' as an empty dp_packet that contains the >>>> 'allocated' bytes of >>>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h >>>> index c301ed5..272597f 100644 >>>> --- a/lib/dp-packet.h >>>> +++ b/lib/dp-packet.h >>>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct >>>> dp_packet *, size_t offset, >>>> size_t size); >>>> static inline void *dp_packet_at_assert(const struct dp_packet *, >>>> size_t offset, size_t >>>> size); >>>> +#ifdef DPDK_NETDEV >>>> +static inline void * dp_packet_at_offset(const struct dp_packet *b, >>>> + size_t offset); >>>> +#endif >>>> static inline void *dp_packet_tail(const struct dp_packet *); >>>> static inline void *dp_packet_end(const struct dp_packet *); >>>> >>>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b) >>>> } >>>> } >>>> >>>> -/* If 'b' contains at least 'offset + size' bytes of data, returns >>>> a >>>> pointer to >>>> - * byte 'offset'. Otherwise, returns a null pointer. */ >>>> -static inline void * >>>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >>>> -{ >>>> - return offset + size <= dp_packet_size(b) >>>> - ? (char *) dp_packet_data(b) + offset >>>> - : NULL; >>>> -} >>>> - >>>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at >>>> least >>>> - * 'offset + size' bytes of data. */ >>>> -static inline void * >>>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, >>>> size_t >>>> size) >>>> -{ >>>> - ovs_assert(offset + size <= dp_packet_size(b)); >>>> - return ((char *) dp_packet_data(b)) + offset; >>>> -} >>>> - >>>> -/* Returns a pointer to byte following the last byte of data in use >>>> in 'b'. */ >>>> -static inline void * >>>> -dp_packet_tail(const struct dp_packet *b) >>>> -{ >>>> - return (char *) dp_packet_data(b) + dp_packet_size(b); >>>> -} >>>> - >>>> -/* Returns a pointer to byte following the last byte allocated for >>>> use (but >>>> - * not necessarily in use) in 'b'. */ >>>> -static inline void * >>>> -dp_packet_end(const struct dp_packet *b) >>>> -{ >>>> - return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); >>>> -} >>>> - >>>> /* Returns the number of bytes of headroom in 'b', that is, the >>>> number of bytes >>>> * of unused space in dp_packet 'b' before the data that is in use. >>>> (Most >>>> * commonly, the data in a dp_packet is at its beginning, and thus >>>> the >>>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b) >>>> static inline size_t >>>> dp_packet_tailroom(const struct dp_packet *b) >>>> { >>>> +#ifdef DPDK_NETDEV >>>> + if (b->source == DPBUF_DPDK) { >>>> + struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, >>>> &b->mbuf); >>>> + tmbuf = rte_pktmbuf_lastseg(tmbuf); >>>> + >>> >>> What if the last two chains contain no user data, we have more >>> tailroom?!? Should we not call (tmbuf)? >> >> I've covered this in my reply to the cover letter, hopefully it is >> more >> clear now. This shouldn't be a possibility in the implementation. >> Also, >> we do call rte_pktmbuf_tailroom(), just below. > > I know but I do not see the reason for not calling > rte_pktmbuf_tailroom() on the mbuf directly. This will work for all > cases, add’s less complexity, even in the future if we move to a > design where we will support multiple buffers. Trying to “limit” the > usual mbuf cases does not sound right, and it will confuse people. > I think there's some confusion here. rte_pktmbuf_tailroom() doesn't calculate the case above where "the last two chains contain no user data". It only calculates the tailroom for the passed mbuf. In fact, most of the rte_pktmbuf_* functions does not deal with such cases. They only do the calculations for the passed mbuf, and it is up to the user of the library to add logic on top (using helper functions such as rte_pktmbuf_lastseg() to traverse the chain). Since this series doesn't allow for an mbuf in the chain to be free of data after the tail, I didn't add the extra logic. Hopefully this clarifies why we are calling rte_pktmbuf_tailroom() in the tail mbuf only. Tiago. >>> >>>> + return rte_pktmbuf_tailroom(tmbuf); >>>> + } >>>> +#endif >>>> return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b); >>>> } >>>> >>>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b) >>>> static inline void >>>> dp_packet_clear(struct dp_packet *b) >>>> { >>>> +#ifdef DPDK_NETDEV >>>> + dp_packet_set_size(b, 0); >>>> + rte_pktmbuf_reset(&b->mbuf); >>> >>> This would probably assert on none DPBUF_DPDK, do we need a “if >>> (b->source == DPBUF_DPDK)” here also? >>> >> >> We do, I'll add it to next iteration. It only asserts when using DPDK >> with `RTE_LIBRTE_MBUF_DEBUG`, but still... > > Yes, but you do not want this assert when you are trying to debug > something else ;) > >>>> +#else >>>> dp_packet_set_data(b, dp_packet_base(b)); >>>> dp_packet_set_size(b, 0); >>>> +#endif >>>> } >>>> >>>> /* Removes 'size' bytes from the head end of 'b', which must >>>> contain >>>> at least >>>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t >>>> size) >>>> return data; >>>> } >>>> >>>> +#ifdef DPDK_NETDEV >>>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any >>>> code, only >>>> + * returns true or false if it would be possible to actually pull >>>> any >>>> code. >>>> + * Valid for dp_packets carrying mbufs only. */ >>>> +static inline bool >>>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) { >>>> + if (size > b->mbuf.data_len) { >>>> + return false; >>>> + } >>>> + >>>> + return true; >>>> +} >>>> +#endif >>>> + >>>> /* If 'b' has at least 'size' bytes of data, removes that many >>>> bytes >>>> from the >>>> * head end of 'b' and returns the first byte removed. Otherwise, >>>> returns a >>>> * null pointer without modifying 'b'. */ >>>> static inline void * >>>> dp_packet_try_pull(struct dp_packet *b, size_t size) >>>> { >>>> +#ifdef DPDK_NETDEV >>>> + if (!dp_packet_mbuf_may_pull(b, size)) { >>>> + return NULL; >>>> + } >>>> +#endif >>>> + >>>> return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size >>>> ? dp_packet_pull(b, size) : NULL; >>>> } >>>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, >>>> uint8_t pad_size) >>>> static inline void * >>>> dp_packet_l2_5(const struct dp_packet *b) >>>> { >>>> +#ifdef DPDK_NETDEV >>>> + if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) { >>>> + return NULL; >>>> + } >>>> +#endif >>>> + >>>> return b->l2_5_ofs != UINT16_MAX >>>> ? (char *) dp_packet_data(b) + b->l2_5_ofs >>>> : NULL; >>>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void >>>> *l2_5) >>>> static inline void * >>>> dp_packet_l3(const struct dp_packet *b) >>>> { >>>> +#ifdef DPDK_NETDEV >>>> + if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) { >>>> + return NULL; >>>> + } >>>> +#endif >>>> + >>> >>> Can we not use the dp_packet_at_offset() function here (and the >>> similar >>> functions)? And include the above and below checks there? >>> >> >> They are aimed at different purposes. dp_packet_at_offset() should >> walk >> the chain of mbufs and return a pointer to the offset, while here we >> actually want to know if the offset is within the first mbuf. If these >> headers are not within the first mbuf, we don't return it as it >> crosses >> mbufs. > > My bad, I was referring to dp_packet_at(), which probably also need the > may_pull() if there are control protocols with jumbo packets? > > >>>> return b->l3_ofs != UINT16_MAX >>>> ? (char *) dp_packet_data(b) + b->l3_ofs >>>> : NULL; >>>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3) >>>> static inline void * >>>> dp_packet_l4(const struct dp_packet *b) >>>> { >>>> +#ifdef DPDK_NETDEV >>>> + if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) { >>>> + return NULL; >>>> + } >>>> +#endif >>>> + >>>> return b->l4_ofs != UINT16_MAX >>>> ? (char *) dp_packet_data(b) + b->l4_ofs >>>> : NULL; >>>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void >>>> *l4) >>>> static inline size_t >>>> dp_packet_l4_size(const struct dp_packet *b) >>>> { >>>> +#ifdef DPDK_NETDEV >>>> + if (b->l4_ofs == UINT16_MAX) { >>>> + return 0; >>>> + } >>>> + >>>> + struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, >>>> &b->mbuf); >>>> + struct rte_mbuf *tmbuf = dp_packet_tail(b); >>>> + >>>> + size_t l4_size = 0; >>>> + if (hmbuf == tmbuf) { >>> >>> Do not think this optimisation help much, as the dp_packet_tail(b) >>> does >>> the same walk as in the "while (hmbuf)" clause below, so why not just >>> keet the else case? >>> >> >> Yeah, good point. I'll change that in the next iteration. >> >>>> + l4_size = hmbuf->data_len - b->l4_ofs; >>>> + } else { >>>> + l4_size = hmbuf->data_len - b->l4_ofs; >>>> + hmbuf = hmbuf->next; >>>> + >>>> + while (hmbuf) { >>>> + l4_size += tmbuf->data_len; >>>> + >>>> + hmbuf = hmbuf->next; >>>> + } >>>> + } >>>> + >>>> + l4_size -= dp_packet_l2_pad_size(b); >>>> + >>>> + return l4_size; >>>> +#else >>>> return b->l4_ofs != UINT16_MAX >>>> ? (const char *)dp_packet_tail(b) - (const char >>>> *)dp_packet_l4(b) >>>> - dp_packet_l2_pad_size(b) >>>> : 0; >>>> +#endif >>>> } >>>> >>>> static inline const void * >>>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t >>>> v) >>>> static inline uint16_t >>>> dp_packet_get_allocated(const struct dp_packet *b) >>>> { >>>> - return b->mbuf.buf_len; >>>> + return b->mbuf.nb_segs * b->mbuf.buf_len; >>>> } >>>> >>>> static inline void >>>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, >>>> uint16_t s) >>>> { >>>> b->mbuf.buf_len = s; >>>> } >>>> + >>>> +static inline void * >>>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset) >>>> +{ >>>> + if (b->source == DPBUF_DPDK) { >>>> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >>>> &b->mbuf); >>>> + >>>> + while (buf && offset > buf->data_len) { >>>> + offset -= buf->data_len; >>>> + >>>> + buf = buf->next; >>>> + } >>>> + return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : >>>> NULL; >>>> + } else { >>>> + return (char *) dp_packet_data(b) + offset; >>>> + } >>>> +} >>>> + >>>> +/* If 'b' contains at least 'offset + size' bytes of data, returns >>>> a >>>> pointer to >>>> + * byte 'offset'. Otherwise, returns a null pointer. */ >>>> +static inline void * >>>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >>>> +{ >>>> + return offset + size <= dp_packet_size(b) >>>> + ? dp_packet_at_offset(b, offset) >>>> + : NULL; >>>> +} >>>> + >>>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at >>>> least >>>> + * 'offset + size' bytes of data. */ >>>> +static inline void * >>>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, >>>> size_t >>>> size) >>>> +{ >>>> + ovs_assert(offset + size <= dp_packet_size(b)); >>>> + return dp_packet_at_offset(b, offset); >>>> +} >>>> + >>>> +/* Returns a pointer to byte following the last byte of data in use >>>> in 'b'. */ >>>> +static inline void * >>>> +dp_packet_tail(const struct dp_packet *b) >>>> +{ >>>> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf); >>>> + if (b->source == DPBUF_DPDK) { >>>> + /* Find last segment where data ends, meaning the tail of >>>> the >>>> chained >>>> + mbufs is there */ >>>> + buf = rte_pktmbuf_lastseg(buf); >>>> + } >>>> + >>> >>> What if the last two chains contains no user data, we should find the >>> last buffer with data. >>> Looks like we assume only the last in the chain contains room, and we >>> achieve this by stripping off buffers (but we never add them again >>> when >>> needed, so something is wrong in this implementation). >>> >> >> Does my reply to the cover letter help here as well? That's a valid >> point, that if the size is decreased and mbufs are freed then mbufs >> are >> not allocated again if size needs to be incremented. But as explained, >> this is to enforce that data is contiguous and so the tail points to >> the >> last mbuf. I haven't yet encountered cases where this would lead to >> complications, but maybe you have something in mind? > > I feel like we should not design the general APIs to force the current > assumed limitation. > As rte_pktmbuf_lastseg() is already walking the three, we could do it > here instead and checking for the mailroom. >
On 27 Jun 2018, at 12:09, Lam, Tiago wrote: > On 26/06/2018 11:31, Eelco Chaudron wrote: >> >> >> On 22 Jun 2018, at 21:04, Lam, Tiago wrote: >> >>> On 18/06/2018 12:50, Eelco Chaudron wrote: >>>> >>>> >>>> On 11 Jun 2018, at 18:21, Tiago Lam wrote: >>>> >>>>> Most helper functions in dp-packet assume that the data held by a >>>>> dp_packet is contiguous, and perform operations such as pointer >>>>> arithmetic under that assumption. However, with the introduction of >>>>> multi-segment mbufs, where data is non-contiguous, such assumptions >>>>> are >>>>> no longer possible. Some examples of Such helper functions are >>>>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(), >>>>> dp_packet_get_allocated() and dp_packet_at(). >>>>> >>>>> Thus, instead of assuming contiguous data in dp_packet, they now >>>>> iterate over the (non-contiguous) data in mbufs to perform their >>>>> calculations. >>>>> >>>>> Finally, dp_packet_use__() has also been modified to perform the >>>>> initialisation of the packet (and setting the source) before >>>>> continuing >>>>> to set its size and data length, which now depends on the type of >>>>> packet. >>>>> >>>>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >>>>> >>>>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >>>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com> >>>>> --- >>>>> lib/dp-packet.c | 4 +- >>>>> lib/dp-packet.h | 242 >>>>> +++++++++++++++++++++++++++++++++++++++++++++----------- >>>>> 2 files changed, 197 insertions(+), 49 deletions(-) >>>>> >>>>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c >>>>> index 782e7c2..2aaeaae 100644 >>>>> --- a/lib/dp-packet.c >>>>> +++ b/lib/dp-packet.c >>>>> @@ -41,11 +41,11 @@ static void >>>>> dp_packet_use__(struct dp_packet *b, void *base, size_t allocated, >>>>> enum dp_packet_source source) >>>>> { >>>>> + dp_packet_init__(b, allocated, source); >>>>> + >>>>> dp_packet_set_base(b, base); >>>>> dp_packet_set_data(b, base); >>>>> dp_packet_set_size(b, 0); >>>>> - >>>>> - dp_packet_init__(b, allocated, source); >>>>> } >>>>> >>>>> /* Initializes 'b' as an empty dp_packet that contains the >>>>> 'allocated' bytes of >>>>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h >>>>> index c301ed5..272597f 100644 >>>>> --- a/lib/dp-packet.h >>>>> +++ b/lib/dp-packet.h >>>>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct >>>>> dp_packet *, size_t offset, >>>>> size_t size); >>>>> static inline void *dp_packet_at_assert(const struct dp_packet *, >>>>> size_t offset, size_t >>>>> size); >>>>> +#ifdef DPDK_NETDEV >>>>> +static inline void * dp_packet_at_offset(const struct dp_packet *b, >>>>> + size_t offset); >>>>> +#endif >>>>> static inline void *dp_packet_tail(const struct dp_packet *); >>>>> static inline void *dp_packet_end(const struct dp_packet *); >>>>> >>>>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b) >>>>> } >>>>> } >>>>> >>>>> -/* If 'b' contains at least 'offset + size' bytes of data, returns >>>>> a >>>>> pointer to >>>>> - * byte 'offset'. Otherwise, returns a null pointer. */ >>>>> -static inline void * >>>>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >>>>> -{ >>>>> - return offset + size <= dp_packet_size(b) >>>>> - ? (char *) dp_packet_data(b) + offset >>>>> - : NULL; >>>>> -} >>>>> - >>>>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at >>>>> least >>>>> - * 'offset + size' bytes of data. */ >>>>> -static inline void * >>>>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, >>>>> size_t >>>>> size) >>>>> -{ >>>>> - ovs_assert(offset + size <= dp_packet_size(b)); >>>>> - return ((char *) dp_packet_data(b)) + offset; >>>>> -} >>>>> - >>>>> -/* Returns a pointer to byte following the last byte of data in use >>>>> in 'b'. */ >>>>> -static inline void * >>>>> -dp_packet_tail(const struct dp_packet *b) >>>>> -{ >>>>> - return (char *) dp_packet_data(b) + dp_packet_size(b); >>>>> -} >>>>> - >>>>> -/* Returns a pointer to byte following the last byte allocated for >>>>> use (but >>>>> - * not necessarily in use) in 'b'. */ >>>>> -static inline void * >>>>> -dp_packet_end(const struct dp_packet *b) >>>>> -{ >>>>> - return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); >>>>> -} >>>>> - >>>>> /* Returns the number of bytes of headroom in 'b', that is, the >>>>> number of bytes >>>>> * of unused space in dp_packet 'b' before the data that is in use. >>>>> (Most >>>>> * commonly, the data in a dp_packet is at its beginning, and thus >>>>> the >>>>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b) >>>>> static inline size_t >>>>> dp_packet_tailroom(const struct dp_packet *b) >>>>> { >>>>> +#ifdef DPDK_NETDEV >>>>> + if (b->source == DPBUF_DPDK) { >>>>> + struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, >>>>> &b->mbuf); >>>>> + tmbuf = rte_pktmbuf_lastseg(tmbuf); >>>>> + >>>> >>>> What if the last two chains contain no user data, we have more >>>> tailroom?!? Should we not call (tmbuf)? >>> >>> I've covered this in my reply to the cover letter, hopefully it is >>> more >>> clear now. This shouldn't be a possibility in the implementation. >>> Also, >>> we do call rte_pktmbuf_tailroom(), just below. >> >> I know but I do not see the reason for not calling >> rte_pktmbuf_tailroom() on the mbuf directly. This will work for all >> cases, add’s less complexity, even in the future if we move to a >> design where we will support multiple buffers. Trying to “limit” the >> usual mbuf cases does not sound right, and it will confuse people. >> > > I think there's some confusion here. rte_pktmbuf_tailroom() doesn't > calculate the case above where "the last two chains contain no user > data". It only calculates the tailroom for the passed mbuf. > > In fact, most of the rte_pktmbuf_* functions does not deal with such > cases. They only do the calculations for the passed mbuf, and it is up > to the user of the library to add logic on top (using helper functions > such as rte_pktmbuf_lastseg() to traverse the chain). Since this series > doesn't allow for an mbuf in the chain to be free of data after the > tail, I didn't add the extra logic. > > Hopefully this clarifies why we are calling rte_pktmbuf_tailroom() in > the tail mbuf only. > It does, thanks. > Tiago. > >>>> >>>>> + return rte_pktmbuf_tailroom(tmbuf); >>>>> + } >>>>> +#endif >>>>> return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b); >>>>> } >>>>> >>>>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b) >>>>> static inline void >>>>> dp_packet_clear(struct dp_packet *b) >>>>> { >>>>> +#ifdef DPDK_NETDEV >>>>> + dp_packet_set_size(b, 0); >>>>> + rte_pktmbuf_reset(&b->mbuf); >>>> >>>> This would probably assert on none DPBUF_DPDK, do we need a “if >>>> (b->source == DPBUF_DPDK)” here also? >>>> >>> >>> We do, I'll add it to next iteration. It only asserts when using DPDK >>> with `RTE_LIBRTE_MBUF_DEBUG`, but still... >> >> Yes, but you do not want this assert when you are trying to debug >> something else ;) >> >>>>> +#else >>>>> dp_packet_set_data(b, dp_packet_base(b)); >>>>> dp_packet_set_size(b, 0); >>>>> +#endif >>>>> } >>>>> >>>>> /* Removes 'size' bytes from the head end of 'b', which must >>>>> contain >>>>> at least >>>>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t >>>>> size) >>>>> return data; >>>>> } >>>>> >>>>> +#ifdef DPDK_NETDEV >>>>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any >>>>> code, only >>>>> + * returns true or false if it would be possible to actually pull >>>>> any >>>>> code. >>>>> + * Valid for dp_packets carrying mbufs only. */ >>>>> +static inline bool >>>>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) { >>>>> + if (size > b->mbuf.data_len) { >>>>> + return false; >>>>> + } >>>>> + >>>>> + return true; >>>>> +} >>>>> +#endif >>>>> + >>>>> /* If 'b' has at least 'size' bytes of data, removes that many >>>>> bytes >>>>> from the >>>>> * head end of 'b' and returns the first byte removed. Otherwise, >>>>> returns a >>>>> * null pointer without modifying 'b'. */ >>>>> static inline void * >>>>> dp_packet_try_pull(struct dp_packet *b, size_t size) >>>>> { >>>>> +#ifdef DPDK_NETDEV >>>>> + if (!dp_packet_mbuf_may_pull(b, size)) { >>>>> + return NULL; >>>>> + } >>>>> +#endif >>>>> + >>>>> return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size >>>>> ? dp_packet_pull(b, size) : NULL; >>>>> } >>>>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, >>>>> uint8_t pad_size) >>>>> static inline void * >>>>> dp_packet_l2_5(const struct dp_packet *b) >>>>> { >>>>> +#ifdef DPDK_NETDEV >>>>> + if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) { >>>>> + return NULL; >>>>> + } >>>>> +#endif >>>>> + >>>>> return b->l2_5_ofs != UINT16_MAX >>>>> ? (char *) dp_packet_data(b) + b->l2_5_ofs >>>>> : NULL; >>>>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void >>>>> *l2_5) >>>>> static inline void * >>>>> dp_packet_l3(const struct dp_packet *b) >>>>> { >>>>> +#ifdef DPDK_NETDEV >>>>> + if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) { >>>>> + return NULL; >>>>> + } >>>>> +#endif >>>>> + >>>> >>>> Can we not use the dp_packet_at_offset() function here (and the >>>> similar >>>> functions)? And include the above and below checks there? >>>> >>> >>> They are aimed at different purposes. dp_packet_at_offset() should >>> walk >>> the chain of mbufs and return a pointer to the offset, while here we >>> actually want to know if the offset is within the first mbuf. If these >>> headers are not within the first mbuf, we don't return it as it >>> crosses >>> mbufs. >> >> My bad, I was referring to dp_packet_at(), which probably also need the >> may_pull() if there are control protocols with jumbo packets? >> >> >>>>> return b->l3_ofs != UINT16_MAX >>>>> ? (char *) dp_packet_data(b) + b->l3_ofs >>>>> : NULL; >>>>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3) >>>>> static inline void * >>>>> dp_packet_l4(const struct dp_packet *b) >>>>> { >>>>> +#ifdef DPDK_NETDEV >>>>> + if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) { >>>>> + return NULL; >>>>> + } >>>>> +#endif >>>>> + >>>>> return b->l4_ofs != UINT16_MAX >>>>> ? (char *) dp_packet_data(b) + b->l4_ofs >>>>> : NULL; >>>>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void >>>>> *l4) >>>>> static inline size_t >>>>> dp_packet_l4_size(const struct dp_packet *b) >>>>> { >>>>> +#ifdef DPDK_NETDEV >>>>> + if (b->l4_ofs == UINT16_MAX) { >>>>> + return 0; >>>>> + } >>>>> + >>>>> + struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, >>>>> &b->mbuf); >>>>> + struct rte_mbuf *tmbuf = dp_packet_tail(b); >>>>> + >>>>> + size_t l4_size = 0; >>>>> + if (hmbuf == tmbuf) { >>>> >>>> Do not think this optimisation help much, as the dp_packet_tail(b) >>>> does >>>> the same walk as in the "while (hmbuf)" clause below, so why not just >>>> keet the else case? >>>> >>> >>> Yeah, good point. I'll change that in the next iteration. >>> >>>>> + l4_size = hmbuf->data_len - b->l4_ofs; >>>>> + } else { >>>>> + l4_size = hmbuf->data_len - b->l4_ofs; >>>>> + hmbuf = hmbuf->next; >>>>> + >>>>> + while (hmbuf) { >>>>> + l4_size += tmbuf->data_len; >>>>> + >>>>> + hmbuf = hmbuf->next; >>>>> + } >>>>> + } >>>>> + >>>>> + l4_size -= dp_packet_l2_pad_size(b); >>>>> + >>>>> + return l4_size; >>>>> +#else >>>>> return b->l4_ofs != UINT16_MAX >>>>> ? (const char *)dp_packet_tail(b) - (const char >>>>> *)dp_packet_l4(b) >>>>> - dp_packet_l2_pad_size(b) >>>>> : 0; >>>>> +#endif >>>>> } >>>>> >>>>> static inline const void * >>>>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t >>>>> v) >>>>> static inline uint16_t >>>>> dp_packet_get_allocated(const struct dp_packet *b) >>>>> { >>>>> - return b->mbuf.buf_len; >>>>> + return b->mbuf.nb_segs * b->mbuf.buf_len; >>>>> } >>>>> >>>>> static inline void >>>>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, >>>>> uint16_t s) >>>>> { >>>>> b->mbuf.buf_len = s; >>>>> } >>>>> + >>>>> +static inline void * >>>>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset) >>>>> +{ >>>>> + if (b->source == DPBUF_DPDK) { >>>>> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >>>>> &b->mbuf); >>>>> + >>>>> + while (buf && offset > buf->data_len) { >>>>> + offset -= buf->data_len; >>>>> + >>>>> + buf = buf->next; >>>>> + } >>>>> + return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : >>>>> NULL; >>>>> + } else { >>>>> + return (char *) dp_packet_data(b) + offset; >>>>> + } >>>>> +} >>>>> + >>>>> +/* If 'b' contains at least 'offset + size' bytes of data, returns >>>>> a >>>>> pointer to >>>>> + * byte 'offset'. Otherwise, returns a null pointer. */ >>>>> +static inline void * >>>>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) >>>>> +{ >>>>> + return offset + size <= dp_packet_size(b) >>>>> + ? dp_packet_at_offset(b, offset) >>>>> + : NULL; >>>>> +} >>>>> + >>>>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at >>>>> least >>>>> + * 'offset + size' bytes of data. */ >>>>> +static inline void * >>>>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, >>>>> size_t >>>>> size) >>>>> +{ >>>>> + ovs_assert(offset + size <= dp_packet_size(b)); >>>>> + return dp_packet_at_offset(b, offset); >>>>> +} >>>>> + >>>>> +/* Returns a pointer to byte following the last byte of data in use >>>>> in 'b'. */ >>>>> +static inline void * >>>>> +dp_packet_tail(const struct dp_packet *b) >>>>> +{ >>>>> + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf); >>>>> + if (b->source == DPBUF_DPDK) { >>>>> + /* Find last segment where data ends, meaning the tail of >>>>> the >>>>> chained >>>>> + mbufs is there */ >>>>> + buf = rte_pktmbuf_lastseg(buf); >>>>> + } >>>>> + >>>> >>>> What if the last two chains contains no user data, we should find the >>>> last buffer with data. >>>> Looks like we assume only the last in the chain contains room, and we >>>> achieve this by stripping off buffers (but we never add them again >>>> when >>>> needed, so something is wrong in this implementation). >>>> >>> >>> Does my reply to the cover letter help here as well? That's a valid >>> point, that if the size is decreased and mbufs are freed then mbufs >>> are >>> not allocated again if size needs to be incremented. But as explained, >>> this is to enforce that data is contiguous and so the tail points to >>> the >>> last mbuf. I haven't yet encountered cases where this would lead to >>> complications, but maybe you have something in mind? >> >> I feel like we should not design the general APIs to force the current >> assumed limitation. >> As rte_pktmbuf_lastseg() is already walking the three, we could do it >> here instead and checking for the mailroom. >>
diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 782e7c2..2aaeaae 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -41,11 +41,11 @@ static void dp_packet_use__(struct dp_packet *b, void *base, size_t allocated, enum dp_packet_source source) { + dp_packet_init__(b, allocated, source); + dp_packet_set_base(b, base); dp_packet_set_data(b, base); dp_packet_set_size(b, 0); - - dp_packet_init__(b, allocated, source); } /* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of diff --git a/lib/dp-packet.h b/lib/dp-packet.h index c301ed5..272597f 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct dp_packet *, size_t offset, size_t size); static inline void *dp_packet_at_assert(const struct dp_packet *, size_t offset, size_t size); +#ifdef DPDK_NETDEV +static inline void * dp_packet_at_offset(const struct dp_packet *b, + size_t offset); +#endif static inline void *dp_packet_tail(const struct dp_packet *); static inline void *dp_packet_end(const struct dp_packet *); @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b) } } -/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to - * byte 'offset'. Otherwise, returns a null pointer. */ -static inline void * -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) -{ - return offset + size <= dp_packet_size(b) - ? (char *) dp_packet_data(b) + offset - : NULL; -} - -/* Returns a pointer to byte 'offset' in 'b', which must contain at least - * 'offset + size' bytes of data. */ -static inline void * -dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size) -{ - ovs_assert(offset + size <= dp_packet_size(b)); - return ((char *) dp_packet_data(b)) + offset; -} - -/* Returns a pointer to byte following the last byte of data in use in 'b'. */ -static inline void * -dp_packet_tail(const struct dp_packet *b) -{ - return (char *) dp_packet_data(b) + dp_packet_size(b); -} - -/* Returns a pointer to byte following the last byte allocated for use (but - * not necessarily in use) in 'b'. */ -static inline void * -dp_packet_end(const struct dp_packet *b) -{ - return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); -} - /* Returns the number of bytes of headroom in 'b', that is, the number of bytes * of unused space in dp_packet 'b' before the data that is in use. (Most * commonly, the data in a dp_packet is at its beginning, and thus the @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b) static inline size_t dp_packet_tailroom(const struct dp_packet *b) { +#ifdef DPDK_NETDEV + if (b->source == DPBUF_DPDK) { + struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf); + tmbuf = rte_pktmbuf_lastseg(tmbuf); + + return rte_pktmbuf_tailroom(tmbuf); + } +#endif return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b); } @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b) static inline void dp_packet_clear(struct dp_packet *b) { +#ifdef DPDK_NETDEV + dp_packet_set_size(b, 0); + rte_pktmbuf_reset(&b->mbuf); +#else dp_packet_set_data(b, dp_packet_base(b)); dp_packet_set_size(b, 0); +#endif } /* Removes 'size' bytes from the head end of 'b', which must contain at least @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t size) return data; } +#ifdef DPDK_NETDEV +/* Similar to dp_packet_try_pull() but doesn't actually pull any code, only + * returns true or false if it would be possible to actually pull any code. + * Valid for dp_packets carrying mbufs only. */ +static inline bool +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) { + if (size > b->mbuf.data_len) { + return false; + } + + return true; +} +#endif + /* If 'b' has at least 'size' bytes of data, removes that many bytes from the * head end of 'b' and returns the first byte removed. Otherwise, returns a * null pointer without modifying 'b'. */ static inline void * dp_packet_try_pull(struct dp_packet *b, size_t size) { +#ifdef DPDK_NETDEV + if (!dp_packet_mbuf_may_pull(b, size)) { + return NULL; + } +#endif + return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size ? dp_packet_pull(b, size) : NULL; } @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, uint8_t pad_size) static inline void * dp_packet_l2_5(const struct dp_packet *b) { +#ifdef DPDK_NETDEV + if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) { + return NULL; + } +#endif + return b->l2_5_ofs != UINT16_MAX ? (char *) dp_packet_data(b) + b->l2_5_ofs : NULL; @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void *l2_5) static inline void * dp_packet_l3(const struct dp_packet *b) { +#ifdef DPDK_NETDEV + if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) { + return NULL; + } +#endif + return b->l3_ofs != UINT16_MAX ? (char *) dp_packet_data(b) + b->l3_ofs : NULL; @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3) static inline void * dp_packet_l4(const struct dp_packet *b) { +#ifdef DPDK_NETDEV + if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) { + return NULL; + } +#endif + return b->l4_ofs != UINT16_MAX ? (char *) dp_packet_data(b) + b->l4_ofs : NULL; @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void *l4) static inline size_t dp_packet_l4_size(const struct dp_packet *b) { +#ifdef DPDK_NETDEV + if (b->l4_ofs == UINT16_MAX) { + return 0; + } + + struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf); + struct rte_mbuf *tmbuf = dp_packet_tail(b); + + size_t l4_size = 0; + if (hmbuf == tmbuf) { + l4_size = hmbuf->data_len - b->l4_ofs; + } else { + l4_size = hmbuf->data_len - b->l4_ofs; + hmbuf = hmbuf->next; + + while (hmbuf) { + l4_size += tmbuf->data_len; + + hmbuf = hmbuf->next; + } + } + + l4_size -= dp_packet_l2_pad_size(b); + + return l4_size; +#else return b->l4_ofs != UINT16_MAX ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l4(b) - dp_packet_l2_pad_size(b) : 0; +#endif } static inline const void * @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t v) static inline uint16_t dp_packet_get_allocated(const struct dp_packet *b) { - return b->mbuf.buf_len; + return b->mbuf.nb_segs * b->mbuf.buf_len; } static inline void @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) { b->mbuf.buf_len = s; } + +static inline void * +dp_packet_at_offset(const struct dp_packet *b, size_t offset) +{ + if (b->source == DPBUF_DPDK) { + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf); + + while (buf && offset > buf->data_len) { + offset -= buf->data_len; + + buf = buf->next; + } + return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : NULL; + } else { + return (char *) dp_packet_data(b) + offset; + } +} + +/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to + * byte 'offset'. Otherwise, returns a null pointer. */ +static inline void * +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) +{ + return offset + size <= dp_packet_size(b) + ? dp_packet_at_offset(b, offset) + : NULL; +} + +/* Returns a pointer to byte 'offset' in 'b', which must contain at least + * 'offset + size' bytes of data. */ +static inline void * +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size) +{ + ovs_assert(offset + size <= dp_packet_size(b)); + return dp_packet_at_offset(b, offset); +} + +/* Returns a pointer to byte following the last byte of data in use in 'b'. */ +static inline void * +dp_packet_tail(const struct dp_packet *b) +{ + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf); + if (b->source == DPBUF_DPDK) { + /* Find last segment where data ends, meaning the tail of the chained + mbufs is there */ + buf = rte_pktmbuf_lastseg(buf); + } + + return rte_pktmbuf_mtod_offset(buf, void *, buf->data_len); +} + +/* Returns a pointer to byte following the last byte allocated for use (but + * not necessarily in use) in 'b'. */ +static inline void * +dp_packet_end(const struct dp_packet *b) +{ + if (b->source == DPBUF_DPDK) { + struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &(b->mbuf)); + + buf = rte_pktmbuf_lastseg(buf); + + return (char *) buf->buf_addr + buf->buf_len; + } else { + return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); + } +} #else +/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to + * byte 'offset'. Otherwise, returns a null pointer. */ +static inline void * +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) +{ + return offset + size <= dp_packet_size(b) + ? (char *) dp_packet_data(b) + offset + : NULL; +} + +/* Returns a pointer to byte 'offset' in 'b', which must contain at least + * 'offset + size' bytes of data. */ +static inline void * +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size) +{ + ovs_assert(offset + size <= dp_packet_size(b)); + return ((char *) dp_packet_data(b)) + offset; +} + +/* Returns a pointer to byte following the last byte of data in use in 'b'. */ +static inline void * +dp_packet_tail(const struct dp_packet *b) +{ + return (char *) dp_packet_data(b) + dp_packet_size(b); +} + +/* Returns a pointer to byte following the last byte allocated for use (but + * not necessarily in use) in 'b'. */ +static inline void * +dp_packet_end(const struct dp_packet *b) +{ + return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); +} + static inline void * dp_packet_base(const struct dp_packet *b) { @@ -518,34 +666,34 @@ dp_packet_size(const struct dp_packet *b) return b->size_; } -static inline void -dp_packet_set_size(struct dp_packet *b, uint32_t v) +static inline uint16_t +dp_packet_get_allocated(const struct dp_packet *b) { - b->size_ = v; + return b->allocated_; } -static inline uint16_t -__packet_data(const struct dp_packet *b) +static inline void +dp_packet_set_allocated(struct dp_packet *b, uint16_t s) { - return b->data_ofs; + b->allocated_ = s; } static inline void -__packet_set_data(struct dp_packet *b, uint16_t v) +dp_packet_set_size(struct dp_packet *b, uint32_t v) { - b->data_ofs = v; + b->size_ = v; } static inline uint16_t -dp_packet_get_allocated(const struct dp_packet *b) +__packet_data(const struct dp_packet *b) { - return b->allocated_; + return b->data_ofs; } static inline void -dp_packet_set_allocated(struct dp_packet *b, uint16_t s) +__packet_set_data(struct dp_packet *b, uint16_t v) { - b->allocated_ = s; + b->data_ofs = v; } #endif