[ovs-dev,v3,4/4] dp-packet: Use memcpy on dp_packet elements.

Message ID 1502466766-17370-5-git-send-email-antonio.fischetti@intel.com
State Accepted
Delegated to: Darrell Ball
Headers show

Commit Message

Fischetti, Antonio Aug. 11, 2017, 3:52 p.m.
memcpy replaces the several single copies inside
dp_packet_clone_with_headroom().

Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
---
I tested this change by comparing the CPU Time over a 60 sec analysis
with VTune.

In original ovs:
dp_packet_clone_with_headroom    4.530s

+ this changes:
dp_packet_clone_with_headroom    3.920s

Further details were reported in this reply for v1
https://mail.openvswitch.org/pipermail/ovs-dev/2017-June/334536.html
---
 lib/dp-packet.c | 18 +++++++++---------
 lib/dp-packet.h |  6 +++++-
 2 files changed, 14 insertions(+), 10 deletions(-)

Comments

Darrell Ball Aug. 14, 2017, 6:34 a.m. | #1
Could you show some throughput results for a particular tests ?

-----Original Message-----
From: <ovs-dev-bounces@openvswitch.org> on behalf of "antonio.fischetti@intel.com" <antonio.fischetti@intel.com>
Date: Friday, August 11, 2017 at 8:52 AM
To: "dev@openvswitch.org" <dev@openvswitch.org>
Subject: [ovs-dev] [PATCH v3 4/4] dp-packet: Use memcpy on dp_packet	elements.

    memcpy replaces the several single copies inside
    dp_packet_clone_with_headroom().
    
    Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
    ---
    I tested this change by comparing the CPU Time over a 60 sec analysis
    with VTune.
    
    In original ovs:
    dp_packet_clone_with_headroom    4.530s
    
    + this changes:
    dp_packet_clone_with_headroom    3.920s
    
    Further details were reported in this reply for v1
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_pipermail_ovs-2Ddev_2017-2DJune_334536.html&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=6qGelsac4wBCA5cHYalnmiB0QySdkGCItzNJ_-twiJg&s=Wc0BH6ff_CG15qMnkGq8VDI5YmNby5Eo3JxRdZslOsI&e= 
    ---
     lib/dp-packet.c | 18 +++++++++---------
     lib/dp-packet.h |  6 +++++-
     2 files changed, 14 insertions(+), 10 deletions(-)
    
    diff --git a/lib/dp-packet.c b/lib/dp-packet.c
    index 67aa406..f4dbcb7 100644
    --- a/lib/dp-packet.c
    +++ b/lib/dp-packet.c
    @@ -157,8 +157,9 @@ dp_packet_clone(const struct dp_packet *buffer)
         return dp_packet_clone_with_headroom(buffer, 0);
     }
     
    -/* Creates and returns a new dp_packet whose data are copied from 'buffer'.   The
    - * returned dp_packet will additionally have 'headroom' bytes of headroom. */
    +/* Creates and returns a new dp_packet whose data are copied from 'buffer'.
    + * The returned dp_packet will additionally have 'headroom' bytes of
    + * headroom. */
     struct dp_packet *
     dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
     {
    @@ -167,13 +168,12 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
         new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
                                                      dp_packet_size(buffer),
                                                      headroom);
    -    new_buffer->l2_pad_size = buffer->l2_pad_size;
    -    new_buffer->l2_5_ofs = buffer->l2_5_ofs;
    -    new_buffer->l3_ofs = buffer->l3_ofs;
    -    new_buffer->l4_ofs = buffer->l4_ofs;
    -    new_buffer->md = buffer->md;
    -    new_buffer->cutlen = buffer->cutlen;
    -    new_buffer->packet_type = buffer->packet_type;
    +    /* Copy the following fields into the returned buffer: l2_pad_size,
    +     * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
    +    memcpy(&new_buffer->l2_pad_size, &buffer->l2_pad_size,
    +            sizeof(struct dp_packet) -
    +            offsetof(struct dp_packet, l2_pad_size));
    +
     #ifdef DPDK_NETDEV
         new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
     #else
    diff --git a/lib/dp-packet.h b/lib/dp-packet.h
    index 9dbb611..9cab7c7 100644
    --- a/lib/dp-packet.h
    +++ b/lib/dp-packet.h
    @@ -40,7 +40,8 @@ enum OVS_PACKED_ENUM dp_packet_source {
         DPBUF_STACK,               /* Un-movable stack space or static buffer. */
         DPBUF_STUB,                /* Starts on stack, may expand into heap. */
         DPBUF_DPDK,                /* buffer data is from DPDK allocated memory.
    -                                * ref to dp_packet_init_dpdk() in dp-packet.c. */
    +                                * ref to dp_packet_init_dpdk() in dp-packet.c.
    +                                */
     };
     
     #define DP_PACKET_CONTEXT_SIZE 64
    @@ -61,6 +62,9 @@ struct dp_packet {
         bool rss_hash_valid;        /* Is the 'rss_hash' valid? */
     #endif
         enum dp_packet_source source;  /* Source of memory allocated as 'base'. */
    +
    +    /* All the following elements of this struct are copied by a single call
    +     * to memcpy in dp_packet_clone_with_headroom. */
         uint8_t l2_pad_size;           /* Detected l2 padding size.
                                         * Padding is non-pullable. */
         uint16_t l2_5_ofs;             /* MPLS label stack offset, or UINT16_MAX */
    -- 
    2.4.11
    
    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=6qGelsac4wBCA5cHYalnmiB0QySdkGCItzNJ_-twiJg&s=LREI681tchx40jdHLb94j5YDPgqXZnbI9_Yv4QNnqyA&e=
Fischetti, Antonio Aug. 17, 2017, 10:44 a.m. | #2
> -----Original Message-----
> From: Darrell Ball [mailto:dball@vmware.com]
> Sent: Monday, August 14, 2017 7:34 AM
> To: Fischetti, Antonio <antonio.fischetti@intel.com>; dev@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH v3 4/4] dp-packet: Use memcpy on dp_packet
> elements.
> 
> Could you show some throughput results for a particular tests ?

[Antonio]
The only way I could test this change is with a setup that I was 
initially using on a modified firewall for learning purposes. 
I had added a normal action to the 1st rule below, this can't be 
a real scenario. I couldn't find a better test to involve the 
dp_packet_clone_with_headroom function, if anyone has some
suggestion is welcome.
Below some continuous tests results, ie bidir test with packets
sent at the line-rate. The Rx pkt rate is measured, without 
considering the packet loss.

	Test setup
	----------

table=0, priority=100,ct_state=-trk,ip actions=ct(table=1),NORMAL  <----
table=0, priority=10,arp actions=NORMAL
table=0, priority=1 actions=drop
table=1, ct_state=+new+trk,ip,in_port=dpdk0 actions=ct(commit),output:dpdk1
table=1, ct_state=+new+trk,ip,in_port=dpdk1 actions=drop
table=1, ct_state=+est+trk,ip,in_port=dpdk0 actions=output:dpdk1
table=1, ct_state=+est+trk,ip,in_port=dpdk1 actions=output:dpdk0

2 PMDs, bidir test, 64B UDP packets.

	Throughput results
	------------------

Original OvS-DPDK means at Commit ID:
    6b1babacc3ca0488e07596bf822fe356c9bab646

              +----------------------+-----------------------+
              |  Original OvS-DPDK + |   Original OvS-DPDK + |
              |  patches 1,2,3       |   patches 1,2,3 +     |
              |                      |      this patch       |
     ---------+----------------------+-----------------------+
      Traffic |          Rx          |          Rx           |
      Streams |        [Mpps]        |        [Mpps]         |
     ---------+----------------------+-----------------------+
         100  |      3.03, 3.02      |      3.04, 3.04       |
         500  |      2.87, 2.86      |      2.90, 2.87       |
       1,000  |      2.72, 2.70      |      2.80, 2.76       |
       2,000  |      2.57, 2.59      |      2.59, 2.58       |
       5,000  |      2.54, 2.47      |      2.53, 2.48       |
      10,000  |      2.43, 2.39      |      2.43, 2.39       |
     ---------+----------------------+-----------------------+


	Perf comparison with 500 UDP connections
	----------------------------------------

With perf top I am seeing the following difference on 
dp_packet_clone_with_headroom: from 0.95% -> 0.77%, 
below some details.

Orig + patches 1,2,3:	
---------------------
  11.09%  ovs-vswitchd        [.] dp_netdev_input__
   9.42%  libc-2.21.so        [.] malloc
   9.33%  libpthread-2.21.so  [.] pthread_mutex_unlock
   7.49%  libc-2.21.so        [.] free
   6.27%  ovs-vswitchd        [.] miniflow_extract
   5.94%  libc-2.21.so        [.] __memcpy_avx_unaligned
   4.43%  ovs-vswitchd        [.] ovs_mutex_lock_at
   4.37%  libc-2.21.so        [.] __memcmp_sse4_1
   4.30%  libc-2.21.so        [.] _int_malloc
   4.20%  libpthread-2.21.so  [.] pthread_mutex_lock
   3.34%  ovs-vswitchd        [.] dpdk_do_tx_copy
   3.34%  ovs-vswitchd        [.] conn_key_lookup
   3.20%  ovs-vswitchd        [.] ixgbe_xmit_fixed_burst_vec
   2.58%  ovs-vswitchd        [.] conntrack_execute
   2.49%  ovs-vswitchd        [.] conn_key_hash
   2.25%  ovs-vswitchd        [.] process_one
   2.19%  ovs-vswitchd        [.] other_conn_update
   1.77%  ovs-vswitchd        [.] ixgbe_recv_pkts_vec
   0.95%  ovs-vswitchd        [.] dp_packet_clone_with_headroom     <<<<<<<<<<<<
   0.83%  ovs-vswitchd        [.] write_ct_md
   0.79%  ovs-vswitchd        [.] dp_execute_cb
   0.74%  ovs-vswitchd        [.] conn_update_state.isra.15.part.16
   0.63%  ovs-vswitchd        [.] conn_key_cmp
   0.42%  ovs-vswitchd        [.] dp_packet_put

Orig + patches 1,2,3 + this patch:
----------------------------------
  11.72%  ovs-vswitchd        [.] dp_netdev_input__
   9.30%  libpthread-2.21.so  [.] pthread_mutex_unlock
   9.09%  libc-2.21.so        [.] malloc
   7.52%  libc-2.21.so        [.] free
   6.40%  libc-2.21.so        [.] __memcpy_avx_unaligned
   6.04%  ovs-vswitchd        [.] miniflow_extract
   4.55%  libc-2.21.so        [.] _int_malloc
   4.45%  libc-2.21.so        [.] __memcmp_sse4_1
   4.31%  ovs-vswitchd        [.] ovs_mutex_lock_at
   3.88%  libpthread-2.21.so  [.] pthread_mutex_lock
   3.62%  ovs-vswitchd        [.] dpdk_do_tx_copy
   3.62%  ovs-vswitchd        [.] conn_key_lookup
   3.00%  ovs-vswitchd        [.] ixgbe_xmit_fixed_burst_vec
   2.60%  ovs-vswitchd        [.] conntrack_execute
   2.42%  ovs-vswitchd        [.] conn_key_hash
   2.39%  ovs-vswitchd        [.] other_conn_update
   2.30%  ovs-vswitchd        [.] process_one
   1.65%  ovs-vswitchd        [.] ixgbe_recv_pkts_vec
   0.78%  ovs-vswitchd        [.] dp_execute_cb
   0.77%  ovs-vswitchd        [.] dp_packet_clone_with_headroom     <<<<<<<<<<<<
   0.71%  ovs-vswitchd        [.] write_ct_md
   0.71%  ovs-vswitchd        [.] conn_update_state.isra.15.part.16
   0.63%  ovs-vswitchd        [.] conn_key_cmp


	VTune analysis
	--------------
The CPU Time over a 60 sec analysis goes from 4.530s -> 3.920s.
Please refer to results reported in previous email.



> 
> -----Original Message-----
> From: <ovs-dev-bounces@openvswitch.org> on behalf of
> "antonio.fischetti@intel.com" <antonio.fischetti@intel.com>
> Date: Friday, August 11, 2017 at 8:52 AM
> To: "dev@openvswitch.org" <dev@openvswitch.org>
> Subject: [ovs-dev] [PATCH v3 4/4] dp-packet: Use memcpy on dp_packet
> 	elements.
> 
>     memcpy replaces the several single copies inside
>     dp_packet_clone_with_headroom().
> 
>     Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
>     ---
>     I tested this change by comparing the CPU Time over a 60 sec analysis
>     with VTune.
> 
>     In original ovs:
>     dp_packet_clone_with_headroom    4.530s
> 
>     + this changes:
>     dp_packet_clone_with_headroom    3.920s
> 
>     Further details were reported in this reply for v1
>     https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__mail.openvswitch.org_pipermail_ovs-2Ddev_2017-
> 2DJune_334536.html&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-
> uZnsw&m=6qGelsac4wBCA5cHYalnmiB0QySdkGCItzNJ_-
> twiJg&s=Wc0BH6ff_CG15qMnkGq8VDI5YmNby5Eo3JxRdZslOsI&e=
>     ---
>      lib/dp-packet.c | 18 +++++++++---------
>      lib/dp-packet.h |  6 +++++-
>      2 files changed, 14 insertions(+), 10 deletions(-)
> 
>     diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>     index 67aa406..f4dbcb7 100644
>     --- a/lib/dp-packet.c
>     +++ b/lib/dp-packet.c
>     @@ -157,8 +157,9 @@ dp_packet_clone(const struct dp_packet *buffer)
>          return dp_packet_clone_with_headroom(buffer, 0);
>      }
> 
>     -/* Creates and returns a new dp_packet whose data are copied from
> 'buffer'.   The
>     - * returned dp_packet will additionally have 'headroom' bytes of headroom.
> */
>     +/* Creates and returns a new dp_packet whose data are copied from
> 'buffer'.
>     + * The returned dp_packet will additionally have 'headroom' bytes of
>     + * headroom. */
>      struct dp_packet *
>      dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t
> headroom)
>      {
>     @@ -167,13 +168,12 @@ dp_packet_clone_with_headroom(const struct dp_packet
> *buffer, size_t headroom)
>          new_buffer =
> dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
>                                                       dp_packet_size(buffer),
>                                                       headroom);
>     -    new_buffer->l2_pad_size = buffer->l2_pad_size;
>     -    new_buffer->l2_5_ofs = buffer->l2_5_ofs;
>     -    new_buffer->l3_ofs = buffer->l3_ofs;
>     -    new_buffer->l4_ofs = buffer->l4_ofs;
>     -    new_buffer->md = buffer->md;
>     -    new_buffer->cutlen = buffer->cutlen;
>     -    new_buffer->packet_type = buffer->packet_type;
>     +    /* Copy the following fields into the returned buffer: l2_pad_size,
>     +     * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
>     +    memcpy(&new_buffer->l2_pad_size, &buffer->l2_pad_size,
>     +            sizeof(struct dp_packet) -
>     +            offsetof(struct dp_packet, l2_pad_size));
>     +
>      #ifdef DPDK_NETDEV
>          new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
>      #else
>     diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>     index 9dbb611..9cab7c7 100644
>     --- a/lib/dp-packet.h
>     +++ b/lib/dp-packet.h
>     @@ -40,7 +40,8 @@ enum OVS_PACKED_ENUM dp_packet_source {
>          DPBUF_STACK,               /* Un-movable stack space or static buffer.
> */
>          DPBUF_STUB,                /* Starts on stack, may expand into heap.
> */
>          DPBUF_DPDK,                /* buffer data is from DPDK allocated
> memory.
>     -                                * ref to dp_packet_init_dpdk() in dp-
> packet.c. */
>     +                                * ref to dp_packet_init_dpdk() in dp-
> packet.c.
>     +                                */
>      };
> 
>      #define DP_PACKET_CONTEXT_SIZE 64
>     @@ -61,6 +62,9 @@ struct dp_packet {
>          bool rss_hash_valid;        /* Is the 'rss_hash' valid? */
>      #endif
>          enum dp_packet_source source;  /* Source of memory allocated as
> 'base'. */
>     +
>     +    /* All the following elements of this struct are copied by a single
> call
>     +     * to memcpy in dp_packet_clone_with_headroom. */
>          uint8_t l2_pad_size;           /* Detected l2 padding size.
>                                          * Padding is non-pullable. */
>          uint16_t l2_5_ofs;             /* MPLS label stack offset, or
> UINT16_MAX */
>     --
>     2.4.11
> 
>     _______________________________________________
>     dev mailing list
>     dev@openvswitch.org
>     https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__mail.openvswitch.org_mailman_listinfo_ovs-
> 2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-
> uZnsw&m=6qGelsac4wBCA5cHYalnmiB0QySdkGCItzNJ_-
> twiJg&s=LREI681tchx40jdHLb94j5YDPgqXZnbI9_Yv4QNnqyA&e=
>
Darrell Ball Aug. 22, 2017, 11:24 p.m. | #3
On 8/17/17, 3:44 AM, "Fischetti, Antonio" <antonio.fischetti@intel.com> wrote:

    
    > -----Original Message-----
    > From: Darrell Ball [mailto:dball@vmware.com]
    > Sent: Monday, August 14, 2017 7:34 AM
    > To: Fischetti, Antonio <antonio.fischetti@intel.com>; dev@openvswitch.org
    > Subject: Re: [ovs-dev] [PATCH v3 4/4] dp-packet: Use memcpy on dp_packet
    > elements.
    > 
    > Could you show some throughput results for a particular tests ?
    
    [Antonio]
    The only way I could test this change is with a setup that I was 
    initially using on a modified firewall for learning purposes. 
    I had added a normal action to the 1st rule below, this can't be 
    a real scenario. I couldn't find a better test to involve the 
    dp_packet_clone_with_headroom function, if anyone has some
    suggestion is welcome.


[Darrell] I had the same issue; that is why I sent the e-mail (
                 This code change is an improvement regardless.
                 We should get patches 2-4 in.
                 I realize patch 1 still has discussion going on.


    Below some continuous tests results, ie bidir test with packets
    sent at the line-rate. The Rx pkt rate is measured, without 
    considering the packet loss.
    
    	Test setup
    	----------
    
    table=0, priority=100,ct_state=-trk,ip actions=ct(table=1),NORMAL  <----
    table=0, priority=10,arp actions=NORMAL
    table=0, priority=1 actions=drop
    table=1, ct_state=+new+trk,ip,in_port=dpdk0 actions=ct(commit),output:dpdk1
    table=1, ct_state=+new+trk,ip,in_port=dpdk1 actions=drop
    table=1, ct_state=+est+trk,ip,in_port=dpdk0 actions=output:dpdk1
    table=1, ct_state=+est+trk,ip,in_port=dpdk1 actions=output:dpdk0
    
    2 PMDs, bidir test, 64B UDP packets.
    
    	Throughput results
    	------------------
    
    Original OvS-DPDK means at Commit ID:
        6b1babacc3ca0488e07596bf822fe356c9bab646
    
                  +----------------------+-----------------------+
                  |  Original OvS-DPDK + |   Original OvS-DPDK + |
                  |  patches 1,2,3       |   patches 1,2,3 +     |
                  |                      |      this patch       |
         ---------+----------------------+-----------------------+
          Traffic |          Rx          |          Rx           |
          Streams |        [Mpps]        |        [Mpps]         |
         ---------+----------------------+-----------------------+
             100  |      3.03, 3.02      |      3.04, 3.04       |
             500  |      2.87, 2.86      |      2.90, 2.87       |
           1,000  |      2.72, 2.70      |      2.80, 2.76       |
           2,000  |      2.57, 2.59      |      2.59, 2.58       |
           5,000  |      2.54, 2.47      |      2.53, 2.48       |
          10,000  |      2.43, 2.39      |      2.43, 2.39       |
         ---------+----------------------+-----------------------+
    
    
    	Perf comparison with 500 UDP connections
    	----------------------------------------
    
    With perf top I am seeing the following difference on 
    dp_packet_clone_with_headroom: from 0.95% -> 0.77%, 
    below some details.
    
    Orig + patches 1,2,3:	
    ---------------------
      11.09%  ovs-vswitchd        [.] dp_netdev_input__
       9.42%  libc-2.21.so        [.] malloc
       9.33%  libpthread-2.21.so  [.] pthread_mutex_unlock
       7.49%  libc-2.21.so        [.] free
       6.27%  ovs-vswitchd        [.] miniflow_extract
       5.94%  libc-2.21.so        [.] __memcpy_avx_unaligned
       4.43%  ovs-vswitchd        [.] ovs_mutex_lock_at
       4.37%  libc-2.21.so        [.] __memcmp_sse4_1
       4.30%  libc-2.21.so        [.] _int_malloc
       4.20%  libpthread-2.21.so  [.] pthread_mutex_lock
       3.34%  ovs-vswitchd        [.] dpdk_do_tx_copy
       3.34%  ovs-vswitchd        [.] conn_key_lookup
       3.20%  ovs-vswitchd        [.] ixgbe_xmit_fixed_burst_vec
       2.58%  ovs-vswitchd        [.] conntrack_execute
       2.49%  ovs-vswitchd        [.] conn_key_hash
       2.25%  ovs-vswitchd        [.] process_one
       2.19%  ovs-vswitchd        [.] other_conn_update
       1.77%  ovs-vswitchd        [.] ixgbe_recv_pkts_vec
       0.95%  ovs-vswitchd        [.] dp_packet_clone_with_headroom     <<<<<<<<<<<<
       0.83%  ovs-vswitchd        [.] write_ct_md
       0.79%  ovs-vswitchd        [.] dp_execute_cb
       0.74%  ovs-vswitchd        [.] conn_update_state.isra.15.part.16
       0.63%  ovs-vswitchd        [.] conn_key_cmp
       0.42%  ovs-vswitchd        [.] dp_packet_put
    
    Orig + patches 1,2,3 + this patch:
    ----------------------------------
      11.72%  ovs-vswitchd        [.] dp_netdev_input__
       9.30%  libpthread-2.21.so  [.] pthread_mutex_unlock
       9.09%  libc-2.21.so        [.] malloc
       7.52%  libc-2.21.so        [.] free
       6.40%  libc-2.21.so        [.] __memcpy_avx_unaligned
       6.04%  ovs-vswitchd        [.] miniflow_extract
       4.55%  libc-2.21.so        [.] _int_malloc
       4.45%  libc-2.21.so        [.] __memcmp_sse4_1
       4.31%  ovs-vswitchd        [.] ovs_mutex_lock_at
       3.88%  libpthread-2.21.so  [.] pthread_mutex_lock
       3.62%  ovs-vswitchd        [.] dpdk_do_tx_copy
       3.62%  ovs-vswitchd        [.] conn_key_lookup
       3.00%  ovs-vswitchd        [.] ixgbe_xmit_fixed_burst_vec
       2.60%  ovs-vswitchd        [.] conntrack_execute
       2.42%  ovs-vswitchd        [.] conn_key_hash
       2.39%  ovs-vswitchd        [.] other_conn_update
       2.30%  ovs-vswitchd        [.] process_one
       1.65%  ovs-vswitchd        [.] ixgbe_recv_pkts_vec
       0.78%  ovs-vswitchd        [.] dp_execute_cb
       0.77%  ovs-vswitchd        [.] dp_packet_clone_with_headroom     <<<<<<<<<<<<
       0.71%  ovs-vswitchd        [.] write_ct_md
       0.71%  ovs-vswitchd        [.] conn_update_state.isra.15.part.16
       0.63%  ovs-vswitchd        [.] conn_key_cmp
    
    
    	VTune analysis
    	--------------
    The CPU Time over a 60 sec analysis goes from 4.530s -> 3.920s.
    Please refer to results reported in previous email.
    
    
    
    > 
    > -----Original Message-----
    > From: <ovs-dev-bounces@openvswitch.org> on behalf of
    > "antonio.fischetti@intel.com" <antonio.fischetti@intel.com>
    > Date: Friday, August 11, 2017 at 8:52 AM
    > To: "dev@openvswitch.org" <dev@openvswitch.org>
    > Subject: [ovs-dev] [PATCH v3 4/4] dp-packet: Use memcpy on dp_packet
    > 	elements.
    > 
    >     memcpy replaces the several single copies inside
    >     dp_packet_clone_with_headroom().
    > 
    >     Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
    >     ---
    >     I tested this change by comparing the CPU Time over a 60 sec analysis
    >     with VTune.
    > 
    >     In original ovs:
    >     dp_packet_clone_with_headroom    4.530s
    > 
    >     + this changes:
    >     dp_packet_clone_with_headroom    3.920s
    > 
    >     Further details were reported in this reply for v1
    >     https://urldefense.proofpoint.com/v2/url?u=https-
    > 3A__mail.openvswitch.org_pipermail_ovs-2Ddev_2017-
    > 2DJune_334536.html&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-
    > uZnsw&m=6qGelsac4wBCA5cHYalnmiB0QySdkGCItzNJ_-
    > twiJg&s=Wc0BH6ff_CG15qMnkGq8VDI5YmNby5Eo3JxRdZslOsI&e=
    >     ---
    >      lib/dp-packet.c | 18 +++++++++---------
    >      lib/dp-packet.h |  6 +++++-
    >      2 files changed, 14 insertions(+), 10 deletions(-)
    > 
    >     diff --git a/lib/dp-packet.c b/lib/dp-packet.c
    >     index 67aa406..f4dbcb7 100644
    >     --- a/lib/dp-packet.c
    >     +++ b/lib/dp-packet.c
    >     @@ -157,8 +157,9 @@ dp_packet_clone(const struct dp_packet *buffer)
    >          return dp_packet_clone_with_headroom(buffer, 0);
    >      }
    > 
    >     -/* Creates and returns a new dp_packet whose data are copied from
    > 'buffer'.   The
    >     - * returned dp_packet will additionally have 'headroom' bytes of headroom.
    > */
    >     +/* Creates and returns a new dp_packet whose data are copied from
    > 'buffer'.
    >     + * The returned dp_packet will additionally have 'headroom' bytes of
    >     + * headroom. */
    >      struct dp_packet *
    >      dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t
    > headroom)
    >      {
    >     @@ -167,13 +168,12 @@ dp_packet_clone_with_headroom(const struct dp_packet
    > *buffer, size_t headroom)
    >          new_buffer =
    > dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
    >                                                       dp_packet_size(buffer),
    >                                                       headroom);
    >     -    new_buffer->l2_pad_size = buffer->l2_pad_size;
    >     -    new_buffer->l2_5_ofs = buffer->l2_5_ofs;
    >     -    new_buffer->l3_ofs = buffer->l3_ofs;
    >     -    new_buffer->l4_ofs = buffer->l4_ofs;
    >     -    new_buffer->md = buffer->md;
    >     -    new_buffer->cutlen = buffer->cutlen;
    >     -    new_buffer->packet_type = buffer->packet_type;
    >     +    /* Copy the following fields into the returned buffer: l2_pad_size,
    >     +     * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
    >     +    memcpy(&new_buffer->l2_pad_size, &buffer->l2_pad_size,
    >     +            sizeof(struct dp_packet) -
    >     +            offsetof(struct dp_packet, l2_pad_size));
    >     +
    >      #ifdef DPDK_NETDEV
    >          new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
    >      #else
    >     diff --git a/lib/dp-packet.h b/lib/dp-packet.h
    >     index 9dbb611..9cab7c7 100644
    >     --- a/lib/dp-packet.h
    >     +++ b/lib/dp-packet.h
    >     @@ -40,7 +40,8 @@ enum OVS_PACKED_ENUM dp_packet_source {
    >          DPBUF_STACK,               /* Un-movable stack space or static buffer.
    > */
    >          DPBUF_STUB,                /* Starts on stack, may expand into heap.
    > */
    >          DPBUF_DPDK,                /* buffer data is from DPDK allocated
    > memory.
    >     -                                * ref to dp_packet_init_dpdk() in dp-
    > packet.c. */
    >     +                                * ref to dp_packet_init_dpdk() in dp-
    > packet.c.
    >     +                                */
    >      };
    > 
    >      #define DP_PACKET_CONTEXT_SIZE 64
    >     @@ -61,6 +62,9 @@ struct dp_packet {
    >          bool rss_hash_valid;        /* Is the 'rss_hash' valid? */
    >      #endif
    >          enum dp_packet_source source;  /* Source of memory allocated as
    > 'base'. */
    >     +
    >     +    /* All the following elements of this struct are copied by a single
    > call
    >     +     * to memcpy in dp_packet_clone_with_headroom. */
    >          uint8_t l2_pad_size;           /* Detected l2 padding size.
    >                                          * Padding is non-pullable. */
    >          uint16_t l2_5_ofs;             /* MPLS label stack offset, or
    > UINT16_MAX */
    >     --
    >     2.4.11
    > 
    >     _______________________________________________
    >     dev mailing list
    >     dev@openvswitch.org
    >     https://urldefense.proofpoint.com/v2/url?u=https-
    > 3A__mail.openvswitch.org_mailman_listinfo_ovs-
    > 2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-
    > uZnsw&m=6qGelsac4wBCA5cHYalnmiB0QySdkGCItzNJ_-
    > twiJg&s=LREI681tchx40jdHLb94j5YDPgqXZnbI9_Yv4QNnqyA&e=
    >

Patch

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 67aa406..f4dbcb7 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -157,8 +157,9 @@  dp_packet_clone(const struct dp_packet *buffer)
     return dp_packet_clone_with_headroom(buffer, 0);
 }
 
-/* Creates and returns a new dp_packet whose data are copied from 'buffer'.   The
- * returned dp_packet will additionally have 'headroom' bytes of headroom. */
+/* Creates and returns a new dp_packet whose data are copied from 'buffer'.
+ * The returned dp_packet will additionally have 'headroom' bytes of
+ * headroom. */
 struct dp_packet *
 dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
 {
@@ -167,13 +168,12 @@  dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
     new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
                                                  dp_packet_size(buffer),
                                                  headroom);
-    new_buffer->l2_pad_size = buffer->l2_pad_size;
-    new_buffer->l2_5_ofs = buffer->l2_5_ofs;
-    new_buffer->l3_ofs = buffer->l3_ofs;
-    new_buffer->l4_ofs = buffer->l4_ofs;
-    new_buffer->md = buffer->md;
-    new_buffer->cutlen = buffer->cutlen;
-    new_buffer->packet_type = buffer->packet_type;
+    /* Copy the following fields into the returned buffer: l2_pad_size,
+     * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
+    memcpy(&new_buffer->l2_pad_size, &buffer->l2_pad_size,
+            sizeof(struct dp_packet) -
+            offsetof(struct dp_packet, l2_pad_size));
+
 #ifdef DPDK_NETDEV
     new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
 #else
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 9dbb611..9cab7c7 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -40,7 +40,8 @@  enum OVS_PACKED_ENUM dp_packet_source {
     DPBUF_STACK,               /* Un-movable stack space or static buffer. */
     DPBUF_STUB,                /* Starts on stack, may expand into heap. */
     DPBUF_DPDK,                /* buffer data is from DPDK allocated memory.
-                                * ref to dp_packet_init_dpdk() in dp-packet.c. */
+                                * ref to dp_packet_init_dpdk() in dp-packet.c.
+                                */
 };
 
 #define DP_PACKET_CONTEXT_SIZE 64
@@ -61,6 +62,9 @@  struct dp_packet {
     bool rss_hash_valid;        /* Is the 'rss_hash' valid? */
 #endif
     enum dp_packet_source source;  /* Source of memory allocated as 'base'. */
+
+    /* All the following elements of this struct are copied by a single call
+     * to memcpy in dp_packet_clone_with_headroom. */
     uint8_t l2_pad_size;           /* Detected l2 padding size.
                                     * Padding is non-pullable. */
     uint16_t l2_5_ofs;             /* MPLS label stack offset, or UINT16_MAX */