[ovs-dev,v6,1/7] dpif-netdev: Refactor PMD thread structure for further extension.

Message ID 1512143073-22347-2-git-send-email-i.maximets@samsung.com
State Superseded
Delegated to: Ian Stokes
Headers show
Series
  • Output packet batching.
Related show

Commit Message

Ilya Maximets Dec. 1, 2017, 3:44 p.m.
This is preparation for 'struct dp_netdev_pmd_thread' modification
in upcoming commits. Needed to avoid reordering and regrouping while
replacing old and adding new members.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Co-authored-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
 lib/dpif-netdev.c | 65 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 25 deletions(-)

Comments

Eelco Chaudron Dec. 7, 2017, 12:49 p.m. | #1
On 01/12/17 16:44, Ilya Maximets wrote:
> This is preparation for 'struct dp_netdev_pmd_thread' modification
> in upcoming commits. Needed to avoid reordering and regrouping while
> replacing old and adding new members.
>
Should this be part of the TX batching set? Anyway, I'm ok if it's not 
stalling the approval :)

See some comments below on the use of remaining padding...
> Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
> Co-authored-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> ---
>   lib/dpif-netdev.c | 65 ++++++++++++++++++++++++++++++++++---------------------
>   1 file changed, 40 insertions(+), 25 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 0a62630..7a7c6ce 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -549,29 +549,22 @@ struct tx_port {
>   struct dp_netdev_pmd_thread {
>       PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>           struct dp_netdev *dp;
> -        struct cmap_node node;          /* In 'dp->poll_threads'. */
> -        pthread_cond_t cond;            /* For synchronizing pmd thread
> -                                           reload. */
> +        struct cmap_node node;     /* In 'dp->poll_threads'. */
> +        pthread_cond_t cond;       /* For synchronizing pmd thread reload. */
>       );
>   
>       PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
>           struct ovs_mutex cond_mutex;    /* Mutex for condition variable. */
>           pthread_t thread;
> -        unsigned core_id;               /* CPU core id of this pmd thread. */
> -        int numa_id;                    /* numa node id of this pmd thread. */
>       );
>   
>       /* Per thread exact-match cache.  Note, the instance for cpu core
>        * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>        * need to be protected by 'non_pmd_mutex'.  Every other instance
>        * will only be accessed by its own pmd thread. */
> -    OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
> -    struct ovs_refcount ref_cnt;    /* Every reference must be refcount'ed. */
> -
> -    /* Queue id used by this pmd thread to send packets on all netdevs if
> -     * XPS disabled for this netdev. All static_tx_qid's are unique and less
> -     * than 'cmap_count(dp->poll_threads)'. */
> -    uint32_t static_tx_qid;
> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
> +        OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
> +    );
>   
>       /* Flow-Table and classifiers
>        *
> @@ -579,7 +572,10 @@ struct dp_netdev_pmd_thread {
>        * changes to 'classifiers' must be made while still holding the
>        * 'flow_mutex'.
>        */
> -    struct ovs_mutex flow_mutex;
> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
> +        struct ovs_mutex flow_mutex;
> +        /* 8 pad bytes. */
Do we really want to add pad bytes left in this structure? They can 
easily be wrong is something changes elsewhere?
Guessing from some the differences you might have other patches applied?
Using the pahole tool I think the 8 here should be 16?

     union {
         struct {
             struct ovs_mutex flow_mutex;     /*     0    48 */
         }; /* 48 */
         uint8_t            pad12[64];        /*          64 */
     };                                       /*     0    64 */

> +    );
>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>           struct cmap flow_table OVS_GUARDED; /* Flow table. */
>   
> @@ -596,35 +592,50 @@ struct dp_netdev_pmd_thread {
>   
>           /* Used to count cycles. See 'cycles_counter_end()'. */
>           unsigned long long last_cycles;
> -        struct latch exit_latch;        /* For terminating the pmd thread. */
> -    );
>   
> +        /* 8 pad bytes. */
> +    );
>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>           /* Statistics. */
>           struct dp_netdev_pmd_stats stats;
> +        /* 8 pad bytes. */
Should this not be 24?

     union {
         struct {
             struct dp_netdev_pmd_stats stats; /*     0 40 */
         };                                    /* 40 */
         uint8_t            pad14[64];         /* 64 */
     };                                        /*     0 64 */

> +    );
>   
> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
> +        struct latch exit_latch;     /* For terminating the pmd thread. */
>           struct seq *reload_seq;
>           uint64_t last_reload_seq;
> -        atomic_bool reload;             /* Do we need to reload ports? */
> -        bool isolated;
> -
> +        atomic_bool reload;          /* Do we need to reload ports? */
>           /* Set to true if the pmd thread needs to be reloaded. */
>           bool need_reload;
> -        /* 5 pad bytes. */
> +        bool isolated;
> +
> +        struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
> +
> +        /* Queue id used by this pmd thread to send packets on all netdevs if
> +         * XPS disabled for this netdev. All static_tx_qid's are unique and
> +         * less than 'cmap_count(dp->poll_threads)'. */
> +        uint32_t static_tx_qid;
> +
> +        unsigned core_id;            /* CPU core id of this pmd thread. */
> +        int numa_id;                 /* numa node id of this pmd thread. */
> +
> +        /* 20 pad bytes. */
>       );
>   
>       PADDED_MEMBERS(CACHE_LINE_SIZE,
> -        struct ovs_mutex port_mutex;    /* Mutex for 'poll_list'
> -                                           and 'tx_ports'. */
> -        /* 16 pad bytes. */
> +        /* Mutex for 'poll_list' and 'tx_ports'. */
> +        struct ovs_mutex port_mutex;
>       );
> +
>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>           /* List of rx queues to poll. */
>           struct hmap poll_list OVS_GUARDED;
> -        /* Map of 'tx_port's used for transmission.  Written by the main
> -         * thread, read by the pmd thread. */
> +        /* Map of 'tx_port's used for transmission.
> +         * Written by the main thread, read by the pmd thread. */
>           struct hmap tx_ports OVS_GUARDED;
>       );
> +
>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>           /* These are thread-local copies of 'tx_ports'.  One contains only
>            * tunnel ports (that support push_tunnel/pop_tunnel), the other
> @@ -648,9 +659,13 @@ struct dp_netdev_pmd_thread {
>            * values and subtracts them from 'stats' and 'cycles' before
>            * reporting to the user */
>           unsigned long long stats_zero[DP_N_STATS];
> -        uint64_t cycles_zero[PMD_N_CYCLES];
>           /* 8 pad bytes. */
>       );
> +
> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
> +        uint64_t cycles_zero[PMD_N_CYCLES];
> +        /* 48 pad bytes. */
> +    );
>   };
>   
>   /* Interface to netdev-based datapath. */
Ilya Maximets Dec. 7, 2017, 1:28 p.m. | #2
Thanks for review, comments inline.

On 07.12.2017 15:49, Eelco Chaudron wrote:
> On 01/12/17 16:44, Ilya Maximets wrote:
>> This is preparation for 'struct dp_netdev_pmd_thread' modification
>> in upcoming commits. Needed to avoid reordering and regrouping while
>> replacing old and adding new members.
>>
> Should this be part of the TX batching set? Anyway, I'm ok if it's not stalling the approval :)

Unfortunately yes, because members reordered and regrouped just to include
new members: pmd->ctx and pmd->n_output_batches. This could not be a standalone
change because adding of different members will require different regrouping/
reordering. I moved this change to a separate patch to not do this twice while
adding each member in patches 2/7 and 6/7.

Anyway, as I mentioned in cover letter, I still prefer reverting of the padding
at all by this patch:
	https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html

> 
> See some comments below on the use of remaining padding...
>> Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
>> Co-authored-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>> ---
>>   lib/dpif-netdev.c | 65 ++++++++++++++++++++++++++++++++++---------------------
>>   1 file changed, 40 insertions(+), 25 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 0a62630..7a7c6ce 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -549,29 +549,22 @@ struct tx_port {
>>   struct dp_netdev_pmd_thread {
>>       PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>>           struct dp_netdev *dp;
>> -        struct cmap_node node;          /* In 'dp->poll_threads'. */
>> -        pthread_cond_t cond;            /* For synchronizing pmd thread
>> -                                           reload. */
>> +        struct cmap_node node;     /* In 'dp->poll_threads'. */
>> +        pthread_cond_t cond;       /* For synchronizing pmd thread reload. */
>>       );
>>         PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
>>           struct ovs_mutex cond_mutex;    /* Mutex for condition variable. */
>>           pthread_t thread;
>> -        unsigned core_id;               /* CPU core id of this pmd thread. */
>> -        int numa_id;                    /* numa node id of this pmd thread. */
>>       );
>>         /* Per thread exact-match cache.  Note, the instance for cpu core
>>        * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>>        * need to be protected by 'non_pmd_mutex'.  Every other instance
>>        * will only be accessed by its own pmd thread. */
>> -    OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
>> -    struct ovs_refcount ref_cnt;    /* Every reference must be refcount'ed. */
>> -
>> -    /* Queue id used by this pmd thread to send packets on all netdevs if
>> -     * XPS disabled for this netdev. All static_tx_qid's are unique and less
>> -     * than 'cmap_count(dp->poll_threads)'. */
>> -    uint32_t static_tx_qid;
>> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>> +        OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
>> +    );
>>         /* Flow-Table and classifiers
>>        *
>> @@ -579,7 +572,10 @@ struct dp_netdev_pmd_thread {
>>        * changes to 'classifiers' must be made while still holding the
>>        * 'flow_mutex'.
>>        */
>> -    struct ovs_mutex flow_mutex;
>> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>> +        struct ovs_mutex flow_mutex;
>> +        /* 8 pad bytes. */
> Do we really want to add pad bytes left in this structure? They can easily be wrong is something changes elsewhere?
> Guessing from some the differences you might have other patches applied?
> Using the pahole tool I think the 8 here should be 16?
> 
>     union {
>         struct {
>             struct ovs_mutex flow_mutex;     /*     0    48 */
>         }; /* 48 */
>         uint8_t            pad12[64];        /*          64 */
>     };                                       /*     0    64 */

I left 8 bytes here because we may have different size of ovs_mutex on different
systems. The biggest value I saw was 56 bytes on my ARMv8 platform.

> 
>> +    );
>>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>>           struct cmap flow_table OVS_GUARDED; /* Flow table. */
>>   @@ -596,35 +592,50 @@ struct dp_netdev_pmd_thread {
>>             /* Used to count cycles. See 'cycles_counter_end()'. */
>>           unsigned long long last_cycles;
>> -        struct latch exit_latch;        /* For terminating the pmd thread. */
>> -    );
>>   +        /* 8 pad bytes. */
>> +    );
>>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>>           /* Statistics. */
>>           struct dp_netdev_pmd_stats stats;
>> +        /* 8 pad bytes. */
> Should this not be 24?

Yes, you're right. It'll be 8 after applying of the last patch of the series.
I can keep 24 in this patch and change this value to 8 at the last patch if
it really matters. Do you think I should?

> 
>     union {
>         struct {
>             struct dp_netdev_pmd_stats stats; /*     0 40 */
>         };                                    /* 40 */
>         uint8_t            pad14[64];         /* 64 */
>     };                                        /*     0 64 
> 
>> +    );
>>   +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>> +        struct latch exit_latch;     /* For terminating the pmd thread. */
>>           struct seq *reload_seq;
>>           uint64_t last_reload_seq;
>> -        atomic_bool reload;             /* Do we need to reload ports? */
>> -        bool isolated;
>> -
>> +        atomic_bool reload;          /* Do we need to reload ports? */
>>           /* Set to true if the pmd thread needs to be reloaded. */
>>           bool need_reload;
>> -        /* 5 pad bytes. */
>> +        bool isolated;
>> +
>> +        struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
>> +
>> +        /* Queue id used by this pmd thread to send packets on all netdevs if
>> +         * XPS disabled for this netdev. All static_tx_qid's are unique and
>> +         * less than 'cmap_count(dp->poll_threads)'. */
>> +        uint32_t static_tx_qid;
>> +
>> +        unsigned core_id;            /* CPU core id of this pmd thread. */
>> +        int numa_id;                 /* numa node id of this pmd thread. */
>> +
>> +        /* 20 pad bytes. */
>>       );
>>         PADDED_MEMBERS(CACHE_LINE_SIZE,
>> -        struct ovs_mutex port_mutex;    /* Mutex for 'poll_list'
>> -                                           and 'tx_ports'. */
>> -        /* 16 pad bytes. */
>> +        /* Mutex for 'poll_list' and 'tx_ports'. */
>> +        struct ovs_mutex port_mutex;
>>       );
>> +
>>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>>           /* List of rx queues to poll. */
>>           struct hmap poll_list OVS_GUARDED;
>> -        /* Map of 'tx_port's used for transmission.  Written by the main
>> -         * thread, read by the pmd thread. */
>> +        /* Map of 'tx_port's used for transmission.
>> +         * Written by the main thread, read by the pmd thread. */
>>           struct hmap tx_ports OVS_GUARDED;
>>       );
>> +
>>       PADDED_MEMBERS(CACHE_LINE_SIZE,
>>           /* These are thread-local copies of 'tx_ports'.  One contains only
>>            * tunnel ports (that support push_tunnel/pop_tunnel), the other
>> @@ -648,9 +659,13 @@ struct dp_netdev_pmd_thread {
>>            * values and subtracts them from 'stats' and 'cycles' before
>>            * reporting to the user */
>>           unsigned long long stats_zero[DP_N_STATS];
>> -        uint64_t cycles_zero[PMD_N_CYCLES];
>>           /* 8 pad bytes. */
>>       );
>> +
>> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>> +        uint64_t cycles_zero[PMD_N_CYCLES];
>> +        /* 48 pad bytes. */
>> +    );
>>   };
>>     /* Interface to netdev-based datapath. */
> 
> 
> 
> 
>
Jan Scheurich Dec. 7, 2017, 1:48 p.m. | #3
> -----Original Message-----
> From: Ilya Maximets [mailto:i.maximets@samsung.com]
> Sent: Thursday, 07 December, 2017 14:28

> >> This is preparation for 'struct dp_netdev_pmd_thread' modification
> >> in upcoming commits. Needed to avoid reordering and regrouping while
> >> replacing old and adding new members.
> >>
> > Should this be part of the TX batching set? Anyway, I'm ok if it's not stalling the approval :)
> 
> Unfortunately yes, because members reordered and regrouped just to include
> new members: pmd->ctx and pmd->n_output_batches. This could not be a standalone
> change because adding of different members will require different regrouping/
> reordering. I moved this change to a separate patch to not do this twice while
> adding each member in patches 2/7 and 6/7.
> 
> Anyway, as I mentioned in cover letter, I still prefer reverting of the padding
> at all by this patch:
> 	https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html
> 

I think we should not spent time designing and reviewing these kind of patches that are made necessary by the introduction of commit  a807c15796ddc43ba1ffb2a6b0bd2ad4e2b73941.

As far as I can see there was never a single review on the original patch:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-October/339402.html
I wonder how it got merged into master in the first place.

I strongly support Ilya's revert patch for that commit:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html

Let's do that quickly to remove some of the obstacles to merging important patches in time for OVS 2.9.

BR, Jan
Eelco Chaudron Dec. 7, 2017, 3:58 p.m. | #4
On 07/12/17 14:48, Jan Scheurich wrote:
>> -----Original Message-----
>> From: Ilya Maximets [mailto:i.maximets@samsung.com]
>> Sent: Thursday, 07 December, 2017 14:28
>>>> This is preparation for 'struct dp_netdev_pmd_thread' modification
>>>> in upcoming commits. Needed to avoid reordering and regrouping while
>>>> replacing old and adding new members.
>>>>
>>> Should this be part of the TX batching set? Anyway, I'm ok if it's not stalling the approval :)
>> Unfortunately yes, because members reordered and regrouped just to include
>> new members: pmd->ctx and pmd->n_output_batches. This could not be a standalone
>> change because adding of different members will require different regrouping/
>> reordering. I moved this change to a separate patch to not do this twice while
>> adding each member in patches 2/7 and 6/7.
>>
>> Anyway, as I mentioned in cover letter, I still prefer reverting of the padding
>> at all by this patch:
>> 	https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html
>>
> I think we should not spent time designing and reviewing these kind of patches that are made necessary by the introduction of commit  a807c15796ddc43ba1ffb2a6b0bd2ad4e2b73941.
>
> As far as I can see there was never a single review on the original patch:
> https://mail.openvswitch.org/pipermail/ovs-dev/2017-October/339402.html
> I wonder how it got merged into master in the first place.
>
> I strongly support Ilya's revert patch for that commit:
> https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html
>
> Let's do that quickly to remove some of the obstacles to merging important patches in time for OVS 2.9.
>
> BR, Jan

Looking at the above, and going over the patches, I also agree undoing 
the commit is the right thing to do.
Eelco Chaudron Dec. 7, 2017, 4:01 p.m. | #5
On 07/12/17 14:28, Ilya Maximets wrote:
> Thanks for review, comments inline.
>
> On 07.12.2017 15:49, Eelco Chaudron wrote:
>> On 01/12/17 16:44, Ilya Maximets wrote:
>>> This is preparation for 'struct dp_netdev_pmd_thread' modification
>>> in upcoming commits. Needed to avoid reordering and regrouping while
>>> replacing old and adding new members.
>>>
>> Should this be part of the TX batching set? Anyway, I'm ok if it's not stalling the approval :)
> Unfortunately yes, because members reordered and regrouped just to include
> new members: pmd->ctx and pmd->n_output_batches. This could not be a standalone
> change because adding of different members will require different regrouping/
> reordering. I moved this change to a separate patch to not do this twice while
> adding each member in patches 2/7 and 6/7.
>
> Anyway, as I mentioned in cover letter, I still prefer reverting of the padding
> at all by this patch:
> 	https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html
Agree, see reply to Jan's email.
>> See some comments below on the use of remaining padding...
>>> Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
>>> Co-authored-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
>>> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>>> ---
>>>    lib/dpif-netdev.c | 65 ++++++++++++++++++++++++++++++++++---------------------
>>>    1 file changed, 40 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>> index 0a62630..7a7c6ce 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -549,29 +549,22 @@ struct tx_port {
>>>    struct dp_netdev_pmd_thread {
>>>        PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
>>>            struct dp_netdev *dp;
>>> -        struct cmap_node node;          /* In 'dp->poll_threads'. */
>>> -        pthread_cond_t cond;            /* For synchronizing pmd thread
>>> -                                           reload. */
>>> +        struct cmap_node node;     /* In 'dp->poll_threads'. */
>>> +        pthread_cond_t cond;       /* For synchronizing pmd thread reload. */
>>>        );
>>>          PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
>>>            struct ovs_mutex cond_mutex;    /* Mutex for condition variable. */
>>>            pthread_t thread;
>>> -        unsigned core_id;               /* CPU core id of this pmd thread. */
>>> -        int numa_id;                    /* numa node id of this pmd thread. */
>>>        );
>>>          /* Per thread exact-match cache.  Note, the instance for cpu core
>>>         * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>>>         * need to be protected by 'non_pmd_mutex'.  Every other instance
>>>         * will only be accessed by its own pmd thread. */
>>> -    OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
>>> -    struct ovs_refcount ref_cnt;    /* Every reference must be refcount'ed. */
>>> -
>>> -    /* Queue id used by this pmd thread to send packets on all netdevs if
>>> -     * XPS disabled for this netdev. All static_tx_qid's are unique and less
>>> -     * than 'cmap_count(dp->poll_threads)'. */
>>> -    uint32_t static_tx_qid;
>>> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>>> +        OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
>>> +    );
>>>          /* Flow-Table and classifiers
>>>         *
>>> @@ -579,7 +572,10 @@ struct dp_netdev_pmd_thread {
>>>         * changes to 'classifiers' must be made while still holding the
>>>         * 'flow_mutex'.
>>>         */
>>> -    struct ovs_mutex flow_mutex;
>>> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>>> +        struct ovs_mutex flow_mutex;
>>> +        /* 8 pad bytes. */
>> Do we really want to add pad bytes left in this structure? They can easily be wrong is something changes elsewhere?
>> Guessing from some the differences you might have other patches applied?
>> Using the pahole tool I think the 8 here should be 16?
>>
>>      union {
>>          struct {
>>              struct ovs_mutex flow_mutex;     /*     0    48 */
>>          }; /* 48 */
>>          uint8_t            pad12[64];        /*          64 */
>>      };                                       /*     0    64 */
> I left 8 bytes here because we may have different size of ovs_mutex on different
> systems. The biggest value I saw was 56 bytes on my ARMv8 platform.
>
>>> +    );
>>>        PADDED_MEMBERS(CACHE_LINE_SIZE,
>>>            struct cmap flow_table OVS_GUARDED; /* Flow table. */
>>>    @@ -596,35 +592,50 @@ struct dp_netdev_pmd_thread {
>>>              /* Used to count cycles. See 'cycles_counter_end()'. */
>>>            unsigned long long last_cycles;
>>> -        struct latch exit_latch;        /* For terminating the pmd thread. */
>>> -    );
>>>    +        /* 8 pad bytes. */
>>> +    );
>>>        PADDED_MEMBERS(CACHE_LINE_SIZE,
>>>            /* Statistics. */
>>>            struct dp_netdev_pmd_stats stats;
>>> +        /* 8 pad bytes. */
>> Should this not be 24?
> Yes, you're right. It'll be 8 after applying of the last patch of the series.
> I can keep 24 in this patch and change this value to 8 at the last patch if
> it really matters. Do you think I should?
Don't think it should. I feel more like removing it at all as it assumes 
CACHE_LINE_SIZE is always 64
>
>>      union {
>>          struct {
>>              struct dp_netdev_pmd_stats stats; /*     0 40 */
>>          };                                    /* 40 */
>>          uint8_t            pad14[64];         /* 64 */
>>      };                                        /*     0 64
>>
>>> +    );
>>>    +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>>> +        struct latch exit_latch;     /* For terminating the pmd thread. */
>>>            struct seq *reload_seq;
>>>            uint64_t last_reload_seq;
>>> -        atomic_bool reload;             /* Do we need to reload ports? */
>>> -        bool isolated;
>>> -
>>> +        atomic_bool reload;          /* Do we need to reload ports? */
>>>            /* Set to true if the pmd thread needs to be reloaded. */
>>>            bool need_reload;
>>> -        /* 5 pad bytes. */
>>> +        bool isolated;
>>> +
>>> +        struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
>>> +
>>> +        /* Queue id used by this pmd thread to send packets on all netdevs if
>>> +         * XPS disabled for this netdev. All static_tx_qid's are unique and
>>> +         * less than 'cmap_count(dp->poll_threads)'. */
>>> +        uint32_t static_tx_qid;
>>> +
>>> +        unsigned core_id;            /* CPU core id of this pmd thread. */
>>> +        int numa_id;                 /* numa node id of this pmd thread. */
>>> +
>>> +        /* 20 pad bytes. */
>>>        );
>>>          PADDED_MEMBERS(CACHE_LINE_SIZE,
>>> -        struct ovs_mutex port_mutex;    /* Mutex for 'poll_list'
>>> -                                           and 'tx_ports'. */
>>> -        /* 16 pad bytes. */
>>> +        /* Mutex for 'poll_list' and 'tx_ports'. */
>>> +        struct ovs_mutex port_mutex;
>>>        );
>>> +
>>>        PADDED_MEMBERS(CACHE_LINE_SIZE,
>>>            /* List of rx queues to poll. */
>>>            struct hmap poll_list OVS_GUARDED;
>>> -        /* Map of 'tx_port's used for transmission.  Written by the main
>>> -         * thread, read by the pmd thread. */
>>> +        /* Map of 'tx_port's used for transmission.
>>> +         * Written by the main thread, read by the pmd thread. */
>>>            struct hmap tx_ports OVS_GUARDED;
>>>        );
>>> +
>>>        PADDED_MEMBERS(CACHE_LINE_SIZE,
>>>            /* These are thread-local copies of 'tx_ports'.  One contains only
>>>             * tunnel ports (that support push_tunnel/pop_tunnel), the other
>>> @@ -648,9 +659,13 @@ struct dp_netdev_pmd_thread {
>>>             * values and subtracts them from 'stats' and 'cycles' before
>>>             * reporting to the user */
>>>            unsigned long long stats_zero[DP_N_STATS];
>>> -        uint64_t cycles_zero[PMD_N_CYCLES];
>>>            /* 8 pad bytes. */
>>>        );
>>> +
>>> +    PADDED_MEMBERS(CACHE_LINE_SIZE,
>>> +        uint64_t cycles_zero[PMD_N_CYCLES];
>>> +        /* 48 pad bytes. */
>>> +    );
>>>    };
>>>      /* Interface to netdev-based datapath. */
>>
>>
>>
>>
Bodireddy, Bhanuprakash Dec. 7, 2017, 8:22 p.m. | #6
>On 07/12/17 14:28, Ilya Maximets wrote:

>> Thanks for review, comments inline.

>>

>> On 07.12.2017 15:49, Eelco Chaudron wrote:

>>> On 01/12/17 16:44, Ilya Maximets wrote:

>>>> This is preparation for 'struct dp_netdev_pmd_thread' modification

>>>> in upcoming commits. Needed to avoid reordering and regrouping while

>>>> replacing old and adding new members.

>>>>

>>> Should this be part of the TX batching set? Anyway, I'm ok if it's

>>> not stalling the approval :)

>> Unfortunately yes, because members reordered and regrouped just to

>> include new members: pmd->ctx and pmd->n_output_batches. This could

>> not be a standalone change because adding of different members will

>> require different regrouping/ reordering. I moved this change to a

>> separate patch to not do this twice while adding each member in patches

>2/7 and 6/7.

>>

>> Anyway, as I mentioned in cover letter, I still prefer reverting of

>> the padding at all by this patch:

>>

>> https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html


I understand that with PADDED_MEMBERS macro it was slightly tricky to extend or reorganize the structure and so suggested 'pahole'.
But I see that the problem hasn't gone and still there are some strong opinions on reverting the earlier effort.

I  don’t mind reverting the patch but would be nice if the changes to this structure are made keeping alignment in mind.

- Bhanuprakash.
Jan Scheurich Dec. 8, 2017, 9:26 a.m. | #7
Hi Bhanu,

Perhaps we can aim at fine-tuning the layout of performance-critical structs once at the end of a release cycle when all development is completed. That way we don't hinder productivity during the release cycle and still won't unnecessarily loose performance.

And we should definitely apply the now corrected xmalloc_cacheline() for DPDK datapath structs of size 64B or larger to have a well-defined cache alignment base.

BR, Jan

> -----Original Message-----

> From: Bodireddy, Bhanuprakash [mailto:bhanuprakash.bodireddy@intel.com]

> Sent: Thursday, 07 December, 2017 21:22

> To: echaudro@redhat.com; Ilya Maximets <i.maximets@samsung.com>; ovs-dev@openvswitch.org

> Cc: Heetae Ahn <heetae82.ahn@samsung.com>; Fischetti, Antonio <antonio.fischetti@intel.com>; Loftus, Ciara <ciara.loftus@intel.com>;

> Kevin Traynor <ktraynor@redhat.com>; Jan Scheurich <jan.scheurich@ericsson.com>; Stokes, Ian <ian.stokes@intel.com>

> Subject: RE: [PATCH v6 1/7] dpif-netdev: Refactor PMD thread structure for further extension.

> 

> >On 07/12/17 14:28, Ilya Maximets wrote:

> >> Thanks for review, comments inline.

> >>

> >> On 07.12.2017 15:49, Eelco Chaudron wrote:

> >>> On 01/12/17 16:44, Ilya Maximets wrote:

> >>>> This is preparation for 'struct dp_netdev_pmd_thread' modification

> >>>> in upcoming commits. Needed to avoid reordering and regrouping while

> >>>> replacing old and adding new members.

> >>>>

> >>> Should this be part of the TX batching set? Anyway, I'm ok if it's

> >>> not stalling the approval :)

> >> Unfortunately yes, because members reordered and regrouped just to

> >> include new members: pmd->ctx and pmd->n_output_batches. This could

> >> not be a standalone change because adding of different members will

> >> require different regrouping/ reordering. I moved this change to a

> >> separate patch to not do this twice while adding each member in patches

> >2/7 and 6/7.

> >>

> >> Anyway, as I mentioned in cover letter, I still prefer reverting of

> >> the padding at all by this patch:

> >>

> >> https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.html

> 

> I understand that with PADDED_MEMBERS macro it was slightly tricky to extend or reorganize the structure and so suggested 'pahole'.

> But I see that the problem hasn't gone and still there are some strong opinions on reverting the earlier effort.

> 

> I  don’t mind reverting the patch but would be nice if the changes to this structure are made keeping alignment in mind.

> 

> - Bhanuprakash.
Stokes, Ian Dec. 8, 2017, 4:44 p.m. | #8
> >On 07/12/17 14:28, Ilya Maximets wrote:

> >> Thanks for review, comments inline.

> >>

> >> On 07.12.2017 15:49, Eelco Chaudron wrote:

> >>> On 01/12/17 16:44, Ilya Maximets wrote:

> >>>> This is preparation for 'struct dp_netdev_pmd_thread' modification

> >>>> in upcoming commits. Needed to avoid reordering and regrouping

> >>>> while replacing old and adding new members.

> >>>>

> >>> Should this be part of the TX batching set? Anyway, I'm ok if it's

> >>> not stalling the approval :)

> >> Unfortunately yes, because members reordered and regrouped just to

> >> include new members: pmd->ctx and pmd->n_output_batches. This could

> >> not be a standalone change because adding of different members will

> >> require different regrouping/ reordering. I moved this change to a

> >> separate patch to not do this twice while adding each member in

> >> patches

> >2/7 and 6/7.

> >>

> >> Anyway, as I mentioned in cover letter, I still prefer reverting of

> >> the padding at all by this patch:

> >>

> >> https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341153.h

> >> tml

> 

> I understand that with PADDED_MEMBERS macro it was slightly tricky to

> extend or reorganize the structure and so suggested 'pahole'.

> But I see that the problem hasn't gone and still there are some strong

> opinions on reverting the earlier effort.

> 

> I  don’t mind reverting the patch but would be nice if the changes to this

> structure are made keeping alignment in mind.


OK, it sounds like reverting the dpif-netdev padding is the preferred approach to simplify future development. I'll look at reviewing and validating.

I agree as Jan has pointed out, it will make life easier for future large features when rebasing over the next few weeks.

Ian

> 

> - Bhanuprakash.

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0a62630..7a7c6ce 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -549,29 +549,22 @@  struct tx_port {
 struct dp_netdev_pmd_thread {
     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
         struct dp_netdev *dp;
-        struct cmap_node node;          /* In 'dp->poll_threads'. */
-        pthread_cond_t cond;            /* For synchronizing pmd thread
-                                           reload. */
+        struct cmap_node node;     /* In 'dp->poll_threads'. */
+        pthread_cond_t cond;       /* For synchronizing pmd thread reload. */
     );
 
     PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
         struct ovs_mutex cond_mutex;    /* Mutex for condition variable. */
         pthread_t thread;
-        unsigned core_id;               /* CPU core id of this pmd thread. */
-        int numa_id;                    /* numa node id of this pmd thread. */
     );
 
     /* Per thread exact-match cache.  Note, the instance for cpu core
      * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
      * need to be protected by 'non_pmd_mutex'.  Every other instance
      * will only be accessed by its own pmd thread. */
-    OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
-    struct ovs_refcount ref_cnt;    /* Every reference must be refcount'ed. */
-
-    /* Queue id used by this pmd thread to send packets on all netdevs if
-     * XPS disabled for this netdev. All static_tx_qid's are unique and less
-     * than 'cmap_count(dp->poll_threads)'. */
-    uint32_t static_tx_qid;
+    PADDED_MEMBERS(CACHE_LINE_SIZE,
+        OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
+    );
 
     /* Flow-Table and classifiers
      *
@@ -579,7 +572,10 @@  struct dp_netdev_pmd_thread {
      * changes to 'classifiers' must be made while still holding the
      * 'flow_mutex'.
      */
-    struct ovs_mutex flow_mutex;
+    PADDED_MEMBERS(CACHE_LINE_SIZE,
+        struct ovs_mutex flow_mutex;
+        /* 8 pad bytes. */
+    );
     PADDED_MEMBERS(CACHE_LINE_SIZE,
         struct cmap flow_table OVS_GUARDED; /* Flow table. */
 
@@ -596,35 +592,50 @@  struct dp_netdev_pmd_thread {
 
         /* Used to count cycles. See 'cycles_counter_end()'. */
         unsigned long long last_cycles;
-        struct latch exit_latch;        /* For terminating the pmd thread. */
-    );
 
+        /* 8 pad bytes. */
+    );
     PADDED_MEMBERS(CACHE_LINE_SIZE,
         /* Statistics. */
         struct dp_netdev_pmd_stats stats;
+        /* 8 pad bytes. */
+    );
 
+    PADDED_MEMBERS(CACHE_LINE_SIZE,
+        struct latch exit_latch;     /* For terminating the pmd thread. */
         struct seq *reload_seq;
         uint64_t last_reload_seq;
-        atomic_bool reload;             /* Do we need to reload ports? */
-        bool isolated;
-
+        atomic_bool reload;          /* Do we need to reload ports? */
         /* Set to true if the pmd thread needs to be reloaded. */
         bool need_reload;
-        /* 5 pad bytes. */
+        bool isolated;
+
+        struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
+
+        /* Queue id used by this pmd thread to send packets on all netdevs if
+         * XPS disabled for this netdev. All static_tx_qid's are unique and
+         * less than 'cmap_count(dp->poll_threads)'. */
+        uint32_t static_tx_qid;
+
+        unsigned core_id;            /* CPU core id of this pmd thread. */
+        int numa_id;                 /* numa node id of this pmd thread. */
+
+        /* 20 pad bytes. */
     );
 
     PADDED_MEMBERS(CACHE_LINE_SIZE,
-        struct ovs_mutex port_mutex;    /* Mutex for 'poll_list'
-                                           and 'tx_ports'. */
-        /* 16 pad bytes. */
+        /* Mutex for 'poll_list' and 'tx_ports'. */
+        struct ovs_mutex port_mutex;
     );
+
     PADDED_MEMBERS(CACHE_LINE_SIZE,
         /* List of rx queues to poll. */
         struct hmap poll_list OVS_GUARDED;
-        /* Map of 'tx_port's used for transmission.  Written by the main
-         * thread, read by the pmd thread. */
+        /* Map of 'tx_port's used for transmission.
+         * Written by the main thread, read by the pmd thread. */
         struct hmap tx_ports OVS_GUARDED;
     );
+
     PADDED_MEMBERS(CACHE_LINE_SIZE,
         /* These are thread-local copies of 'tx_ports'.  One contains only
          * tunnel ports (that support push_tunnel/pop_tunnel), the other
@@ -648,9 +659,13 @@  struct dp_netdev_pmd_thread {
          * values and subtracts them from 'stats' and 'cycles' before
          * reporting to the user */
         unsigned long long stats_zero[DP_N_STATS];
-        uint64_t cycles_zero[PMD_N_CYCLES];
         /* 8 pad bytes. */
     );
+
+    PADDED_MEMBERS(CACHE_LINE_SIZE,
+        uint64_t cycles_zero[PMD_N_CYCLES];
+        /* 48 pad bytes. */
+    );
 };
 
 /* Interface to netdev-based datapath. */