Message ID | 20220825233058.2697002-9-kumar.amber@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | DPIF + MFEX Inner AVX512 | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
Hi Amber, My comments on the patch are below inline. Thanks, Cian > -----Original Message----- > From: Amber, Kumar <kumar.amber@intel.com> > Sent: Friday 26 August 2022 00:31 > To: ovs-dev@openvswitch.org > Cc: echaudro@redhat.com; i.maximets@ovn.org; Ferriter, Cian <cian.ferriter@intel.com>; Stokes, Ian > <ian.stokes@intel.com>; fbl@sysclose.org; Van Haaren, Harry <harry.van.haaren@intel.com>; Amber, Kumar > <kumar.amber@intel.com> > Subject: [PATCH v5 8/9] mfex-study: Modify study func to select outer and inner mfex funcs. > > The Mfex study function is split into outer and inner to allow > for independent selection and studying of packets in outer and inner > flows to different ISA optimized Mfexs. For the last line how about: > flows to different ISA optimized miniflow extract implementations. > > Signed-off-by: Kumar Amber <kumar.amber@intel.com> > --- > lib/dpif-netdev-extract-study.c | 126 +++++++++++++++++++++----------- > 1 file changed, 83 insertions(+), 43 deletions(-) > > diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c > index 71354cc4c..03d97c64e 100644 > --- a/lib/dpif-netdev-extract-study.c > +++ b/lib/dpif-netdev-extract-study.c > @@ -30,7 +30,9 @@ static atomic_uint32_t mfex_study_pkts_count = MFEX_MAX_PKT_COUNT; > /* Struct to hold miniflow study stats. */ > struct study_stats { > uint32_t pkt_count; > + uint32_t pkt_inner_count; > uint32_t impl_hitcount[MFEX_IMPL_MAX]; > + uint32_t impl_inner_hitcount[MFEX_IMPL_MAX]; > }; > > /* Define per thread data to hold the study stats. */ > @@ -67,6 +69,58 @@ mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char *name) > return -EINVAL; > } > > + Remove the extra newline here. > +static inline void > +mfex_reset_stats(uint32_t *impls_hitcount, uint32_t *pkt_cnt) { > + /* Reset stats so that study function can be called again > + * for next traffic type and optimal function ptr can be > + * chosen. > + */ I prefer having this comment above the function rather than inside since it describes what the whole function does. I've reworded the comment to add 'the's and 'an'. How about this: Reset stats so that the study function can be called again for the next traffic type and an optimal function pointer can be chosen. > + memset(impls_hitcount, 0, sizeof(uint32_t) * MFEX_IMPL_MAX); > + *pkt_cnt = 0; > +} > + > +static inline void > +mfex_study_select_best_impls(struct dpif_miniflow_extract_impl *mfex_funcs, > + uint32_t pkt_cnt, uint32_t *impls_arr, > + atomic_uintptr_t *pmd_func, char *name) > +{ > + > + uint32_t best_func_index = MFEX_IMPL_START_IDX; > + uint32_t max_hits = 0; > + > + for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > + if (impls_arr[i] > max_hits) { > + max_hits = impls_arr[i]; > + best_func_index = i; > + } > + } These 2 braces are indented incorrectly. It should be like this: > + } > + } > + > + /* If 50% of the packets hit, enable the function. */ How about this reword: If at least 50% of the packets hit the implementation, enable that implementation. > + if (max_hits >= (mfex_study_pkts_count / 2)) { > + atomic_store_relaxed(pmd_func, > + (uintptr_t) mfex_funcs[best_func_index].extract_func); > + VLOG_INFO("MFEX %s study chose impl %s: (hits %u/%u pkts)", > + name, mfex_funcs[best_func_index].name, max_hits, > + pkt_cnt); The 'pkt_cnt' doesn't have to be wrapped to the next line. > + } else { > + /* Set the implementation to null for default miniflow. */ > + atomic_store_relaxed(pmd_func, > + (uintptr_t) mfex_funcs[MFEX_IMPL_SCALAR].extract_func); > + VLOG_INFO("Not enough packets matched (%u/%u), disabling" > + " optimized MFEX.", max_hits, pkt_cnt); > + } > + > + /* In debug mode show stats for all the counters. */ > + if (VLOG_IS_DBG_ENABLED()) { > + for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > + VLOG_DBG("MFEX study results for implementation %s:" > + " (hits %u/%u pkts)", mfex_funcs[i].name, > + impls_arr[i], pkt_cnt); > + } > + } > +} > + > uint32_t > mfex_study_traffic(struct dp_packet_batch *packets, > struct netdev_flow_key *keys, > @@ -76,10 +130,12 @@ mfex_study_traffic(struct dp_packet_batch *packets, > { > uint32_t hitmask = 0; > uint32_t mask = 0; > + uint32_t study_cnt_pkts; > struct dp_netdev_pmd_thread *pmd = pmd_handle; > struct dpif_miniflow_extract_impl *miniflow_funcs; > struct study_stats *stats = mfex_study_get_study_stats_ptr(); > miniflow_funcs = dpif_mfex_impl_info_get(); > + atomic_read_relaxed(&mfex_study_pkts_count, &study_cnt_pkts); > > /* Run traffic optimized miniflow_extract to collect the hitmask > * to be compared after certain packets have been hit to choose > @@ -93,7 +149,11 @@ mfex_study_traffic(struct dp_packet_batch *packets, > hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size, > in_port, pmd_handle, > md_is_valid); > - stats->impl_hitcount[i] += count_1bits(hitmask); > + if (!md_is_valid) { > + stats->impl_hitcount[i] += count_1bits(hitmask); > + } else { > + stats->impl_inner_hitcount[i] += count_1bits(hitmask); > + } > > /* If traffic is not classified then we dont overwrite the keys > * array in minfiflow implementations so its safe to create a > @@ -102,54 +162,34 @@ mfex_study_traffic(struct dp_packet_batch *packets, > mask |= hitmask; > } > > - stats->pkt_count += dp_packet_batch_size(packets); > - > /* Choose the best implementation after a minimum packets have been > * processed. > */ How about updating the comment to: Choose the best miniflow extract implementation to use for inner and outer packets separately. > - uint32_t study_cnt_pkts; > - atomic_read_relaxed(&mfex_study_pkts_count, &study_cnt_pkts); > - > - if (stats->pkt_count >= study_cnt_pkts) { > - uint32_t best_func_index = MFEX_IMPL_START_IDX; > - uint32_t max_hits = 0; > - for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > - if (stats->impl_hitcount[i] > max_hits) { > - max_hits = stats->impl_hitcount[i]; > - best_func_index = i; > - } > + if (!md_is_valid) { > + stats->pkt_count += dp_packet_batch_size(packets); > + > + if (stats->pkt_count >= study_cnt_pkts) { > + char name[] = "outer"; > + mfex_study_select_best_impls(miniflow_funcs, stats->pkt_count, > + stats->impl_hitcount, > + (void *)&pmd->miniflow_extract_opt, > + name); > + mfex_reset_stats(stats->impl_hitcount, &stats->pkt_count); > } > > - /* If 50% of the packets hit, enable the function. */ > - if (max_hits >= (mfex_study_pkts_count / 2)) { > - atomic_store_relaxed(&pmd->miniflow_extract_opt, > - miniflow_funcs[best_func_index].extract_func); > - VLOG_INFO("MFEX study chose impl %s: (hits %u/%u pkts)", > - miniflow_funcs[best_func_index].name, max_hits, > - stats->pkt_count); > - } else { > - /* Set the implementation to null for default miniflow. */ > - atomic_store_relaxed(&pmd->miniflow_extract_opt, > - miniflow_funcs[MFEX_IMPL_SCALAR].extract_func); > - VLOG_INFO("Not enough packets matched (%u/%u), disabling" > - " optimized MFEX.", max_hits, stats->pkt_count); > + } else { > + stats->pkt_inner_count += dp_packet_batch_size(packets); > + > + if (stats->pkt_inner_count >= study_cnt_pkts) { > + char name[] = "inner"; > + mfex_study_select_best_impls(miniflow_funcs, > + stats->pkt_inner_count, > + stats->impl_inner_hitcount, > + (void *)&pmd->miniflow_extract_inner_opt, > + name); > + mfex_reset_stats(stats->impl_inner_hitcount, > + &stats->pkt_inner_count); > } > - > - /* In debug mode show stats for all the counters. */ > - if (VLOG_IS_DBG_ENABLED()) { > - > - for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > - VLOG_DBG("MFEX study results for implementation %s:" > - " (hits %u/%u pkts)", miniflow_funcs[i].name, > - stats->impl_hitcount[i], stats->pkt_count); > - } > - } > - > - /* Reset stats so that study function can be called again > - * for next traffic type and optimal function ptr can be > - * chosen. > - */ > - memset(stats, 0, sizeof(struct study_stats)); > } > return mask; > } > -- > 2.25.1 Overall, I like the reuse of code. And I think the patch looks good. The above are all smaller comments. The only major thing is what I mentioned in patch 7/9 about seeing whether we can get the same behaviour without using the 'md_is_valid' bool at all.
Hi Cian, Please find the comments inline. > -----Original Message----- > From: Ferriter, Cian <cian.ferriter@intel.com> > Sent: Thursday, September 29, 2022 9:01 PM > To: Amber, Kumar <kumar.amber@intel.com>; ovs-dev@openvswitch.org > Cc: echaudro@redhat.com; i.maximets@ovn.org; Stokes, Ian > <ian.stokes@intel.com>; fbl@sysclose.org; Van Haaren, Harry > <harry.van.haaren@intel.com> > Subject: RE: [PATCH v5 8/9] mfex-study: Modify study func to select outer > and inner mfex funcs. > > Hi Amber, > > My comments on the patch are below inline. > > Thanks, > Cian > > > -----Original Message----- > > From: Amber, Kumar <kumar.amber@intel.com> > > Sent: Friday 26 August 2022 00:31 > > To: ovs-dev@openvswitch.org > > Cc: echaudro@redhat.com; i.maximets@ovn.org; Ferriter, Cian > > <cian.ferriter@intel.com>; Stokes, Ian <ian.stokes@intel.com>; > > fbl@sysclose.org; Van Haaren, Harry <harry.van.haaren@intel.com>; > > Amber, Kumar <kumar.amber@intel.com> > > Subject: [PATCH v5 8/9] mfex-study: Modify study func to select outer and > inner mfex funcs. > > > > The Mfex study function is split into outer and inner to allow for > > independent selection and studying of packets in outer and inner flows > > to different ISA optimized Mfexs. > > For the last line how about: > > flows to different ISA optimized miniflow extract implementations. > Fixed. > > > > Signed-off-by: Kumar Amber <kumar.amber@intel.com> > > --- > > lib/dpif-netdev-extract-study.c | 126 > > +++++++++++++++++++++----------- > > 1 file changed, 83 insertions(+), 43 deletions(-) > > > > diff --git a/lib/dpif-netdev-extract-study.c > > b/lib/dpif-netdev-extract-study.c index 71354cc4c..03d97c64e 100644 > > --- a/lib/dpif-netdev-extract-study.c > > +++ b/lib/dpif-netdev-extract-study.c > > @@ -30,7 +30,9 @@ static atomic_uint32_t mfex_study_pkts_count = > > MFEX_MAX_PKT_COUNT; > > /* Struct to hold miniflow study stats. */ struct study_stats { > > uint32_t pkt_count; > > + uint32_t pkt_inner_count; > > uint32_t impl_hitcount[MFEX_IMPL_MAX]; > > + uint32_t impl_inner_hitcount[MFEX_IMPL_MAX]; > > }; > > > > /* Define per thread data to hold the study stats. */ @@ -67,6 +69,58 > > @@ mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char *name) > > return -EINVAL; > > } > > > > + > > Remove the extra newline here. > Fixed. > > +static inline void > > +mfex_reset_stats(uint32_t *impls_hitcount, uint32_t *pkt_cnt) { > > + /* Reset stats so that study function can be called again > > + * for next traffic type and optimal function ptr can be > > + * chosen. > > + */ > > I prefer having this comment above the function rather than inside since it > describes what the whole function does. > I've reworded the comment to add 'the's and 'an'. How about this: > Reset stats so that the study function can be called again for the next traffic > type and an optimal function pointer can be chosen. > Fixed. > > + memset(impls_hitcount, 0, sizeof(uint32_t) * MFEX_IMPL_MAX); > > + *pkt_cnt = 0; > > +} > > + > > +static inline void > > +mfex_study_select_best_impls(struct dpif_miniflow_extract_impl > *mfex_funcs, > > + uint32_t pkt_cnt, uint32_t *impls_arr, > > + atomic_uintptr_t *pmd_func, char *name) > > +{ > > + > > + uint32_t best_func_index = MFEX_IMPL_START_IDX; > > + uint32_t max_hits = 0; > > + > > + for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > > + if (impls_arr[i] > max_hits) { > > + max_hits = impls_arr[i]; > > + best_func_index = i; > > + } > > + } > > These 2 braces are indented incorrectly. It should be like this: Fixed. > > + } > > + } > > > > + > > + /* If 50% of the packets hit, enable the function. */ > > How about this reword: > If at least 50% of the packets hit the implementation, enable that > implementation. > Fixed. > > + if (max_hits >= (mfex_study_pkts_count / 2)) { > > + atomic_store_relaxed(pmd_func, > > + (uintptr_t) mfex_funcs[best_func_index].extract_func); > > + VLOG_INFO("MFEX %s study chose impl %s: (hits %u/%u pkts)", > > + name, mfex_funcs[best_func_index].name, max_hits, > > + pkt_cnt); > > The 'pkt_cnt' doesn't have to be wrapped to the next line. > Fixed. > > + } else { > > + /* Set the implementation to null for default miniflow. */ > > + atomic_store_relaxed(pmd_func, > > + (uintptr_t) mfex_funcs[MFEX_IMPL_SCALAR].extract_func); > > + VLOG_INFO("Not enough packets matched (%u/%u), disabling" > > + " optimized MFEX.", max_hits, pkt_cnt); > > + } > > + > > + /* In debug mode show stats for all the counters. */ > > + if (VLOG_IS_DBG_ENABLED()) { > > + for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > > + VLOG_DBG("MFEX study results for implementation %s:" > > + " (hits %u/%u pkts)", mfex_funcs[i].name, > > + impls_arr[i], pkt_cnt); > > + } > > + } > > +} > > + > > uint32_t > > mfex_study_traffic(struct dp_packet_batch *packets, > > struct netdev_flow_key *keys, @@ -76,10 +130,12 @@ > > mfex_study_traffic(struct dp_packet_batch *packets, { > > uint32_t hitmask = 0; > > uint32_t mask = 0; > > + uint32_t study_cnt_pkts; > > struct dp_netdev_pmd_thread *pmd = pmd_handle; > > struct dpif_miniflow_extract_impl *miniflow_funcs; > > struct study_stats *stats = mfex_study_get_study_stats_ptr(); > > miniflow_funcs = dpif_mfex_impl_info_get(); > > + atomic_read_relaxed(&mfex_study_pkts_count, &study_cnt_pkts); > > > > /* Run traffic optimized miniflow_extract to collect the hitmask > > * to be compared after certain packets have been hit to choose > > @@ -93,7 +149,11 @@ mfex_study_traffic(struct dp_packet_batch > *packets, > > hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size, > > in_port, pmd_handle, > > md_is_valid); > > - stats->impl_hitcount[i] += count_1bits(hitmask); > > + if (!md_is_valid) { > > + stats->impl_hitcount[i] += count_1bits(hitmask); > > + } else { > > + stats->impl_inner_hitcount[i] += count_1bits(hitmask); > > + } > > > > /* If traffic is not classified then we dont overwrite the keys > > * array in minfiflow implementations so its safe to create a > > @@ -102,54 +162,34 @@ mfex_study_traffic(struct dp_packet_batch > *packets, > > mask |= hitmask; > > } > > > > - stats->pkt_count += dp_packet_batch_size(packets); > > - > > /* Choose the best implementation after a minimum packets have been > > * processed. > > */ > > How about updating the comment to: > Choose the best miniflow extract implementation to use for inner and outer > packets separately. > Fixed. > > - uint32_t study_cnt_pkts; > > - atomic_read_relaxed(&mfex_study_pkts_count, &study_cnt_pkts); > > - > > - if (stats->pkt_count >= study_cnt_pkts) { > > - uint32_t best_func_index = MFEX_IMPL_START_IDX; > > - uint32_t max_hits = 0; > > - for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > > - if (stats->impl_hitcount[i] > max_hits) { > > - max_hits = stats->impl_hitcount[i]; > > - best_func_index = i; > > - } > > + if (!md_is_valid) { > > + stats->pkt_count += dp_packet_batch_size(packets); > > + > > + if (stats->pkt_count >= study_cnt_pkts) { > > + char name[] = "outer"; > > + mfex_study_select_best_impls(miniflow_funcs, stats->pkt_count, > > + stats->impl_hitcount, > > + (void *)&pmd->miniflow_extract_opt, > > + name); > > + mfex_reset_stats(stats->impl_hitcount, > > + &stats->pkt_count); > > } > > > > - /* If 50% of the packets hit, enable the function. */ > > - if (max_hits >= (mfex_study_pkts_count / 2)) { > > - atomic_store_relaxed(&pmd->miniflow_extract_opt, > > - miniflow_funcs[best_func_index].extract_func); > > - VLOG_INFO("MFEX study chose impl %s: (hits %u/%u pkts)", > > - miniflow_funcs[best_func_index].name, max_hits, > > - stats->pkt_count); > > - } else { > > - /* Set the implementation to null for default miniflow. */ > > - atomic_store_relaxed(&pmd->miniflow_extract_opt, > > - miniflow_funcs[MFEX_IMPL_SCALAR].extract_func); > > - VLOG_INFO("Not enough packets matched (%u/%u), disabling" > > - " optimized MFEX.", max_hits, stats->pkt_count); > > + } else { > > + stats->pkt_inner_count += dp_packet_batch_size(packets); > > + > > + if (stats->pkt_inner_count >= study_cnt_pkts) { > > + char name[] = "inner"; > > + mfex_study_select_best_impls(miniflow_funcs, > > + stats->pkt_inner_count, > > + stats->impl_inner_hitcount, > > + (void *)&pmd->miniflow_extract_inner_opt, > > + name); > > + mfex_reset_stats(stats->impl_inner_hitcount, > > + &stats->pkt_inner_count); > > } > > - > > - /* In debug mode show stats for all the counters. */ > > - if (VLOG_IS_DBG_ENABLED()) { > > - > > - for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { > > - VLOG_DBG("MFEX study results for implementation %s:" > > - " (hits %u/%u pkts)", miniflow_funcs[i].name, > > - stats->impl_hitcount[i], stats->pkt_count); > > - } > > - } > > - > > - /* Reset stats so that study function can be called again > > - * for next traffic type and optimal function ptr can be > > - * chosen. > > - */ > > - memset(stats, 0, sizeof(struct study_stats)); > > } > > return mask; > > } > > -- > > 2.25.1 > > Overall, I like the reuse of code. And I think the patch looks good. The above > are all smaller comments. The only major thing is what I mentioned in patch > 7/9 about seeing whether we can get the same behaviour without using the > 'md_is_valid' bool at all. Thanks, Md_is_valid is a better solution. Regards Amber
diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c index 71354cc4c..03d97c64e 100644 --- a/lib/dpif-netdev-extract-study.c +++ b/lib/dpif-netdev-extract-study.c @@ -30,7 +30,9 @@ static atomic_uint32_t mfex_study_pkts_count = MFEX_MAX_PKT_COUNT; /* Struct to hold miniflow study stats. */ struct study_stats { uint32_t pkt_count; + uint32_t pkt_inner_count; uint32_t impl_hitcount[MFEX_IMPL_MAX]; + uint32_t impl_inner_hitcount[MFEX_IMPL_MAX]; }; /* Define per thread data to hold the study stats. */ @@ -67,6 +69,58 @@ mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char *name) return -EINVAL; } + +static inline void +mfex_reset_stats(uint32_t *impls_hitcount, uint32_t *pkt_cnt) { + /* Reset stats so that study function can be called again + * for next traffic type and optimal function ptr can be + * chosen. + */ + memset(impls_hitcount, 0, sizeof(uint32_t) * MFEX_IMPL_MAX); + *pkt_cnt = 0; +} + +static inline void +mfex_study_select_best_impls(struct dpif_miniflow_extract_impl *mfex_funcs, + uint32_t pkt_cnt, uint32_t *impls_arr, + atomic_uintptr_t *pmd_func, char *name) +{ + + uint32_t best_func_index = MFEX_IMPL_START_IDX; + uint32_t max_hits = 0; + + for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { + if (impls_arr[i] > max_hits) { + max_hits = impls_arr[i]; + best_func_index = i; + } + } + + /* If 50% of the packets hit, enable the function. */ + if (max_hits >= (mfex_study_pkts_count / 2)) { + atomic_store_relaxed(pmd_func, + (uintptr_t) mfex_funcs[best_func_index].extract_func); + VLOG_INFO("MFEX %s study chose impl %s: (hits %u/%u pkts)", + name, mfex_funcs[best_func_index].name, max_hits, + pkt_cnt); + } else { + /* Set the implementation to null for default miniflow. */ + atomic_store_relaxed(pmd_func, + (uintptr_t) mfex_funcs[MFEX_IMPL_SCALAR].extract_func); + VLOG_INFO("Not enough packets matched (%u/%u), disabling" + " optimized MFEX.", max_hits, pkt_cnt); + } + + /* In debug mode show stats for all the counters. */ + if (VLOG_IS_DBG_ENABLED()) { + for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { + VLOG_DBG("MFEX study results for implementation %s:" + " (hits %u/%u pkts)", mfex_funcs[i].name, + impls_arr[i], pkt_cnt); + } + } +} + uint32_t mfex_study_traffic(struct dp_packet_batch *packets, struct netdev_flow_key *keys, @@ -76,10 +130,12 @@ mfex_study_traffic(struct dp_packet_batch *packets, { uint32_t hitmask = 0; uint32_t mask = 0; + uint32_t study_cnt_pkts; struct dp_netdev_pmd_thread *pmd = pmd_handle; struct dpif_miniflow_extract_impl *miniflow_funcs; struct study_stats *stats = mfex_study_get_study_stats_ptr(); miniflow_funcs = dpif_mfex_impl_info_get(); + atomic_read_relaxed(&mfex_study_pkts_count, &study_cnt_pkts); /* Run traffic optimized miniflow_extract to collect the hitmask * to be compared after certain packets have been hit to choose @@ -93,7 +149,11 @@ mfex_study_traffic(struct dp_packet_batch *packets, hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size, in_port, pmd_handle, md_is_valid); - stats->impl_hitcount[i] += count_1bits(hitmask); + if (!md_is_valid) { + stats->impl_hitcount[i] += count_1bits(hitmask); + } else { + stats->impl_inner_hitcount[i] += count_1bits(hitmask); + } /* If traffic is not classified then we dont overwrite the keys * array in minfiflow implementations so its safe to create a @@ -102,54 +162,34 @@ mfex_study_traffic(struct dp_packet_batch *packets, mask |= hitmask; } - stats->pkt_count += dp_packet_batch_size(packets); - /* Choose the best implementation after a minimum packets have been * processed. */ - uint32_t study_cnt_pkts; - atomic_read_relaxed(&mfex_study_pkts_count, &study_cnt_pkts); - - if (stats->pkt_count >= study_cnt_pkts) { - uint32_t best_func_index = MFEX_IMPL_START_IDX; - uint32_t max_hits = 0; - for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { - if (stats->impl_hitcount[i] > max_hits) { - max_hits = stats->impl_hitcount[i]; - best_func_index = i; - } + if (!md_is_valid) { + stats->pkt_count += dp_packet_batch_size(packets); + + if (stats->pkt_count >= study_cnt_pkts) { + char name[] = "outer"; + mfex_study_select_best_impls(miniflow_funcs, stats->pkt_count, + stats->impl_hitcount, + (void *)&pmd->miniflow_extract_opt, + name); + mfex_reset_stats(stats->impl_hitcount, &stats->pkt_count); } - /* If 50% of the packets hit, enable the function. */ - if (max_hits >= (mfex_study_pkts_count / 2)) { - atomic_store_relaxed(&pmd->miniflow_extract_opt, - miniflow_funcs[best_func_index].extract_func); - VLOG_INFO("MFEX study chose impl %s: (hits %u/%u pkts)", - miniflow_funcs[best_func_index].name, max_hits, - stats->pkt_count); - } else { - /* Set the implementation to null for default miniflow. */ - atomic_store_relaxed(&pmd->miniflow_extract_opt, - miniflow_funcs[MFEX_IMPL_SCALAR].extract_func); - VLOG_INFO("Not enough packets matched (%u/%u), disabling" - " optimized MFEX.", max_hits, stats->pkt_count); + } else { + stats->pkt_inner_count += dp_packet_batch_size(packets); + + if (stats->pkt_inner_count >= study_cnt_pkts) { + char name[] = "inner"; + mfex_study_select_best_impls(miniflow_funcs, + stats->pkt_inner_count, + stats->impl_inner_hitcount, + (void *)&pmd->miniflow_extract_inner_opt, + name); + mfex_reset_stats(stats->impl_inner_hitcount, + &stats->pkt_inner_count); } - - /* In debug mode show stats for all the counters. */ - if (VLOG_IS_DBG_ENABLED()) { - - for (int i = MFEX_IMPL_START_IDX; i < MFEX_IMPL_MAX; i++) { - VLOG_DBG("MFEX study results for implementation %s:" - " (hits %u/%u pkts)", miniflow_funcs[i].name, - stats->impl_hitcount[i], stats->pkt_count); - } - } - - /* Reset stats so that study function can be called again - * for next traffic type and optimal function ptr can be - * chosen. - */ - memset(stats, 0, sizeof(struct study_stats)); } return mask; }
The Mfex study function is split into outer and inner to allow for independent selection and studying of packets in outer and inner flows to different ISA optimized Mfexs. Signed-off-by: Kumar Amber <kumar.amber@intel.com> --- lib/dpif-netdev-extract-study.c | 126 +++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 43 deletions(-)