diff mbox series

[net-next,v3,2/2] net: openvswitch: make masks cache size configurable

Message ID 159602917888.937753.17718463910615059058.stgit@ebuild
State Changes Requested
Delegated to: David Miller
Headers show
Series net: openvswitch: masks cache enhancements | expand

Commit Message

Eelco Chaudron July 29, 2020, 1:27 p.m. UTC
This patch makes the masks cache size configurable, or with
a size of 0, disable it.

Reviewed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
Changes in v3:
 - Use is_power_of_2() function
 - Use array_size() function
 - Fix remaining sparse errors

Changes in v2:
 - Fix sparse warnings
 - Fix netlink policy items reported by Florian Westphal

 include/uapi/linux/openvswitch.h |    1 
 net/openvswitch/datapath.c       |   14 +++++
 net/openvswitch/flow_table.c     |   97 +++++++++++++++++++++++++++++++++-----
 net/openvswitch/flow_table.h     |   10 ++++
 4 files changed, 108 insertions(+), 14 deletions(-)

Comments

Tonghao Zhang July 30, 2020, 5:07 a.m. UTC | #1
On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com> wrote:
>
> This patch makes the masks cache size configurable, or with
> a size of 0, disable it.
>
> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
> Changes in v3:
>  - Use is_power_of_2() function
>  - Use array_size() function
>  - Fix remaining sparse errors
>
> Changes in v2:
>  - Fix sparse warnings
>  - Fix netlink policy items reported by Florian Westphal
>
>  include/uapi/linux/openvswitch.h |    1
>  net/openvswitch/datapath.c       |   14 +++++
>  net/openvswitch/flow_table.c     |   97 +++++++++++++++++++++++++++++++++-----
>  net/openvswitch/flow_table.h     |   10 ++++
>  4 files changed, 108 insertions(+), 14 deletions(-)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index 7cb76e5ca7cf..8300cc29dec8 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct ovs_dp_megaflow_stats */
>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>         OVS_DP_ATTR_PAD,
> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>         __OVS_DP_ATTR_MAX
>  };
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index a54df1fe3ec4..114b2ddb8037 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
>         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
> +       msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
>
>         return msgsize;
>  }
> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
>                 goto nla_put_failure;
>
> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
> +               goto nla_put_failure;
> +
>         genlmsg_end(skb, ovs_header);
>         return 0;
>
> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  #endif
>         }
>
> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
> +               u32 cache_size;
> +
> +               cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> +               ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
Do we should return error code, if we can't change the "mask_cache"
size ? for example, -EINVAL, -ENOMEM
> +       }
> +
>         dp->user_features = user_features;
>
>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> @@ -1894,6 +1906,8 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
> +               PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
>  };
>
>  static const struct genl_ops dp_datapath_genl_ops[] = {
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index a5912ea05352..5280aeeef628 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -38,8 +38,8 @@
>  #define MASK_ARRAY_SIZE_MIN    16
>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>
> +#define MC_DEFAULT_HASH_ENTRIES        256
>  #define MC_HASH_SHIFT          8
> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
>
>  static struct kmem_cache *flow_cache;
> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
>         }
>  }
>
> +static void __mask_cache_destroy(struct mask_cache *mc)
> +{
> +       if (mc->mask_cache)
> +               free_percpu(mc->mask_cache);
free_percpu the NULL is safe. we can remove the "if".
> +       kfree(mc);
> +}
> +
> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
> +{
> +       struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu);
> +
> +       __mask_cache_destroy(mc);
> +}
> +
> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
> +{
> +       struct mask_cache_entry __percpu *cache = NULL;
> +       struct mask_cache *new;
> +
> +       /* Only allow size to be 0, or a power of 2, and does not exceed
> +        * percpu allocation size.
> +        */
> +       if ((!is_power_of_2(size) && size != 0) ||
> +           (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
> +               return NULL;
> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
> +       if (!new)
> +               return NULL;
> +
> +       new->cache_size = size;
> +       if (new->cache_size > 0) {
> +               cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry),
> +                                                 new->cache_size),
> +                                      __alignof__(struct mask_cache_entry));
> +               if (!cache) {
> +                       kfree(new);
> +                       return NULL;
> +               }
> +       }
> +
> +       new->mask_cache = cache;
> +       return new;
> +}
> +
> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
> +{
> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> +       struct mask_cache *new;
> +
> +       if (size == mc->cache_size)
> +               return;
> +
> +       new = tbl_mask_cache_alloc(size);
> +       if (!new)
> +               return;
> +
> +       rcu_assign_pointer(table->mask_cache, new);
> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> +}
> +
>  int ovs_flow_tbl_init(struct flow_table *table)
>  {
>         struct table_instance *ti, *ufid_ti;
> +       struct mask_cache *mc;
>         struct mask_array *ma;
>
> -       table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
> -                                          MC_HASH_ENTRIES,
> -                                          __alignof__(struct mask_cache_entry));
> -       if (!table->mask_cache)
> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
> +       if (!mc)
>                 return -ENOMEM;
>
>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>         rcu_assign_pointer(table->ti, ti);
>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>         rcu_assign_pointer(table->mask_array, ma);
> +       rcu_assign_pointer(table->mask_cache, mc);
>         table->last_rehash = jiffies;
>         table->count = 0;
>         table->ufid_count = 0;
> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  free_mask_array:
>         __mask_array_destroy(ma);
>  free_mask_cache:
> -       free_percpu(table->mask_cache);
> +       __mask_cache_destroy(mc);
>         return -ENOMEM;
>  }
>
> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
>  {
>         struct table_instance *ti = rcu_dereference_raw(table->ti);
>         struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> +       struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
>
> -       free_percpu(table->mask_cache);
> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>         table_instance_destroy(table, ti, ufid_ti, false);
>  }
>
> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>                                           u32 *n_mask_hit,
>                                           u32 *n_cache_hit)
>  {
> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>         struct table_instance *ti = rcu_dereference(tbl->ti);
>         struct mask_cache_entry *entries, *ce;
> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>
>         *n_mask_hit = 0;
>         *n_cache_hit = 0;
> -       if (unlikely(!skb_hash)) {
> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>                 u32 mask_index = 0;
>                 u32 cache = 0;
>
> @@ -749,11 +813,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>
>         ce = NULL;
>         hash = skb_hash;
> -       entries = this_cpu_ptr(tbl->mask_cache);
> +       entries = this_cpu_ptr(mc->mask_cache);
>
>         /* Find the cache entry 'ce' to operate on. */
>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> -               int index = hash & (MC_HASH_ENTRIES - 1);
> +               int index = hash & (mc->cache_size - 1);
>                 struct mask_cache_entry *e;
>
>                 e = &entries[index];
> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
>         return READ_ONCE(ma->count);
>  }
>
> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
> +{
> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> +
> +       return READ_ONCE(mc->cache_size);
> +}
> +
>  static struct table_instance *table_instance_expand(struct table_instance *ti,
>                                                     bool ufid)
>  {
> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>         for (i = 0; i < masks_entries; i++) {
>                 int index = masks_and_count[i].index;
>
> -               new->masks[new->count++] =
> -                       rcu_dereference_ovsl(ma->masks[index]);
> +               if (ovsl_dereference(ma->masks[index]))
> +                       new->masks[new->count++] = ma->masks[index];
>         }
>
>         rcu_assign_pointer(table->mask_array, new);
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index 325e939371d8..f2dba952db2f 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>         u32 mask_index;
>  };
>
> +struct mask_cache {
> +       struct rcu_head rcu;
> +       u32 cache_size;  /* Must be ^2 value. */
> +       struct mask_cache_entry __percpu *mask_cache;
> +};
> +
>  struct mask_count {
>         int index;
>         u64 counter;
> @@ -53,7 +59,7 @@ struct table_instance {
>  struct flow_table {
>         struct table_instance __rcu *ti;
>         struct table_instance __rcu *ufid_ti;
> -       struct mask_cache_entry __percpu *mask_cache;
> +       struct mask_cache __rcu *mask_cache;
>         struct mask_array __rcu *mask_array;
>         unsigned long last_rehash;
>         unsigned int count;
> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
>                         const struct sw_flow_mask *mask);
>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size);
>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
>                                        u32 *bucket, u32 *idx);
>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>
Eelco Chaudron July 30, 2020, 12:33 p.m. UTC | #2
Thanks Tonghao for the review, see comments inline below!

If I get no more reviews by the end of the week I’ll send out a v4.

//Eelco


On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:

> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com> 
> wrote:
>>
>> This patch makes the masks cache size configurable, or with
>> a size of 0, disable it.
>>
>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>> Changes in v3:
>>  - Use is_power_of_2() function
>>  - Use array_size() function
>>  - Fix remaining sparse errors
>>
>> Changes in v2:
>>  - Fix sparse warnings
>>  - Fix netlink policy items reported by Florian Westphal
>>
>>  include/uapi/linux/openvswitch.h |    1
>>  net/openvswitch/datapath.c       |   14 +++++
>>  net/openvswitch/flow_table.c     |   97 
>> +++++++++++++++++++++++++++++++++-----
>>  net/openvswitch/flow_table.h     |   10 ++++
>>  4 files changed, 108 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/uapi/linux/openvswitch.h 
>> b/include/uapi/linux/openvswitch.h
>> index 7cb76e5ca7cf..8300cc29dec8 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct 
>> ovs_dp_megaflow_stats */
>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>>         OVS_DP_ATTR_PAD,
>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>         __OVS_DP_ATTR_MAX
>>  };
>>
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index a54df1fe3ec4..114b2ddb8037 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
>>         msgsize += nla_total_size_64bit(sizeof(struct 
>> ovs_dp_megaflow_stats));
>>         msgsize += nla_total_size(sizeof(u32)); /* 
>> OVS_DP_ATTR_USER_FEATURES */
>> +       msgsize += nla_total_size(sizeof(u32)); /* 
>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
>>
>>         return msgsize;
>>  }
>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct 
>> datapath *dp, struct sk_buff *skb,
>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, 
>> dp->user_features))
>>                 goto nla_put_failure;
>>
>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
>> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
>> +               goto nla_put_failure;
>> +
>>         genlmsg_end(skb, ovs_header);
>>         return 0;
>>
>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp, 
>> struct nlattr *a[])
>>  #endif
>>         }
>>
>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
>> +               u32 cache_size;
>> +
>> +               cache_size = 
>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
>> +               ovs_flow_tbl_masks_cache_resize(&dp->table, 
>> cache_size);
> Do we should return error code, if we can't change the "mask_cache"
> size ? for example, -EINVAL, -ENOMEM

Initially, I did not do this due to the fact the new value is reported, 
and on failure, the old value is shown.
However thinking about it again, it makes more sense to return an error. 
Will sent a v4 with the following to return:

-
-void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
size)
+int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
  {
         struct mask_cache *mc = rcu_dereference(table->mask_cache);
         struct mask_cache *new;

         if (size == mc->cache_size)
-               return;
+               return 0;
+
+       if (!is_power_of_2(size) && size != 0)
+               return -EINVAL;

         new = tbl_mask_cache_alloc(size);
         if (!new)
-               return;
+               return -ENOMEM;

         rcu_assign_pointer(table->mask_cache, new);
         call_rcu(&mc->rcu, mask_cache_rcu_cb);
+
+       return 0;
  }

>> +       }
>> +
>>         dp->user_features = user_features;
>>
>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
>> @@ -1894,6 +1906,8 @@ static const struct nla_policy 
>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = 
>> IFNAMSIZ - 1 },
>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 
>> 0,
>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct 
>> mask_cache_entry)),
>>  };
>>
>>  static const struct genl_ops dp_datapath_genl_ops[] = {
>> diff --git a/net/openvswitch/flow_table.c 
>> b/net/openvswitch/flow_table.c
>> index a5912ea05352..5280aeeef628 100644
>> --- a/net/openvswitch/flow_table.c
>> +++ b/net/openvswitch/flow_table.c
>> @@ -38,8 +38,8 @@
>>  #define MASK_ARRAY_SIZE_MIN    16
>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>>
>> +#define MC_DEFAULT_HASH_ENTRIES        256
>>  #define MC_HASH_SHIFT          8
>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) / 
>> MC_HASH_SHIFT)
>>
>>  static struct kmem_cache *flow_cache;
>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table 
>> *tbl, struct sw_flow_mask *mask)
>>         }
>>  }
>>
>> +static void __mask_cache_destroy(struct mask_cache *mc)
>> +{
>> +       if (mc->mask_cache)
>> +               free_percpu(mc->mask_cache);
> free_percpu the NULL is safe. we can remove the "if".

Makes sense, will remove the if() check.

>> +       kfree(mc);
>> +}
>> +
>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
>> +{
>> +       struct mask_cache *mc = container_of(rcu, struct mask_cache, 
>> rcu);
>> +
>> +       __mask_cache_destroy(mc);
>> +}
>> +
>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>> +{
>> +       struct mask_cache_entry __percpu *cache = NULL;
>> +       struct mask_cache *new;
>> +
>> +       /* Only allow size to be 0, or a power of 2, and does not 
>> exceed
>> +        * percpu allocation size.
>> +        */
>> +       if ((!is_power_of_2(size) && size != 0) ||
>> +           (size * sizeof(struct mask_cache_entry)) > 
>> PCPU_MIN_UNIT_SIZE)
>> +               return NULL;
>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
>> +       if (!new)
>> +               return NULL;
>> +
>> +       new->cache_size = size;
>> +       if (new->cache_size > 0) {
>> +               cache = __alloc_percpu(array_size(sizeof(struct 
>> mask_cache_entry),
>> +                                                 new->cache_size),
>> +                                      __alignof__(struct 
>> mask_cache_entry));
>> +               if (!cache) {
>> +                       kfree(new);
>> +                       return NULL;
>> +               }
>> +       }
>> +
>> +       new->mask_cache = cache;
>> +       return new;
>> +}
>> +
>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
>> size)
>> +{
>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> +       struct mask_cache *new;
>> +
>> +       if (size == mc->cache_size)
>> +               return;
>> +
>> +       new = tbl_mask_cache_alloc(size);
>> +       if (!new)
>> +               return;
>> +
>> +       rcu_assign_pointer(table->mask_cache, new);
>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> +}
>> +
>>  int ovs_flow_tbl_init(struct flow_table *table)
>>  {
>>         struct table_instance *ti, *ufid_ti;
>> +       struct mask_cache *mc;
>>         struct mask_array *ma;
>>
>> -       table->mask_cache = __alloc_percpu(sizeof(struct 
>> mask_cache_entry) *
>> -                                          MC_HASH_ENTRIES,
>> -                                          __alignof__(struct 
>> mask_cache_entry));
>> -       if (!table->mask_cache)
>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>> +       if (!mc)
>>                 return -ENOMEM;
>>
>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>         rcu_assign_pointer(table->ti, ti);
>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>>         rcu_assign_pointer(table->mask_array, ma);
>> +       rcu_assign_pointer(table->mask_cache, mc);
>>         table->last_rehash = jiffies;
>>         table->count = 0;
>>         table->ufid_count = 0;
>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>  free_mask_array:
>>         __mask_array_destroy(ma);
>>  free_mask_cache:
>> -       free_percpu(table->mask_cache);
>> +       __mask_cache_destroy(mc);
>>         return -ENOMEM;
>>  }
>>
>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table 
>> *table)
>>  {
>>         struct table_instance *ti = rcu_dereference_raw(table->ti);
>>         struct table_instance *ufid_ti = 
>> rcu_dereference_raw(table->ufid_ti);
>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> +       struct mask_array *ma = 
>> rcu_dereference_ovsl(table->mask_array);
>>
>> -       free_percpu(table->mask_cache);
>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>>         table_instance_destroy(table, ti, ufid_ti, false);
>>  }
>>
>> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
>> flow_table *tbl,
>>                                           u32 *n_mask_hit,
>>                                           u32 *n_cache_hit)
>>  {
>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>>         struct table_instance *ti = rcu_dereference(tbl->ti);
>>         struct mask_cache_entry *entries, *ce;
>> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
>> flow_table *tbl,
>>
>>         *n_mask_hit = 0;
>>         *n_cache_hit = 0;
>> -       if (unlikely(!skb_hash)) {
>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>>                 u32 mask_index = 0;
>>                 u32 cache = 0;
>>
>> @@ -749,11 +813,11 @@ struct sw_flow 
>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>
>>         ce = NULL;
>>         hash = skb_hash;
>> -       entries = this_cpu_ptr(tbl->mask_cache);
>> +       entries = this_cpu_ptr(mc->mask_cache);
>>
>>         /* Find the cache entry 'ce' to operate on. */
>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
>> -               int index = hash & (MC_HASH_ENTRIES - 1);
>> +               int index = hash & (mc->cache_size - 1);
>>                 struct mask_cache_entry *e;
>>
>>                 e = &entries[index];
>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct 
>> flow_table *table)
>>         return READ_ONCE(ma->count);
>>  }
>>
>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>> +{
>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> +
>> +       return READ_ONCE(mc->cache_size);
>> +}
>> +
>>  static struct table_instance *table_instance_expand(struct 
>> table_instance *ti,
>>                                                     bool ufid)
>>  {
>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table 
>> *table)
>>         for (i = 0; i < masks_entries; i++) {
>>                 int index = masks_and_count[i].index;
>>
>> -               new->masks[new->count++] =
>> -                       rcu_dereference_ovsl(ma->masks[index]);
>> +               if (ovsl_dereference(ma->masks[index]))
>> +                       new->masks[new->count++] = ma->masks[index];
>>         }
>>
>>         rcu_assign_pointer(table->mask_array, new);
>> diff --git a/net/openvswitch/flow_table.h 
>> b/net/openvswitch/flow_table.h
>> index 325e939371d8..f2dba952db2f 100644
>> --- a/net/openvswitch/flow_table.h
>> +++ b/net/openvswitch/flow_table.h
>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>>         u32 mask_index;
>>  };
>>
>> +struct mask_cache {
>> +       struct rcu_head rcu;
>> +       u32 cache_size;  /* Must be ^2 value. */
>> +       struct mask_cache_entry __percpu *mask_cache;
>> +};
>> +
>>  struct mask_count {
>>         int index;
>>         u64 counter;
>> @@ -53,7 +59,7 @@ struct table_instance {
>>  struct flow_table {
>>         struct table_instance __rcu *ti;
>>         struct table_instance __rcu *ufid_ti;
>> -       struct mask_cache_entry __percpu *mask_cache;
>> +       struct mask_cache __rcu *mask_cache;
>>         struct mask_array __rcu *mask_array;
>>         unsigned long last_rehash;
>>         unsigned int count;
>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, 
>> struct sw_flow *flow,
>>                         const struct sw_flow_mask *mask);
>>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow 
>> *flow);
>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
>> size);
>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
>>                                        u32 *bucket, u32 *idx);
>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>>
>
>
> -- 
> Best regards, Tonghao
Tonghao Zhang July 30, 2020, 1:25 p.m. UTC | #3
On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com> wrote:
>
> Thanks Tonghao for the review, see comments inline below!
>
> If I get no more reviews by the end of the week I’ll send out a v4.
>
> //Eelco
>
>
> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
>
> > On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com>
> > wrote:
> >>
> >> This patch makes the masks cache size configurable, or with
> >> a size of 0, disable it.
> >>
> >> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> >> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> >> ---
> >> Changes in v3:
> >>  - Use is_power_of_2() function
> >>  - Use array_size() function
> >>  - Fix remaining sparse errors
> >>
> >> Changes in v2:
> >>  - Fix sparse warnings
> >>  - Fix netlink policy items reported by Florian Westphal
> >>
> >>  include/uapi/linux/openvswitch.h |    1
> >>  net/openvswitch/datapath.c       |   14 +++++
> >>  net/openvswitch/flow_table.c     |   97
> >> +++++++++++++++++++++++++++++++++-----
> >>  net/openvswitch/flow_table.h     |   10 ++++
> >>  4 files changed, 108 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/include/uapi/linux/openvswitch.h
> >> b/include/uapi/linux/openvswitch.h
> >> index 7cb76e5ca7cf..8300cc29dec8 100644
> >> --- a/include/uapi/linux/openvswitch.h
> >> +++ b/include/uapi/linux/openvswitch.h
> >> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
> >>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
> >> ovs_dp_megaflow_stats */
> >>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
> >>         OVS_DP_ATTR_PAD,
> >> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >>         __OVS_DP_ATTR_MAX
> >>  };
> >>
> >> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> >> index a54df1fe3ec4..114b2ddb8037 100644
> >> --- a/net/openvswitch/datapath.c
> >> +++ b/net/openvswitch/datapath.c
> >> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
> >>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
> >>         msgsize += nla_total_size_64bit(sizeof(struct
> >> ovs_dp_megaflow_stats));
> >>         msgsize += nla_total_size(sizeof(u32)); /*
> >> OVS_DP_ATTR_USER_FEATURES */
> >> +       msgsize += nla_total_size(sizeof(u32)); /*
> >> OVS_DP_ATTR_MASKS_CACHE_SIZE */
> >>
> >>         return msgsize;
> >>  }
> >> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
> >> datapath *dp, struct sk_buff *skb,
> >>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
> >> dp->user_features))
> >>                 goto nla_put_failure;
> >>
> >> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
> >> +               goto nla_put_failure;
> >> +
> >>         genlmsg_end(skb, ovs_header);
> >>         return 0;
> >>
> >> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp,
> >> struct nlattr *a[])
> >>  #endif
> >>         }
> >>
> >> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
> >> +               u32 cache_size;
> >> +
> >> +               cache_size =
> >> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> >> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
> >> cache_size);
> > Do we should return error code, if we can't change the "mask_cache"
> > size ? for example, -EINVAL, -ENOMEM
>
> Initially, I did not do this due to the fact the new value is reported,
> and on failure, the old value is shown.
> However thinking about it again, it makes more sense to return an error.
> Will sent a v4 with the following to return:
>
> -
> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> size)
> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>   {
>          struct mask_cache *mc = rcu_dereference(table->mask_cache);
>          struct mask_cache *new;
>
>          if (size == mc->cache_size)
> -               return;
> +               return 0;
> +
> +       if (!is_power_of_2(size) && size != 0)
> +               return -EINVAL;
add check as below, and we can remove the check in
tbl_mask_cache_alloc function. That
function will only return NULL, if there is no memory. And we can
comment for tbl_mask_cache_alloc, to indicate that size should be a
power of 2.
 if ((!is_power_of_2(size) && size != 0) ||
           (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)

Thanks Eelco.
Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>

>          new = tbl_mask_cache_alloc(size);
>          if (!new)
> -               return;
> +               return -ENOMEM;
>
>          rcu_assign_pointer(table->mask_cache, new);
>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
> +
> +       return 0;
>   }
>
> >> +       }
> >> +
> >>         dp->user_features = user_features;
> >>
> >>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> >> @@ -1894,6 +1906,8 @@ static const struct nla_policy
> >> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
> >>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
> >> IFNAMSIZ - 1 },
> >>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
> >>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
> >> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32,
> >> 0,
> >> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
> >> mask_cache_entry)),
> >>  };
> >>
> >>  static const struct genl_ops dp_datapath_genl_ops[] = {
> >> diff --git a/net/openvswitch/flow_table.c
> >> b/net/openvswitch/flow_table.c
> >> index a5912ea05352..5280aeeef628 100644
> >> --- a/net/openvswitch/flow_table.c
> >> +++ b/net/openvswitch/flow_table.c
> >> @@ -38,8 +38,8 @@
> >>  #define MASK_ARRAY_SIZE_MIN    16
> >>  #define REHASH_INTERVAL                (10 * 60 * HZ)
> >>
> >> +#define MC_DEFAULT_HASH_ENTRIES        256
> >>  #define MC_HASH_SHIFT          8
> >> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
> >>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
> >> MC_HASH_SHIFT)
> >>
> >>  static struct kmem_cache *flow_cache;
> >> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table
> >> *tbl, struct sw_flow_mask *mask)
> >>         }
> >>  }
> >>
> >> +static void __mask_cache_destroy(struct mask_cache *mc)
> >> +{
> >> +       if (mc->mask_cache)
> >> +               free_percpu(mc->mask_cache);
> > free_percpu the NULL is safe. we can remove the "if".
>
> Makes sense, will remove the if() check.
>
> >> +       kfree(mc);
> >> +}
> >> +
> >> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
> >> +{
> >> +       struct mask_cache *mc = container_of(rcu, struct mask_cache,
> >> rcu);
> >> +
> >> +       __mask_cache_destroy(mc);
> >> +}
> >> +
> >> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
> >> +{
> >> +       struct mask_cache_entry __percpu *cache = NULL;
> >> +       struct mask_cache *new;
> >> +
> >> +       /* Only allow size to be 0, or a power of 2, and does not
> >> exceed
> >> +        * percpu allocation size.
> >> +        */
> >> +       if ((!is_power_of_2(size) && size != 0) ||
> >> +           (size * sizeof(struct mask_cache_entry)) >
> >> PCPU_MIN_UNIT_SIZE)
> >> +               return NULL;
> >> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
> >> +       if (!new)
> >> +               return NULL;
> >> +
> >> +       new->cache_size = size;
> >> +       if (new->cache_size > 0) {
> >> +               cache = __alloc_percpu(array_size(sizeof(struct
> >> mask_cache_entry),
> >> +                                                 new->cache_size),
> >> +                                      __alignof__(struct
> >> mask_cache_entry));
> >> +               if (!cache) {
> >> +                       kfree(new);
> >> +                       return NULL;
> >> +               }
> >> +       }
> >> +
> >> +       new->mask_cache = cache;
> >> +       return new;
> >> +}
> >> +
> >> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size)
> >> +{
> >> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >> +       struct mask_cache *new;
> >> +
> >> +       if (size == mc->cache_size)
> >> +               return;
> >> +
> >> +       new = tbl_mask_cache_alloc(size);
> >> +       if (!new)
> >> +               return;
> >> +
> >> +       rcu_assign_pointer(table->mask_cache, new);
> >> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >> +}
> >> +
> >>  int ovs_flow_tbl_init(struct flow_table *table)
> >>  {
> >>         struct table_instance *ti, *ufid_ti;
> >> +       struct mask_cache *mc;
> >>         struct mask_array *ma;
> >>
> >> -       table->mask_cache = __alloc_percpu(sizeof(struct
> >> mask_cache_entry) *
> >> -                                          MC_HASH_ENTRIES,
> >> -                                          __alignof__(struct
> >> mask_cache_entry));
> >> -       if (!table->mask_cache)
> >> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
> >> +       if (!mc)
> >>                 return -ENOMEM;
> >>
> >>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> >> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>         rcu_assign_pointer(table->ti, ti);
> >>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
> >>         rcu_assign_pointer(table->mask_array, ma);
> >> +       rcu_assign_pointer(table->mask_cache, mc);
> >>         table->last_rehash = jiffies;
> >>         table->count = 0;
> >>         table->ufid_count = 0;
> >> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>  free_mask_array:
> >>         __mask_array_destroy(ma);
> >>  free_mask_cache:
> >> -       free_percpu(table->mask_cache);
> >> +       __mask_cache_destroy(mc);
> >>         return -ENOMEM;
> >>  }
> >>
> >> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
> >> *table)
> >>  {
> >>         struct table_instance *ti = rcu_dereference_raw(table->ti);
> >>         struct table_instance *ufid_ti =
> >> rcu_dereference_raw(table->ufid_ti);
> >> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >> +       struct mask_array *ma =
> >> rcu_dereference_ovsl(table->mask_array);
> >>
> >> -       free_percpu(table->mask_cache);
> >> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
> >> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
> >>         table_instance_destroy(table, ti, ufid_ti, false);
> >>  }
> >>
> >> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
> >> flow_table *tbl,
> >>                                           u32 *n_mask_hit,
> >>                                           u32 *n_cache_hit)
> >>  {
> >> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
> >>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
> >>         struct table_instance *ti = rcu_dereference(tbl->ti);
> >>         struct mask_cache_entry *entries, *ce;
> >> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
> >> flow_table *tbl,
> >>
> >>         *n_mask_hit = 0;
> >>         *n_cache_hit = 0;
> >> -       if (unlikely(!skb_hash)) {
> >> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
> >>                 u32 mask_index = 0;
> >>                 u32 cache = 0;
> >>
> >> @@ -749,11 +813,11 @@ struct sw_flow
> >> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> >>
> >>         ce = NULL;
> >>         hash = skb_hash;
> >> -       entries = this_cpu_ptr(tbl->mask_cache);
> >> +       entries = this_cpu_ptr(mc->mask_cache);
> >>
> >>         /* Find the cache entry 'ce' to operate on. */
> >>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> >> -               int index = hash & (MC_HASH_ENTRIES - 1);
> >> +               int index = hash & (mc->cache_size - 1);
> >>                 struct mask_cache_entry *e;
> >>
> >>                 e = &entries[index];
> >> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
> >> flow_table *table)
> >>         return READ_ONCE(ma->count);
> >>  }
> >>
> >> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
> >> +{
> >> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >> +
> >> +       return READ_ONCE(mc->cache_size);
> >> +}
> >> +
> >>  static struct table_instance *table_instance_expand(struct
> >> table_instance *ti,
> >>                                                     bool ufid)
> >>  {
> >> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table
> >> *table)
> >>         for (i = 0; i < masks_entries; i++) {
> >>                 int index = masks_and_count[i].index;
> >>
> >> -               new->masks[new->count++] =
> >> -                       rcu_dereference_ovsl(ma->masks[index]);
> >> +               if (ovsl_dereference(ma->masks[index]))
> >> +                       new->masks[new->count++] = ma->masks[index];
> >>         }
> >>
> >>         rcu_assign_pointer(table->mask_array, new);
> >> diff --git a/net/openvswitch/flow_table.h
> >> b/net/openvswitch/flow_table.h
> >> index 325e939371d8..f2dba952db2f 100644
> >> --- a/net/openvswitch/flow_table.h
> >> +++ b/net/openvswitch/flow_table.h
> >> @@ -27,6 +27,12 @@ struct mask_cache_entry {
> >>         u32 mask_index;
> >>  };
> >>
> >> +struct mask_cache {
> >> +       struct rcu_head rcu;
> >> +       u32 cache_size;  /* Must be ^2 value. */
> >> +       struct mask_cache_entry __percpu *mask_cache;
> >> +};
> >> +
> >>  struct mask_count {
> >>         int index;
> >>         u64 counter;
> >> @@ -53,7 +59,7 @@ struct table_instance {
> >>  struct flow_table {
> >>         struct table_instance __rcu *ti;
> >>         struct table_instance __rcu *ufid_ti;
> >> -       struct mask_cache_entry __percpu *mask_cache;
> >> +       struct mask_cache __rcu *mask_cache;
> >>         struct mask_array __rcu *mask_array;
> >>         unsigned long last_rehash;
> >>         unsigned int count;
> >> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table,
> >> struct sw_flow *flow,
> >>                         const struct sw_flow_mask *mask);
> >>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow
> >> *flow);
> >>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
> >> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
> >> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size);
> >>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
> >>                                        u32 *bucket, u32 *idx);
> >>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
> >>
> >
> >
> > --
> > Best regards, Tonghao
>
Eelco Chaudron July 30, 2020, 1:51 p.m. UTC | #4
On 30 Jul 2020, at 15:25, Tonghao Zhang wrote:

> On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com> 
> wrote:
>>
>> Thanks Tonghao for the review, see comments inline below!
>>
>> If I get no more reviews by the end of the week I’ll send out a v4.
>>
>> //Eelco
>>
>>
>> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
>>
>>> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com>
>>> wrote:
>>>>
>>>> This patch makes the masks cache size configurable, or with
>>>> a size of 0, disable it.
>>>>
>>>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>> ---
>>>> Changes in v3:
>>>>  - Use is_power_of_2() function
>>>>  - Use array_size() function
>>>>  - Fix remaining sparse errors
>>>>
>>>> Changes in v2:
>>>>  - Fix sparse warnings
>>>>  - Fix netlink policy items reported by Florian Westphal
>>>>
>>>>  include/uapi/linux/openvswitch.h |    1
>>>>  net/openvswitch/datapath.c       |   14 +++++
>>>>  net/openvswitch/flow_table.c     |   97
>>>> +++++++++++++++++++++++++++++++++-----
>>>>  net/openvswitch/flow_table.h     |   10 ++++
>>>>  4 files changed, 108 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/include/uapi/linux/openvswitch.h
>>>> b/include/uapi/linux/openvswitch.h
>>>> index 7cb76e5ca7cf..8300cc29dec8 100644
>>>> --- a/include/uapi/linux/openvswitch.h
>>>> +++ b/include/uapi/linux/openvswitch.h
>>>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>>>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
>>>> ovs_dp_megaflow_stats */
>>>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>>>>         OVS_DP_ATTR_PAD,
>>>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>>         __OVS_DP_ATTR_MAX
>>>>  };
>>>>
>>>> diff --git a/net/openvswitch/datapath.c 
>>>> b/net/openvswitch/datapath.c
>>>> index a54df1fe3ec4..114b2ddb8037 100644
>>>> --- a/net/openvswitch/datapath.c
>>>> +++ b/net/openvswitch/datapath.c
>>>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>>>>         msgsize += nla_total_size_64bit(sizeof(struct 
>>>> ovs_dp_stats));
>>>>         msgsize += nla_total_size_64bit(sizeof(struct
>>>> ovs_dp_megaflow_stats));
>>>>         msgsize += nla_total_size(sizeof(u32)); /*
>>>> OVS_DP_ATTR_USER_FEATURES */
>>>> +       msgsize += nla_total_size(sizeof(u32)); /*
>>>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
>>>>
>>>>         return msgsize;
>>>>  }
>>>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
>>>> datapath *dp, struct sk_buff *skb,
>>>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
>>>> dp->user_features))
>>>>                 goto nla_put_failure;
>>>>
>>>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
>>>> +               goto nla_put_failure;
>>>> +
>>>>         genlmsg_end(skb, ovs_header);
>>>>         return 0;
>>>>
>>>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath 
>>>> *dp,
>>>> struct nlattr *a[])
>>>>  #endif
>>>>         }
>>>>
>>>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
>>>> +               u32 cache_size;
>>>> +
>>>> +               cache_size =
>>>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
>>>> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
>>>> cache_size);
>>> Do we should return error code, if we can't change the "mask_cache"
>>> size ? for example, -EINVAL, -ENOMEM
>>
>> Initially, I did not do this due to the fact the new value is 
>> reported,
>> and on failure, the old value is shown.
>> However thinking about it again, it makes more sense to return an 
>> error.
>> Will sent a v4 with the following to return:
>>
>> -
>> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>> size)
>> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
>> size)
>>   {
>>          struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>          struct mask_cache *new;
>>
>>          if (size == mc->cache_size)
>> -               return;
>> +               return 0;
>> +
>> +       if (!is_power_of_2(size) && size != 0)
>> +               return -EINVAL;
> add check as below, and we can remove the check in
> tbl_mask_cache_alloc function. That
> function will only return NULL, if there is no memory. And we can
> comment for tbl_mask_cache_alloc, to indicate that size should be a
> power of 2.
>  if ((!is_power_of_2(size) && size != 0) ||
>            (size * sizeof(struct mask_cache_entry)) > 
> PCPU_MIN_UNIT_SIZE)

I do not think the extra check hurts as it’s not in any fast path.
The comment would easily be missed, especially if someone changes the 
default value, so I would prefer to keep it as is.

> Thanks Eelco.
> Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
>>          new = tbl_mask_cache_alloc(size);
>>          if (!new)
>> -               return;
>> +               return -ENOMEM;
>>
>>          rcu_assign_pointer(table->mask_cache, new);
>>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> +
>> +       return 0;
>>   }
>>
>>>> +       }
>>>> +
>>>>         dp->user_features = user_features;
>>>>
>>>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
>>>> @@ -1894,6 +1906,8 @@ static const struct nla_policy
>>>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>>>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
>>>> IFNAMSIZ - 1 },
>>>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>>>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
>>>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32,
>>>> 0,
>>>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
>>>> mask_cache_entry)),
>>>>  };
>>>>
>>>>  static const struct genl_ops dp_datapath_genl_ops[] = {
>>>> diff --git a/net/openvswitch/flow_table.c
>>>> b/net/openvswitch/flow_table.c
>>>> index a5912ea05352..5280aeeef628 100644
>>>> --- a/net/openvswitch/flow_table.c
>>>> +++ b/net/openvswitch/flow_table.c
>>>> @@ -38,8 +38,8 @@
>>>>  #define MASK_ARRAY_SIZE_MIN    16
>>>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>>>>
>>>> +#define MC_DEFAULT_HASH_ENTRIES        256
>>>>  #define MC_HASH_SHIFT          8
>>>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>>>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
>>>> MC_HASH_SHIFT)
>>>>
>>>>  static struct kmem_cache *flow_cache;
>>>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct 
>>>> flow_table
>>>> *tbl, struct sw_flow_mask *mask)
>>>>         }
>>>>  }
>>>>
>>>> +static void __mask_cache_destroy(struct mask_cache *mc)
>>>> +{
>>>> +       if (mc->mask_cache)
>>>> +               free_percpu(mc->mask_cache);
>>> free_percpu the NULL is safe. we can remove the "if".
>>
>> Makes sense, will remove the if() check.
>>
>>>> +       kfree(mc);
>>>> +}
>>>> +
>>>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
>>>> +{
>>>> +       struct mask_cache *mc = container_of(rcu, struct 
>>>> mask_cache,
>>>> rcu);
>>>> +
>>>> +       __mask_cache_destroy(mc);
>>>> +}
>>>> +
>>>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>>>> +{
>>>> +       struct mask_cache_entry __percpu *cache = NULL;
>>>> +       struct mask_cache *new;
>>>> +
>>>> +       /* Only allow size to be 0, or a power of 2, and does not
>>>> exceed
>>>> +        * percpu allocation size.
>>>> +        */
>>>> +       if ((!is_power_of_2(size) && size != 0) ||
>>>> +           (size * sizeof(struct mask_cache_entry)) >
>>>> PCPU_MIN_UNIT_SIZE)
>>>> +               return NULL;
>>>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
>>>> +       if (!new)
>>>> +               return NULL;
>>>> +
>>>> +       new->cache_size = size;
>>>> +       if (new->cache_size > 0) {
>>>> +               cache = __alloc_percpu(array_size(sizeof(struct
>>>> mask_cache_entry),
>>>> +                                                 new->cache_size),
>>>> +                                      __alignof__(struct
>>>> mask_cache_entry));
>>>> +               if (!cache) {
>>>> +                       kfree(new);
>>>> +                       return NULL;
>>>> +               }
>>>> +       }
>>>> +
>>>> +       new->mask_cache = cache;
>>>> +       return new;
>>>> +}
>>>> +
>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size)
>>>> +{
>>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>>> +       struct mask_cache *new;
>>>> +
>>>> +       if (size == mc->cache_size)
>>>> +               return;
>>>> +
>>>> +       new = tbl_mask_cache_alloc(size);
>>>> +       if (!new)
>>>> +               return;
>>>> +
>>>> +       rcu_assign_pointer(table->mask_cache, new);
>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>> +}
>>>> +
>>>>  int ovs_flow_tbl_init(struct flow_table *table)
>>>>  {
>>>>         struct table_instance *ti, *ufid_ti;
>>>> +       struct mask_cache *mc;
>>>>         struct mask_array *ma;
>>>>
>>>> -       table->mask_cache = __alloc_percpu(sizeof(struct
>>>> mask_cache_entry) *
>>>> -                                          MC_HASH_ENTRIES,
>>>> -                                          __alignof__(struct
>>>> mask_cache_entry));
>>>> -       if (!table->mask_cache)
>>>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>>>> +       if (!mc)
>>>>                 return -ENOMEM;
>>>>
>>>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>>>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>>>         rcu_assign_pointer(table->ti, ti);
>>>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>>>>         rcu_assign_pointer(table->mask_array, ma);
>>>> +       rcu_assign_pointer(table->mask_cache, mc);
>>>>         table->last_rehash = jiffies;
>>>>         table->count = 0;
>>>>         table->ufid_count = 0;
>>>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>>>  free_mask_array:
>>>>         __mask_array_destroy(ma);
>>>>  free_mask_cache:
>>>> -       free_percpu(table->mask_cache);
>>>> +       __mask_cache_destroy(mc);
>>>>         return -ENOMEM;
>>>>  }
>>>>
>>>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
>>>> *table)
>>>>  {
>>>>         struct table_instance *ti = rcu_dereference_raw(table->ti);
>>>>         struct table_instance *ufid_ti =
>>>> rcu_dereference_raw(table->ufid_ti);
>>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>>> +       struct mask_array *ma =
>>>> rcu_dereference_ovsl(table->mask_array);
>>>>
>>>> -       free_percpu(table->mask_cache);
>>>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>>>>         table_instance_destroy(table, ti, ufid_ti, false);
>>>>  }
>>>>
>>>> @@ -724,6 +787,7 @@ struct sw_flow 
>>>> *ovs_flow_tbl_lookup_stats(struct
>>>> flow_table *tbl,
>>>>                                           u32 *n_mask_hit,
>>>>                                           u32 *n_cache_hit)
>>>>  {
>>>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>>>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>>>>         struct table_instance *ti = rcu_dereference(tbl->ti);
>>>>         struct mask_cache_entry *entries, *ce;
>>>> @@ -733,7 +797,7 @@ struct sw_flow 
>>>> *ovs_flow_tbl_lookup_stats(struct
>>>> flow_table *tbl,
>>>>
>>>>         *n_mask_hit = 0;
>>>>         *n_cache_hit = 0;
>>>> -       if (unlikely(!skb_hash)) {
>>>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>>>>                 u32 mask_index = 0;
>>>>                 u32 cache = 0;
>>>>
>>>> @@ -749,11 +813,11 @@ struct sw_flow
>>>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>>>
>>>>         ce = NULL;
>>>>         hash = skb_hash;
>>>> -       entries = this_cpu_ptr(tbl->mask_cache);
>>>> +       entries = this_cpu_ptr(mc->mask_cache);
>>>>
>>>>         /* Find the cache entry 'ce' to operate on. */
>>>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
>>>> -               int index = hash & (MC_HASH_ENTRIES - 1);
>>>> +               int index = hash & (mc->cache_size - 1);
>>>>                 struct mask_cache_entry *e;
>>>>
>>>>                 e = &entries[index];
>>>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
>>>> flow_table *table)
>>>>         return READ_ONCE(ma->count);
>>>>  }
>>>>
>>>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>>>> +{
>>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>>> +
>>>> +       return READ_ONCE(mc->cache_size);
>>>> +}
>>>> +
>>>>  static struct table_instance *table_instance_expand(struct
>>>> table_instance *ti,
>>>>                                                     bool ufid)
>>>>  {
>>>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct 
>>>> flow_table
>>>> *table)
>>>>         for (i = 0; i < masks_entries; i++) {
>>>>                 int index = masks_and_count[i].index;
>>>>
>>>> -               new->masks[new->count++] =
>>>> -                       rcu_dereference_ovsl(ma->masks[index]);
>>>> +               if (ovsl_dereference(ma->masks[index]))
>>>> +                       new->masks[new->count++] = 
>>>> ma->masks[index];
>>>>         }
>>>>
>>>>         rcu_assign_pointer(table->mask_array, new);
>>>> diff --git a/net/openvswitch/flow_table.h
>>>> b/net/openvswitch/flow_table.h
>>>> index 325e939371d8..f2dba952db2f 100644
>>>> --- a/net/openvswitch/flow_table.h
>>>> +++ b/net/openvswitch/flow_table.h
>>>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>>>>         u32 mask_index;
>>>>  };
>>>>
>>>> +struct mask_cache {
>>>> +       struct rcu_head rcu;
>>>> +       u32 cache_size;  /* Must be ^2 value. */
>>>> +       struct mask_cache_entry __percpu *mask_cache;
>>>> +};
>>>> +
>>>>  struct mask_count {
>>>>         int index;
>>>>         u64 counter;
>>>> @@ -53,7 +59,7 @@ struct table_instance {
>>>>  struct flow_table {
>>>>         struct table_instance __rcu *ti;
>>>>         struct table_instance __rcu *ufid_ti;
>>>> -       struct mask_cache_entry __percpu *mask_cache;
>>>> +       struct mask_cache __rcu *mask_cache;
>>>>         struct mask_array __rcu *mask_array;
>>>>         unsigned long last_rehash;
>>>>         unsigned int count;
>>>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table,
>>>> struct sw_flow *flow,
>>>>                         const struct sw_flow_mask *mask);
>>>>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow
>>>> *flow);
>>>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
>>>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table 
>>>> *table);
>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size);
>>>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance 
>>>> *table,
>>>>                                        u32 *bucket, u32 *idx);
>>>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>>>>
>>>
>>>
>>> --
>>> Best regards, Tonghao
>>
>
>
> -- 
> Best regards, Tonghao
Tonghao Zhang July 30, 2020, 2:23 p.m. UTC | #5
On Thu, Jul 30, 2020 at 9:52 PM Eelco Chaudron <echaudro@redhat.com> wrote:
>
>
>
> On 30 Jul 2020, at 15:25, Tonghao Zhang wrote:
>
> > On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com>
> > wrote:
> >>
> >> Thanks Tonghao for the review, see comments inline below!
> >>
> >> If I get no more reviews by the end of the week I’ll send out a v4.
> >>
> >> //Eelco
> >>
> >>
> >> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
> >>
> >>> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com>
> >>> wrote:
> >>>>
> >>>> This patch makes the masks cache size configurable, or with
> >>>> a size of 0, disable it.
> >>>>
> >>>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> >>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> >>>> ---
> >>>> Changes in v3:
> >>>>  - Use is_power_of_2() function
> >>>>  - Use array_size() function
> >>>>  - Fix remaining sparse errors
> >>>>
> >>>> Changes in v2:
> >>>>  - Fix sparse warnings
> >>>>  - Fix netlink policy items reported by Florian Westphal
> >>>>
> >>>>  include/uapi/linux/openvswitch.h |    1
> >>>>  net/openvswitch/datapath.c       |   14 +++++
> >>>>  net/openvswitch/flow_table.c     |   97
> >>>> +++++++++++++++++++++++++++++++++-----
> >>>>  net/openvswitch/flow_table.h     |   10 ++++
> >>>>  4 files changed, 108 insertions(+), 14 deletions(-)
> >>>>
> >>>> diff --git a/include/uapi/linux/openvswitch.h
> >>>> b/include/uapi/linux/openvswitch.h
> >>>> index 7cb76e5ca7cf..8300cc29dec8 100644
> >>>> --- a/include/uapi/linux/openvswitch.h
> >>>> +++ b/include/uapi/linux/openvswitch.h
> >>>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
> >>>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
> >>>> ovs_dp_megaflow_stats */
> >>>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
> >>>>         OVS_DP_ATTR_PAD,
> >>>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >>>>         __OVS_DP_ATTR_MAX
> >>>>  };
> >>>>
> >>>> diff --git a/net/openvswitch/datapath.c
> >>>> b/net/openvswitch/datapath.c
> >>>> index a54df1fe3ec4..114b2ddb8037 100644
> >>>> --- a/net/openvswitch/datapath.c
> >>>> +++ b/net/openvswitch/datapath.c
> >>>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
> >>>>         msgsize += nla_total_size_64bit(sizeof(struct
> >>>> ovs_dp_stats));
> >>>>         msgsize += nla_total_size_64bit(sizeof(struct
> >>>> ovs_dp_megaflow_stats));
> >>>>         msgsize += nla_total_size(sizeof(u32)); /*
> >>>> OVS_DP_ATTR_USER_FEATURES */
> >>>> +       msgsize += nla_total_size(sizeof(u32)); /*
> >>>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
> >>>>
> >>>>         return msgsize;
> >>>>  }
> >>>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
> >>>> datapath *dp, struct sk_buff *skb,
> >>>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
> >>>> dp->user_features))
> >>>>                 goto nla_put_failure;
> >>>>
> >>>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >>>> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
> >>>> +               goto nla_put_failure;
> >>>> +
> >>>>         genlmsg_end(skb, ovs_header);
> >>>>         return 0;
> >>>>
> >>>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath
> >>>> *dp,
> >>>> struct nlattr *a[])
> >>>>  #endif
> >>>>         }
> >>>>
> >>>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
> >>>> +               u32 cache_size;
> >>>> +
> >>>> +               cache_size =
> >>>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> >>>> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
> >>>> cache_size);
> >>> Do we should return error code, if we can't change the "mask_cache"
> >>> size ? for example, -EINVAL, -ENOMEM
> >>
> >> Initially, I did not do this due to the fact the new value is
> >> reported,
> >> and on failure, the old value is shown.
> >> However thinking about it again, it makes more sense to return an
> >> error.
> >> Will sent a v4 with the following to return:
> >>
> >> -
> >> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size)
> >> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size)
> >>   {
> >>          struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>          struct mask_cache *new;
> >>
> >>          if (size == mc->cache_size)
> >> -               return;
> >> +               return 0;
> >> +
> >> +       if (!is_power_of_2(size) && size != 0)
> >> +               return -EINVAL;
> > add check as below, and we can remove the check in
> > tbl_mask_cache_alloc function. That
> > function will only return NULL, if there is no memory. And we can
> > comment for tbl_mask_cache_alloc, to indicate that size should be a
> > power of 2.
> >  if ((!is_power_of_2(size) && size != 0) ||
> >            (size * sizeof(struct mask_cache_entry)) >
> > PCPU_MIN_UNIT_SIZE)
>
> I do not think the extra check hurts as it’s not in any fast path.
Hi Eelco.
Yes, I agree. Sorry for not being clear. I mean that:
if we don't check the "(size * sizeof(struct mask_cache_entry))  >
PCPU_MIN_UNIT_SIZE)" in the ovs_flow_tbl_masks_cache_resize.
tbl_mask_cache_alloc (ovs_flow_tbl_masks_cache_resize will invoke this
function)will return the NULL,
ovs_flow_tbl_masks_cache_resize will return -ENOMEM. I guess we should
return -EINVAL in that case.

> The comment would easily be missed, especially if someone changes the
> default value, so I would prefer to keep it as is.
>
> > Thanks Eelco.
> > Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> >>          new = tbl_mask_cache_alloc(size);
> >>          if (!new)
> >> -               return;
> >> +               return -ENOMEM;
> >>
> >>          rcu_assign_pointer(table->mask_cache, new);
> >>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >> +
> >> +       return 0;
> >>   }
> >>
> >>>> +       }
> >>>> +
> >>>>         dp->user_features = user_features;
> >>>>
> >>>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> >>>> @@ -1894,6 +1906,8 @@ static const struct nla_policy
> >>>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
> >>>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
> >>>> IFNAMSIZ - 1 },
> >>>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
> >>>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
> >>>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32,
> >>>> 0,
> >>>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
> >>>> mask_cache_entry)),
> >>>>  };
> >>>>
> >>>>  static const struct genl_ops dp_datapath_genl_ops[] = {
> >>>> diff --git a/net/openvswitch/flow_table.c
> >>>> b/net/openvswitch/flow_table.c
> >>>> index a5912ea05352..5280aeeef628 100644
> >>>> --- a/net/openvswitch/flow_table.c
> >>>> +++ b/net/openvswitch/flow_table.c
> >>>> @@ -38,8 +38,8 @@
> >>>>  #define MASK_ARRAY_SIZE_MIN    16
> >>>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
> >>>>
> >>>> +#define MC_DEFAULT_HASH_ENTRIES        256
> >>>>  #define MC_HASH_SHIFT          8
> >>>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
> >>>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
> >>>> MC_HASH_SHIFT)
> >>>>
> >>>>  static struct kmem_cache *flow_cache;
> >>>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct
> >>>> flow_table
> >>>> *tbl, struct sw_flow_mask *mask)
> >>>>         }
> >>>>  }
> >>>>
> >>>> +static void __mask_cache_destroy(struct mask_cache *mc)
> >>>> +{
> >>>> +       if (mc->mask_cache)
> >>>> +               free_percpu(mc->mask_cache);
> >>> free_percpu the NULL is safe. we can remove the "if".
> >>
> >> Makes sense, will remove the if() check.
> >>
> >>>> +       kfree(mc);
> >>>> +}
> >>>> +
> >>>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
> >>>> +{
> >>>> +       struct mask_cache *mc = container_of(rcu, struct
> >>>> mask_cache,
> >>>> rcu);
> >>>> +
> >>>> +       __mask_cache_destroy(mc);
> >>>> +}
> >>>> +
> >>>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
> >>>> +{
> >>>> +       struct mask_cache_entry __percpu *cache = NULL;
> >>>> +       struct mask_cache *new;
> >>>> +
> >>>> +       /* Only allow size to be 0, or a power of 2, and does not
> >>>> exceed
> >>>> +        * percpu allocation size.
> >>>> +        */
> >>>> +       if ((!is_power_of_2(size) && size != 0) ||
> >>>> +           (size * sizeof(struct mask_cache_entry)) >
> >>>> PCPU_MIN_UNIT_SIZE)
> >>>> +               return NULL;
> >>>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
> >>>> +       if (!new)
> >>>> +               return NULL;
> >>>> +
> >>>> +       new->cache_size = size;
> >>>> +       if (new->cache_size > 0) {
> >>>> +               cache = __alloc_percpu(array_size(sizeof(struct
> >>>> mask_cache_entry),
> >>>> +                                                 new->cache_size),
> >>>> +                                      __alignof__(struct
> >>>> mask_cache_entry));
> >>>> +               if (!cache) {
> >>>> +                       kfree(new);
> >>>> +                       return NULL;
> >>>> +               }
> >>>> +       }
> >>>> +
> >>>> +       new->mask_cache = cache;
> >>>> +       return new;
> >>>> +}
> >>>> +
> >>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >>>> size)
> >>>> +{
> >>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>>> +       struct mask_cache *new;
> >>>> +
> >>>> +       if (size == mc->cache_size)
> >>>> +               return;
> >>>> +
> >>>> +       new = tbl_mask_cache_alloc(size);
> >>>> +       if (!new)
> >>>> +               return;
> >>>> +
> >>>> +       rcu_assign_pointer(table->mask_cache, new);
> >>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >>>> +}
> >>>> +
> >>>>  int ovs_flow_tbl_init(struct flow_table *table)
> >>>>  {
> >>>>         struct table_instance *ti, *ufid_ti;
> >>>> +       struct mask_cache *mc;
> >>>>         struct mask_array *ma;
> >>>>
> >>>> -       table->mask_cache = __alloc_percpu(sizeof(struct
> >>>> mask_cache_entry) *
> >>>> -                                          MC_HASH_ENTRIES,
> >>>> -                                          __alignof__(struct
> >>>> mask_cache_entry));
> >>>> -       if (!table->mask_cache)
> >>>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
> >>>> +       if (!mc)
> >>>>                 return -ENOMEM;
> >>>>
> >>>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> >>>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>>>         rcu_assign_pointer(table->ti, ti);
> >>>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
> >>>>         rcu_assign_pointer(table->mask_array, ma);
> >>>> +       rcu_assign_pointer(table->mask_cache, mc);
> >>>>         table->last_rehash = jiffies;
> >>>>         table->count = 0;
> >>>>         table->ufid_count = 0;
> >>>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>>>  free_mask_array:
> >>>>         __mask_array_destroy(ma);
> >>>>  free_mask_cache:
> >>>> -       free_percpu(table->mask_cache);
> >>>> +       __mask_cache_destroy(mc);
> >>>>         return -ENOMEM;
> >>>>  }
> >>>>
> >>>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
> >>>> *table)
> >>>>  {
> >>>>         struct table_instance *ti = rcu_dereference_raw(table->ti);
> >>>>         struct table_instance *ufid_ti =
> >>>> rcu_dereference_raw(table->ufid_ti);
> >>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>>> +       struct mask_array *ma =
> >>>> rcu_dereference_ovsl(table->mask_array);
> >>>>
> >>>> -       free_percpu(table->mask_cache);
> >>>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
> >>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >>>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
> >>>>         table_instance_destroy(table, ti, ufid_ti, false);
> >>>>  }
> >>>>
> >>>> @@ -724,6 +787,7 @@ struct sw_flow
> >>>> *ovs_flow_tbl_lookup_stats(struct
> >>>> flow_table *tbl,
> >>>>                                           u32 *n_mask_hit,
> >>>>                                           u32 *n_cache_hit)
> >>>>  {
> >>>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
> >>>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
> >>>>         struct table_instance *ti = rcu_dereference(tbl->ti);
> >>>>         struct mask_cache_entry *entries, *ce;
> >>>> @@ -733,7 +797,7 @@ struct sw_flow
> >>>> *ovs_flow_tbl_lookup_stats(struct
> >>>> flow_table *tbl,
> >>>>
> >>>>         *n_mask_hit = 0;
> >>>>         *n_cache_hit = 0;
> >>>> -       if (unlikely(!skb_hash)) {
> >>>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
> >>>>                 u32 mask_index = 0;
> >>>>                 u32 cache = 0;
> >>>>
> >>>> @@ -749,11 +813,11 @@ struct sw_flow
> >>>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> >>>>
> >>>>         ce = NULL;
> >>>>         hash = skb_hash;
> >>>> -       entries = this_cpu_ptr(tbl->mask_cache);
> >>>> +       entries = this_cpu_ptr(mc->mask_cache);
> >>>>
> >>>>         /* Find the cache entry 'ce' to operate on. */
> >>>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> >>>> -               int index = hash & (MC_HASH_ENTRIES - 1);
> >>>> +               int index = hash & (mc->cache_size - 1);
> >>>>                 struct mask_cache_entry *e;
> >>>>
> >>>>                 e = &entries[index];
> >>>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
> >>>> flow_table *table)
> >>>>         return READ_ONCE(ma->count);
> >>>>  }
> >>>>
> >>>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
> >>>> +{
> >>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>>> +
> >>>> +       return READ_ONCE(mc->cache_size);
> >>>> +}
> >>>> +
> >>>>  static struct table_instance *table_instance_expand(struct
> >>>> table_instance *ti,
> >>>>                                                     bool ufid)
> >>>>  {
> >>>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct
> >>>> flow_table
> >>>> *table)
> >>>>         for (i = 0; i < masks_entries; i++) {
> >>>>                 int index = masks_and_count[i].index;
> >>>>
> >>>> -               new->masks[new->count++] =
> >>>> -                       rcu_dereference_ovsl(ma->masks[index]);
> >>>> +               if (ovsl_dereference(ma->masks[index]))
> >>>> +                       new->masks[new->count++] =
> >>>> ma->masks[index];
> >>>>         }
> >>>>
> >>>>         rcu_assign_pointer(table->mask_array, new);
> >>>> diff --git a/net/openvswitch/flow_table.h
> >>>> b/net/openvswitch/flow_table.h
> >>>> index 325e939371d8..f2dba952db2f 100644
> >>>> --- a/net/openvswitch/flow_table.h
> >>>> +++ b/net/openvswitch/flow_table.h
> >>>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
> >>>>         u32 mask_index;
> >>>>  };
> >>>>
> >>>> +struct mask_cache {
> >>>> +       struct rcu_head rcu;
> >>>> +       u32 cache_size;  /* Must be ^2 value. */
> >>>> +       struct mask_cache_entry __percpu *mask_cache;
> >>>> +};
> >>>> +
> >>>>  struct mask_count {
> >>>>         int index;
> >>>>         u64 counter;
> >>>> @@ -53,7 +59,7 @@ struct table_instance {
> >>>>  struct flow_table {
> >>>>         struct table_instance __rcu *ti;
> >>>>         struct table_instance __rcu *ufid_ti;
> >>>> -       struct mask_cache_entry __percpu *mask_cache;
> >>>> +       struct mask_cache __rcu *mask_cache;
> >>>>         struct mask_array __rcu *mask_array;
> >>>>         unsigned long last_rehash;
> >>>>         unsigned int count;
> >>>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table,
> >>>> struct sw_flow *flow,
> >>>>                         const struct sw_flow_mask *mask);
> >>>>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow
> >>>> *flow);
> >>>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
> >>>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table
> >>>> *table);
> >>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >>>> size);
> >>>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance
> >>>> *table,
> >>>>                                        u32 *bucket, u32 *idx);
> >>>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
> >>>>
> >>>
> >>>
> >>> --
> >>> Best regards, Tonghao
> >>
> >
> >
> > --
> > Best regards, Tonghao
>


--
Best regards, Tonghao
Eelco Chaudron July 30, 2020, 2:28 p.m. UTC | #6
On 30 Jul 2020, at 16:23, Tonghao Zhang wrote:

> On Thu, Jul 30, 2020 at 9:52 PM Eelco Chaudron <echaudro@redhat.com> 
> wrote:
>>
>>
>>
>> On 30 Jul 2020, at 15:25, Tonghao Zhang wrote:
>>
>>> On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com>
>>> wrote:
>>>>
>>>> Thanks Tonghao for the review, see comments inline below!
>>>>
>>>> If I get no more reviews by the end of the week I’ll send out a 
>>>> v4.
>>>>
>>>> //Eelco
>>>>
>>>>
>>>> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
>>>>
>>>>> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron 
>>>>> <echaudro@redhat.com>
>>>>> wrote:
>>>>>>
>>>>>> This patch makes the masks cache size configurable, or with
>>>>>> a size of 0, disable it.
>>>>>>
>>>>>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
>>>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>>>> ---
>>>>>> Changes in v3:
>>>>>>  - Use is_power_of_2() function
>>>>>>  - Use array_size() function
>>>>>>  - Fix remaining sparse errors
>>>>>>
>>>>>> Changes in v2:
>>>>>>  - Fix sparse warnings
>>>>>>  - Fix netlink policy items reported by Florian Westphal
>>>>>>
>>>>>>  include/uapi/linux/openvswitch.h |    1
>>>>>>  net/openvswitch/datapath.c       |   14 +++++
>>>>>>  net/openvswitch/flow_table.c     |   97
>>>>>> +++++++++++++++++++++++++++++++++-----
>>>>>>  net/openvswitch/flow_table.h     |   10 ++++
>>>>>>  4 files changed, 108 insertions(+), 14 deletions(-)
>>>>>>
>>>>>> diff --git a/include/uapi/linux/openvswitch.h
>>>>>> b/include/uapi/linux/openvswitch.h
>>>>>> index 7cb76e5ca7cf..8300cc29dec8 100644
>>>>>> --- a/include/uapi/linux/openvswitch.h
>>>>>> +++ b/include/uapi/linux/openvswitch.h
>>>>>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>>>>>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
>>>>>> ovs_dp_megaflow_stats */
>>>>>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>>>>>>         OVS_DP_ATTR_PAD,
>>>>>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>>>>         __OVS_DP_ATTR_MAX
>>>>>>  };
>>>>>>
>>>>>> diff --git a/net/openvswitch/datapath.c
>>>>>> b/net/openvswitch/datapath.c
>>>>>> index a54df1fe3ec4..114b2ddb8037 100644
>>>>>> --- a/net/openvswitch/datapath.c
>>>>>> +++ b/net/openvswitch/datapath.c
>>>>>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>>>>>>         msgsize += nla_total_size_64bit(sizeof(struct
>>>>>> ovs_dp_stats));
>>>>>>         msgsize += nla_total_size_64bit(sizeof(struct
>>>>>> ovs_dp_megaflow_stats));
>>>>>>         msgsize += nla_total_size(sizeof(u32)); /*
>>>>>> OVS_DP_ATTR_USER_FEATURES */
>>>>>> +       msgsize += nla_total_size(sizeof(u32)); /*
>>>>>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
>>>>>>
>>>>>>         return msgsize;
>>>>>>  }
>>>>>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
>>>>>> datapath *dp, struct sk_buff *skb,
>>>>>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
>>>>>> dp->user_features))
>>>>>>                 goto nla_put_failure;
>>>>>>
>>>>>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>>>> +                       
>>>>>> ovs_flow_tbl_masks_cache_size(&dp->table)))
>>>>>> +               goto nla_put_failure;
>>>>>> +
>>>>>>         genlmsg_end(skb, ovs_header);
>>>>>>         return 0;
>>>>>>
>>>>>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath
>>>>>> *dp,
>>>>>> struct nlattr *a[])
>>>>>>  #endif
>>>>>>         }
>>>>>>
>>>>>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
>>>>>> +               u32 cache_size;
>>>>>> +
>>>>>> +               cache_size =
>>>>>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
>>>>>> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
>>>>>> cache_size);
>>>>> Do we should return error code, if we can't change the 
>>>>> "mask_cache"
>>>>> size ? for example, -EINVAL, -ENOMEM
>>>>
>>>> Initially, I did not do this due to the fact the new value is
>>>> reported,
>>>> and on failure, the old value is shown.
>>>> However thinking about it again, it makes more sense to return an
>>>> error.
>>>> Will sent a v4 with the following to return:
>>>>
>>>> -
>>>> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size)
>>>> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size)
>>>>   {
>>>>          struct mask_cache *mc = 
>>>> rcu_dereference(table->mask_cache);
>>>>          struct mask_cache *new;
>>>>
>>>>          if (size == mc->cache_size)
>>>> -               return;
>>>> +               return 0;
>>>> +
>>>> +       if (!is_power_of_2(size) && size != 0)
>>>> +               return -EINVAL;
>>> add check as below, and we can remove the check in
>>> tbl_mask_cache_alloc function. That
>>> function will only return NULL, if there is no memory. And we can
>>> comment for tbl_mask_cache_alloc, to indicate that size should be a
>>> power of 2.
>>>  if ((!is_power_of_2(size) && size != 0) ||
>>>            (size * sizeof(struct mask_cache_entry)) >
>>> PCPU_MIN_UNIT_SIZE)
>>
>> I do not think the extra check hurts as it’s not in any fast path.
> Hi Eelco.
> Yes, I agree. Sorry for not being clear. I mean that:
> if we don't check the "(size * sizeof(struct mask_cache_entry))  >
> PCPU_MIN_UNIT_SIZE)" in the ovs_flow_tbl_masks_cache_resize.
> tbl_mask_cache_alloc (ovs_flow_tbl_masks_cache_resize will invoke this
> function)will return the NULL,
> ovs_flow_tbl_masks_cache_resize will return -ENOMEM. I guess we should
> return -EINVAL in that case.

Thanks got it now, will add the additional check, and not remove it from 
tbl_mask_cache_alloc().

>> The comment would easily be missed, especially if someone changes the
>> default value, so I would prefer to keep it as is.
>>
>>> Thanks Eelco.
>>> Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>
>>>>          new = tbl_mask_cache_alloc(size);
>>>>          if (!new)
>>>> -               return;
>>>> +               return -ENOMEM;
>>>>
>>>>          rcu_assign_pointer(table->mask_cache, new);
>>>>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>> +
>>>> +       return 0;
>>>>   }
>>>>
>>>>>> +       }
>>>>>> +
>>>>>>         dp->user_features = user_features;
>>>>>>
>>>>>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
>>>>>> @@ -1894,6 +1906,8 @@ static const struct nla_policy
>>>>>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>>>>>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
>>>>>> IFNAMSIZ - 1 },
>>>>>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>>>>>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
>>>>>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  
>>>>>> NLA_POLICY_RANGE(NLA_U32,
>>>>>> 0,
>>>>>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
>>>>>> mask_cache_entry)),
>>>>>>  };
>>>>>>
>>>>>>  static const struct genl_ops dp_datapath_genl_ops[] = {
>>>>>> diff --git a/net/openvswitch/flow_table.c
>>>>>> b/net/openvswitch/flow_table.c
>>>>>> index a5912ea05352..5280aeeef628 100644
>>>>>> --- a/net/openvswitch/flow_table.c
>>>>>> +++ b/net/openvswitch/flow_table.c
>>>>>> @@ -38,8 +38,8 @@
>>>>>>  #define MASK_ARRAY_SIZE_MIN    16
>>>>>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>>>>>>
>>>>>> +#define MC_DEFAULT_HASH_ENTRIES        256
>>>>>>  #define MC_HASH_SHIFT          8
>>>>>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>>>>>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
>>>>>> MC_HASH_SHIFT)
>>>>>>
>>>>>>  static struct kmem_cache *flow_cache;
>>>>>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct
>>>>>> flow_table
>>>>>> *tbl, struct sw_flow_mask *mask)
>>>>>>         }
>>>>>>  }
>>>>>>
>>>>>> +static void __mask_cache_destroy(struct mask_cache *mc)
>>>>>> +{
>>>>>> +       if (mc->mask_cache)
>>>>>> +               free_percpu(mc->mask_cache);
>>>>> free_percpu the NULL is safe. we can remove the "if".
>>>>
>>>> Makes sense, will remove the if() check.
>>>>
>>>>>> +       kfree(mc);
>>>>>> +}
>>>>>> +
>>>>>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
>>>>>> +{
>>>>>> +       struct mask_cache *mc = container_of(rcu, struct
>>>>>> mask_cache,
>>>>>> rcu);
>>>>>> +
>>>>>> +       __mask_cache_destroy(mc);
>>>>>> +}
>>>>>> +
>>>>>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>>>>>> +{
>>>>>> +       struct mask_cache_entry __percpu *cache = NULL;
>>>>>> +       struct mask_cache *new;
>>>>>> +
>>>>>> +       /* Only allow size to be 0, or a power of 2, and does not
>>>>>> exceed
>>>>>> +        * percpu allocation size.
>>>>>> +        */
>>>>>> +       if ((!is_power_of_2(size) && size != 0) ||
>>>>>> +           (size * sizeof(struct mask_cache_entry)) >
>>>>>> PCPU_MIN_UNIT_SIZE)
>>>>>> +               return NULL;
>>>>>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
>>>>>> +       if (!new)
>>>>>> +               return NULL;
>>>>>> +
>>>>>> +       new->cache_size = size;
>>>>>> +       if (new->cache_size > 0) {
>>>>>> +               cache = __alloc_percpu(array_size(sizeof(struct
>>>>>> mask_cache_entry),
>>>>>> +                                                 
>>>>>> new->cache_size),
>>>>>> +                                      __alignof__(struct
>>>>>> mask_cache_entry));
>>>>>> +               if (!cache) {
>>>>>> +                       kfree(new);
>>>>>> +                       return NULL;
>>>>>> +               }
>>>>>> +       }
>>>>>> +
>>>>>> +       new->mask_cache = cache;
>>>>>> +       return new;
>>>>>> +}
>>>>>> +
>>>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, 
>>>>>> u32
>>>>>> size)
>>>>>> +{
>>>>>> +       struct mask_cache *mc = 
>>>>>> rcu_dereference(table->mask_cache);
>>>>>> +       struct mask_cache *new;
>>>>>> +
>>>>>> +       if (size == mc->cache_size)
>>>>>> +               return;
>>>>>> +
>>>>>> +       new = tbl_mask_cache_alloc(size);
>>>>>> +       if (!new)
>>>>>> +               return;
>>>>>> +
>>>>>> +       rcu_assign_pointer(table->mask_cache, new);
>>>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>>>> +}
>>>>>> +
>>>>>>  int ovs_flow_tbl_init(struct flow_table *table)
>>>>>>  {
>>>>>>         struct table_instance *ti, *ufid_ti;
>>>>>> +       struct mask_cache *mc;
>>>>>>         struct mask_array *ma;
>>>>>>
>>>>>> -       table->mask_cache = __alloc_percpu(sizeof(struct
>>>>>> mask_cache_entry) *
>>>>>> -                                          MC_HASH_ENTRIES,
>>>>>> -                                          __alignof__(struct
>>>>>> mask_cache_entry));
>>>>>> -       if (!table->mask_cache)
>>>>>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>>>>>> +       if (!mc)
>>>>>>                 return -ENOMEM;
>>>>>>
>>>>>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>>>>>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table 
>>>>>> *table)
>>>>>>         rcu_assign_pointer(table->ti, ti);
>>>>>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>>>>>>         rcu_assign_pointer(table->mask_array, ma);
>>>>>> +       rcu_assign_pointer(table->mask_cache, mc);
>>>>>>         table->last_rehash = jiffies;
>>>>>>         table->count = 0;
>>>>>>         table->ufid_count = 0;
>>>>>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table 
>>>>>> *table)
>>>>>>  free_mask_array:
>>>>>>         __mask_array_destroy(ma);
>>>>>>  free_mask_cache:
>>>>>> -       free_percpu(table->mask_cache);
>>>>>> +       __mask_cache_destroy(mc);
>>>>>>         return -ENOMEM;
>>>>>>  }
>>>>>>
>>>>>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
>>>>>> *table)
>>>>>>  {
>>>>>>         struct table_instance *ti = 
>>>>>> rcu_dereference_raw(table->ti);
>>>>>>         struct table_instance *ufid_ti =
>>>>>> rcu_dereference_raw(table->ufid_ti);
>>>>>> +       struct mask_cache *mc = 
>>>>>> rcu_dereference(table->mask_cache);
>>>>>> +       struct mask_array *ma =
>>>>>> rcu_dereference_ovsl(table->mask_array);
>>>>>>
>>>>>> -       free_percpu(table->mask_cache);
>>>>>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
>>>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>>>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>>>>>>         table_instance_destroy(table, ti, ufid_ti, false);
>>>>>>  }
>>>>>>
>>>>>> @@ -724,6 +787,7 @@ struct sw_flow
>>>>>> *ovs_flow_tbl_lookup_stats(struct
>>>>>> flow_table *tbl,
>>>>>>                                           u32 *n_mask_hit,
>>>>>>                                           u32 *n_cache_hit)
>>>>>>  {
>>>>>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>>>>>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>>>>>>         struct table_instance *ti = rcu_dereference(tbl->ti);
>>>>>>         struct mask_cache_entry *entries, *ce;
>>>>>> @@ -733,7 +797,7 @@ struct sw_flow
>>>>>> *ovs_flow_tbl_lookup_stats(struct
>>>>>> flow_table *tbl,
>>>>>>
>>>>>>         *n_mask_hit = 0;
>>>>>>         *n_cache_hit = 0;
>>>>>> -       if (unlikely(!skb_hash)) {
>>>>>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>>>>>>                 u32 mask_index = 0;
>>>>>>                 u32 cache = 0;
>>>>>>
>>>>>> @@ -749,11 +813,11 @@ struct sw_flow
>>>>>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>>>>>
>>>>>>         ce = NULL;
>>>>>>         hash = skb_hash;
>>>>>> -       entries = this_cpu_ptr(tbl->mask_cache);
>>>>>> +       entries = this_cpu_ptr(mc->mask_cache);
>>>>>>
>>>>>>         /* Find the cache entry 'ce' to operate on. */
>>>>>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
>>>>>> -               int index = hash & (MC_HASH_ENTRIES - 1);
>>>>>> +               int index = hash & (mc->cache_size - 1);
>>>>>>                 struct mask_cache_entry *e;
>>>>>>
>>>>>>                 e = &entries[index];
>>>>>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
>>>>>> flow_table *table)
>>>>>>         return READ_ONCE(ma->count);
>>>>>>  }
>>>>>>
>>>>>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table 
>>>>>> *table)
>>>>>> +{
>>>>>> +       struct mask_cache *mc = 
>>>>>> rcu_dereference(table->mask_cache);
>>>>>> +
>>>>>> +       return READ_ONCE(mc->cache_size);
>>>>>> +}
>>>>>> +
>>>>>>  static struct table_instance *table_instance_expand(struct
>>>>>> table_instance *ti,
>>>>>>                                                     bool ufid)
>>>>>>  {
>>>>>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct
>>>>>> flow_table
>>>>>> *table)
>>>>>>         for (i = 0; i < masks_entries; i++) {
>>>>>>                 int index = masks_and_count[i].index;
>>>>>>
>>>>>> -               new->masks[new->count++] =
>>>>>> -                       rcu_dereference_ovsl(ma->masks[index]);
>>>>>> +               if (ovsl_dereference(ma->masks[index]))
>>>>>> +                       new->masks[new->count++] =
>>>>>> ma->masks[index];
>>>>>>         }
>>>>>>
>>>>>>         rcu_assign_pointer(table->mask_array, new);
>>>>>> diff --git a/net/openvswitch/flow_table.h
>>>>>> b/net/openvswitch/flow_table.h
>>>>>> index 325e939371d8..f2dba952db2f 100644
>>>>>> --- a/net/openvswitch/flow_table.h
>>>>>> +++ b/net/openvswitch/flow_table.h
>>>>>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>>>>>>         u32 mask_index;
>>>>>>  };
>>>>>>
>>>>>> +struct mask_cache {
>>>>>> +       struct rcu_head rcu;
>>>>>> +       u32 cache_size;  /* Must be ^2 value. */
>>>>>> +       struct mask_cache_entry __percpu *mask_cache;
>>>>>> +};
>>>>>> +
>>>>>>  struct mask_count {
>>>>>>         int index;
>>>>>>         u64 counter;
>>>>>> @@ -53,7 +59,7 @@ struct table_instance {
>>>>>>  struct flow_table {
>>>>>>         struct table_instance __rcu *ti;
>>>>>>         struct table_instance __rcu *ufid_ti;
>>>>>> -       struct mask_cache_entry __percpu *mask_cache;
>>>>>> +       struct mask_cache __rcu *mask_cache;
>>>>>>         struct mask_array __rcu *mask_array;
>>>>>>         unsigned long last_rehash;
>>>>>>         unsigned int count;
>>>>>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table 
>>>>>> *table,
>>>>>> struct sw_flow *flow,
>>>>>>                         const struct sw_flow_mask *mask);
>>>>>>  void ovs_flow_tbl_remove(struct flow_table *table, struct 
>>>>>> sw_flow
>>>>>> *flow);
>>>>>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
>>>>>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table
>>>>>> *table);
>>>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, 
>>>>>> u32
>>>>>> size);
>>>>>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance
>>>>>> *table,
>>>>>>                                        u32 *bucket, u32 *idx);
>>>>>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>>>>>>
>>>>>
>>>>>
>>>>> --
>>>>> Best regards, Tonghao
>>>>
>>>
>>>
>>> --
>>> Best regards, Tonghao
>>
>
>
> --
> Best regards, Tonghao
diff mbox series

Patch

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 7cb76e5ca7cf..8300cc29dec8 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -86,6 +86,7 @@  enum ovs_datapath_attr {
 	OVS_DP_ATTR_MEGAFLOW_STATS,	/* struct ovs_dp_megaflow_stats */
 	OVS_DP_ATTR_USER_FEATURES,	/* OVS_DP_F_*  */
 	OVS_DP_ATTR_PAD,
+	OVS_DP_ATTR_MASKS_CACHE_SIZE,
 	__OVS_DP_ATTR_MAX
 };
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a54df1fe3ec4..114b2ddb8037 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1498,6 +1498,7 @@  static size_t ovs_dp_cmd_msg_size(void)
 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
 	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
 
 	return msgsize;
 }
@@ -1535,6 +1536,10 @@  static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
+			ovs_flow_tbl_masks_cache_size(&dp->table)))
+		goto nla_put_failure;
+
 	genlmsg_end(skb, ovs_header);
 	return 0;
 
@@ -1599,6 +1604,13 @@  static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 #endif
 	}
 
+	if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
+		u32 cache_size;
+
+		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
+		ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
+	}
+
 	dp->user_features = user_features;
 
 	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
@@ -1894,6 +1906,8 @@  static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
 	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
 	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
 	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+	[OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
+		PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
 };
 
 static const struct genl_ops dp_datapath_genl_ops[] = {
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index a5912ea05352..5280aeeef628 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -38,8 +38,8 @@ 
 #define MASK_ARRAY_SIZE_MIN	16
 #define REHASH_INTERVAL		(10 * 60 * HZ)
 
+#define MC_DEFAULT_HASH_ENTRIES	256
 #define MC_HASH_SHIFT		8
-#define MC_HASH_ENTRIES		(1u << MC_HASH_SHIFT)
 #define MC_HASH_SEGS		((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
 
 static struct kmem_cache *flow_cache;
@@ -341,15 +341,75 @@  static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 	}
 }
 
+static void __mask_cache_destroy(struct mask_cache *mc)
+{
+	if (mc->mask_cache)
+		free_percpu(mc->mask_cache);
+	kfree(mc);
+}
+
+static void mask_cache_rcu_cb(struct rcu_head *rcu)
+{
+	struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu);
+
+	__mask_cache_destroy(mc);
+}
+
+static struct mask_cache *tbl_mask_cache_alloc(u32 size)
+{
+	struct mask_cache_entry __percpu *cache = NULL;
+	struct mask_cache *new;
+
+	/* Only allow size to be 0, or a power of 2, and does not exceed
+	 * percpu allocation size.
+	 */
+	if ((!is_power_of_2(size) && size != 0) ||
+	    (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
+		return NULL;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	new->cache_size = size;
+	if (new->cache_size > 0) {
+		cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry),
+						  new->cache_size),
+				       __alignof__(struct mask_cache_entry));
+		if (!cache) {
+			kfree(new);
+			return NULL;
+		}
+	}
+
+	new->mask_cache = cache;
+	return new;
+}
+
+void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
+{
+	struct mask_cache *mc = rcu_dereference(table->mask_cache);
+	struct mask_cache *new;
+
+	if (size == mc->cache_size)
+		return;
+
+	new = tbl_mask_cache_alloc(size);
+	if (!new)
+		return;
+
+	rcu_assign_pointer(table->mask_cache, new);
+	call_rcu(&mc->rcu, mask_cache_rcu_cb);
+}
+
 int ovs_flow_tbl_init(struct flow_table *table)
 {
 	struct table_instance *ti, *ufid_ti;
+	struct mask_cache *mc;
 	struct mask_array *ma;
 
-	table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
-					   MC_HASH_ENTRIES,
-					   __alignof__(struct mask_cache_entry));
-	if (!table->mask_cache)
+	mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
+	if (!mc)
 		return -ENOMEM;
 
 	ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
@@ -367,6 +427,7 @@  int ovs_flow_tbl_init(struct flow_table *table)
 	rcu_assign_pointer(table->ti, ti);
 	rcu_assign_pointer(table->ufid_ti, ufid_ti);
 	rcu_assign_pointer(table->mask_array, ma);
+	rcu_assign_pointer(table->mask_cache, mc);
 	table->last_rehash = jiffies;
 	table->count = 0;
 	table->ufid_count = 0;
@@ -377,7 +438,7 @@  int ovs_flow_tbl_init(struct flow_table *table)
 free_mask_array:
 	__mask_array_destroy(ma);
 free_mask_cache:
-	free_percpu(table->mask_cache);
+	__mask_cache_destroy(mc);
 	return -ENOMEM;
 }
 
@@ -453,9 +514,11 @@  void ovs_flow_tbl_destroy(struct flow_table *table)
 {
 	struct table_instance *ti = rcu_dereference_raw(table->ti);
 	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
+	struct mask_cache *mc = rcu_dereference(table->mask_cache);
+	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
 
-	free_percpu(table->mask_cache);
-	call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
+	call_rcu(&mc->rcu, mask_cache_rcu_cb);
+	call_rcu(&ma->rcu, mask_array_rcu_cb);
 	table_instance_destroy(table, ti, ufid_ti, false);
 }
 
@@ -724,6 +787,7 @@  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 					  u32 *n_mask_hit,
 					  u32 *n_cache_hit)
 {
+	struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
 	struct mask_array *ma = rcu_dereference(tbl->mask_array);
 	struct table_instance *ti = rcu_dereference(tbl->ti);
 	struct mask_cache_entry *entries, *ce;
@@ -733,7 +797,7 @@  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 
 	*n_mask_hit = 0;
 	*n_cache_hit = 0;
-	if (unlikely(!skb_hash)) {
+	if (unlikely(!skb_hash || mc->cache_size == 0)) {
 		u32 mask_index = 0;
 		u32 cache = 0;
 
@@ -749,11 +813,11 @@  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 
 	ce = NULL;
 	hash = skb_hash;
-	entries = this_cpu_ptr(tbl->mask_cache);
+	entries = this_cpu_ptr(mc->mask_cache);
 
 	/* Find the cache entry 'ce' to operate on. */
 	for (seg = 0; seg < MC_HASH_SEGS; seg++) {
-		int index = hash & (MC_HASH_ENTRIES - 1);
+		int index = hash & (mc->cache_size - 1);
 		struct mask_cache_entry *e;
 
 		e = &entries[index];
@@ -867,6 +931,13 @@  int ovs_flow_tbl_num_masks(const struct flow_table *table)
 	return READ_ONCE(ma->count);
 }
 
+u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
+{
+	struct mask_cache *mc = rcu_dereference(table->mask_cache);
+
+	return READ_ONCE(mc->cache_size);
+}
+
 static struct table_instance *table_instance_expand(struct table_instance *ti,
 						    bool ufid)
 {
@@ -1095,8 +1166,8 @@  void ovs_flow_masks_rebalance(struct flow_table *table)
 	for (i = 0; i < masks_entries; i++) {
 		int index = masks_and_count[i].index;
 
-		new->masks[new->count++] =
-			rcu_dereference_ovsl(ma->masks[index]);
+		if (ovsl_dereference(ma->masks[index]))
+			new->masks[new->count++] = ma->masks[index];
 	}
 
 	rcu_assign_pointer(table->mask_array, new);
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 325e939371d8..f2dba952db2f 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -27,6 +27,12 @@  struct mask_cache_entry {
 	u32 mask_index;
 };
 
+struct mask_cache {
+	struct rcu_head rcu;
+	u32 cache_size;  /* Must be ^2 value. */
+	struct mask_cache_entry __percpu *mask_cache;
+};
+
 struct mask_count {
 	int index;
 	u64 counter;
@@ -53,7 +59,7 @@  struct table_instance {
 struct flow_table {
 	struct table_instance __rcu *ti;
 	struct table_instance __rcu *ufid_ti;
-	struct mask_cache_entry __percpu *mask_cache;
+	struct mask_cache __rcu *mask_cache;
 	struct mask_array __rcu *mask_array;
 	unsigned long last_rehash;
 	unsigned int count;
@@ -77,6 +83,8 @@  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			const struct sw_flow_mask *mask);
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
 int  ovs_flow_tbl_num_masks(const struct flow_table *table);
+u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
+void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size);
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
 				       u32 *bucket, u32 *idx);
 struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,