diff mbox series

[ovs-dev,ovs-dev,v3,1/4] dpif-netdev: Expand the meters supported number.

Message ID 20200523103320.47497-2-xiangxia.m.yue@gmail.com
State Changes Requested
Headers show
Series expand the meter table and fix bug. | expand

Commit Message

Tonghao Zhang May 23, 2020, 10:33 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

For now, ovs-vswitchd use the array of the dp_meter struct
to store meter's data, and at most, there are only 65536
(defined by MAX_METERS) meters that can be used. But in some
case, for example, in the edge gateway, we should use 200,000,
at least, meters for IP address bandwidth limitation.
Every one IP address will use two meters for its rx and tx
path[1]. In other way, ovs-vswitchd should support meter-offload
(rte_mtr_xxx api introduced by dpdk.), but there are more than
65536 meters in the hardware, such as Mellanox ConnectX-6.

This patch use array to manage the meter, but it can ben expanded.

[1].
$ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
$ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0

Cc: Ilya Maximets <i.maximets@ovn.org>
Cc: William Tu <u9012063@gmail.com>
Cc: Jarno Rajahalme <jarno@ovn.org>
Cc: Ben Pfaff <blp@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Cc: Pravin Shelar <pshelar@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
v3:
* rename n_meters -> n_allocated in dp_meter_instance
* rename count -> n_used in dp_meter_table
* rename "ti" -> "meter_inst" or "inst" in different functions/struction
* remove parenthesize for sizeof
* rename dp_netdev_meter_destroy/init to dp_netdev_meter_table_destroy/init
* fix OVS_REQUIRES style
v2:
* add comments for dp_meter_instance
* change the log
* remove extra newline
* I don't move the dp_netdev_meter_init/destroy up. because
  them depends other meters function and put all meter function
  together may make the codes clean.
---
 lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 250 insertions(+), 69 deletions(-)

Comments

Tonghao Zhang Feb. 24, 2021, 12:31 p.m. UTC | #1
Now this patch version is v3. and stay a long time. Any maintainer
will continue to review this patch ?  Thanks!

On Sat, May 23, 2020 at 6:33 PM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> For now, ovs-vswitchd use the array of the dp_meter struct
> to store meter's data, and at most, there are only 65536
> (defined by MAX_METERS) meters that can be used. But in some
> case, for example, in the edge gateway, we should use 200,000,
> at least, meters for IP address bandwidth limitation.
> Every one IP address will use two meters for its rx and tx
> path[1]. In other way, ovs-vswitchd should support meter-offload
> (rte_mtr_xxx api introduced by dpdk.), but there are more than
> 65536 meters in the hardware, such as Mellanox ConnectX-6.
>
> This patch use array to manage the meter, but it can ben expanded.
>
> [1].
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
>
> Cc: Ilya Maximets <i.maximets@ovn.org>
> Cc: William Tu <u9012063@gmail.com>
> Cc: Jarno Rajahalme <jarno@ovn.org>
> Cc: Ben Pfaff <blp@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Cc: Pravin Shelar <pshelar@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
> v3:
> * rename n_meters -> n_allocated in dp_meter_instance
> * rename count -> n_used in dp_meter_table
> * rename "ti" -> "meter_inst" or "inst" in different functions/struction
> * remove parenthesize for sizeof
> * rename dp_netdev_meter_destroy/init to dp_netdev_meter_table_destroy/init
> * fix OVS_REQUIRES style
> v2:
> * add comments for dp_meter_instance
> * change the log
> * remove extra newline
> * I don't move the dp_netdev_meter_init/destroy up. because
>   them depends other meters function and put all meter function
>   together may make the codes clean.
> ---
>  lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 250 insertions(+), 69 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 51c888501bdf..920fef3ec572 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -99,9 +99,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
>
>  /* Configuration parameters. */
>  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
> -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> +
> +/* Maximum number of meters in the table. */
> +#define METER_ENTRY_MAX (200000ULL)
> +/* Maximum number of bands / meter. */
> +#define METER_BAND_MAX  (8)
> +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
>
>  COVERAGE_DEFINE(datapath_drop_meter);
>  COVERAGE_DEFINE(datapath_drop_upcall_error);
> @@ -284,12 +287,26 @@ struct dp_meter {
>      uint16_t flags;
>      uint16_t n_bands;
>      uint32_t max_delta_t;
> +    uint32_t id;
> +    struct ovs_mutex lock;
>      uint64_t used;
>      uint64_t packet_count;
>      uint64_t byte_count;
>      struct dp_meter_band bands[];
>  };
>
> +struct dp_meter_instance {
> +    uint32_t n_allocated;
> +    /* Followed by struct dp_meter[n]; where n is the n_allocated. */
> +    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
> +};
> +
> +struct dp_meter_table {
> +    OVSRCU_TYPE(struct dp_meter_instance *) meter_inst;
> +    uint32_t n_used;
> +    struct ovs_mutex lock;
> +};
> +
>  struct pmd_auto_lb {
>      bool auto_lb_requested;     /* Auto load balancing requested by user. */
>      bool is_enabled;            /* Current status of Auto load balancing. */
> @@ -330,8 +347,7 @@ struct dp_netdev {
>      atomic_uint32_t tx_flush_interval;
>
>      /* Meters. */
> -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> +    struct dp_meter_table meter_tbl;
>
>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> @@ -379,19 +395,6 @@ struct dp_netdev {
>      struct pmd_auto_lb pmd_alb;
>  };
>
> -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
>                                                      odp_port_t)
>      OVS_REQUIRES(dp->port_mutex);
> @@ -1524,6 +1527,9 @@ choose_port(struct dp_netdev *dp, const char *name)
>      return ODPP_NONE;
>  }
>
> +static void dp_netdev_meter_table_init(struct dp_meter_table *tbl);
> +static void dp_netdev_meter_table_destroy(struct dp_meter_table *tbl);
> +
>  static int
>  create_dp_netdev(const char *name, const struct dpif_class *class,
>                   struct dp_netdev **dpp)
> @@ -1557,9 +1563,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>      dp->reconfigure_seq = seq_create();
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
>
> -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_table_init(&dp->meter_tbl);
>
>      /* Disable upcalls by default. */
>      dp_netdev_disable_upcall(dp);
> @@ -1648,16 +1652,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
>      fat_rwlock_destroy(&dp->upcall_rwlock);
>  }
>
> -static void
> -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    if (dp->meters[meter_id]) {
> -        free(dp->meters[meter_id]);
> -        dp->meters[meter_id] = NULL;
> -    }
> -}
> -
>  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
>   * through the 'dp_netdevs' shash while freeing 'dp'. */
>  static void
> @@ -1695,16 +1689,7 @@ dp_netdev_free(struct dp_netdev *dp)
>      /* Upcalls must be disabled at this point */
>      dp_netdev_destroy_upcall_lock(dp);
>
> -    int i;
> -
> -    for (i = 0; i < MAX_METERS; ++i) {
> -        meter_lock(dp, i);
> -        dp_delete_meter(dp, i);
> -        meter_unlock(dp, i);
> -    }
> -    for (i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_destroy(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_table_destroy(&dp->meter_tbl);
>
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name));
> @@ -5714,14 +5699,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp)
>
>
>  /* Meters */
> +static uint32_t
> +meter_hash(struct dp_meter_instance *inst, uint32_t id)
> +{
> +    uint32_t n_allocated = inst->n_allocated;
> +
> +    return id % n_allocated;
> +}
> +
> +static void
> +dp_meter_free(struct dp_meter *meter)
> +{
> +    ovs_mutex_destroy(&meter->lock);
> +    free(meter);
> +}
> +
> +static struct dp_meter_instance *
> +dp_meter_instance_alloc(const uint32_t size)
> +{
> +    struct dp_meter_instance *inst;
> +
> +    inst = xzalloc(sizeof *inst + sizeof(struct dp_meter *) * size);
> +    inst->n_allocated = size;
> +
> +    return inst;
> +}
> +
> +static void
> +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
> +{
> +    struct dp_meter_instance *new_inst;
> +    struct dp_meter_instance *inst;
> +    int n_meters;
> +    int i;
> +
> +    new_inst = dp_meter_instance_alloc(size);
> +
> +    inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
> +    n_meters = MIN(size, inst->n_allocated);
> +
> +    for (i = 0; i < n_meters; i++) {
> +        if (ovsrcu_get(struct dp_meter *, &inst->dp_meters[i])) {
> +            new_inst->dp_meters[i] = inst->dp_meters[i];
> +        }
> +    }
> +
> +    ovsrcu_set(&tbl->meter_inst, new_inst);
> +    ovsrcu_postpone(free, inst);
> +}
> +
> +static void
> +dp_meter_instance_insert(struct dp_meter_instance *inst,
> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(inst, meter->id);
> +    ovsrcu_set(&inst->dp_meters[hash], meter);
> +}
> +
> +static void
> +dp_meter_instance_remove(struct dp_meter_instance *inst,
> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(inst, meter->id);
> +    ovsrcu_set(&inst->dp_meters[hash], NULL);
> +}
> +
> +static void
> +dp_netdev_meter_table_init(struct dp_meter_table *tbl)
> +{
> +    struct dp_meter_instance *inst;
> +
> +    inst = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> +    ovsrcu_set(&tbl->meter_inst, inst);
> +
> +    ovs_mutex_init(&tbl->lock);
> +    tbl->n_used = 0;
> +}
> +
> +static void
> +dp_netdev_meter_table_destroy(struct dp_meter_table *tbl)
> +{
> +    struct dp_meter_instance *inst;
> +    int i;
> +
> +    inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
> +    for (i = 0; i < inst->n_allocated; i++) {
> +        struct dp_meter *meter;
> +
> +        meter = ovsrcu_get(struct dp_meter *, &inst->dp_meters[i]);
> +        if (meter) {
> +            ovsrcu_postpone(dp_meter_free, meter);
> +        }
> +    }
> +
> +    ovsrcu_postpone(free, inst);
> +    ovs_mutex_destroy(&tbl->lock);
> +}
> +
> +static struct dp_meter *
> +dp_meter_lookup(struct dp_meter_table *tbl, uint32_t meter_id)
> +{
> +    struct dp_meter_instance *meter_inst;
> +    struct dp_meter *meter;
> +    uint32_t hash;
> +
> +    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
> +    hash = meter_hash(meter_inst, meter_id);
> +
> +    meter = ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[hash]);
> +    if (meter && meter->id == meter_id) {
> +        return meter;
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +dp_meter_detach_free(struct dp_meter_table *tbl, uint32_t meter_id)
> +    OVS_REQUIRES(tbl->lock)
> +{
> +    struct dp_meter_instance *meter_inst;
> +    struct dp_meter *meter;
> +
> +    meter = dp_meter_lookup(tbl, meter_id);
> +    if (!meter) {
> +        return;
> +    }
> +
> +    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
> +    dp_meter_instance_remove(meter_inst, meter);
> +    ovsrcu_postpone(dp_meter_free, meter);
> +
> +    tbl->n_used--;
> +    /* Shrink the meter array if necessary. */
> +    if (meter_inst->n_allocated > DP_METER_ARRAY_SIZE_MIN &&
> +        tbl->n_used <= (meter_inst->n_allocated / 4)) {
> +        int half_size = meter_inst->n_allocated / 2;
> +        int i;
> +
> +        /* Avoid hash collision, don't move slots to other place.
> +         * Make sure there are no references of meters in array
> +         * which will be released.
> +         */
> +        for (i = half_size; i < meter_inst->n_allocated; i++) {
> +            if (ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[i])) {
> +                return;
> +            }
> +        }
> +
> +        dp_meter_instance_realloc(tbl, half_size);
> +    }
> +}
> +
> +static int
> +dp_meter_attach(struct dp_meter_table *tbl, struct dp_meter *meter)
> +    OVS_REQUIRES(tbl->lock)
> +{
> +    struct dp_meter_instance *meter_inst;
> +    uint32_t hash;
> +
> +    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
> +    hash = meter_hash(meter_inst, meter->id);
> +
> +    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
> +                                &meter_inst->dp_meters[hash]))) {
> +        VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n",
> +                  meter->id, hash, meter_inst->n_allocated);
> +        return EBUSY;
> +    }
> +
> +    dp_meter_instance_insert(meter_inst, meter);
> +
> +    tbl->n_used++;
> +    if (tbl->n_used >= meter_inst->n_allocated) {
> +        dp_meter_instance_realloc(tbl, meter_inst->n_allocated * 2);
> +    }
> +
> +    return 0;
> +}
> +
>  static void
>  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
>                                 struct ofputil_meter_features *features)
>  {
> -    features->max_meters = MAX_METERS;
> +    features->max_meters = METER_ENTRY_MAX;
>      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
>      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> -    features->max_bands = MAX_BANDS;
> +    features->max_bands = METER_BAND_MAX;
>      features->max_color = 0;
>  }
>
> @@ -5743,14 +5911,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>      uint32_t exceeded_rate[NETDEV_MAX_BURST];
>      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
>
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return;
>      }
>
> -    meter_lock(dp, meter_id);
> -    meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        goto out;
> +        return;
>      }
>
>      /* Initialize as negative values. */
> @@ -5758,6 +5925,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>      /* Initialize as zeroes. */
>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>
> +    ovs_mutex_lock(&meter->lock);
>      /* All packets will hit the meter at the same time. */
>      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
>
> @@ -5875,8 +6043,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>              dp_packet_batch_refill(packets_, packet, j);
>          }
>      }
> - out:
> -    meter_unlock(dp, meter_id);
> +
> +    ovs_mutex_unlock(&meter->lock);
>  }
>
>  /* Meter set/get/del processing is still single-threaded. */
> @@ -5885,11 +6053,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>                        struct ofputil_meter_config *config)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      uint32_t mid = meter_id.uint32;
>      struct dp_meter *meter;
> -    int i;
> +    int err, i;
>
> -    if (mid >= MAX_METERS) {
> +    if (mid >= METER_ENTRY_MAX) {
>          return EFBIG; /* Meter_id out of range. */
>      }
>
> @@ -5897,7 +6066,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          return EBADF; /* Unsupported flags set */
>      }
>
> -    if (config->n_bands > MAX_BANDS) {
> +    if (config->n_bands > METER_BAND_MAX) {
>          return EINVAL;
>      }
>
> @@ -5918,6 +6087,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>      meter->n_bands = config->n_bands;
>      meter->max_delta_t = 0;
>      meter->used = time_usec();
> +    meter->id = mid;
> +    ovs_mutex_init(&meter->lock);
>
>      /* set up bands */
>      for (i = 0; i < config->n_bands; ++i) {
> @@ -5943,12 +6114,22 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          }
>      }
>
> -    meter_lock(dp, mid);
> -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> -    dp->meters[mid] = meter;
> -    meter_unlock(dp, mid);
> +    ovs_mutex_lock(&meter_tbl->lock);
> +
> +    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
> +    err = dp_meter_attach(meter_tbl, meter);
> +    if (err) {
> +        goto unlock_out;
> +    }
> +
> +    ovs_mutex_unlock(&meter_tbl->lock);
>
>      return 0;
> +
> +unlock_out:
> +    ovs_mutex_unlock(&meter_tbl->lock);
> +    dp_meter_free(meter);
> +    return err;
>  }
>
>  static int
> @@ -5956,23 +6137,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>                        ofproto_meter_id meter_id_,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
> -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev *dp = get_dp_netdev(dpif);
>      uint32_t meter_id = meter_id_.uint32;
> -    int retval = 0;
> +    const struct dp_meter *meter;
>
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return EFBIG;
>      }
>
> -    meter_lock(dp, meter_id);
> -    const struct dp_meter *meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        retval = ENOENT;
> -        goto done;
> +        return ENOENT;
>      }
> +
>      if (stats) {
>          int i = 0;
>
> +        ovs_mutex_lock(&meter->lock);
>          stats->packet_in_count = meter->packet_count;
>          stats->byte_in_count = meter->byte_count;
>
> @@ -5980,13 +6161,12 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>              stats->bands[i].packet_count = meter->bands[i].packet_count;
>              stats->bands[i].byte_count = meter->bands[i].byte_count;
>          }
> +        ovs_mutex_unlock(&meter->lock);
>
>          stats->n_bands = i;
>      }
>
> -done:
> -    meter_unlock(dp, meter_id);
> -    return retval;
> +    return 0;
>  }
>
>  static int
> @@ -5995,15 +6175,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      int error;
>
>      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
>      if (!error) {
>          uint32_t meter_id = meter_id_.uint32;
>
> -        meter_lock(dp, meter_id);
> -        dp_delete_meter(dp, meter_id);
> -        meter_unlock(dp, meter_id);
> +        ovs_mutex_lock(&meter_tbl->lock);
> +        dp_meter_detach_free(meter_tbl, meter_id);
> +        ovs_mutex_unlock(&meter_tbl->lock);
>      }
>      return error;
>  }
> --
> 2.26.1
>
Ilya Maximets Feb. 24, 2021, 6:17 p.m. UTC | #2
On 2/24/21 1:31 PM, Tonghao Zhang wrote:
> Now this patch version is v3. and stay a long time. Any maintainer
> will continue to review this patch ?  Thanks!

Sorry for long dalays.  I have it on my TODO list for this week along
with the overflow fix v2.

Best regards, Ilya Maximets.
Tonghao Zhang April 14, 2021, 1:55 a.m. UTC | #3
On Thu, Feb 25, 2021 at 2:17 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 2/24/21 1:31 PM, Tonghao Zhang wrote:
> > Now this patch version is v3. and stay a long time. Any maintainer
> > will continue to review this patch ?  Thanks!
>
> Sorry for long dalays.  I have it on my TODO list for this week along
> with the overflow fix v2.
Hi Ilya, Ben
Do you have comments on
1/4 dpif-netdev: Expand the meters supported number ?

Other patches were applied to master. If you have a plan to review
this patch, I will rebase the codes and send it again.

> Best regards, Ilya Maximets.
Ilya Maximets April 14, 2021, 5:04 p.m. UTC | #4
On 4/14/21 3:55 AM, Tonghao Zhang wrote:
> On Thu, Feb 25, 2021 at 2:17 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 2/24/21 1:31 PM, Tonghao Zhang wrote:
>>> Now this patch version is v3. and stay a long time. Any maintainer
>>> will continue to review this patch ?  Thanks!
>>
>> Sorry for long dalays.  I have it on my TODO list for this week along
>> with the overflow fix v2.
> Hi Ilya, Ben
> Do you have comments on
> 1/4 dpif-netdev: Expand the meters supported number ?

Hi.

The main problem I have with the patch is the way how new data structure
introduced.  And I'm not really comfortable adding a new RCU-based structure
without full set of actual unit tests including tests of it's parallel
operation.

You mentioned before that cmap has 1% overhead in compare with array.  Maybe
it's not that bad?  I mean, I'd sacrifice 1% of performance for the
simplicity of the implementation.  Maybe we can even optimize something in
you implementation or save this 1% at some different place.

WDYT?

Best regards, Ilya Maximets.
Tonghao Zhang April 26, 2021, 12:04 p.m. UTC | #5
On Thu, Apr 15, 2021 at 1:04 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 4/14/21 3:55 AM, Tonghao Zhang wrote:
> > On Thu, Feb 25, 2021 at 2:17 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>
> >> On 2/24/21 1:31 PM, Tonghao Zhang wrote:
> >>> Now this patch version is v3. and stay a long time. Any maintainer
> >>> will continue to review this patch ?  Thanks!
> >>
> >> Sorry for long dalays.  I have it on my TODO list for this week along
> >> with the overflow fix v2.
> > Hi Ilya, Ben
> > Do you have comments on
> > 1/4 dpif-netdev: Expand the meters supported number ?
>
> Hi.
>
> The main problem I have with the patch is the way how new data structure
> introduced.  And I'm not really comfortable adding a new RCU-based structure
> without full set of actual unit tests including tests of it's parallel
> operation.
>
> You mentioned before that cmap has 1% overhead in compare with array.  Maybe
> it's not that bad?  I mean, I'd sacrifice 1% of performance for the
> simplicity of the implementation.  Maybe we can even optimize something in
> you implementation or save this 1% at some different place.
>
> WDYT?
The original patches implemented as cmap, and add a cache using meters array:
http://patchwork.ozlabs.org/project/openvswitch/patch/1584180230-89020-1-git-send-email-xiangxia.m.yue@gmail.com/
http://patchwork.ozlabs.org/project/openvswitch/patch/1584180230-89020-2-git-send-email-xiangxia.m.yue@gmail.com/

Ben suggested I use the idpool, the kernel do this in the same way.
Ben  what do you think ?
The two versions look good to me.

> Best regards, Ilya Maximets.
Ilya Maximets April 26, 2021, 5:11 p.m. UTC | #6
On 4/26/21 2:04 PM, Tonghao Zhang wrote:
> On Thu, Apr 15, 2021 at 1:04 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 4/14/21 3:55 AM, Tonghao Zhang wrote:
>>> On Thu, Feb 25, 2021 at 2:17 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>>
>>>> On 2/24/21 1:31 PM, Tonghao Zhang wrote:
>>>>> Now this patch version is v3. and stay a long time. Any maintainer
>>>>> will continue to review this patch ?  Thanks!
>>>>
>>>> Sorry for long dalays.  I have it on my TODO list for this week along
>>>> with the overflow fix v2.
>>> Hi Ilya, Ben
>>> Do you have comments on
>>> 1/4 dpif-netdev: Expand the meters supported number ?
>>
>> Hi.
>>
>> The main problem I have with the patch is the way how new data structure
>> introduced.  And I'm not really comfortable adding a new RCU-based structure
>> without full set of actual unit tests including tests of it's parallel
>> operation.
>>
>> You mentioned before that cmap has 1% overhead in compare with array.  Maybe
>> it's not that bad?  I mean, I'd sacrifice 1% of performance for the
>> simplicity of the implementation.  Maybe we can even optimize something in
>> you implementation or save this 1% at some different place.
>>
>> WDYT?
> The original patches implemented as cmap, and add a cache using meters array:
> http://patchwork.ozlabs.org/project/openvswitch/patch/1584180230-89020-1-git-send-email-xiangxia.m.yue@gmail.com/

Thanks for the pointer.  I prefer this implementation with cmap
and I see some possible small performance improvements that could
be done in the patch above.  I'll review it.

> http://patchwork.ozlabs.org/project/openvswitch/patch/1584180230-89020-2-git-send-email-xiangxia.m.yue@gmail.com/

This patch with a special cache seems too complex for the performance
benefit it has.  I'd rather not have it and try to optimize simple
cmap implementation instead.

> 
> Ben suggested I use the idpool, the kernel do this in the same way.
> Ben  what do you think ?
> The two versions look good to me.
> 
>> Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 51c888501bdf..920fef3ec572 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -99,9 +99,12 @@  DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
-enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
-enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
-enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
+
+/* Maximum number of meters in the table. */
+#define METER_ENTRY_MAX (200000ULL)
+/* Maximum number of bands / meter. */
+#define METER_BAND_MAX  (8)
+#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
 
 COVERAGE_DEFINE(datapath_drop_meter);
 COVERAGE_DEFINE(datapath_drop_upcall_error);
@@ -284,12 +287,26 @@  struct dp_meter {
     uint16_t flags;
     uint16_t n_bands;
     uint32_t max_delta_t;
+    uint32_t id;
+    struct ovs_mutex lock;
     uint64_t used;
     uint64_t packet_count;
     uint64_t byte_count;
     struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+    uint32_t n_allocated;
+    /* Followed by struct dp_meter[n]; where n is the n_allocated. */
+    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
+};
+
+struct dp_meter_table {
+    OVSRCU_TYPE(struct dp_meter_instance *) meter_inst;
+    uint32_t n_used;
+    struct ovs_mutex lock;
+};
+
 struct pmd_auto_lb {
     bool auto_lb_requested;     /* Auto load balancing requested by user. */
     bool is_enabled;            /* Current status of Auto load balancing. */
@@ -330,8 +347,7 @@  struct dp_netdev {
     atomic_uint32_t tx_flush_interval;
 
     /* Meters. */
-    struct ovs_mutex meter_locks[N_METER_LOCKS];
-    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
+    struct dp_meter_table meter_tbl;
 
     /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
     OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
@@ -379,19 +395,6 @@  struct dp_netdev {
     struct pmd_auto_lb pmd_alb;
 };
 
-static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-
 static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                     odp_port_t)
     OVS_REQUIRES(dp->port_mutex);
@@ -1524,6 +1527,9 @@  choose_port(struct dp_netdev *dp, const char *name)
     return ODPP_NONE;
 }
 
+static void dp_netdev_meter_table_init(struct dp_meter_table *tbl);
+static void dp_netdev_meter_table_destroy(struct dp_meter_table *tbl);
+
 static int
 create_dp_netdev(const char *name, const struct dpif_class *class,
                  struct dp_netdev **dpp)
@@ -1557,9 +1563,7 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
     dp->reconfigure_seq = seq_create();
     dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
 
-    for (int i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_table_init(&dp->meter_tbl);
 
     /* Disable upcalls by default. */
     dp_netdev_disable_upcall(dp);
@@ -1648,16 +1652,6 @@  dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
     fat_rwlock_destroy(&dp->upcall_rwlock);
 }
 
-static void
-dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
-    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    if (dp->meters[meter_id]) {
-        free(dp->meters[meter_id]);
-        dp->meters[meter_id] = NULL;
-    }
-}
-
 /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
  * through the 'dp_netdevs' shash while freeing 'dp'. */
 static void
@@ -1695,16 +1689,7 @@  dp_netdev_free(struct dp_netdev *dp)
     /* Upcalls must be disabled at this point */
     dp_netdev_destroy_upcall_lock(dp);
 
-    int i;
-
-    for (i = 0; i < MAX_METERS; ++i) {
-        meter_lock(dp, i);
-        dp_delete_meter(dp, i);
-        meter_unlock(dp, i);
-    }
-    for (i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_destroy(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_table_destroy(&dp->meter_tbl);
 
     free(dp->pmd_cmask);
     free(CONST_CAST(char *, dp->name));
@@ -5714,14 +5699,197 @@  dp_netdev_disable_upcall(struct dp_netdev *dp)
 
 
 /* Meters */
+static uint32_t
+meter_hash(struct dp_meter_instance *inst, uint32_t id)
+{
+    uint32_t n_allocated = inst->n_allocated;
+
+    return id % n_allocated;
+}
+
+static void
+dp_meter_free(struct dp_meter *meter)
+{
+    ovs_mutex_destroy(&meter->lock);
+    free(meter);
+}
+
+static struct dp_meter_instance *
+dp_meter_instance_alloc(const uint32_t size)
+{
+    struct dp_meter_instance *inst;
+
+    inst = xzalloc(sizeof *inst + sizeof(struct dp_meter *) * size);
+    inst->n_allocated = size;
+
+    return inst;
+}
+
+static void
+dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
+{
+    struct dp_meter_instance *new_inst;
+    struct dp_meter_instance *inst;
+    int n_meters;
+    int i;
+
+    new_inst = dp_meter_instance_alloc(size);
+
+    inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    n_meters = MIN(size, inst->n_allocated);
+
+    for (i = 0; i < n_meters; i++) {
+        if (ovsrcu_get(struct dp_meter *, &inst->dp_meters[i])) {
+            new_inst->dp_meters[i] = inst->dp_meters[i];
+        }
+    }
+
+    ovsrcu_set(&tbl->meter_inst, new_inst);
+    ovsrcu_postpone(free, inst);
+}
+
+static void
+dp_meter_instance_insert(struct dp_meter_instance *inst,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(inst, meter->id);
+    ovsrcu_set(&inst->dp_meters[hash], meter);
+}
+
+static void
+dp_meter_instance_remove(struct dp_meter_instance *inst,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(inst, meter->id);
+    ovsrcu_set(&inst->dp_meters[hash], NULL);
+}
+
+static void
+dp_netdev_meter_table_init(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *inst;
+
+    inst = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+    ovsrcu_set(&tbl->meter_inst, inst);
+
+    ovs_mutex_init(&tbl->lock);
+    tbl->n_used = 0;
+}
+
+static void
+dp_netdev_meter_table_destroy(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *inst;
+    int i;
+
+    inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    for (i = 0; i < inst->n_allocated; i++) {
+        struct dp_meter *meter;
+
+        meter = ovsrcu_get(struct dp_meter *, &inst->dp_meters[i]);
+        if (meter) {
+            ovsrcu_postpone(dp_meter_free, meter);
+        }
+    }
+
+    ovsrcu_postpone(free, inst);
+    ovs_mutex_destroy(&tbl->lock);
+}
+
+static struct dp_meter *
+dp_meter_lookup(struct dp_meter_table *tbl, uint32_t meter_id)
+{
+    struct dp_meter_instance *meter_inst;
+    struct dp_meter *meter;
+    uint32_t hash;
+
+    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    hash = meter_hash(meter_inst, meter_id);
+
+    meter = ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[hash]);
+    if (meter && meter->id == meter_id) {
+        return meter;
+    }
+
+    return NULL;
+}
+
+static void
+dp_meter_detach_free(struct dp_meter_table *tbl, uint32_t meter_id)
+    OVS_REQUIRES(tbl->lock)
+{
+    struct dp_meter_instance *meter_inst;
+    struct dp_meter *meter;
+
+    meter = dp_meter_lookup(tbl, meter_id);
+    if (!meter) {
+        return;
+    }
+
+    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    dp_meter_instance_remove(meter_inst, meter);
+    ovsrcu_postpone(dp_meter_free, meter);
+
+    tbl->n_used--;
+    /* Shrink the meter array if necessary. */
+    if (meter_inst->n_allocated > DP_METER_ARRAY_SIZE_MIN &&
+        tbl->n_used <= (meter_inst->n_allocated / 4)) {
+        int half_size = meter_inst->n_allocated / 2;
+        int i;
+
+        /* Avoid hash collision, don't move slots to other place.
+         * Make sure there are no references of meters in array
+         * which will be released.
+         */
+        for (i = half_size; i < meter_inst->n_allocated; i++) {
+            if (ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[i])) {
+                return;
+            }
+        }
+
+        dp_meter_instance_realloc(tbl, half_size);
+    }
+}
+
+static int
+dp_meter_attach(struct dp_meter_table *tbl, struct dp_meter *meter)
+    OVS_REQUIRES(tbl->lock)
+{
+    struct dp_meter_instance *meter_inst;
+    uint32_t hash;
+
+    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    hash = meter_hash(meter_inst, meter->id);
+
+    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
+                                &meter_inst->dp_meters[hash]))) {
+        VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n",
+                  meter->id, hash, meter_inst->n_allocated);
+        return EBUSY;
+    }
+
+    dp_meter_instance_insert(meter_inst, meter);
+
+    tbl->n_used++;
+    if (tbl->n_used >= meter_inst->n_allocated) {
+        dp_meter_instance_realloc(tbl, meter_inst->n_allocated * 2);
+    }
+
+    return 0;
+}
+
 static void
 dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
                                struct ofputil_meter_features *features)
 {
-    features->max_meters = MAX_METERS;
+    features->max_meters = METER_ENTRY_MAX;
     features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
     features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
-    features->max_bands = MAX_BANDS;
+    features->max_bands = METER_BAND_MAX;
     features->max_color = 0;
 }
 
@@ -5743,14 +5911,13 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     uint32_t exceeded_rate[NETDEV_MAX_BURST];
     int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return;
     }
 
-    meter_lock(dp, meter_id);
-    meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        goto out;
+        return;
     }
 
     /* Initialize as negative values. */
@@ -5758,6 +5925,7 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     /* Initialize as zeroes. */
     memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
 
+    ovs_mutex_lock(&meter->lock);
     /* All packets will hit the meter at the same time. */
     long_delta_t = now / 1000 - meter->used / 1000; /* msec */
 
@@ -5875,8 +6043,8 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
             dp_packet_batch_refill(packets_, packet, j);
         }
     }
- out:
-    meter_unlock(dp, meter_id);
+
+    ovs_mutex_unlock(&meter->lock);
 }
 
 /* Meter set/get/del processing is still single-threaded. */
@@ -5885,11 +6053,12 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
                       struct ofputil_meter_config *config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     uint32_t mid = meter_id.uint32;
     struct dp_meter *meter;
-    int i;
+    int err, i;
 
-    if (mid >= MAX_METERS) {
+    if (mid >= METER_ENTRY_MAX) {
         return EFBIG; /* Meter_id out of range. */
     }
 
@@ -5897,7 +6066,7 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         return EBADF; /* Unsupported flags set */
     }
 
-    if (config->n_bands > MAX_BANDS) {
+    if (config->n_bands > METER_BAND_MAX) {
         return EINVAL;
     }
 
@@ -5918,6 +6087,8 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
     meter->n_bands = config->n_bands;
     meter->max_delta_t = 0;
     meter->used = time_usec();
+    meter->id = mid;
+    ovs_mutex_init(&meter->lock);
 
     /* set up bands */
     for (i = 0; i < config->n_bands; ++i) {
@@ -5943,12 +6114,22 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         }
     }
 
-    meter_lock(dp, mid);
-    dp_delete_meter(dp, mid); /* Free existing meter, if any */
-    dp->meters[mid] = meter;
-    meter_unlock(dp, mid);
+    ovs_mutex_lock(&meter_tbl->lock);
+
+    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
+    err = dp_meter_attach(meter_tbl, meter);
+    if (err) {
+        goto unlock_out;
+    }
+
+    ovs_mutex_unlock(&meter_tbl->lock);
 
     return 0;
+
+unlock_out:
+    ovs_mutex_unlock(&meter_tbl->lock);
+    dp_meter_free(meter);
+    return err;
 }
 
 static int
@@ -5956,23 +6137,23 @@  dpif_netdev_meter_get(const struct dpif *dpif,
                       ofproto_meter_id meter_id_,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
-    const struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev *dp = get_dp_netdev(dpif);
     uint32_t meter_id = meter_id_.uint32;
-    int retval = 0;
+    const struct dp_meter *meter;
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return EFBIG;
     }
 
-    meter_lock(dp, meter_id);
-    const struct dp_meter *meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        retval = ENOENT;
-        goto done;
+        return ENOENT;
     }
+
     if (stats) {
         int i = 0;
 
+        ovs_mutex_lock(&meter->lock);
         stats->packet_in_count = meter->packet_count;
         stats->byte_in_count = meter->byte_count;
 
@@ -5980,13 +6161,12 @@  dpif_netdev_meter_get(const struct dpif *dpif,
             stats->bands[i].packet_count = meter->bands[i].packet_count;
             stats->bands[i].byte_count = meter->bands[i].byte_count;
         }
+        ovs_mutex_unlock(&meter->lock);
 
         stats->n_bands = i;
     }
 
-done:
-    meter_unlock(dp, meter_id);
-    return retval;
+    return 0;
 }
 
 static int
@@ -5995,15 +6175,16 @@  dpif_netdev_meter_del(struct dpif *dpif,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     int error;
 
     error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
     if (!error) {
         uint32_t meter_id = meter_id_.uint32;
 
-        meter_lock(dp, meter_id);
-        dp_delete_meter(dp, meter_id);
-        meter_unlock(dp, meter_id);
+        ovs_mutex_lock(&meter_tbl->lock);
+        dp_meter_detach_free(meter_tbl, meter_id);
+        ovs_mutex_unlock(&meter_tbl->lock);
     }
     return error;
 }